diff --git a/configure.ac b/configure.ac index 5f0fe25..13d4ee1 100644 --- a/configure.ac +++ b/configure.ac @@ -24,8 +24,8 @@ AS_CASE(["x${with_ns_mode}"], [NS_FIXED=0]) AM_CONDITIONAL(NS_FIXED, [test "x${NS_FIXED}" = "x1"]) -COMMON_CFLAGS="-DNDEBUG -I\$(top_srcdir)" -COMMON_CXXFLAGS="-std=c++11 -DNDEBUG -I\$(top_srcdir)" +COMMON_CFLAGS="-DWEBRTC_POSIX -DWEBRTC_LINUX -DNDEBUG -I\$(top_srcdir)" +COMMON_CXXFLAGS="-std=c++11 -DWEBRTC_POSIX -DWEBRTC_LINUX -DNDEBUG -I\$(top_srcdir)" AC_SUBST([COMMON_CFLAGS]) AC_SUBST([COMMON_CXXFLAGS]) @@ -33,15 +33,12 @@ AC_CONFIG_FILES([ webrtc-audio-processing.pc Makefile webrtc/Makefile +webrtc/base/Makefile webrtc/common_audio/Makefile webrtc/system_wrappers/Makefile webrtc/modules/Makefile +webrtc/modules/audio_coding/Makefile webrtc/modules/audio_processing/Makefile -webrtc/modules/audio_processing/utility/Makefile -webrtc/modules/audio_processing/ns/Makefile -webrtc/modules/audio_processing/aec/Makefile -webrtc/modules/audio_processing/aecm/Makefile -webrtc/modules/audio_processing/agc/Makefile ]) AC_OUTPUT diff --git a/webrtc/BUILD.gn b/webrtc/BUILD.gn new file mode 100644 index 0000000..ac14d7d --- /dev/null +++ b/webrtc/BUILD.gn @@ -0,0 +1,281 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +# TODO(kjellander): Rebase this to webrtc/build/common.gypi changes after r6330. + +import("//build/config/crypto.gni") +import("//build/config/linux/pkg_config.gni") +import("build/webrtc.gni") +import("//third_party/protobuf/proto_library.gni") + +# Contains the defines and includes in common.gypi that are duplicated both as +# target_defaults and direct_dependent_settings. +config("common_inherited_config") { + defines = [] + if (build_with_mozilla) { + defines += [ "WEBRTC_MOZILLA_BUILD" ] + } + if (build_with_chromium) { + defines = [ "WEBRTC_CHROMIUM_BUILD" ] + include_dirs = [ + # The overrides must be included first as that is the mechanism for + # selecting the override headers in Chromium. + "../webrtc_overrides", + + # Allow includes to be prefixed with webrtc/ in case it is not an + # immediate subdirectory of the top-level. + "..", + ] + } + if (is_posix) { + defines += [ "WEBRTC_POSIX" ] + } + if (is_ios) { + defines += [ + "WEBRTC_MAC", + "WEBRTC_IOS", + ] + } + if (is_ios && rtc_use_objc_h264) { + defines += [ "WEBRTC_OBJC_H264" ] + } + if (is_linux) { + defines += [ "WEBRTC_LINUX" ] + } + if (is_mac) { + defines += [ "WEBRTC_MAC" ] + } + if (is_win) { + defines += [ "WEBRTC_WIN" ] + } + if (is_android) { + defines += [ + "WEBRTC_LINUX", + "WEBRTC_ANDROID", + ] + } +} + +if (rtc_have_dbus_glib) { + pkg_config("dbus-glib") { + packages = [ "dbus-glib-1" ] + } +} + +config("common_config") { + cflags = [] + cflags_cc = [] + if (rtc_restrict_logging) { + defines = [ "WEBRTC_RESTRICT_LOGGING" ] + } + + if (rtc_have_dbus_glib) { + defines += [ "HAVE_DBUS_GLIB" ] + + # TODO(kjellander): Investigate this, it seems like include + # is still not found even if the execution of + # build/config/linux/pkg-config.py dbus-glib-1 returns correct include + # dirs on Linux. + all_dependent_configs = [ "dbus-glib" ] + } + + if (build_with_chromium) { + defines += [ "LOGGING_INSIDE_WEBRTC" ] + } else { + if (is_posix) { + # -Wextra is currently disabled in Chromium"s common.gypi. Enable + # for targets that can handle it. For Android/arm64 right now + # there will be an "enumeral and non-enumeral type in conditional + # expression" warning in android_tools/ndk_experimental"s version + # of stlport. + # See: https://code.google.com/p/chromium/issues/detail?id=379699 + if (current_cpu != "arm64" || !is_android) { + cflags = [ + "-Wextra", + + # We need to repeat some flags from Chromium"s common.gypi + # here that get overridden by -Wextra. + "-Wno-unused-parameter", + "-Wno-missing-field-initializers", + "-Wno-strict-overflow", + ] + cflags_cc = [ + "-Wnon-virtual-dtor", + + # This is enabled for clang; enable for gcc as well. + "-Woverloaded-virtual", + ] + } + } + + if (is_clang) { + cflags += [ "-Wthread-safety" ] + } + } + + if (current_cpu == "arm64") { + defines += [ "WEBRTC_ARCH_ARM64" ] + defines += [ "WEBRTC_HAS_NEON" ] + } + + if (current_cpu == "arm") { + defines += [ "WEBRTC_ARCH_ARM" ] + if (arm_version >= 7) { + defines += [ "WEBRTC_ARCH_ARM_V7" ] + if (arm_use_neon) { + defines += [ "WEBRTC_HAS_NEON" ] + } else if (arm_optionally_use_neon) { + defines += [ "WEBRTC_DETECT_NEON" ] + } + } + } + + if (current_cpu == "mipsel") { + defines += [ "MIPS32_LE" ] + if (mips_float_abi == "hard") { + defines += [ "MIPS_FPU_LE" ] + } + if (mips_arch_variant == "r2") { + defines += [ "MIPS32_R2_LE" ] + } + if (mips_dsp_rev == 1) { + defines += [ "MIPS_DSP_R1_LE" ] + } else if (mips_dsp_rev == 2) { + defines += [ + "MIPS_DSP_R1_LE", + "MIPS_DSP_R2_LE", + ] + } + } + + if (is_android && !is_clang) { + # The Android NDK doesn"t provide optimized versions of these + # functions. Ensure they are disabled for all compilers. + cflags += [ + "-fno-builtin-cos", + "-fno-builtin-sin", + "-fno-builtin-cosf", + "-fno-builtin-sinf", + ] + } +} + +source_set("webrtc") { + sources = [ + "call.h", + "config.h", + "frame_callback.h", + "transport.h", + ] + + defines = [] + configs += [ ":common_config" ] + public_configs = [ ":common_inherited_config" ] + + deps = [ + "audio", + ":webrtc_common", + "base:rtc_base", + "call", + "common_audio", + "common_video", + "modules/audio_coding", + "modules/audio_conference_mixer", + "modules/audio_device", + "modules/audio_processing", + "modules/bitrate_controller", + "modules/desktop_capture", + "modules/media_file", + "modules/rtp_rtcp", + "modules/utility", + "modules/video_coding", + "modules/video_processing", + "system_wrappers", + "tools", + "video", + "voice_engine", + ] + + if (build_with_chromium) { + deps += [ + "modules/video_capture", + "modules/video_render", + ] + } + + if (rtc_enable_protobuf) { + defines += [ "ENABLE_RTC_EVENT_LOG" ] + deps += [ ":rtc_event_log_proto" ] + } +} + +if (!build_with_chromium) { + executable("webrtc_tests") { + testonly = true + deps = [ + ":webrtc", + "modules/video_render:video_render_internal_impl", + "modules/video_capture:video_capture_internal_impl", + "test", + ] + } +} + +source_set("webrtc_common") { + sources = [ + "common_types.cc", + "common_types.h", + "config.cc", + "config.h", + "engine_configurations.h", + "typedefs.h", + ] + + configs += [ ":common_config" ] + public_configs = [ ":common_inherited_config" ] +} + +source_set("gtest_prod") { + sources = [ + "test/testsupport/gtest_prod_util.h", + ] +} + +if (rtc_enable_protobuf) { + proto_library("rtc_event_log_proto") { + sources = [ + "call/rtc_event_log.proto", + ] + proto_out_dir = "webrtc/call" + } +} + +source_set("rtc_event_log") { + sources = [ + "call/rtc_event_log.cc", + "call/rtc_event_log.h", + ] + + defines = [] + configs += [ ":common_config" ] + public_configs = [ ":common_inherited_config" ] + + deps = [ + ":webrtc_common", + ] + + if (rtc_enable_protobuf) { + defines += [ "ENABLE_RTC_EVENT_LOG" ] + deps += [ ":rtc_event_log_proto" ] + } + if (is_clang && !is_nacl) { + # Suppress warnings from Chrome's Clang plugins. + # See http://code.google.com/p/webrtc/issues/detail?id=163 for details. + configs -= [ "//build/config/clang:find_bad_constructs" ] + } +} diff --git a/webrtc/LICENSE_THIRD_PARTY b/webrtc/LICENSE_THIRD_PARTY new file mode 100644 index 0000000..b64dbba --- /dev/null +++ b/webrtc/LICENSE_THIRD_PARTY @@ -0,0 +1,419 @@ +This source tree contains third party source code which is governed by third +party licenses. Paths to the files and associated licenses are collected here. + +Files governed by third party licenses: +common_audio/fft4g.c +common_audio/signal_processing/spl_sqrt_floor.c +common_audio/signal_processing/spl_sqrt_floor_arm.S +modules/audio_coding/codecs/g711/main/source/g711.c +modules/audio_coding/codecs/g711/main/source/g711.h +modules/audio_coding/codecs/g722/main/source/g722_decode.c +modules/audio_coding/codecs/g722/main/source/g722_enc_dec.h +modules/audio_coding/codecs/g722/main/source/g722_encode.c +modules/audio_coding/codecs/isac/main/source/fft.c +modules/audio_device/mac/portaudio/pa_memorybarrier.h +modules/audio_device/mac/portaudio/pa_ringbuffer.c +modules/audio_device/mac/portaudio/pa_ringbuffer.h +modules/audio_processing/aec/aec_rdft.c +system_wrappers/source/condition_variable_event_win.cc +system_wrappers/source/set_thread_name_win.h +system_wrappers/source/spreadsortlib/constants.hpp +system_wrappers/source/spreadsortlib/spreadsort.hpp + +Individual licenses for each file: +------------------------------------------------------------------------------- +Files: +common_audio/signal_processing/spl_sqrt_floor.c +common_audio/signal_processing/spl_sqrt_floor_arm.S + +License: +/* + * Written by Wilco Dijkstra, 1996. The following email exchange establishes the + * license. + * + * From: Wilco Dijkstra + * Date: Fri, Jun 24, 2011 at 3:20 AM + * Subject: Re: sqrt routine + * To: Kevin Ma + * Hi Kevin, + * Thanks for asking. Those routines are public domain (originally posted to + * comp.sys.arm a long time ago), so you can use them freely for any purpose. + * Cheers, + * Wilco + * + * ----- Original Message ----- + * From: "Kevin Ma" + * To: + * Sent: Thursday, June 23, 2011 11:44 PM + * Subject: Fwd: sqrt routine + * Hi Wilco, + * I saw your sqrt routine from several web sites, including + * http://www.finesse.demon.co.uk/steven/sqrt.html. + * Just wonder if there's any copyright information with your Successive + * approximation routines, or if I can freely use it for any purpose. + * Thanks. + * Kevin + */ +------------------------------------------------------------------------------- +Files: +modules/audio_coding/codecs/g711/main/source/g711.c +modules/audio_coding/codecs/g711/main/source/g711.h + +License: +/* + * SpanDSP - a series of DSP components for telephony + * + * g711.h - In line A-law and u-law conversion routines + * + * Written by Steve Underwood + * + * Copyright (C) 2001 Steve Underwood + * + * Despite my general liking of the GPL, I place this code in the + * public domain for the benefit of all mankind - even the slimy + * ones who might try to proprietize my work and use it to my + * detriment. + */ +------------------------------------------------------------------------------- +Files: +modules/audio_coding/codecs/g722/main/source/g722_decode.c +modules/audio_coding/codecs/g722/main/source/g722_enc_dec.h +modules/audio_coding/codecs/g722/main/source/g722_encode.c + +License: +/* + * SpanDSP - a series of DSP components for telephony + * + * g722_decode.c - The ITU G.722 codec, decode part. + * + * Written by Steve Underwood + * + * Copyright (C) 2005 Steve Underwood + * + * Despite my general liking of the GPL, I place my own contributions + * to this code in the public domain for the benefit of all mankind - + * even the slimy ones who might try to proprietize my work and use it + * to my detriment. + * + * Based in part on a single channel G.722 codec which is: + * + * Copyright (c) CMU 1993 + * Computer Science, Speech Group + * Chengxiang Lu and Alex Hauptmann + */ +------------------------------------------------------------------------------- +Files: +modules/audio_coding/codecs/isac/main/source/fft.c + +License: +/* + * Copyright(c)1995,97 Mark Olesen + * Queen's Univ at Kingston (Canada) + * + * Permission to use, copy, modify, and distribute this software for + * any purpose without fee is hereby granted, provided that this + * entire notice is included in all copies of any software which is + * or includes a copy or modification of this software and in all + * copies of the supporting documentation for such software. + * + * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTY. IN PARTICULAR, NEITHER THE AUTHOR NOR QUEEN'S + * UNIVERSITY AT KINGSTON MAKES ANY REPRESENTATION OR WARRANTY OF ANY + * KIND CONCERNING THE MERCHANTABILITY OF THIS SOFTWARE OR ITS + * FITNESS FOR ANY PARTICULAR PURPOSE. + * + * All of which is to say that you can do what you like with this + * source code provided you don't try to sell it as your own and you + * include an unaltered copy of this message (including the + * copyright). + * + * It is also implicitly understood that bug fixes and improvements + * should make their way back to the general Internet community so + * that everyone benefits. + */ +------------------------------------------------------------------------------- +Files: +modules/audio_device/mac/portaudio/pa_memorybarrier.h +modules/audio_device/mac/portaudio/pa_ringbuffer.c +modules/audio_device/mac/portaudio/pa_ringbuffer.h + +License: +/* + * $Id: pa_memorybarrier.h 1240 2007-07-17 13:05:07Z bjornroche $ + * Portable Audio I/O Library + * Memory barrier utilities + * + * Author: Bjorn Roche, XO Audio, LLC + * + * This program uses the PortAudio Portable Audio Library. + * For more information see: http://www.portaudio.com + * Copyright (c) 1999-2000 Ross Bencina and Phil Burk + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * The text above constitutes the entire PortAudio license; however, + * the PortAudio community also makes the following non-binding requests: + * + * Any person wishing to distribute modifications to the Software is + * requested to send the modifications to the original developer so that + * they can be incorporated into the canonical version. It is also + * requested that these non-binding requests be included along with the + * license above. + */ + +/* + * $Id: pa_ringbuffer.c 1421 2009-11-18 16:09:05Z bjornroche $ + * Portable Audio I/O Library + * Ring Buffer utility. + * + * Author: Phil Burk, http://www.softsynth.com + * modified for SMP safety on Mac OS X by Bjorn Roche + * modified for SMP safety on Linux by Leland Lucius + * also, allowed for const where possible + * modified for multiple-byte-sized data elements by Sven Fischer + * + * Note that this is safe only for a single-thread reader and a + * single-thread writer. + * + * This program uses the PortAudio Portable Audio Library. + * For more information see: http://www.portaudio.com + * Copyright (c) 1999-2000 Ross Bencina and Phil Burk + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files + * (the "Software"), to deal in the Software without restriction, + * including without limitation the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF + * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * The text above constitutes the entire PortAudio license; however, + * the PortAudio community also makes the following non-binding requests: + * + * Any person wishing to distribute modifications to the Software is + * requested to send the modifications to the original developer so that + * they can be incorporated into the canonical version. It is also + * requested that these non-binding requests be included along with the + * license above. + */ +------------------------------------------------------------------------------- +Files: +common_audio/fft4g.c +modules/audio_processing/aec/aec_rdft.c + +License: +/* + * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html + * Copyright Takuya OOURA, 1996-2001 + * + * You may use, copy, modify and distribute this code for any purpose (include + * commercial use) and without fee. Please refer to this package when you modify + * this code. + */ +------------------------------------------------------------------------------- +Files: +system_wrappers/source/condition_variable_event_win.cc + +Source: +http://www1.cse.wustl.edu/~schmidt/ACE-copying.html + +License: +Copyright and Licensing Information for ACE(TM), TAO(TM), CIAO(TM), DAnCE(TM), +and CoSMIC(TM) + +ACE(TM), TAO(TM), CIAO(TM), DAnCE>(TM), and CoSMIC(TM) (henceforth referred to +as "DOC software") are copyrighted by Douglas C. Schmidt and his research +group at Washington University, University of California, Irvine, and +Vanderbilt University, Copyright (c) 1993-2009, all rights reserved. Since DOC +software is open-source, freely available software, you are free to use, +modify, copy, and distribute--perpetually and irrevocably--the DOC software +source code and object code produced from the source, as well as copy and +distribute modified versions of this software. You must, however, include this +copyright statement along with any code built using DOC software that you +release. No copyright statement needs to be provided if you just ship binary +executables of your software products. +You can use DOC software in commercial and/or binary software releases and are +under no obligation to redistribute any of your source code that is built +using DOC software. Note, however, that you may not misappropriate the DOC +software code, such as copyrighting it yourself or claiming authorship of the +DOC software code, in a way that will prevent DOC software from being +distributed freely using an open-source development model. You needn't inform +anyone that you're using DOC software in your software, though we encourage +you to let us know so we can promote your project in the DOC software success +stories. + +The ACE, TAO, CIAO, DAnCE, and CoSMIC web sites are maintained by the DOC +Group at the Institute for Software Integrated Systems (ISIS) and the Center +for Distributed Object Computing of Washington University, St. Louis for the +development of open-source software as part of the open-source software +community. Submissions are provided by the submitter ``as is'' with no +warranties whatsoever, including any warranty of merchantability, +noninfringement of third party intellectual property, or fitness for any +particular purpose. In no event shall the submitter be liable for any direct, +indirect, special, exemplary, punitive, or consequential damages, including +without limitation, lost profits, even if advised of the possibility of such +damages. Likewise, DOC software is provided as is with no warranties of any +kind, including the warranties of design, merchantability, and fitness for a +particular purpose, noninfringement, or arising from a course of dealing, +usage or trade practice. Washington University, UC Irvine, Vanderbilt +University, their employees, and students shall have no liability with respect +to the infringement of copyrights, trade secrets or any patents by DOC +software or any part thereof. Moreover, in no event will Washington +University, UC Irvine, or Vanderbilt University, their employees, or students +be liable for any lost revenue or profits or other special, indirect and +consequential damages. + +DOC software is provided with no support and without any obligation on the +part of Washington University, UC Irvine, Vanderbilt University, their +employees, or students to assist in its use, correction, modification, or +enhancement. A number of companies around the world provide commercial support +for DOC software, however. DOC software is Y2K-compliant, as long as the +underlying OS platform is Y2K-compliant. Likewise, DOC software is compliant +with the new US daylight savings rule passed by Congress as "The Energy Policy +Act of 2005," which established new daylight savings times (DST) rules for the +United States that expand DST as of March 2007. Since DOC software obtains +time/date and calendaring information from operating systems users will not be +affected by the new DST rules as long as they upgrade their operating systems +accordingly. + +The names ACE(TM), TAO(TM), CIAO(TM), DAnCE(TM), CoSMIC(TM), Washington +University, UC Irvine, and Vanderbilt University, may not be used to endorse +or promote products or services derived from this source without express +written permission from Washington University, UC Irvine, or Vanderbilt +University. This license grants no permission to call products or services +derived from this source ACE(TM), TAO(TM), CIAO(TM), DAnCE(TM), or CoSMIC(TM), +nor does it grant permission for the name Washington University, UC Irvine, or +Vanderbilt University to appear in their names. +------------------------------------------------------------------------------- +Files: +system_wrappers/source/set_thread_name_win.h + +Source: +http://msdn.microsoft.com/en-us/cc300389.aspx#P + +License: +This license governs use of code marked as “sample” or “example” available on +this web site without a license agreement, as provided under the section above +titled “NOTICE SPECIFIC TO SOFTWARE AVAILABLE ON THIS WEB SITE.” If you use +such code (the “software”), you accept this license. If you do not accept the +license, do not use the software. + +1. Definitions + +The terms “reproduce,” “reproduction,” “derivative works,” and “distribution” +have the same meaning here as under U.S. copyright law. + +A “contribution” is the original software, or any additions or changes to the +software. + +A “contributor” is any person that distributes its contribution under this +license. + +“Licensed patents” are a contributor’s patent claims that read directly on its +contribution. + +2. Grant of Rights + +(A) Copyright Grant - Subject to the terms of this license, including the +license conditions and limitations in section 3, each contributor grants you a +non-exclusive, worldwide, royalty-free copyright license to reproduce its +contribution, prepare derivative works of its contribution, and distribute its +contribution or any derivative works that you create. + +(B) Patent Grant - Subject to the terms of this license, including the license +conditions and limitations in section 3, each contributor grants you a +non-exclusive, worldwide, royalty-free license under its licensed patents to +make, have made, use, sell, offer for sale, import, and/or otherwise dispose +of its contribution in the software or derivative works of the contribution in +the software. + +3. Conditions and Limitations + +(A) No Trademark License- This license does not grant you rights to use any +contributors’ name, logo, or trademarks. + +(B) If you bring a patent claim against any contributor over patents that you +claim are infringed by the software, your patent license from such contributor +to the software ends automatically. + +(C) If you distribute any portion of the software, you must retain all +copyright, patent, trademark, and attribution notices that are present in the +software. + +(D) If you distribute any portion of the software in source code form, you may +do so only under this license by including a complete copy of this license +with your distribution. If you distribute any portion of the software in +compiled or object code form, you may only do so under a license that complies +with this license. + +(E) The software is licensed “as-is.” You bear the risk of using it. The +contributors give no express warranties, guarantees or conditions. You may +have additional consumer rights under your local laws which this license +cannot change. To the extent permitted under your local laws, the contributors +exclude the implied warranties of merchantability, fitness for a particular +purpose and non-infringement. + +(F) Platform Limitation - The licenses granted in sections 2(A) and 2(B) +extend only to the software or derivative works that you create that run on a +Microsoft Windows operating system product. +------------------------------------------------------------------------------- +Files: +system_wrappers/source/spreadsortlib/constants.hpp +system_wrappers/source/spreadsortlib/spreadsort.hpp + +License: +/*Boost Software License - Version 1.0 - August 17th, 2003 + +Permission is hereby granted, free of charge, to any person or organization +obtaining a copy of the software and accompanying documentation covered by +this license (the "Software") to use, reproduce, display, distribute, +execute, and transmit the Software, and to prepare derivative works of the +Software, and to permit third-parties to whom the Software is furnished to +do so, all subject to the following: + +The copyright notices in the Software and this entire statement, including +the above license grant, this restriction and the following disclaimer, +must be included in all copies of the Software, in whole or in part, and +all derivative works of the Software, unless such copies or derivative +works are solely in the form of machine-executable object code generated by +a source language processor. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT +SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE +FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, +ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER +DEALINGS IN THE SOFTWARE.*/ diff --git a/webrtc/Makefile.am b/webrtc/Makefile.am index bf946f9..4088784 100644 --- a/webrtc/Makefile.am +++ b/webrtc/Makefile.am @@ -1 +1,3 @@ -SUBDIRS = common_audio system_wrappers modules +SUBDIRS = base common_audio system_wrappers modules + +noinst_HEADERS = common.h diff --git a/PATENTS b/webrtc/PATENTS similarity index 100% rename from PATENTS rename to webrtc/PATENTS diff --git a/webrtc/base/BUILD.gn b/webrtc/base/BUILD.gn new file mode 100644 index 0000000..1980782 --- /dev/null +++ b/webrtc/base/BUILD.gn @@ -0,0 +1,592 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("//build/config/crypto.gni") +import("//build/config/ui.gni") +import("../build/webrtc.gni") + +config("rtc_base_config") { + include_dirs = [ + "//third_party/jsoncpp/overrides/include", + "//third_party/jsoncpp/source/include", + ] + + defines = [ + "FEATURE_ENABLE_SSL", + "LOGGING=1", + ] + + if (is_posix) { + # TODO(henrike): issue 3307, make rtc_base build without disabling + # these flags. + cflags_cc = [ "-Wno-non-virtual-dtor" ] + } +} + +config("rtc_base_chromium_config") { + defines = [ "NO_MAIN_THREAD_WRAPPING" ] +} + +config("openssl_config") { + defines = [ + "SSL_USE_OPENSSL", + "HAVE_OPENSSL_SSL_H", + ] +} + +config("ios_config") { + libs = [ + "CFNetwork.framework", + + #"Foundation.framework", # Already included in //build/config:default_libs. + "Security.framework", + "SystemConfiguration.framework", + + #"UIKit.framework", # Already included in //build/config:default_libs. + ] +} + +config("mac_config") { + libs = [ + "Cocoa.framework", + + #"Foundation.framework", # Already included in //build/config:default_libs. + #"IOKit.framework", # Already included in //build/config:default_libs. + #"Security.framework", # Already included in //build/config:default_libs. + "SystemConfiguration.framework", + ] +} + +config("mac_x86_config") { + libs = [ + #"Carbon.framework", # Already included in //build/config:default_libs. + ] +} + +if (is_linux && !build_with_chromium) { + # Provides the same functionality as the //crypto:platform target, which + # WebRTC cannot use as we don't sync src/crypto from Chromium. + group("linux_system_ssl") { + if (use_openssl) { + deps = [ + "//third_party/boringssl", + ] + } + } +} + +if (rtc_build_ssl == 0) { + config("external_ssl_library") { + assert(rtc_ssl_root != "", + "You must specify rtc_ssl_root when rtc_build_ssl==0.") + include_dirs = [ rtc_ssl_root ] + } +} + +# The subset of rtc_base approved for use outside of libjingle. +static_library("rtc_base_approved") { + configs += [ "..:common_config" ] + public_configs = [ "..:common_inherited_config" ] + + sources = [ + "atomicops.h", + "bitbuffer.cc", + "bitbuffer.h", + "buffer.cc", + "buffer.h", + "bufferqueue.cc", + "bufferqueue.h", + "bytebuffer.cc", + "bytebuffer.h", + "byteorder.h", + "checks.cc", + "checks.h", + "criticalsection.cc", + "criticalsection.h", + "event.cc", + "event.h", + "event_tracer.cc", + "event_tracer.h", + "exp_filter.cc", + "exp_filter.h", + "md5.cc", + "md5.h", + "md5digest.cc", + "md5digest.h", + "platform_file.cc", + "platform_file.h", + "platform_thread.cc", + "platform_thread.h", + "safe_conversions.h", + "safe_conversions_impl.h", + "scoped_ptr.h", + "stringencode.cc", + "stringencode.h", + "stringutils.cc", + "stringutils.h", + "systeminfo.cc", + "systeminfo.h", + "template_util.h", + "thread_annotations.h", + "thread_checker.h", + "thread_checker_impl.cc", + "thread_checker_impl.h", + "timeutils.cc", + "timeutils.h", + "trace_event.h", + ] + + if (!build_with_chromium) { + sources += [ + "basictypes.h", + "constructormagic.h", + "logging.cc", + "logging.h", + ] + } +} + +static_library("rtc_base") { + cflags = [] + cflags_cc = [] + libs = [] + deps = [ + ":rtc_base_approved", + ] + + configs += [ + "..:common_config", + ":rtc_base_config", + ] + + public_configs = [ + "..:common_inherited_config", + ":rtc_base_config", + ] + + defines = [ "LOGGING=1" ] + + sources = [ + "arraysize.h", + "asyncfile.cc", + "asyncfile.h", + "asyncinvoker-inl.h", + "asyncinvoker.cc", + "asyncinvoker.h", + "asyncpacketsocket.cc", + "asyncpacketsocket.h", + "asyncresolverinterface.cc", + "asyncresolverinterface.h", + "asyncsocket.cc", + "asyncsocket.h", + "asynctcpsocket.cc", + "asynctcpsocket.h", + "asyncudpsocket.cc", + "asyncudpsocket.h", + "autodetectproxy.cc", + "autodetectproxy.h", + "base64.cc", + "base64.h", + "basicdefs.h", + "common.cc", + "common.h", + "crc32.cc", + "crc32.h", + "cryptstring.cc", + "cryptstring.h", + "diskcache.cc", + "diskcache.h", + "filerotatingstream.cc", + "filerotatingstream.h", + "fileutils.cc", + "fileutils.h", + "firewallsocketserver.cc", + "firewallsocketserver.h", + "flags.cc", + "flags.h", + "format_macros.h", + "gunit_prod.h", + "helpers.cc", + "helpers.h", + "httpbase.cc", + "httpbase.h", + "httpclient.cc", + "httpclient.h", + "httpcommon-inl.h", + "httpcommon.cc", + "httpcommon.h", + "httprequest.cc", + "httprequest.h", + "iosfilesystem.mm", + "ipaddress.cc", + "ipaddress.h", + "linked_ptr.h", + "mathutils.h", + "messagedigest.cc", + "messagedigest.h", + "messagehandler.cc", + "messagehandler.h", + "messagequeue.cc", + "messagequeue.h", + "nethelpers.cc", + "nethelpers.h", + "network.cc", + "network.h", + "nullsocketserver.h", + "pathutils.cc", + "pathutils.h", + "physicalsocketserver.cc", + "physicalsocketserver.h", + "proxydetect.cc", + "proxydetect.h", + "proxyinfo.cc", + "proxyinfo.h", + "ratelimiter.cc", + "ratelimiter.h", + "ratetracker.cc", + "ratetracker.h", + "rtccertificate.cc", + "rtccertificate.h", + "scoped_autorelease_pool.h", + "scoped_autorelease_pool.mm", + "sha1.cc", + "sha1.h", + "sha1digest.cc", + "sha1digest.h", + "signalthread.cc", + "signalthread.h", + "sigslot.cc", + "sigslot.h", + "sigslotrepeater.h", + "socket.h", + "socketadapters.cc", + "socketadapters.h", + "socketaddress.cc", + "socketaddress.h", + "socketaddresspair.cc", + "socketaddresspair.h", + "socketfactory.h", + "socketpool.cc", + "socketpool.h", + "socketserver.h", + "socketstream.cc", + "socketstream.h", + "ssladapter.cc", + "ssladapter.h", + "sslfingerprint.cc", + "sslfingerprint.h", + "sslidentity.cc", + "sslidentity.h", + "sslsocketfactory.cc", + "sslsocketfactory.h", + "sslstreamadapter.cc", + "sslstreamadapter.h", + "sslstreamadapterhelper.cc", + "sslstreamadapterhelper.h", + "stream.cc", + "stream.h", + "task.cc", + "task.h", + "taskparent.cc", + "taskparent.h", + "taskrunner.cc", + "taskrunner.h", + "thread.cc", + "thread.h", + "timing.cc", + "timing.h", + "urlencode.cc", + "urlencode.h", + "worker.cc", + "worker.h", + ] + + if (is_posix) { + sources += [ + "unixfilesystem.cc", + "unixfilesystem.h", + ] + } + + if (build_with_chromium) { + sources += [ + "../../webrtc_overrides/webrtc/base/logging.cc", + "../../webrtc_overrides/webrtc/base/logging.h", + ] + + deps += [ "..:webrtc_common" ] + + if (is_win) { + sources += [ "../../webrtc_overrides/webrtc/base/win32socketinit.cc" ] + } + + include_dirs = [ + "../../webrtc_overrides", + "../../boringssl/src/include", + ] + + public_configs += [ ":rtc_base_chromium_config" ] + } else { + sources += [ + "bandwidthsmoother.cc", + "bandwidthsmoother.h", + "bind.h", + "bind.h.pump", + "callback.h", + "callback.h.pump", + "fileutils_mock.h", + "genericslot.h", + "genericslot.h.pump", + "httpserver.cc", + "httpserver.h", + "json.cc", + "json.h", + "logsinks.cc", + "logsinks.h", + "mathutils.h", + "multipart.cc", + "multipart.h", + "natserver.cc", + "natserver.h", + "natsocketfactory.cc", + "natsocketfactory.h", + "nattypes.cc", + "nattypes.h", + "optionsfile.cc", + "optionsfile.h", + "profiler.cc", + "profiler.h", + "proxyserver.cc", + "proxyserver.h", + "refcount.h", + "referencecountedsingletonfactory.h", + "rollingaccumulator.h", + "scoped_ref_ptr.h", + "scopedptrcollection.h", + "sec_buffer.h", + "sharedexclusivelock.cc", + "sharedexclusivelock.h", + "sslconfig.h", + "sslroots.h", + "testclient.cc", + "testclient.h", + "transformadapter.cc", + "transformadapter.h", + "versionparsing.cc", + "versionparsing.h", + "virtualsocketserver.cc", + "virtualsocketserver.h", + "window.h", + "windowpicker.h", + "windowpickerfactory.h", + ] + + deps += [ "..:webrtc_common" ] + + if (is_posix) { + sources += [ + "latebindingsymboltable.cc", + "latebindingsymboltable.cc.def", + "latebindingsymboltable.h", + "latebindingsymboltable.h.def", + "posix.cc", + "posix.h", + ] + } + + if (is_linux) { + sources += [ + "dbus.cc", + "dbus.h", + "libdbusglibsymboltable.cc", + "libdbusglibsymboltable.h", + "linuxfdwalk.c", + "linuxfdwalk.h", + ] + } + + if (is_mac) { + sources += [ + "macasyncsocket.cc", + "macasyncsocket.h", + "maccocoasocketserver.h", + "maccocoasocketserver.mm", + "macsocketserver.cc", + "macsocketserver.h", + "macwindowpicker.cc", + "macwindowpicker.h", + ] + } + + if (is_win) { + sources += [ + "diskcache_win32.cc", + "diskcache_win32.h", + "win32regkey.cc", + "win32regkey.h", + "win32socketinit.cc", + "win32socketinit.h", + "win32socketserver.cc", + "win32socketserver.h", + ] + } + if (rtc_build_json) { + deps += [ "//third_party/jsoncpp" ] + } else { + include_dirs += [ rtc_jsoncpp_root ] + + # When defined changes the include path for json.h to where it is + # expected to be when building json outside of the standalone build. + defines += [ "WEBRTC_EXTERNAL_JSON" ] + } + } # !build_with_chromium + + # TODO(henrike): issue 3307, make rtc_base build with the Chromium default + # compiler settings. + configs -= [ "//build/config/compiler:chromium_code" ] + configs += [ "//build/config/compiler:no_chromium_code" ] + if (!is_win) { + cflags += [ "-Wno-uninitialized" ] + cflags_cc += [ "-Wno-non-virtual-dtor" ] + } + + if (use_openssl) { + public_configs += [ ":openssl_config" ] + if (rtc_build_ssl) { + deps += [ "//third_party/boringssl" ] + } else { + configs += [ "external_ssl_library" ] + } + sources += [ + "openssl.h", + "openssladapter.cc", + "openssladapter.h", + "openssldigest.cc", + "openssldigest.h", + "opensslidentity.cc", + "opensslidentity.h", + "opensslstreamadapter.cc", + "opensslstreamadapter.h", + ] + } + + if (is_android) { + sources += [ + "ifaddrs-android.cc", + "ifaddrs-android.h", + ] + + libs += [ + "log", + "GLESv2", + ] + } + + if (is_ios) { + all_dependent_configs = [ ":ios_config" ] + + sources += [ + "macconversion.cc", + "macconversion.h", + ] + } + + if (use_x11) { + sources += [ + "x11windowpicker.cc", + "x11windowpicker.h", + ] + libs += [ + "dl", + "rt", + "Xext", + "X11", + "Xcomposite", + "Xrender", + ] + } + + if (is_linux) { + libs += [ + "dl", + "rt", + ] + } + + if (is_mac) { + sources += [ + "maccocoathreadhelper.h", + "maccocoathreadhelper.mm", + "macconversion.cc", + "macconversion.h", + "macutils.cc", + "macutils.h", + ] + + all_dependent_configs = [ ":mac_config" ] + + if (current_cpu == "x86") { + all_dependent_configs += [ ":mac_x86_config" ] + } + } + + if (is_win) { + sources += [ + "win32.cc", + "win32.h", + "win32filesystem.cc", + "win32filesystem.h", + "win32securityerrors.cc", + "win32window.cc", + "win32window.h", + "win32windowpicker.cc", + "win32windowpicker.h", + "winfirewall.cc", + "winfirewall.h", + "winping.cc", + "winping.h", + ] + + libs += [ + "crypt32.lib", + "iphlpapi.lib", + "secur32.lib", + ] + + cflags += [ + # Suppress warnings about WIN32_LEAN_AND_MEAN. + "/wd4005", + "/wd4703", + ] + + defines += [ "_CRT_NONSTDC_NO_DEPRECATE" ] + } + + if (is_posix && is_debug) { + # The Chromium build/common.gypi defines this for all posix + # _except_ for ios & mac. We want it there as well, e.g. + # because ASSERT and friends trigger off of it. + defines += [ "_DEBUG" ] + } + + if (is_ios || (is_mac && current_cpu != "x86")) { + defines += [ "CARBON_DEPRECATED=YES" ] + } + + if (is_linux || is_android) { + sources += [ + "linux.cc", + "linux.h", + ] + } + + if (is_nacl) { + deps += [ "//native_client_sdk/src/libraries/nacl_io" ] + defines += [ "timezone=_timezone" ] + } +} diff --git a/webrtc/base/Makefile.am b/webrtc/base/Makefile.am new file mode 100644 index 0000000..f0307e5 --- /dev/null +++ b/webrtc/base/Makefile.am @@ -0,0 +1,27 @@ +noinst_LTLIBRARIES = libbase.la + +noinst_HEADERS = arraysize.h \ + basictypes.h \ + checks.h \ + constructormagic.h \ + safe_conversions.h \ + safe_conversions_impl.h \ + scoped_ptr.h \ + template_util.h \ + thread_annotations.h + +libbase_la_SOURCES = criticalsection.cc \ + criticalsection.h \ + event.cc \ + event.h \ + platform_thread.cc \ + platform_thread.h \ + platform_file.cc \ + platform_file.h \ + stringutils.cc \ + stringutils.h \ + thread_checker.h \ + thread_checker_impl.cc \ + thread_checker_impl.h + +libbase_la_CXXFLAGS = $(AM_CXXFLAGS) $(COMMON_CXXFLAGS) diff --git a/webrtc/base/arraysize.h b/webrtc/base/arraysize.h new file mode 100644 index 0000000..56a1039 --- /dev/null +++ b/webrtc/base/arraysize.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_BASE_ARRAYSIZE_H_ +#define WEBRTC_BASE_ARRAYSIZE_H_ + +#include + +// This file defines the arraysize() macro and is derived from Chromium's +// base/macros.h. + +// The arraysize(arr) macro returns the # of elements in an array arr. +// The expression is a compile-time constant, and therefore can be +// used in defining new arrays, for example. If you use arraysize on +// a pointer by mistake, you will get a compile-time error. + +// This template function declaration is used in defining arraysize. +// Note that the function doesn't need an implementation, as we only +// use its type. +template char (&ArraySizeHelper(T (&array)[N]))[N]; + +#define arraysize(array) (sizeof(ArraySizeHelper(array))) + +#endif // WEBRTC_BASE_ARRAYSIZE_H_ diff --git a/webrtc/base/atomicops.h b/webrtc/base/atomicops.h new file mode 100644 index 0000000..a863566 --- /dev/null +++ b/webrtc/base/atomicops.h @@ -0,0 +1,68 @@ +/* + * Copyright 2011 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_BASE_ATOMICOPS_H_ +#define WEBRTC_BASE_ATOMICOPS_H_ + +#if defined(WEBRTC_WIN) +// Include winsock2.h before including to maintain consistency with +// win32.h. We can't include win32.h directly here since it pulls in +// headers such as basictypes.h which causes problems in Chromium where webrtc +// exists as two separate projects, webrtc and libjingle. +#include +#include +#endif // defined(WEBRTC_WIN) + +namespace rtc { +class AtomicOps { + public: +#if defined(WEBRTC_WIN) + // Assumes sizeof(int) == sizeof(LONG), which it is on Win32 and Win64. + static int Increment(volatile int* i) { + return ::InterlockedIncrement(reinterpret_cast(i)); + } + static int Decrement(volatile int* i) { + return ::InterlockedDecrement(reinterpret_cast(i)); + } + static int AcquireLoad(volatile const int* i) { + return *i; + } + static void ReleaseStore(volatile int* i, int value) { + *i = value; + } + static int CompareAndSwap(volatile int* i, int old_value, int new_value) { + return ::InterlockedCompareExchange(reinterpret_cast(i), + new_value, + old_value); + } +#else + static int Increment(volatile int* i) { + return __sync_add_and_fetch(i, 1); + } + static int Decrement(volatile int* i) { + return __sync_sub_and_fetch(i, 1); + } + static int AcquireLoad(volatile const int* i) { + return __atomic_load_n(i, __ATOMIC_ACQUIRE); + } + static void ReleaseStore(volatile int* i, int value) { + __atomic_store_n(i, value, __ATOMIC_RELEASE); + } + static int CompareAndSwap(volatile int* i, int old_value, int new_value) { + return __sync_val_compare_and_swap(i, old_value, new_value); + } +#endif +}; + + + +} + +#endif // WEBRTC_BASE_ATOMICOPS_H_ diff --git a/webrtc/base/basictypes.h b/webrtc/base/basictypes.h new file mode 100644 index 0000000..4c3d5d1 --- /dev/null +++ b/webrtc/base/basictypes.h @@ -0,0 +1,74 @@ +/* + * Copyright 2004 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_BASE_BASICTYPES_H_ +#define WEBRTC_BASE_BASICTYPES_H_ + +#include // for NULL, size_t +#include // for uintptr_t and (u)int_t types. + +#ifdef HAVE_CONFIG_H +#include "config.h" // NOLINT +#endif + +// Detect compiler is for x86 or x64. +#if defined(__x86_64__) || defined(_M_X64) || \ + defined(__i386__) || defined(_M_IX86) +#define CPU_X86 1 +#endif + +// Detect compiler is for arm. +#if defined(__arm__) || defined(_M_ARM) +#define CPU_ARM 1 +#endif + +#if defined(CPU_X86) && defined(CPU_ARM) +#error CPU_X86 and CPU_ARM both defined. +#endif + +#if !defined(RTC_ARCH_CPU_BIG_ENDIAN) && !defined(RTC_ARCH_CPU_LITTLE_ENDIAN) +// x86, arm or GCC provided __BYTE_ORDER__ macros +#if CPU_X86 || CPU_ARM || \ + (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) +#define RTC_ARCH_CPU_LITTLE_ENDIAN +#elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define RTC_ARCH_CPU_BIG_ENDIAN +#else +#error RTC_ARCH_CPU_BIG_ENDIAN or RTC_ARCH_CPU_LITTLE_ENDIAN should be defined. +#endif +#endif + +#if defined(RTC_ARCH_CPU_BIG_ENDIAN) && defined(RTC_ARCH_CPU_LITTLE_ENDIAN) +#error RTC_ARCH_CPU_BIG_ENDIAN and RTC_ARCH_CPU_LITTLE_ENDIAN both defined. +#endif + +#if defined(WEBRTC_WIN) +typedef int socklen_t; +#endif + +// The following only works for C++ +#ifdef __cplusplus + +#ifndef ALIGNP +#define ALIGNP(p, t) \ + (reinterpret_cast(((reinterpret_cast(p) + \ + ((t) - 1)) & ~((t) - 1)))) +#endif + +#define RTC_IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a) - 1))) + +// Use these to declare and define a static local variable that gets leaked so +// that its destructors are not called at exit. +#define RTC_DEFINE_STATIC_LOCAL(type, name, arguments) \ + static type& name = *new type arguments + +#endif // __cplusplus + +#endif // WEBRTC_BASE_BASICTYPES_H_ diff --git a/webrtc/base/criticalsection.cc b/webrtc/base/criticalsection.cc new file mode 100644 index 0000000..1f50c23 --- /dev/null +++ b/webrtc/base/criticalsection.cc @@ -0,0 +1,169 @@ +/* + * Copyright 2015 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/base/criticalsection.h" + +#include "webrtc/base/checks.h" + +namespace rtc { + +CriticalSection::CriticalSection() { +#if defined(WEBRTC_WIN) + InitializeCriticalSection(&crit_); +#else + pthread_mutexattr_t mutex_attribute; + pthread_mutexattr_init(&mutex_attribute); + pthread_mutexattr_settype(&mutex_attribute, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&mutex_, &mutex_attribute); + pthread_mutexattr_destroy(&mutex_attribute); + CS_DEBUG_CODE(thread_ = 0); + CS_DEBUG_CODE(recursion_count_ = 0); +#endif +} + +CriticalSection::~CriticalSection() { +#if defined(WEBRTC_WIN) + DeleteCriticalSection(&crit_); +#else + pthread_mutex_destroy(&mutex_); +#endif +} + +void CriticalSection::Enter() EXCLUSIVE_LOCK_FUNCTION() { +#if defined(WEBRTC_WIN) + EnterCriticalSection(&crit_); +#else + pthread_mutex_lock(&mutex_); +#if CS_DEBUG_CHECKS + if (!recursion_count_) { + RTC_DCHECK(!thread_); + thread_ = pthread_self(); + } else { + RTC_DCHECK(CurrentThreadIsOwner()); + } + ++recursion_count_; +#endif +#endif +} + +bool CriticalSection::TryEnter() EXCLUSIVE_TRYLOCK_FUNCTION(true) { +#if defined(WEBRTC_WIN) + return TryEnterCriticalSection(&crit_) != FALSE; +#else + if (pthread_mutex_trylock(&mutex_) != 0) + return false; +#if CS_DEBUG_CHECKS + if (!recursion_count_) { + RTC_DCHECK(!thread_); + thread_ = pthread_self(); + } else { + RTC_DCHECK(CurrentThreadIsOwner()); + } + ++recursion_count_; +#endif + return true; +#endif +} +void CriticalSection::Leave() UNLOCK_FUNCTION() { + RTC_DCHECK(CurrentThreadIsOwner()); +#if defined(WEBRTC_WIN) + LeaveCriticalSection(&crit_); +#else +#if CS_DEBUG_CHECKS + --recursion_count_; + RTC_DCHECK(recursion_count_ >= 0); + if (!recursion_count_) + thread_ = 0; +#endif + pthread_mutex_unlock(&mutex_); +#endif +} + +bool CriticalSection::CurrentThreadIsOwner() const { +#if defined(WEBRTC_WIN) + // OwningThread has type HANDLE but actually contains the Thread ID: + // http://stackoverflow.com/questions/12675301/why-is-the-owningthread-member-of-critical-section-of-type-handle-when-it-is-de + // Converting through size_t avoids the VS 2015 warning C4312: conversion from + // 'type1' to 'type2' of greater size + return crit_.OwningThread == + reinterpret_cast(static_cast(GetCurrentThreadId())); +#else +#if CS_DEBUG_CHECKS + return pthread_equal(thread_, pthread_self()); +#else + return true; +#endif // CS_DEBUG_CHECKS +#endif +} + +bool CriticalSection::IsLocked() const { +#if defined(WEBRTC_WIN) + return crit_.LockCount != -1; +#else +#if CS_DEBUG_CHECKS + return thread_ != 0; +#else + return true; +#endif +#endif +} + +CritScope::CritScope(CriticalSection* cs) : cs_(cs) { cs_->Enter(); } +CritScope::~CritScope() { cs_->Leave(); } + +TryCritScope::TryCritScope(CriticalSection* cs) + : cs_(cs), locked_(cs->TryEnter()) { + CS_DEBUG_CODE(lock_was_called_ = false); +} + +TryCritScope::~TryCritScope() { + CS_DEBUG_CODE(RTC_DCHECK(lock_was_called_)); + if (locked_) + cs_->Leave(); +} + +bool TryCritScope::locked() const { + CS_DEBUG_CODE(lock_was_called_ = true); + return locked_; +} + +void GlobalLockPod::Lock() { +#if !defined(WEBRTC_WIN) + const struct timespec ts_null = {0}; +#endif + + while (AtomicOps::CompareAndSwap(&lock_acquired, 0, 1)) { +#if defined(WEBRTC_WIN) + ::Sleep(0); +#else + nanosleep(&ts_null, nullptr); +#endif + } +} + +void GlobalLockPod::Unlock() { + int old_value = AtomicOps::CompareAndSwap(&lock_acquired, 1, 0); + RTC_DCHECK_EQ(1, old_value) << "Unlock called without calling Lock first"; +} + +GlobalLock::GlobalLock() { + lock_acquired = 0; +} + +GlobalLockScope::GlobalLockScope(GlobalLockPod* lock) + : lock_(lock) { + lock_->Lock(); +} + +GlobalLockScope::~GlobalLockScope() { + lock_->Unlock(); +} + +} // namespace rtc diff --git a/webrtc/base/criticalsection.h b/webrtc/base/criticalsection.h new file mode 100644 index 0000000..ddbf857 --- /dev/null +++ b/webrtc/base/criticalsection.h @@ -0,0 +1,129 @@ +/* + * Copyright 2004 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_BASE_CRITICALSECTION_H_ +#define WEBRTC_BASE_CRITICALSECTION_H_ + +#include "webrtc/base/atomicops.h" +#include "webrtc/base/constructormagic.h" +#include "webrtc/base/thread_annotations.h" + +#if defined(WEBRTC_WIN) +// Include winsock2.h before including to maintain consistency with +// win32.h. We can't include win32.h directly here since it pulls in +// headers such as basictypes.h which causes problems in Chromium where webrtc +// exists as two separate projects, webrtc and libjingle. +#include +#include +#include // must come after windows headers. +#endif // defined(WEBRTC_WIN) + +#if defined(WEBRTC_POSIX) +#include +#endif + +#if (!defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)) +#define CS_DEBUG_CHECKS 1 +#endif + +#if CS_DEBUG_CHECKS +#define CS_DEBUG_CODE(x) x +#else // !CS_DEBUG_CHECKS +#define CS_DEBUG_CODE(x) +#endif // !CS_DEBUG_CHECKS + +namespace rtc { + +class LOCKABLE CriticalSection { + public: + CriticalSection(); + ~CriticalSection(); + + void Enter() EXCLUSIVE_LOCK_FUNCTION(); + bool TryEnter() EXCLUSIVE_TRYLOCK_FUNCTION(true); + void Leave() UNLOCK_FUNCTION(); + + // Use only for RTC_DCHECKing. + bool CurrentThreadIsOwner() const; + // Use only for RTC_DCHECKing. + bool IsLocked() const; + + private: +#if defined(WEBRTC_WIN) + CRITICAL_SECTION crit_; +#elif defined(WEBRTC_POSIX) + pthread_mutex_t mutex_; + CS_DEBUG_CODE(pthread_t thread_); + CS_DEBUG_CODE(int recursion_count_); +#endif +}; + +// CritScope, for serializing execution through a scope. +class SCOPED_LOCKABLE CritScope { + public: + explicit CritScope(CriticalSection* cs) EXCLUSIVE_LOCK_FUNCTION(cs); + ~CritScope() UNLOCK_FUNCTION(); + private: + CriticalSection* const cs_; + RTC_DISALLOW_COPY_AND_ASSIGN(CritScope); +}; + +// Tries to lock a critical section on construction via +// CriticalSection::TryEnter, and unlocks on destruction if the +// lock was taken. Never blocks. +// +// IMPORTANT: Unlike CritScope, the lock may not be owned by this thread in +// subsequent code. Users *must* check locked() to determine if the +// lock was taken. If you're not calling locked(), you're doing it wrong! +class TryCritScope { + public: + explicit TryCritScope(CriticalSection* cs); + ~TryCritScope(); +#if defined(WEBRTC_WIN) + _Check_return_ bool locked() const; +#else + bool locked() const __attribute__((warn_unused_result)); +#endif + private: + CriticalSection* const cs_; + const bool locked_; + CS_DEBUG_CODE(mutable bool lock_was_called_); + RTC_DISALLOW_COPY_AND_ASSIGN(TryCritScope); +}; + +// A POD lock used to protect global variables. Do NOT use for other purposes. +// No custom constructor or private data member should be added. +class LOCKABLE GlobalLockPod { + public: + void Lock() EXCLUSIVE_LOCK_FUNCTION(); + + void Unlock() UNLOCK_FUNCTION(); + + volatile int lock_acquired; +}; + +class GlobalLock : public GlobalLockPod { + public: + GlobalLock(); +}; + +// GlobalLockScope, for serializing execution through a scope. +class SCOPED_LOCKABLE GlobalLockScope { + public: + explicit GlobalLockScope(GlobalLockPod* lock) EXCLUSIVE_LOCK_FUNCTION(lock); + ~GlobalLockScope() UNLOCK_FUNCTION(); + private: + GlobalLockPod* const lock_; + RTC_DISALLOW_COPY_AND_ASSIGN(GlobalLockScope); +}; + +} // namespace rtc + +#endif // WEBRTC_BASE_CRITICALSECTION_H_ diff --git a/webrtc/base/event.cc b/webrtc/base/event.cc new file mode 100644 index 0000000..a9af208 --- /dev/null +++ b/webrtc/base/event.cc @@ -0,0 +1,135 @@ +/* + * Copyright 2004 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/base/event.h" + +#if defined(WEBRTC_WIN) +#include +#elif defined(WEBRTC_POSIX) +#include +#include +#include +#else +#error "Must define either WEBRTC_WIN or WEBRTC_POSIX." +#endif + +#include "webrtc/base/checks.h" + +namespace rtc { + +#if defined(WEBRTC_WIN) + +Event::Event(bool manual_reset, bool initially_signaled) { + event_handle_ = ::CreateEvent(NULL, // Security attributes. + manual_reset, + initially_signaled, + NULL); // Name. + RTC_CHECK(event_handle_); +} + +Event::~Event() { + CloseHandle(event_handle_); +} + +void Event::Set() { + SetEvent(event_handle_); +} + +void Event::Reset() { + ResetEvent(event_handle_); +} + +bool Event::Wait(int milliseconds) { + DWORD ms = (milliseconds == kForever) ? INFINITE : milliseconds; + return (WaitForSingleObject(event_handle_, ms) == WAIT_OBJECT_0); +} + +#elif defined(WEBRTC_POSIX) + +Event::Event(bool manual_reset, bool initially_signaled) + : is_manual_reset_(manual_reset), + event_status_(initially_signaled) { + RTC_CHECK(pthread_mutex_init(&event_mutex_, NULL) == 0); + RTC_CHECK(pthread_cond_init(&event_cond_, NULL) == 0); +} + +Event::~Event() { + pthread_mutex_destroy(&event_mutex_); + pthread_cond_destroy(&event_cond_); +} + +void Event::Set() { + pthread_mutex_lock(&event_mutex_); + event_status_ = true; + pthread_cond_broadcast(&event_cond_); + pthread_mutex_unlock(&event_mutex_); +} + +void Event::Reset() { + pthread_mutex_lock(&event_mutex_); + event_status_ = false; + pthread_mutex_unlock(&event_mutex_); +} + +bool Event::Wait(int milliseconds) { + pthread_mutex_lock(&event_mutex_); + int error = 0; + + if (milliseconds != kForever) { + // Converting from seconds and microseconds (1e-6) plus + // milliseconds (1e-3) to seconds and nanoseconds (1e-9). + + struct timespec ts; +#if HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE + // Use relative time version, which tends to be more efficient for + // pthread implementations where provided (like on Android). + ts.tv_sec = milliseconds / 1000; + ts.tv_nsec = (milliseconds % 1000) * 1000000; +#else + struct timeval tv; + gettimeofday(&tv, NULL); + + ts.tv_sec = tv.tv_sec + (milliseconds / 1000); + ts.tv_nsec = tv.tv_usec * 1000 + (milliseconds % 1000) * 1000000; + + // Handle overflow. + if (ts.tv_nsec >= 1000000000) { + ts.tv_sec++; + ts.tv_nsec -= 1000000000; + } +#endif + + while (!event_status_ && error == 0) { +#if HAVE_PTHREAD_COND_TIMEDWAIT_RELATIVE + error = pthread_cond_timedwait_relative_np( + &event_cond_, &event_mutex_, &ts); +#else + error = pthread_cond_timedwait(&event_cond_, &event_mutex_, &ts); +#endif + } + } else { + while (!event_status_ && error == 0) + error = pthread_cond_wait(&event_cond_, &event_mutex_); + } + + // NOTE(liulk): Exactly one thread will auto-reset this event. All + // the other threads will think it's unsignaled. This seems to be + // consistent with auto-reset events in WEBRTC_WIN + if (error == 0 && !is_manual_reset_) + event_status_ = false; + + pthread_mutex_unlock(&event_mutex_); + + return (error == 0); +} + +#endif + +} // namespace rtc diff --git a/webrtc/base/event.h b/webrtc/base/event.h new file mode 100644 index 0000000..5237151 --- /dev/null +++ b/webrtc/base/event.h @@ -0,0 +1,53 @@ +/* + * Copyright 2004 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_BASE_EVENT_H__ +#define WEBRTC_BASE_EVENT_H__ + +#if defined(WEBRTC_WIN) +#include "webrtc/base/win32.h" // NOLINT: consider this a system header. +#elif defined(WEBRTC_POSIX) +#include +#else +#error "Must define either WEBRTC_WIN or WEBRTC_POSIX." +#endif + +#include "webrtc/base/basictypes.h" + +namespace rtc { + +class Event { + public: + static const int kForever = -1; + + Event(bool manual_reset, bool initially_signaled); + ~Event(); + + void Set(); + void Reset(); + + // Wait for the event to become signaled, for the specified number of + // |milliseconds|. To wait indefinetly, pass kForever. + bool Wait(int milliseconds); + + private: +#if defined(WEBRTC_WIN) + HANDLE event_handle_; +#elif defined(WEBRTC_POSIX) + pthread_mutex_t event_mutex_; + pthread_cond_t event_cond_; + const bool is_manual_reset_; + bool event_status_; +#endif +}; + +} // namespace rtc + +#endif // WEBRTC_BASE_EVENT_H__ diff --git a/webrtc/base/platform_file.cc b/webrtc/base/platform_file.cc new file mode 100644 index 0000000..d518b74 --- /dev/null +++ b/webrtc/base/platform_file.cc @@ -0,0 +1,49 @@ +/* + * Copyright 2014 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/base/platform_file.h" + +#if defined(WEBRTC_WIN) +#include +#else +#include +#endif + +namespace rtc { + +#if defined(WEBRTC_WIN) +const PlatformFile kInvalidPlatformFileValue = INVALID_HANDLE_VALUE; + +FILE* FdopenPlatformFileForWriting(PlatformFile file) { + if (file == kInvalidPlatformFileValue) + return NULL; + int fd = _open_osfhandle(reinterpret_cast(file), 0); + if (fd < 0) + return NULL; + + return _fdopen(fd, "w"); +} + +bool ClosePlatformFile(PlatformFile file) { + return CloseHandle(file) != 0; +} +#else +const PlatformFile kInvalidPlatformFileValue = -1; + +FILE* FdopenPlatformFileForWriting(PlatformFile file) { + return fdopen(file, "w"); +} + +bool ClosePlatformFile(PlatformFile file) { + return close(file); +} +#endif + +} // namespace rtc diff --git a/webrtc/base/platform_file.h b/webrtc/base/platform_file.h new file mode 100644 index 0000000..12e08e9 --- /dev/null +++ b/webrtc/base/platform_file.h @@ -0,0 +1,44 @@ +/* + * Copyright 2014 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_BASE_PLATFORM_FILE_H_ +#define WEBRTC_BASE_PLATFORM_FILE_H_ + +#include + +#if defined(WEBRTC_WIN) +#include +#endif + +namespace rtc { + +#if defined(WEBRTC_WIN) +typedef HANDLE PlatformFile; +#elif defined(WEBRTC_POSIX) +typedef int PlatformFile; +#else +#error Unsupported platform +#endif + +extern const PlatformFile kInvalidPlatformFileValue; + +// Associates a standard FILE stream with an existing PlatformFile. +// Note that after this function has returned a valid FILE stream, +// the PlatformFile should no longer be used. +FILE* FdopenPlatformFileForWriting(PlatformFile file); + +// Closes a PlatformFile. +// Don't use ClosePlatformFile to close a file opened with FdopenPlatformFile. +// Use fclose instead. +bool ClosePlatformFile(PlatformFile file); + +} // namespace rtc + +#endif // WEBRTC_BASE_PLATFORM_FILE_H_ diff --git a/webrtc/base/platform_thread.cc b/webrtc/base/platform_thread.cc new file mode 100644 index 0000000..4167392 --- /dev/null +++ b/webrtc/base/platform_thread.cc @@ -0,0 +1,82 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/base/platform_thread.h" + +#include + +#include "webrtc/base/checks.h" + +#if defined(WEBRTC_LINUX) +#include +#include +#endif + +namespace rtc { + +PlatformThreadId CurrentThreadId() { + PlatformThreadId ret; +#if defined(WEBRTC_WIN) + ret = GetCurrentThreadId(); +#elif defined(WEBRTC_POSIX) +#if defined(WEBRTC_MAC) || defined(WEBRTC_IOS) + ret = pthread_mach_thread_np(pthread_self()); +#elif defined(WEBRTC_LINUX) + ret = syscall(__NR_gettid); +#elif defined(WEBRTC_ANDROID) + ret = gettid(); +#else + // Default implementation for nacl and solaris. + ret = reinterpret_cast(pthread_self()); +#endif +#endif // defined(WEBRTC_POSIX) + RTC_DCHECK(ret); + return ret; +} + +PlatformThreadRef CurrentThreadRef() { +#if defined(WEBRTC_WIN) + return GetCurrentThreadId(); +#elif defined(WEBRTC_POSIX) + return pthread_self(); +#endif +} + +bool IsThreadRefEqual(const PlatformThreadRef& a, const PlatformThreadRef& b) { +#if defined(WEBRTC_WIN) + return a == b; +#elif defined(WEBRTC_POSIX) + return pthread_equal(a, b); +#endif +} + +void SetCurrentThreadName(const char* name) { + RTC_DCHECK(strlen(name) < 64); +#if defined(WEBRTC_WIN) + struct { + DWORD dwType; + LPCSTR szName; + DWORD dwThreadID; + DWORD dwFlags; + } threadname_info = {0x1000, name, static_cast(-1), 0}; + + __try { + ::RaiseException(0x406D1388, 0, sizeof(threadname_info) / sizeof(DWORD), + reinterpret_cast(&threadname_info)); + } __except (EXCEPTION_EXECUTE_HANDLER) { + } +#elif defined(WEBRTC_LINUX) || defined(WEBRTC_ANDROID) + prctl(PR_SET_NAME, reinterpret_cast(name)); +#elif defined(WEBRTC_MAC) || defined(WEBRTC_IOS) + pthread_setname_np(name); +#endif +} + +} // namespace rtc diff --git a/webrtc/base/platform_thread.h b/webrtc/base/platform_thread.h new file mode 100644 index 0000000..50033b3 --- /dev/null +++ b/webrtc/base/platform_thread.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_BASE_PLATFORM_THREAD_H_ +#define WEBRTC_BASE_PLATFORM_THREAD_H_ + +#if defined(WEBRTC_WIN) +#include +#include +#elif defined(WEBRTC_POSIX) +#include +#include +#endif + +namespace rtc { + +#if defined(WEBRTC_WIN) +typedef DWORD PlatformThreadId; +typedef DWORD PlatformThreadRef; +#elif defined(WEBRTC_POSIX) +typedef pid_t PlatformThreadId; +typedef pthread_t PlatformThreadRef; +#endif + +PlatformThreadId CurrentThreadId(); +PlatformThreadRef CurrentThreadRef(); + +// Compares two thread identifiers for equality. +bool IsThreadRefEqual(const PlatformThreadRef& a, const PlatformThreadRef& b); + +// Sets the current thread name. +void SetCurrentThreadName(const char* name); + +} // namespace rtc + +#endif // WEBRTC_BASE_PLATFORM_THREAD_H_ diff --git a/webrtc/base/safe_conversions.h b/webrtc/base/safe_conversions.h new file mode 100644 index 0000000..51239bc --- /dev/null +++ b/webrtc/base/safe_conversions.h @@ -0,0 +1,70 @@ +/* + * Copyright 2014 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Borrowed from Chromium's src/base/numerics/safe_conversions.h. + +#ifndef WEBRTC_BASE_SAFE_CONVERSIONS_H_ +#define WEBRTC_BASE_SAFE_CONVERSIONS_H_ + +#include + +#include "webrtc/base/checks.h" +#include "webrtc/base/safe_conversions_impl.h" + +namespace rtc { + +// Convenience function that returns true if the supplied value is in range +// for the destination type. +template +inline bool IsValueInRangeForNumericType(Src value) { + return internal::RangeCheck(value) == internal::TYPE_VALID; +} + +// checked_cast<> is analogous to static_cast<> for numeric types, +// except that it CHECKs that the specified numeric conversion will not +// overflow or underflow. NaN source will always trigger a CHECK. +template +inline Dst checked_cast(Src value) { + RTC_CHECK(IsValueInRangeForNumericType(value)); + return static_cast(value); +} + +// saturated_cast<> is analogous to static_cast<> for numeric types, except +// that the specified numeric conversion will saturate rather than overflow or +// underflow. NaN assignment to an integral will trigger a RTC_CHECK condition. +template +inline Dst saturated_cast(Src value) { + // Optimization for floating point values, which already saturate. + if (std::numeric_limits::is_iec559) + return static_cast(value); + + switch (internal::RangeCheck(value)) { + case internal::TYPE_VALID: + return static_cast(value); + + case internal::TYPE_UNDERFLOW: + return std::numeric_limits::min(); + + case internal::TYPE_OVERFLOW: + return std::numeric_limits::max(); + + // Should fail only on attempting to assign NaN to a saturated integer. + case internal::TYPE_INVALID: + FATAL(); + return std::numeric_limits::max(); + } + + FATAL(); + return static_cast(value); +} + +} // namespace rtc + +#endif // WEBRTC_BASE_SAFE_CONVERSIONS_H_ diff --git a/webrtc/base/safe_conversions_impl.h b/webrtc/base/safe_conversions_impl.h new file mode 100644 index 0000000..52e52ef --- /dev/null +++ b/webrtc/base/safe_conversions_impl.h @@ -0,0 +1,188 @@ +/* + * Copyright 2014 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Borrowed from Chromium's src/base/numerics/safe_conversions_impl.h. + +#ifndef WEBRTC_BASE_SAFE_CONVERSIONS_IMPL_H_ +#define WEBRTC_BASE_SAFE_CONVERSIONS_IMPL_H_ + +#include + +namespace rtc { +namespace internal { + +enum DstSign { + DST_UNSIGNED, + DST_SIGNED +}; + +enum SrcSign { + SRC_UNSIGNED, + SRC_SIGNED +}; + +enum DstRange { + OVERLAPS_RANGE, + CONTAINS_RANGE +}; + +// Helper templates to statically determine if our destination type can contain +// all values represented by the source type. + +template ::is_signed ? + DST_SIGNED : DST_UNSIGNED, + SrcSign IsSrcSigned = std::numeric_limits::is_signed ? + SRC_SIGNED : SRC_UNSIGNED> +struct StaticRangeCheck {}; + +template +struct StaticRangeCheck { + typedef std::numeric_limits DstLimits; + typedef std::numeric_limits SrcLimits; + // Compare based on max_exponent, which we must compute for integrals. + static const size_t kDstMaxExponent = DstLimits::is_iec559 ? + DstLimits::max_exponent : + (sizeof(Dst) * 8 - 1); + static const size_t kSrcMaxExponent = SrcLimits::is_iec559 ? + SrcLimits::max_exponent : + (sizeof(Src) * 8 - 1); + static const DstRange value = kDstMaxExponent >= kSrcMaxExponent ? + CONTAINS_RANGE : OVERLAPS_RANGE; +}; + +template +struct StaticRangeCheck { + static const DstRange value = sizeof(Dst) >= sizeof(Src) ? + CONTAINS_RANGE : OVERLAPS_RANGE; +}; + +template +struct StaticRangeCheck { + typedef std::numeric_limits DstLimits; + typedef std::numeric_limits SrcLimits; + // Compare based on max_exponent, which we must compute for integrals. + static const size_t kDstMaxExponent = DstLimits::is_iec559 ? + DstLimits::max_exponent : + (sizeof(Dst) * 8 - 1); + static const size_t kSrcMaxExponent = sizeof(Src) * 8; + static const DstRange value = kDstMaxExponent >= kSrcMaxExponent ? + CONTAINS_RANGE : OVERLAPS_RANGE; +}; + +template +struct StaticRangeCheck { + static const DstRange value = OVERLAPS_RANGE; +}; + + +enum RangeCheckResult { + TYPE_VALID = 0, // Value can be represented by the destination type. + TYPE_UNDERFLOW = 1, // Value would overflow. + TYPE_OVERFLOW = 2, // Value would underflow. + TYPE_INVALID = 3 // Source value is invalid (i.e. NaN). +}; + +// This macro creates a RangeCheckResult from an upper and lower bound +// check by taking advantage of the fact that only NaN can be out of range in +// both directions at once. +#define BASE_NUMERIC_RANGE_CHECK_RESULT(is_in_upper_bound, is_in_lower_bound) \ + RangeCheckResult(((is_in_upper_bound) ? 0 : TYPE_OVERFLOW) | \ + ((is_in_lower_bound) ? 0 : TYPE_UNDERFLOW)) + +template ::is_signed ? + DST_SIGNED : DST_UNSIGNED, + SrcSign IsSrcSigned = std::numeric_limits::is_signed ? + SRC_SIGNED : SRC_UNSIGNED, + DstRange IsSrcRangeContained = StaticRangeCheck::value> +struct RangeCheckImpl {}; + +// The following templates are for ranges that must be verified at runtime. We +// split it into checks based on signedness to avoid confusing casts and +// compiler warnings on signed an unsigned comparisons. + +// Dst range always contains the result: nothing to check. +template +struct RangeCheckImpl { + static RangeCheckResult Check(Src value) { + return TYPE_VALID; + } +}; + +// Signed to signed narrowing. +template +struct RangeCheckImpl { + static RangeCheckResult Check(Src value) { + typedef std::numeric_limits DstLimits; + return DstLimits::is_iec559 ? + BASE_NUMERIC_RANGE_CHECK_RESULT( + value <= static_cast(DstLimits::max()), + value >= static_cast(DstLimits::max() * -1)) : + BASE_NUMERIC_RANGE_CHECK_RESULT( + value <= static_cast(DstLimits::max()), + value >= static_cast(DstLimits::min())); + } +}; + +// Unsigned to unsigned narrowing. +template +struct RangeCheckImpl { + static RangeCheckResult Check(Src value) { + typedef std::numeric_limits DstLimits; + return BASE_NUMERIC_RANGE_CHECK_RESULT( + value <= static_cast(DstLimits::max()), true); + } +}; + +// Unsigned to signed. +template +struct RangeCheckImpl { + static RangeCheckResult Check(Src value) { + typedef std::numeric_limits DstLimits; + return sizeof(Dst) > sizeof(Src) ? TYPE_VALID : + BASE_NUMERIC_RANGE_CHECK_RESULT( + value <= static_cast(DstLimits::max()), true); + } +}; + +// Signed to unsigned. +template +struct RangeCheckImpl { + static RangeCheckResult Check(Src value) { + typedef std::numeric_limits DstLimits; + typedef std::numeric_limits SrcLimits; + // Compare based on max_exponent, which we must compute for integrals. + static const size_t kDstMaxExponent = sizeof(Dst) * 8; + static const size_t kSrcMaxExponent = SrcLimits::is_iec559 ? + SrcLimits::max_exponent : + (sizeof(Src) * 8 - 1); + return (kDstMaxExponent >= kSrcMaxExponent) ? + BASE_NUMERIC_RANGE_CHECK_RESULT(true, value >= static_cast(0)) : + BASE_NUMERIC_RANGE_CHECK_RESULT( + value <= static_cast(DstLimits::max()), + value >= static_cast(0)); + } +}; + +template +inline RangeCheckResult RangeCheck(Src value) { + static_assert(std::numeric_limits::is_specialized, + "argument must be numeric"); + static_assert(std::numeric_limits::is_specialized, + "result must be numeric"); + return RangeCheckImpl::Check(value); +} + +} // namespace internal +} // namespace rtc + +#endif // WEBRTC_BASE_SAFE_CONVERSIONS_IMPL_H_ diff --git a/webrtc/base/stringutils.cc b/webrtc/base/stringutils.cc new file mode 100644 index 0000000..868e475 --- /dev/null +++ b/webrtc/base/stringutils.cc @@ -0,0 +1,133 @@ +/* + * Copyright 2004 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/base/checks.h" +#include "webrtc/base/stringutils.h" + +namespace rtc { + +bool memory_check(const void* memory, int c, size_t count) { + const char* char_memory = static_cast(memory); + char char_c = static_cast(c); + for (size_t i = 0; i < count; ++i) { + if (char_memory[i] != char_c) { + return false; + } + } + return true; +} + +bool string_match(const char* target, const char* pattern) { + while (*pattern) { + if (*pattern == '*') { + if (!*++pattern) { + return true; + } + while (*target) { + if ((toupper(*pattern) == toupper(*target)) + && string_match(target + 1, pattern + 1)) { + return true; + } + ++target; + } + return false; + } else { + if (toupper(*pattern) != toupper(*target)) { + return false; + } + ++target; + ++pattern; + } + } + return !*target; +} + +#if defined(WEBRTC_WIN) +int ascii_string_compare(const wchar_t* s1, const char* s2, size_t n, + CharacterTransformation transformation) { + wchar_t c1, c2; + while (true) { + if (n-- == 0) return 0; + c1 = transformation(*s1); + // Double check that characters are not UTF-8 + RTC_DCHECK_LT(static_cast(*s2), 128); + // Note: *s2 gets implicitly promoted to wchar_t + c2 = transformation(*s2); + if (c1 != c2) return (c1 < c2) ? -1 : 1; + if (!c1) return 0; + ++s1; + ++s2; + } +} + +size_t asccpyn(wchar_t* buffer, size_t buflen, + const char* source, size_t srclen) { + if (buflen <= 0) + return 0; + + if (srclen == SIZE_UNKNOWN) { + srclen = strlenn(source, buflen - 1); + } else if (srclen >= buflen) { + srclen = buflen - 1; + } +#if _DEBUG + // Double check that characters are not UTF-8 + for (size_t pos = 0; pos < srclen; ++pos) + RTC_DCHECK_LT(static_cast(source[pos]), 128); +#endif // _DEBUG + std::copy(source, source + srclen, buffer); + buffer[srclen] = 0; + return srclen; +} + +#endif // WEBRTC_WIN + +void replace_substrs(const char *search, + size_t search_len, + const char *replace, + size_t replace_len, + std::string *s) { + size_t pos = 0; + while ((pos = s->find(search, pos, search_len)) != std::string::npos) { + s->replace(pos, search_len, replace, replace_len); + pos += replace_len; + } +} + +bool starts_with(const char *s1, const char *s2) { + return strncmp(s1, s2, strlen(s2)) == 0; +} + +bool ends_with(const char *s1, const char *s2) { + size_t s1_length = strlen(s1); + size_t s2_length = strlen(s2); + + if (s2_length > s1_length) { + return false; + } + + const char* start = s1 + (s1_length - s2_length); + return strncmp(start, s2, s2_length) == 0; +} + +static const char kWhitespace[] = " \n\r\t"; + +std::string string_trim(const std::string& s) { + std::string::size_type first = s.find_first_not_of(kWhitespace); + std::string::size_type last = s.find_last_not_of(kWhitespace); + + if (first == std::string::npos || last == std::string::npos) { + return std::string(""); + } + + return s.substr(first, last - first + 1); +} + +} // namespace rtc diff --git a/webrtc/base/stringutils.h b/webrtc/base/stringutils.h new file mode 100644 index 0000000..5a6f42a --- /dev/null +++ b/webrtc/base/stringutils.h @@ -0,0 +1,318 @@ +/* + * Copyright 2004 The WebRTC Project Authors. All rights reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_BASE_STRINGUTILS_H__ +#define WEBRTC_BASE_STRINGUTILS_H__ + +#include +#include +#include +#include + +#if defined(WEBRTC_WIN) +#include +#include +#define alloca _alloca +#endif // WEBRTC_WIN + +#if defined(WEBRTC_POSIX) +#ifdef BSD +#include +#else // BSD +#include +#endif // !BSD +#endif // WEBRTC_POSIX + +#include + +#include "webrtc/base/basictypes.h" + +/////////////////////////////////////////////////////////////////////////////// +// Generic string/memory utilities +/////////////////////////////////////////////////////////////////////////////// + +#define STACK_ARRAY(TYPE, LEN) static_cast(::alloca((LEN)*sizeof(TYPE))) + +namespace rtc { + +// Complement to memset. Verifies memory consists of count bytes of value c. +bool memory_check(const void* memory, int c, size_t count); + +// Determines whether the simple wildcard pattern matches target. +// Alpha characters in pattern match case-insensitively. +// Asterisks in pattern match 0 or more characters. +// Ex: string_match("www.TEST.GOOGLE.COM", "www.*.com") -> true +bool string_match(const char* target, const char* pattern); + +} // namespace rtc + +/////////////////////////////////////////////////////////////////////////////// +// Rename a bunch of common string functions so they are consistent across +// platforms and between char and wchar_t variants. +// Here is the full list of functions that are unified: +// strlen, strcmp, stricmp, strncmp, strnicmp +// strchr, vsnprintf, strtoul, tolowercase +// tolowercase is like tolower, but not compatible with end-of-file value +// +// It's not clear if we will ever use wchar_t strings on unix. In theory, +// all strings should be Utf8 all the time, except when interfacing with Win32 +// APIs that require Utf16. +/////////////////////////////////////////////////////////////////////////////// + +inline char tolowercase(char c) { + return static_cast(tolower(c)); +} + +#if defined(WEBRTC_WIN) + +inline size_t strlen(const wchar_t* s) { + return wcslen(s); +} +inline int strcmp(const wchar_t* s1, const wchar_t* s2) { + return wcscmp(s1, s2); +} +inline int stricmp(const wchar_t* s1, const wchar_t* s2) { + return _wcsicmp(s1, s2); +} +inline int strncmp(const wchar_t* s1, const wchar_t* s2, size_t n) { + return wcsncmp(s1, s2, n); +} +inline int strnicmp(const wchar_t* s1, const wchar_t* s2, size_t n) { + return _wcsnicmp(s1, s2, n); +} +inline const wchar_t* strchr(const wchar_t* s, wchar_t c) { + return wcschr(s, c); +} +inline const wchar_t* strstr(const wchar_t* haystack, const wchar_t* needle) { + return wcsstr(haystack, needle); +} +#ifndef vsnprintf +inline int vsnprintf(wchar_t* buf, size_t n, const wchar_t* fmt, va_list args) { + return _vsnwprintf(buf, n, fmt, args); +} +#endif // !vsnprintf +inline unsigned long strtoul(const wchar_t* snum, wchar_t** end, int base) { + return wcstoul(snum, end, base); +} +inline wchar_t tolowercase(wchar_t c) { + return static_cast(towlower(c)); +} + +#endif // WEBRTC_WIN + +#if defined(WEBRTC_POSIX) + +inline int _stricmp(const char* s1, const char* s2) { + return strcasecmp(s1, s2); +} +inline int _strnicmp(const char* s1, const char* s2, size_t n) { + return strncasecmp(s1, s2, n); +} + +#endif // WEBRTC_POSIX + +/////////////////////////////////////////////////////////////////////////////// +// Traits simplifies porting string functions to be CTYPE-agnostic +/////////////////////////////////////////////////////////////////////////////// + +namespace rtc { + +const size_t SIZE_UNKNOWN = static_cast(-1); + +template +struct Traits { + // STL string type + //typedef XXX string; + // Null-terminated string + //inline static const CTYPE* empty_str(); +}; + +/////////////////////////////////////////////////////////////////////////////// +// String utilities which work with char or wchar_t +/////////////////////////////////////////////////////////////////////////////// + +template +inline const CTYPE* nonnull(const CTYPE* str, const CTYPE* def_str = NULL) { + return str ? str : (def_str ? def_str : Traits::empty_str()); +} + +template +const CTYPE* strchr(const CTYPE* str, const CTYPE* chs) { + for (size_t i=0; str[i]; ++i) { + for (size_t j=0; chs[j]; ++j) { + if (str[i] == chs[j]) { + return str + i; + } + } + } + return 0; +} + +template +const CTYPE* strchrn(const CTYPE* str, size_t slen, CTYPE ch) { + for (size_t i=0; i +size_t strlenn(const CTYPE* buffer, size_t buflen) { + size_t bufpos = 0; + while (buffer[bufpos] && (bufpos < buflen)) { + ++bufpos; + } + return bufpos; +} + +// Safe versions of strncpy, strncat, snprintf and vsnprintf that always +// null-terminate. + +template +size_t strcpyn(CTYPE* buffer, size_t buflen, + const CTYPE* source, size_t srclen = SIZE_UNKNOWN) { + if (buflen <= 0) + return 0; + + if (srclen == SIZE_UNKNOWN) { + srclen = strlenn(source, buflen - 1); + } else if (srclen >= buflen) { + srclen = buflen - 1; + } + memcpy(buffer, source, srclen * sizeof(CTYPE)); + buffer[srclen] = 0; + return srclen; +} + +template +size_t strcatn(CTYPE* buffer, size_t buflen, + const CTYPE* source, size_t srclen = SIZE_UNKNOWN) { + if (buflen <= 0) + return 0; + + size_t bufpos = strlenn(buffer, buflen - 1); + return bufpos + strcpyn(buffer + bufpos, buflen - bufpos, source, srclen); +} + +// Some compilers (clang specifically) require vsprintfn be defined before +// sprintfn. +template +size_t vsprintfn(CTYPE* buffer, size_t buflen, const CTYPE* format, + va_list args) { + int len = vsnprintf(buffer, buflen, format, args); + if ((len < 0) || (static_cast(len) >= buflen)) { + len = static_cast(buflen - 1); + buffer[len] = 0; + } + return len; +} + +template +size_t sprintfn(CTYPE* buffer, size_t buflen, const CTYPE* format, ...); +template +size_t sprintfn(CTYPE* buffer, size_t buflen, const CTYPE* format, ...) { + va_list args; + va_start(args, format); + size_t len = vsprintfn(buffer, buflen, format, args); + va_end(args); + return len; +} + +/////////////////////////////////////////////////////////////////////////////// +// Allow safe comparing and copying ascii (not UTF-8) with both wide and +// non-wide character strings. +/////////////////////////////////////////////////////////////////////////////// + +inline int asccmp(const char* s1, const char* s2) { + return strcmp(s1, s2); +} +inline int ascicmp(const char* s1, const char* s2) { + return _stricmp(s1, s2); +} +inline int ascncmp(const char* s1, const char* s2, size_t n) { + return strncmp(s1, s2, n); +} +inline int ascnicmp(const char* s1, const char* s2, size_t n) { + return _strnicmp(s1, s2, n); +} +inline size_t asccpyn(char* buffer, size_t buflen, + const char* source, size_t srclen = SIZE_UNKNOWN) { + return strcpyn(buffer, buflen, source, srclen); +} + +#if defined(WEBRTC_WIN) + +typedef wchar_t(*CharacterTransformation)(wchar_t); +inline wchar_t identity(wchar_t c) { return c; } +int ascii_string_compare(const wchar_t* s1, const char* s2, size_t n, + CharacterTransformation transformation); + +inline int asccmp(const wchar_t* s1, const char* s2) { + return ascii_string_compare(s1, s2, static_cast(-1), identity); +} +inline int ascicmp(const wchar_t* s1, const char* s2) { + return ascii_string_compare(s1, s2, static_cast(-1), tolowercase); +} +inline int ascncmp(const wchar_t* s1, const char* s2, size_t n) { + return ascii_string_compare(s1, s2, n, identity); +} +inline int ascnicmp(const wchar_t* s1, const char* s2, size_t n) { + return ascii_string_compare(s1, s2, n, tolowercase); +} +size_t asccpyn(wchar_t* buffer, size_t buflen, + const char* source, size_t srclen = SIZE_UNKNOWN); + +#endif // WEBRTC_WIN + +/////////////////////////////////////////////////////////////////////////////// +// Traits specializations +/////////////////////////////////////////////////////////////////////////////// + +template<> +struct Traits { + typedef std::string string; + inline static const char* empty_str() { return ""; } +}; + +/////////////////////////////////////////////////////////////////////////////// +// Traits specializations (Windows only, currently) +/////////////////////////////////////////////////////////////////////////////// + +#if defined(WEBRTC_WIN) + +template<> +struct Traits { + typedef std::wstring string; + inline static const wchar_t* empty_str() { return L""; } +}; + +#endif // WEBRTC_WIN + +// Replaces all occurrences of "search" with "replace". +void replace_substrs(const char *search, + size_t search_len, + const char *replace, + size_t replace_len, + std::string *s); + +// True iff s1 starts with s2. +bool starts_with(const char *s1, const char *s2); + +// True iff s1 ends with s2. +bool ends_with(const char *s1, const char *s2); + +// Remove leading and trailing whitespaces. +std::string string_trim(const std::string& s); + +} // namespace rtc + +#endif // WEBRTC_BASE_STRINGUTILS_H__ diff --git a/webrtc/base/thread_checker.h b/webrtc/base/thread_checker.h new file mode 100644 index 0000000..6cd7d7b --- /dev/null +++ b/webrtc/base/thread_checker.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Borrowed from Chromium's src/base/threading/thread_checker.h. + +#ifndef WEBRTC_BASE_THREAD_CHECKER_H_ +#define WEBRTC_BASE_THREAD_CHECKER_H_ + +// Apart from debug builds, we also enable the thread checker in +// builds with DCHECK_ALWAYS_ON so that trybots and waterfall bots +// with this define will get the same level of thread checking as +// debug bots. +// +// Note that this does not perfectly match situations where RTC_DCHECK is +// enabled. For example a non-official release build may have +// DCHECK_ALWAYS_ON undefined (and therefore ThreadChecker would be +// disabled) but have RTC_DCHECKs enabled at runtime. +#if (!defined(NDEBUG) || defined(DCHECK_ALWAYS_ON)) +#define ENABLE_THREAD_CHECKER 1 +#else +#define ENABLE_THREAD_CHECKER 0 +#endif + +#include "webrtc/base/thread_checker_impl.h" + +namespace rtc { + +// Do nothing implementation, for use in release mode. +// +// Note: You should almost always use the ThreadChecker class to get the +// right version for your build configuration. +class ThreadCheckerDoNothing { + public: + bool CalledOnValidThread() const { + return true; + } + + void DetachFromThread() {} +}; + +// ThreadChecker is a helper class used to help verify that some methods of a +// class are called from the same thread. It provides identical functionality to +// base::NonThreadSafe, but it is meant to be held as a member variable, rather +// than inherited from base::NonThreadSafe. +// +// While inheriting from base::NonThreadSafe may give a clear indication about +// the thread-safety of a class, it may also lead to violations of the style +// guide with regard to multiple inheritance. The choice between having a +// ThreadChecker member and inheriting from base::NonThreadSafe should be based +// on whether: +// - Derived classes need to know the thread they belong to, as opposed to +// having that functionality fully encapsulated in the base class. +// - Derived classes should be able to reassign the base class to another +// thread, via DetachFromThread. +// +// If neither of these are true, then having a ThreadChecker member and calling +// CalledOnValidThread is the preferable solution. +// +// Example: +// class MyClass { +// public: +// void Foo() { +// RTC_DCHECK(thread_checker_.CalledOnValidThread()); +// ... (do stuff) ... +// } +// +// private: +// ThreadChecker thread_checker_; +// } +// +// In Release mode, CalledOnValidThread will always return true. +#if ENABLE_THREAD_CHECKER +class ThreadChecker : public ThreadCheckerImpl { +}; +#else +class ThreadChecker : public ThreadCheckerDoNothing { +}; +#endif // ENABLE_THREAD_CHECKER + +#undef ENABLE_THREAD_CHECKER + +} // namespace rtc + +#endif // WEBRTC_BASE_THREAD_CHECKER_H_ diff --git a/webrtc/base/thread_checker_impl.cc b/webrtc/base/thread_checker_impl.cc new file mode 100644 index 0000000..ea88308 --- /dev/null +++ b/webrtc/base/thread_checker_impl.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Borrowed from Chromium's src/base/threading/thread_checker_impl.cc. + +#include "webrtc/base/thread_checker_impl.h" + +namespace rtc { + +ThreadCheckerImpl::ThreadCheckerImpl() : valid_thread_(CurrentThreadRef()) { +} + +ThreadCheckerImpl::~ThreadCheckerImpl() { +} + +bool ThreadCheckerImpl::CalledOnValidThread() const { + const PlatformThreadRef current_thread = CurrentThreadRef(); + CritScope scoped_lock(&lock_); + if (!valid_thread_) // Set if previously detached. + valid_thread_ = current_thread; + return IsThreadRefEqual(valid_thread_, current_thread); +} + +void ThreadCheckerImpl::DetachFromThread() { + CritScope scoped_lock(&lock_); + valid_thread_ = 0; +} + +} // namespace rtc diff --git a/webrtc/base/thread_checker_impl.h b/webrtc/base/thread_checker_impl.h new file mode 100644 index 0000000..7b39ada --- /dev/null +++ b/webrtc/base/thread_checker_impl.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Borrowed from Chromium's src/base/threading/thread_checker_impl.h. + +#ifndef WEBRTC_BASE_THREAD_CHECKER_IMPL_H_ +#define WEBRTC_BASE_THREAD_CHECKER_IMPL_H_ + +#include "webrtc/base/criticalsection.h" +#include "webrtc/base/platform_thread.h" + +namespace rtc { + +// Real implementation of ThreadChecker, for use in debug mode, or +// for temporary use in release mode (e.g. to RTC_CHECK on a threading issue +// seen only in the wild). +// +// Note: You should almost always use the ThreadChecker class to get the +// right version for your build configuration. +class ThreadCheckerImpl { + public: + ThreadCheckerImpl(); + ~ThreadCheckerImpl(); + + bool CalledOnValidThread() const; + + // Changes the thread that is checked for in CalledOnValidThread. This may + // be useful when an object may be created on one thread and then used + // exclusively on another thread. + void DetachFromThread(); + + private: + mutable CriticalSection lock_; + // This is mutable so that CalledOnValidThread can set it. + // It's guarded by |lock_|. + mutable PlatformThreadRef valid_thread_; +}; + +} // namespace rtc + +#endif // WEBRTC_BASE_THREAD_CHECKER_IMPL_H_ diff --git a/webrtc/common.h b/webrtc/common.h new file mode 100644 index 0000000..dda045e --- /dev/null +++ b/webrtc/common.h @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_H_ +#define WEBRTC_COMMON_H_ + +#include + +#include "webrtc/base/basictypes.h" + +namespace webrtc { + +// Class Config is designed to ease passing a set of options across webrtc code. +// Options are identified by typename in order to avoid incorrect casts. +// +// Usage: +// * declaring an option: +// struct Algo1_CostFunction { +// virtual float cost(int x) const { return x; } +// virtual ~Algo1_CostFunction() {} +// }; +// +// * accessing an option: +// config.Get().cost(value); +// +// * setting an option: +// struct SqrCost : Algo1_CostFunction { +// virtual float cost(int x) const { return x*x; } +// }; +// config.Set(new SqrCost()); +// +// Note: This class is thread-compatible (like STL containers). +class Config { + public: + // Returns the option if set or a default constructed one. + // Callers that access options too often are encouraged to cache the result. + // Returned references are owned by this. + // + // Requires std::is_default_constructible + template const T& Get() const; + + // Set the option, deleting any previous instance of the same. + // This instance gets ownership of the newly set value. + template void Set(T* value); + + Config() {} + ~Config() { + // Note: this method is inline so webrtc public API depends only + // on the headers. + for (OptionMap::iterator it = options_.begin(); + it != options_.end(); ++it) { + delete it->second; + } + } + + private: + typedef void* OptionIdentifier; + + struct BaseOption { + virtual ~BaseOption() {} + }; + + template + struct Option : BaseOption { + explicit Option(T* v): value(v) {} + ~Option() { + delete value; + } + T* value; + }; + + // Own implementation of rtti-subset to avoid depending on rtti and its costs. + template + static OptionIdentifier identifier() { + static char id_placeholder; + return &id_placeholder; + } + + // Used to instantiate a default constructed object that doesn't needs to be + // owned. This allows Get to be implemented without requiring explicitly + // locks. + template + static const T& default_value() { + RTC_DEFINE_STATIC_LOCAL(const T, def, ()); + return def; + } + + typedef std::map OptionMap; + OptionMap options_; + + // RTC_DISALLOW_COPY_AND_ASSIGN + Config(const Config&); + void operator=(const Config&); +}; + +template +const T& Config::Get() const { + OptionMap::const_iterator it = options_.find(identifier()); + if (it != options_.end()) { + const T* t = static_cast*>(it->second)->value; + if (t) { + return *t; + } + } + return default_value(); +} + +template +void Config::Set(T* value) { + BaseOption*& it = options_[identifier()]; + delete it; + it = new Option(value); +} + +} // namespace webrtc + +#endif // WEBRTC_COMMON_H_ diff --git a/webrtc/common_audio/Makefile.am b/webrtc/common_audio/Makefile.am index 3c9fcce..c7dc373 100644 --- a/webrtc/common_audio/Makefile.am +++ b/webrtc/common_audio/Makefile.am @@ -1,6 +1,18 @@ noinst_LTLIBRARIES = libcommon_audio.la -libcommon_audio_la_SOURCES = signal_processing/include/real_fft.h \ +libcommon_audio_la_SOURCES = \ + resampler/include/push_resampler.h \ + resampler/include/resampler.h \ + resampler/push_sinc_resampler.h \ + resampler/sinc_resampler.h \ + resampler/sinusoidal_linear_chirp_source.h \ + resampler/push_resampler.cc \ + resampler/push_sinc_resampler.cc \ + resampler/resampler.cc \ + resampler/sinc_resampler.cc \ + resampler/sinc_resampler_sse.cc \ + resampler/sinusoidal_linear_chirp_source.cc \ + signal_processing/include/real_fft.h \ signal_processing/include/signal_processing_library.h \ signal_processing/include/spl_inl.h \ signal_processing/include/spl_inl_armv7.h \ @@ -51,24 +63,59 @@ libcommon_audio_la_SOURCES = signal_processing/include/real_fft.h \ vad/vad_gmm.h \ vad/vad_sp.c \ vad/vad_sp.h \ - vad/webrtc_vad.c + vad/webrtc_vad.c \ + audio_converter.cc \ + audio_converter.h \ + audio_ring_buffer.cc \ + audio_ring_buffer.h \ + blocker.cc \ + blocker.h \ + channel_buffer.cc \ + channel_buffer.h \ + fft4g.c \ + fft4g.h \ + fir_filter.cc \ + fir_filter.h \ + fir_filter_sse.cc \ + fir_filter_sse.h \ + lapped_transform.cc \ + lapped_transform.h \ + real_fourier.cc \ + real_fourier.h \ + real_fourier_ooura.cc \ + real_fourier_ooura.h \ + real_fourier_openmax.h \ + ring_buffer.h \ + ring_buffer.c \ + sparse_fir_filter.cc \ + sparse_fir_filter.h \ + wav_file.h \ + wav_file.cc \ + wav_header.h \ + wav_header.cc \ + window_generator.h \ + window_generator.cc libcommon_audio_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) libcommon_audio_la_CXXFLAGS = $(AM_CXXFLAGS) $(COMMON_CXXFLAGS) # FIXME: -# if ARM - signal_processing/complex_bit_reverse_arm.S -# signal_processing/spl_sqrt_floor_arm.S -# ARM7 - signal_processing/filter_ar_fast_q12_armv7.S -# NEON - signal_processing/cross_correlation_neon.c -# signal_processing/downsample_fast_neon.c -# signal_processing/min_max_operations_neon.c -# if MIPS - signal_processing/complex_bit_reverse_mips.c -# signal_processing/complex_fft_mips.c -# signal_processing/cross_correlation_mips.c -# signal_processing/downsample_fast_mips.c -# signal_processing/filter_ar_fast_q12_mips.c -# signal_processing/min_max_operations_mips.c -# signal_processing/resample_by_2_mips.c -# signal_processing/spl_sqrt_floor_mips.c -# signal_processing/vector_scaling_operations_mips.c +# x86 - resampler/sinc_resampler_sse.cc +# fir_filter_sse.cc +# ARM - signal_processing/complex_bit_reverse_arm.S +# signal_processing/spl_sqrt_floor_arm.S +# ARM7 - signal_processing/filter_ar_fast_q12_armv7.S +# NEON - resampler/sinc_resampler_neon.cc \ +# signal_processing/cross_correlation_neon.c +# signal_processing/downsample_fast_neon.c +# signal_processing/min_max_operations_neon.c +# fir_filter_neon.c +# MIPS - signal_processing/complex_bit_reverse_mips.c +# signal_processing/complex_fft_mips.c +# signal_processing/cross_correlation_mips.c +# signal_processing/downsample_fast_mips.c +# signal_processing/filter_ar_fast_q12_mips.c +# signal_processing/min_max_operations_mips.c +# signal_processing/resample_by_2_mips.c +# signal_processing/spl_sqrt_floor_mips.c +# signal_processing/vector_scaling_operations_mips.c diff --git a/webrtc/common_audio/audio_converter.cc b/webrtc/common_audio/audio_converter.cc new file mode 100644 index 0000000..07e5c6b --- /dev/null +++ b/webrtc/common_audio/audio_converter.cc @@ -0,0 +1,200 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/audio_converter.h" + +#include + +#include "webrtc/base/checks.h" +#include "webrtc/base/safe_conversions.h" +#include "webrtc/common_audio/channel_buffer.h" +#include "webrtc/common_audio/resampler/push_sinc_resampler.h" +#include "webrtc/system_wrappers/interface/scoped_vector.h" + +using rtc::checked_cast; + +namespace webrtc { + +class CopyConverter : public AudioConverter { + public: + CopyConverter(int src_channels, size_t src_frames, int dst_channels, + size_t dst_frames) + : AudioConverter(src_channels, src_frames, dst_channels, dst_frames) {} + ~CopyConverter() override {}; + + void Convert(const float* const* src, size_t src_size, float* const* dst, + size_t dst_capacity) override { + CheckSizes(src_size, dst_capacity); + if (src != dst) { + for (int i = 0; i < src_channels(); ++i) + std::memcpy(dst[i], src[i], dst_frames() * sizeof(*dst[i])); + } + } +}; + +class UpmixConverter : public AudioConverter { + public: + UpmixConverter(int src_channels, size_t src_frames, int dst_channels, + size_t dst_frames) + : AudioConverter(src_channels, src_frames, dst_channels, dst_frames) {} + ~UpmixConverter() override {}; + + void Convert(const float* const* src, size_t src_size, float* const* dst, + size_t dst_capacity) override { + CheckSizes(src_size, dst_capacity); + for (size_t i = 0; i < dst_frames(); ++i) { + const float value = src[0][i]; + for (int j = 0; j < dst_channels(); ++j) + dst[j][i] = value; + } + } +}; + +class DownmixConverter : public AudioConverter { + public: + DownmixConverter(int src_channels, size_t src_frames, int dst_channels, + size_t dst_frames) + : AudioConverter(src_channels, src_frames, dst_channels, dst_frames) { + } + ~DownmixConverter() override {}; + + void Convert(const float* const* src, size_t src_size, float* const* dst, + size_t dst_capacity) override { + CheckSizes(src_size, dst_capacity); + float* dst_mono = dst[0]; + for (size_t i = 0; i < src_frames(); ++i) { + float sum = 0; + for (int j = 0; j < src_channels(); ++j) + sum += src[j][i]; + dst_mono[i] = sum / src_channels(); + } + } +}; + +class ResampleConverter : public AudioConverter { + public: + ResampleConverter(int src_channels, size_t src_frames, int dst_channels, + size_t dst_frames) + : AudioConverter(src_channels, src_frames, dst_channels, dst_frames) { + resamplers_.reserve(src_channels); + for (int i = 0; i < src_channels; ++i) + resamplers_.push_back(new PushSincResampler(src_frames, dst_frames)); + } + ~ResampleConverter() override {}; + + void Convert(const float* const* src, size_t src_size, float* const* dst, + size_t dst_capacity) override { + CheckSizes(src_size, dst_capacity); + for (size_t i = 0; i < resamplers_.size(); ++i) + resamplers_[i]->Resample(src[i], src_frames(), dst[i], dst_frames()); + } + + private: + ScopedVector resamplers_; +}; + +// Apply a vector of converters in serial, in the order given. At least two +// converters must be provided. +class CompositionConverter : public AudioConverter { + public: + CompositionConverter(ScopedVector converters) + : converters_(converters.Pass()) { + RTC_CHECK_GE(converters_.size(), 2u); + // We need an intermediate buffer after every converter. + for (auto it = converters_.begin(); it != converters_.end() - 1; ++it) + buffers_.push_back(new ChannelBuffer((*it)->dst_frames(), + (*it)->dst_channels())); + } + ~CompositionConverter() override {}; + + void Convert(const float* const* src, size_t src_size, float* const* dst, + size_t dst_capacity) override { + converters_.front()->Convert(src, src_size, buffers_.front()->channels(), + buffers_.front()->size()); + for (size_t i = 2; i < converters_.size(); ++i) { + auto src_buffer = buffers_[i - 2]; + auto dst_buffer = buffers_[i - 1]; + converters_[i]->Convert(src_buffer->channels(), + src_buffer->size(), + dst_buffer->channels(), + dst_buffer->size()); + } + converters_.back()->Convert(buffers_.back()->channels(), + buffers_.back()->size(), dst, dst_capacity); + } + + private: + ScopedVector converters_; + ScopedVector> buffers_; +}; + +rtc::scoped_ptr AudioConverter::Create(int src_channels, + size_t src_frames, + int dst_channels, + size_t dst_frames) { + rtc::scoped_ptr sp; + if (src_channels > dst_channels) { + if (src_frames != dst_frames) { + ScopedVector converters; + converters.push_back(new DownmixConverter(src_channels, src_frames, + dst_channels, src_frames)); + converters.push_back(new ResampleConverter(dst_channels, src_frames, + dst_channels, dst_frames)); + sp.reset(new CompositionConverter(converters.Pass())); + } else { + sp.reset(new DownmixConverter(src_channels, src_frames, dst_channels, + dst_frames)); + } + } else if (src_channels < dst_channels) { + if (src_frames != dst_frames) { + ScopedVector converters; + converters.push_back(new ResampleConverter(src_channels, src_frames, + src_channels, dst_frames)); + converters.push_back(new UpmixConverter(src_channels, dst_frames, + dst_channels, dst_frames)); + sp.reset(new CompositionConverter(converters.Pass())); + } else { + sp.reset(new UpmixConverter(src_channels, src_frames, dst_channels, + dst_frames)); + } + } else if (src_frames != dst_frames) { + sp.reset(new ResampleConverter(src_channels, src_frames, dst_channels, + dst_frames)); + } else { + sp.reset(new CopyConverter(src_channels, src_frames, dst_channels, + dst_frames)); + } + + return sp.Pass(); +} + +// For CompositionConverter. +AudioConverter::AudioConverter() + : src_channels_(0), + src_frames_(0), + dst_channels_(0), + dst_frames_(0) {} + +AudioConverter::AudioConverter(int src_channels, size_t src_frames, + int dst_channels, size_t dst_frames) + : src_channels_(src_channels), + src_frames_(src_frames), + dst_channels_(dst_channels), + dst_frames_(dst_frames) { + RTC_CHECK(dst_channels == src_channels || dst_channels == 1 || + src_channels == 1); +} + +void AudioConverter::CheckSizes(size_t src_size, size_t dst_capacity) const { + RTC_CHECK_EQ(src_size, src_channels() * src_frames()); + RTC_CHECK_GE(dst_capacity, dst_channels() * dst_frames()); +} + +} // namespace webrtc diff --git a/webrtc/common_audio/audio_converter.h b/webrtc/common_audio/audio_converter.h new file mode 100644 index 0000000..7d1513b --- /dev/null +++ b/webrtc/common_audio/audio_converter.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_AUDIO_CONVERTER_H_ +#define WEBRTC_COMMON_AUDIO_AUDIO_CONVERTER_H_ + +#include "webrtc/base/constructormagic.h" +#include "webrtc/base/scoped_ptr.h" + +namespace webrtc { + +// Format conversion (remixing and resampling) for audio. Only simple remixing +// conversions are supported: downmix to mono (i.e. |dst_channels| == 1) or +// upmix from mono (i.e. |src_channels == 1|). +// +// The source and destination chunks have the same duration in time; specifying +// the number of frames is equivalent to specifying the sample rates. +class AudioConverter { + public: + // Returns a new AudioConverter, which will use the supplied format for its + // lifetime. Caller is responsible for the memory. + static rtc::scoped_ptr Create(int src_channels, + size_t src_frames, + int dst_channels, + size_t dst_frames); + virtual ~AudioConverter() {}; + + // Convert |src|, containing |src_size| samples, to |dst|, having a sample + // capacity of |dst_capacity|. Both point to a series of buffers containing + // the samples for each channel. The sizes must correspond to the format + // passed to Create(). + virtual void Convert(const float* const* src, size_t src_size, + float* const* dst, size_t dst_capacity) = 0; + + int src_channels() const { return src_channels_; } + size_t src_frames() const { return src_frames_; } + int dst_channels() const { return dst_channels_; } + size_t dst_frames() const { return dst_frames_; } + + protected: + AudioConverter(); + AudioConverter(int src_channels, size_t src_frames, int dst_channels, + size_t dst_frames); + + // Helper to RTC_CHECK that inputs are correctly sized. + void CheckSizes(size_t src_size, size_t dst_capacity) const; + + private: + const int src_channels_; + const size_t src_frames_; + const int dst_channels_; + const size_t dst_frames_; + + RTC_DISALLOW_COPY_AND_ASSIGN(AudioConverter); +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_AUDIO_CONVERTER_H_ diff --git a/webrtc/common_audio/audio_ring_buffer.cc b/webrtc/common_audio/audio_ring_buffer.cc new file mode 100644 index 0000000..a29e53a --- /dev/null +++ b/webrtc/common_audio/audio_ring_buffer.cc @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/audio_ring_buffer.h" + +#include "webrtc/base/checks.h" +#include "webrtc/common_audio/ring_buffer.h" + +// This is a simple multi-channel wrapper over the ring_buffer.h C interface. + +namespace webrtc { + +AudioRingBuffer::AudioRingBuffer(size_t channels, size_t max_frames) { + buffers_.reserve(channels); + for (size_t i = 0; i < channels; ++i) + buffers_.push_back(WebRtc_CreateBuffer(max_frames, sizeof(float))); +} + +AudioRingBuffer::~AudioRingBuffer() { + for (auto buf : buffers_) + WebRtc_FreeBuffer(buf); +} + +void AudioRingBuffer::Write(const float* const* data, size_t channels, + size_t frames) { + RTC_DCHECK_EQ(buffers_.size(), channels); + for (size_t i = 0; i < channels; ++i) { + const size_t written = WebRtc_WriteBuffer(buffers_[i], data[i], frames); + RTC_CHECK_EQ(written, frames); + } +} + +void AudioRingBuffer::Read(float* const* data, size_t channels, size_t frames) { + RTC_DCHECK_EQ(buffers_.size(), channels); + for (size_t i = 0; i < channels; ++i) { + const size_t read = + WebRtc_ReadBuffer(buffers_[i], nullptr, data[i], frames); + RTC_CHECK_EQ(read, frames); + } +} + +size_t AudioRingBuffer::ReadFramesAvailable() const { + // All buffers have the same amount available. + return WebRtc_available_read(buffers_[0]); +} + +size_t AudioRingBuffer::WriteFramesAvailable() const { + // All buffers have the same amount available. + return WebRtc_available_write(buffers_[0]); +} + +void AudioRingBuffer::MoveReadPositionForward(size_t frames) { + for (auto buf : buffers_) { + const size_t moved = + static_cast(WebRtc_MoveReadPtr(buf, static_cast(frames))); + RTC_CHECK_EQ(moved, frames); + } +} + +void AudioRingBuffer::MoveReadPositionBackward(size_t frames) { + for (auto buf : buffers_) { + const size_t moved = static_cast( + -WebRtc_MoveReadPtr(buf, -static_cast(frames))); + RTC_CHECK_EQ(moved, frames); + } +} + +} // namespace webrtc diff --git a/webrtc/common_audio/audio_ring_buffer.h b/webrtc/common_audio/audio_ring_buffer.h new file mode 100644 index 0000000..58e543a --- /dev/null +++ b/webrtc/common_audio/audio_ring_buffer.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +#ifndef WEBRTC_COMMON_AUDIO_AUDIO_RING_BUFFER_H_ +#define WEBRTC_COMMON_AUDIO_AUDIO_RING_BUFFER_H_ + +#include +#include + +struct RingBuffer; + +namespace webrtc { + +// A ring buffer tailored for float deinterleaved audio. Any operation that +// cannot be performed as requested will cause a crash (e.g. insufficient data +// in the buffer to fulfill a read request.) +class AudioRingBuffer final { + public: + // Specify the number of channels and maximum number of frames the buffer will + // contain. + AudioRingBuffer(size_t channels, size_t max_frames); + ~AudioRingBuffer(); + + // Copies |data| to the buffer and advances the write pointer. |channels| must + // be the same as at creation time. + void Write(const float* const* data, size_t channels, size_t frames); + + // Copies from the buffer to |data| and advances the read pointer. |channels| + // must be the same as at creation time. + void Read(float* const* data, size_t channels, size_t frames); + + size_t ReadFramesAvailable() const; + size_t WriteFramesAvailable() const; + + // Moves the read position. The forward version advances the read pointer + // towards the write pointer and the backward verison withdraws the read + // pointer away from the write pointer (i.e. flushing and stuffing the buffer + // respectively.) + void MoveReadPositionForward(size_t frames); + void MoveReadPositionBackward(size_t frames); + + private: + // We don't use a ScopedVector because it doesn't support a specialized + // deleter (like scoped_ptr for instance.) + std::vector buffers_; +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_AUDIO_RING_BUFFER_H_ diff --git a/webrtc/common_audio/blocker.cc b/webrtc/common_audio/blocker.cc new file mode 100644 index 0000000..0133550 --- /dev/null +++ b/webrtc/common_audio/blocker.cc @@ -0,0 +1,236 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/blocker.h" + +#include + +#include "webrtc/base/checks.h" + +namespace { + +// Adds |a| and |b| frame by frame into |result| (basically matrix addition). +void AddFrames(const float* const* a, + size_t a_start_index, + const float* const* b, + int b_start_index, + size_t num_frames, + int num_channels, + float* const* result, + size_t result_start_index) { + for (int i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + result[i][j + result_start_index] = + a[i][j + a_start_index] + b[i][j + b_start_index]; + } + } +} + +// Copies |src| into |dst| channel by channel. +void CopyFrames(const float* const* src, + size_t src_start_index, + size_t num_frames, + int num_channels, + float* const* dst, + size_t dst_start_index) { + for (int i = 0; i < num_channels; ++i) { + memcpy(&dst[i][dst_start_index], + &src[i][src_start_index], + num_frames * sizeof(dst[i][dst_start_index])); + } +} + +// Moves |src| into |dst| channel by channel. +void MoveFrames(const float* const* src, + size_t src_start_index, + size_t num_frames, + int num_channels, + float* const* dst, + size_t dst_start_index) { + for (int i = 0; i < num_channels; ++i) { + memmove(&dst[i][dst_start_index], + &src[i][src_start_index], + num_frames * sizeof(dst[i][dst_start_index])); + } +} + +void ZeroOut(float* const* buffer, + size_t starting_idx, + size_t num_frames, + int num_channels) { + for (int i = 0; i < num_channels; ++i) { + memset(&buffer[i][starting_idx], 0, + num_frames * sizeof(buffer[i][starting_idx])); + } +} + +// Pointwise multiplies each channel of |frames| with |window|. Results are +// stored in |frames|. +void ApplyWindow(const float* window, + size_t num_frames, + int num_channels, + float* const* frames) { + for (int i = 0; i < num_channels; ++i) { + for (size_t j = 0; j < num_frames; ++j) { + frames[i][j] = frames[i][j] * window[j]; + } + } +} + +size_t gcd(size_t a, size_t b) { + size_t tmp; + while (b) { + tmp = a; + a = b; + b = tmp % b; + } + return a; +} + +} // namespace + +namespace webrtc { + +Blocker::Blocker(size_t chunk_size, + size_t block_size, + int num_input_channels, + int num_output_channels, + const float* window, + size_t shift_amount, + BlockerCallback* callback) + : chunk_size_(chunk_size), + block_size_(block_size), + num_input_channels_(num_input_channels), + num_output_channels_(num_output_channels), + initial_delay_(block_size_ - gcd(chunk_size, shift_amount)), + frame_offset_(0), + input_buffer_(num_input_channels_, chunk_size_ + initial_delay_), + output_buffer_(chunk_size_ + initial_delay_, num_output_channels_), + input_block_(block_size_, num_input_channels_), + output_block_(block_size_, num_output_channels_), + window_(new float[block_size_]), + shift_amount_(shift_amount), + callback_(callback) { + RTC_CHECK_LE(num_output_channels_, num_input_channels_); + RTC_CHECK_LE(shift_amount_, block_size_); + + memcpy(window_.get(), window, block_size_ * sizeof(*window_.get())); + input_buffer_.MoveReadPositionBackward(initial_delay_); +} + +// When block_size < chunk_size the input and output buffers look like this: +// +// delay* chunk_size chunk_size + delay* +// buffer: <-------------|---------------------|---------------|> +// _a_ _b_ _c_ +// +// On each call to ProcessChunk(): +// 1. New input gets read into sections _b_ and _c_ of the input buffer. +// 2. We block starting from frame_offset. +// 3. We block until we reach a block |bl| that doesn't contain any frames +// from sections _a_ or _b_ of the input buffer. +// 4. We window the current block, fire the callback for processing, window +// again, and overlap/add to the output buffer. +// 5. We copy sections _a_ and _b_ of the output buffer into output. +// 6. For both the input and the output buffers, we copy section _c_ into +// section _a_. +// 7. We set the new frame_offset to be the difference between the first frame +// of |bl| and the border between sections _b_ and _c_. +// +// When block_size > chunk_size the input and output buffers look like this: +// +// chunk_size delay* chunk_size + delay* +// buffer: <-------------|---------------------|---------------|> +// _a_ _b_ _c_ +// +// On each call to ProcessChunk(): +// The procedure is the same as above, except for: +// 1. New input gets read into section _c_ of the input buffer. +// 3. We block until we reach a block |bl| that doesn't contain any frames +// from section _a_ of the input buffer. +// 5. We copy section _a_ of the output buffer into output. +// 6. For both the input and the output buffers, we copy sections _b_ and _c_ +// into section _a_ and _b_. +// 7. We set the new frame_offset to be the difference between the first frame +// of |bl| and the border between sections _a_ and _b_. +// +// * delay here refers to inintial_delay_ +// +// TODO(claguna): Look at using ring buffers to eliminate some copies. +void Blocker::ProcessChunk(const float* const* input, + size_t chunk_size, + int num_input_channels, + int num_output_channels, + float* const* output) { + RTC_CHECK_EQ(chunk_size, chunk_size_); + RTC_CHECK_EQ(num_input_channels, num_input_channels_); + RTC_CHECK_EQ(num_output_channels, num_output_channels_); + + input_buffer_.Write(input, num_input_channels, chunk_size_); + size_t first_frame_in_block = frame_offset_; + + // Loop through blocks. + while (first_frame_in_block < chunk_size_) { + input_buffer_.Read(input_block_.channels(), num_input_channels, + block_size_); + input_buffer_.MoveReadPositionBackward(block_size_ - shift_amount_); + + ApplyWindow(window_.get(), + block_size_, + num_input_channels_, + input_block_.channels()); + callback_->ProcessBlock(input_block_.channels(), + block_size_, + num_input_channels_, + num_output_channels_, + output_block_.channels()); + ApplyWindow(window_.get(), + block_size_, + num_output_channels_, + output_block_.channels()); + + AddFrames(output_buffer_.channels(), + first_frame_in_block, + output_block_.channels(), + 0, + block_size_, + num_output_channels_, + output_buffer_.channels(), + first_frame_in_block); + + first_frame_in_block += shift_amount_; + } + + // Copy output buffer to output + CopyFrames(output_buffer_.channels(), + 0, + chunk_size_, + num_output_channels_, + output, + 0); + + // Copy output buffer [chunk_size_, chunk_size_ + initial_delay] + // to output buffer [0, initial_delay], zero the rest. + MoveFrames(output_buffer_.channels(), + chunk_size, + initial_delay_, + num_output_channels_, + output_buffer_.channels(), + 0); + ZeroOut(output_buffer_.channels(), + initial_delay_, + chunk_size_, + num_output_channels_); + + // Calculate new starting frames. + frame_offset_ = first_frame_in_block - chunk_size_; +} + +} // namespace webrtc diff --git a/webrtc/common_audio/blocker.h b/webrtc/common_audio/blocker.h new file mode 100644 index 0000000..025638a --- /dev/null +++ b/webrtc/common_audio/blocker.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_INTERNAL_BEAMFORMER_BLOCKER_H_ +#define WEBRTC_INTERNAL_BEAMFORMER_BLOCKER_H_ + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/audio_ring_buffer.h" +#include "webrtc/common_audio/channel_buffer.h" + +namespace webrtc { + +// The callback function to process audio in the time domain. Input has already +// been windowed, and output will be windowed. The number of input channels +// must be >= the number of output channels. +class BlockerCallback { + public: + virtual ~BlockerCallback() {} + + virtual void ProcessBlock(const float* const* input, + size_t num_frames, + int num_input_channels, + int num_output_channels, + float* const* output) = 0; +}; + +// The main purpose of Blocker is to abstract away the fact that often we +// receive a different number of audio frames than our transform takes. For +// example, most FFTs work best when the fft-size is a power of 2, but suppose +// we receive 20ms of audio at a sample rate of 48000. That comes to 960 frames +// of audio, which is not a power of 2. Blocker allows us to specify the +// transform and all other necessary processing via the Process() callback +// function without any constraints on the transform-size +// (read: |block_size_|) or received-audio-size (read: |chunk_size_|). +// We handle this for the multichannel audio case, allowing for different +// numbers of input and output channels (for example, beamforming takes 2 or +// more input channels and returns 1 output channel). Audio signals are +// represented as deinterleaved floats in the range [-1, 1]. +// +// Blocker is responsible for: +// - blocking audio while handling potential discontinuities on the edges +// of chunks +// - windowing blocks before sending them to Process() +// - windowing processed blocks, and overlap-adding them together before +// sending back a processed chunk +// +// To use blocker: +// 1. Impelment a BlockerCallback object |bc|. +// 2. Instantiate a Blocker object |b|, passing in |bc|. +// 3. As you receive audio, call b.ProcessChunk() to get processed audio. +// +// A small amount of delay is added to the first received chunk to deal with +// the difference in chunk/block sizes. This delay is <= chunk_size. +// +// Ownership of window is retained by the caller. That is, Blocker makes a +// copy of window and does not attempt to delete it. +class Blocker { + public: + Blocker(size_t chunk_size, + size_t block_size, + int num_input_channels, + int num_output_channels, + const float* window, + size_t shift_amount, + BlockerCallback* callback); + + void ProcessChunk(const float* const* input, + size_t chunk_size, + int num_input_channels, + int num_output_channels, + float* const* output); + + private: + const size_t chunk_size_; + const size_t block_size_; + const int num_input_channels_; + const int num_output_channels_; + + // The number of frames of delay to add at the beginning of the first chunk. + const size_t initial_delay_; + + // The frame index into the input buffer where the first block should be read + // from. This is necessary because shift_amount_ is not necessarily a + // multiple of chunk_size_, so blocks won't line up at the start of the + // buffer. + size_t frame_offset_; + + // Since blocks nearly always overlap, there are certain blocks that require + // frames from the end of one chunk and the beginning of the next chunk. The + // input and output buffers are responsible for saving those frames between + // calls to ProcessChunk(). + // + // Both contain |initial delay| + |chunk_size| frames. The input is a fairly + // standard FIFO, but due to the overlap-add it's harder to use an + // AudioRingBuffer for the output. + AudioRingBuffer input_buffer_; + ChannelBuffer output_buffer_; + + // Space for the input block (can't wrap because of windowing). + ChannelBuffer input_block_; + + // Space for the output block (can't wrap because of overlap/add). + ChannelBuffer output_block_; + + rtc::scoped_ptr window_; + + // The amount of frames between the start of contiguous blocks. For example, + // |shift_amount_| = |block_size_| / 2 for a Hann window. + size_t shift_amount_; + + BlockerCallback* callback_; +}; + +} // namespace webrtc + +#endif // WEBRTC_INTERNAL_BEAMFORMER_BLOCKER_H_ diff --git a/webrtc/common_audio/channel_buffer.cc b/webrtc/common_audio/channel_buffer.cc new file mode 100644 index 0000000..d3dc7c0 --- /dev/null +++ b/webrtc/common_audio/channel_buffer.cc @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/channel_buffer.h" + +namespace webrtc { + +IFChannelBuffer::IFChannelBuffer(size_t num_frames, + int num_channels, + size_t num_bands) + : ivalid_(true), + ibuf_(num_frames, num_channels, num_bands), + fvalid_(true), + fbuf_(num_frames, num_channels, num_bands) {} + +ChannelBuffer* IFChannelBuffer::ibuf() { + RefreshI(); + fvalid_ = false; + return &ibuf_; +} + +ChannelBuffer* IFChannelBuffer::fbuf() { + RefreshF(); + ivalid_ = false; + return &fbuf_; +} + +const ChannelBuffer* IFChannelBuffer::ibuf_const() const { + RefreshI(); + return &ibuf_; +} + +const ChannelBuffer* IFChannelBuffer::fbuf_const() const { + RefreshF(); + return &fbuf_; +} + +void IFChannelBuffer::RefreshF() const { + if (!fvalid_) { + assert(ivalid_); + const int16_t* const* int_channels = ibuf_.channels(); + float* const* float_channels = fbuf_.channels(); + for (int i = 0; i < ibuf_.num_channels(); ++i) { + for (size_t j = 0; j < ibuf_.num_frames(); ++j) { + float_channels[i][j] = int_channels[i][j]; + } + } + fvalid_ = true; + } +} + +void IFChannelBuffer::RefreshI() const { + if (!ivalid_) { + assert(fvalid_); + int16_t* const* int_channels = ibuf_.channels(); + const float* const* float_channels = fbuf_.channels(); + for (int i = 0; i < ibuf_.num_channels(); ++i) { + FloatS16ToS16(float_channels[i], + ibuf_.num_frames(), + int_channels[i]); + } + ivalid_ = true; + } +} + +} // namespace webrtc diff --git a/webrtc/common_audio/channel_buffer.h b/webrtc/common_audio/channel_buffer.h new file mode 100644 index 0000000..6050090 --- /dev/null +++ b/webrtc/common_audio/channel_buffer.h @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_CHANNEL_BUFFER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_CHANNEL_BUFFER_H_ + +#include + +#include "webrtc/base/checks.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/include/audio_util.h" +#include "webrtc/test/testsupport/gtest_prod_util.h" + +namespace webrtc { + +// Helper to encapsulate a contiguous data buffer, full or split into frequency +// bands, with access to a pointer arrays of the deinterleaved channels and +// bands. The buffer is zero initialized at creation. +// +// The buffer structure is showed below for a 2 channel and 2 bands case: +// +// |data_|: +// { [ --- b1ch1 --- ] [ --- b2ch1 --- ] [ --- b1ch2 --- ] [ --- b2ch2 --- ] } +// +// The pointer arrays for the same example are as follows: +// +// |channels_|: +// { [ b1ch1* ] [ b1ch2* ] [ b2ch1* ] [ b2ch2* ] } +// +// |bands_|: +// { [ b1ch1* ] [ b2ch1* ] [ b1ch2* ] [ b2ch2* ] } +template +class ChannelBuffer { + public: + ChannelBuffer(size_t num_frames, + int num_channels, + size_t num_bands = 1) + : data_(new T[num_frames * num_channels]()), + channels_(new T*[num_channels * num_bands]), + bands_(new T*[num_channels * num_bands]), + num_frames_(num_frames), + num_frames_per_band_(num_frames / num_bands), + num_channels_(num_channels), + num_bands_(num_bands) { + for (int i = 0; i < num_channels_; ++i) { + for (size_t j = 0; j < num_bands_; ++j) { + channels_[j * num_channels_ + i] = + &data_[i * num_frames_ + j * num_frames_per_band_]; + bands_[i * num_bands_ + j] = channels_[j * num_channels_ + i]; + } + } + } + + // Returns a pointer array to the full-band channels (or lower band channels). + // Usage: + // channels()[channel][sample]. + // Where: + // 0 <= channel < |num_channels_| + // 0 <= sample < |num_frames_| + T* const* channels() { return channels(0); } + const T* const* channels() const { return channels(0); } + + // Returns a pointer array to the channels for a specific band. + // Usage: + // channels(band)[channel][sample]. + // Where: + // 0 <= band < |num_bands_| + // 0 <= channel < |num_channels_| + // 0 <= sample < |num_frames_per_band_| + const T* const* channels(size_t band) const { + RTC_DCHECK_LT(band, num_bands_); + return &channels_[band * num_channels_]; + } + T* const* channels(size_t band) { + const ChannelBuffer* t = this; + return const_cast(t->channels(band)); + } + + // Returns a pointer array to the bands for a specific channel. + // Usage: + // bands(channel)[band][sample]. + // Where: + // 0 <= channel < |num_channels_| + // 0 <= band < |num_bands_| + // 0 <= sample < |num_frames_per_band_| + const T* const* bands(int channel) const { + RTC_DCHECK_LT(channel, num_channels_); + RTC_DCHECK_GE(channel, 0); + return &bands_[channel * num_bands_]; + } + T* const* bands(int channel) { + const ChannelBuffer* t = this; + return const_cast(t->bands(channel)); + } + + // Sets the |slice| pointers to the |start_frame| position for each channel. + // Returns |slice| for convenience. + const T* const* Slice(T** slice, size_t start_frame) const { + RTC_DCHECK_LT(start_frame, num_frames_); + for (int i = 0; i < num_channels_; ++i) + slice[i] = &channels_[i][start_frame]; + return slice; + } + T** Slice(T** slice, size_t start_frame) { + const ChannelBuffer* t = this; + return const_cast(t->Slice(slice, start_frame)); + } + + size_t num_frames() const { return num_frames_; } + size_t num_frames_per_band() const { return num_frames_per_band_; } + int num_channels() const { return num_channels_; } + size_t num_bands() const { return num_bands_; } + size_t size() const {return num_frames_ * num_channels_; } + + void SetDataForTesting(const T* data, size_t size) { + RTC_CHECK_EQ(size, this->size()); + memcpy(data_.get(), data, size * sizeof(*data)); + } + + private: + rtc::scoped_ptr data_; + rtc::scoped_ptr channels_; + rtc::scoped_ptr bands_; + const size_t num_frames_; + const size_t num_frames_per_band_; + const int num_channels_; + const size_t num_bands_; +}; + +// One int16_t and one float ChannelBuffer that are kept in sync. The sync is +// broken when someone requests write access to either ChannelBuffer, and +// reestablished when someone requests the outdated ChannelBuffer. It is +// therefore safe to use the return value of ibuf_const() and fbuf_const() +// until the next call to ibuf() or fbuf(), and the return value of ibuf() and +// fbuf() until the next call to any of the other functions. +class IFChannelBuffer { + public: + IFChannelBuffer(size_t num_frames, int num_channels, size_t num_bands = 1); + + ChannelBuffer* ibuf(); + ChannelBuffer* fbuf(); + const ChannelBuffer* ibuf_const() const; + const ChannelBuffer* fbuf_const() const; + + size_t num_frames() const { return ibuf_.num_frames(); } + size_t num_frames_per_band() const { return ibuf_.num_frames_per_band(); } + int num_channels() const { return ibuf_.num_channels(); } + size_t num_bands() const { return ibuf_.num_bands(); } + + private: + void RefreshF() const; + void RefreshI() const; + + mutable bool ivalid_; + mutable ChannelBuffer ibuf_; + mutable bool fvalid_; + mutable ChannelBuffer fbuf_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_CHANNEL_BUFFER_H_ diff --git a/webrtc/modules/audio_processing/utility/fft4g.c b/webrtc/common_audio/fft4g.c similarity index 90% rename from webrtc/modules/audio_processing/utility/fft4g.c rename to webrtc/common_audio/fft4g.c index 9a84368..9cf7b9f 100644 --- a/webrtc/modules/audio_processing/utility/fft4g.c +++ b/webrtc/common_audio/fft4g.c @@ -27,7 +27,7 @@ functions dfst: Sine Transform of RDFT (Real Anti-symmetric DFT) function prototypes void cdft(int, int, float *, int *, float *); - void rdft(int, int, float *, int *, float *); + void rdft(size_t, int, float *, size_t *, float *); void ddct(int, int, float *, int *, float *); void ddst(int, int, float *, int *, float *); void dfct(int, float *, float *, int *, float *); @@ -94,7 +94,7 @@ function prototypes ip[0] = 0; // first time only rdft(n, -1, a, ip, w); [parameters] - n :data length (int) + n :data length (size_t) n >= 2, n = power of 2 a[0...n-1] :input/output data (float *) @@ -107,7 +107,7 @@ function prototypes a[2*j] = R[j], 0<=j= 2+sqrt(n/2) strictly, length of ip >= @@ -286,14 +286,29 @@ Appendix : w[] and ip[] are compatible with all routines. */ -void cdft(int n, int isgn, float *a, int *ip, float *w) -{ - void makewt(int nw, int *ip, float *w); - void bitrv2(int n, int *ip, float *a); - void bitrv2conj(int n, int *ip, float *a); - void cftfsub(int n, float *a, float *w); - void cftbsub(int n, float *a, float *w); +#include +static void makewt(size_t nw, size_t *ip, float *w); +static void makect(size_t nc, size_t *ip, float *c); +static void bitrv2(size_t n, size_t *ip, float *a); +#if 0 // Not used. +static void bitrv2conj(int n, int *ip, float *a); +#endif +static void cftfsub(size_t n, float *a, float *w); +static void cftbsub(size_t n, float *a, float *w); +static void cft1st(size_t n, float *a, float *w); +static void cftmdl(size_t n, size_t l, float *a, float *w); +static void rftfsub(size_t n, float *a, size_t nc, float *c); +static void rftbsub(size_t n, float *a, size_t nc, float *c); +#if 0 // Not used. +static void dctsub(int n, float *a, int nc, float *c) +static void dstsub(int n, float *a, int nc, float *c) +#endif + + +#if 0 // Not used. +void WebRtc_cdft(int n, int isgn, float *a, int *ip, float *w) +{ if (n > (ip[0] << 2)) { makewt(n >> 2, ip, w); } @@ -309,18 +324,12 @@ void cdft(int n, int isgn, float *a, int *ip, float *w) cftfsub(n, a, w); } } +#endif -void rdft(int n, int isgn, float *a, int *ip, float *w) +void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w) { - void makewt(int nw, int *ip, float *w); - void makect(int nc, int *ip, float *c); - void bitrv2(int n, int *ip, float *a); - void cftfsub(int n, float *a, float *w); - void cftbsub(int n, float *a, float *w); - void rftfsub(int n, float *a, int nc, float *c); - void rftbsub(int n, float *a, int nc, float *c); - int nw, nc; + size_t nw, nc; float xi; nw = ip[0]; @@ -357,17 +366,9 @@ void rdft(int n, int isgn, float *a, int *ip, float *w) } } - -void ddct(int n, int isgn, float *a, int *ip, float *w) +#if 0 // Not used. +static void ddct(int n, int isgn, float *a, int *ip, float *w) { - void makewt(int nw, int *ip, float *w); - void makect(int nc, int *ip, float *c); - void bitrv2(int n, int *ip, float *a); - void cftfsub(int n, float *a, float *w); - void cftbsub(int n, float *a, float *w); - void rftfsub(int n, float *a, int nc, float *c); - void rftbsub(int n, float *a, int nc, float *c); - void dctsub(int n, float *a, int nc, float *c); int j, nw, nc; float xr; @@ -417,16 +418,8 @@ void ddct(int n, int isgn, float *a, int *ip, float *w) } -void ddst(int n, int isgn, float *a, int *ip, float *w) +static void ddst(int n, int isgn, float *a, int *ip, float *w) { - void makewt(int nw, int *ip, float *w); - void makect(int nc, int *ip, float *c); - void bitrv2(int n, int *ip, float *a); - void cftfsub(int n, float *a, float *w); - void cftbsub(int n, float *a, float *w); - void rftfsub(int n, float *a, int nc, float *c); - void rftbsub(int n, float *a, int nc, float *c); - void dstsub(int n, float *a, int nc, float *c); int j, nw, nc; float xr; @@ -476,14 +469,8 @@ void ddst(int n, int isgn, float *a, int *ip, float *w) } -void dfct(int n, float *a, float *t, int *ip, float *w) +static void dfct(int n, float *a, float *t, int *ip, float *w) { - void makewt(int nw, int *ip, float *w); - void makect(int nc, int *ip, float *c); - void bitrv2(int n, int *ip, float *a); - void cftfsub(int n, float *a, float *w); - void rftfsub(int n, float *a, int nc, float *c); - void dctsub(int n, float *a, int nc, float *c); int j, k, l, m, mh, nw, nc; float xr, xi, yr, yi; @@ -571,15 +558,8 @@ void dfct(int n, float *a, float *t, int *ip, float *w) } } - -void dfst(int n, float *a, float *t, int *ip, float *w) +static void dfst(int n, float *a, float *t, int *ip, float *w) { - void makewt(int nw, int *ip, float *w); - void makect(int nc, int *ip, float *c); - void bitrv2(int n, int *ip, float *a); - void cftfsub(int n, float *a, float *w); - void rftfsub(int n, float *a, int nc, float *c); - void dstsub(int n, float *a, int nc, float *c); int j, k, l, m, mh, nw, nc; float xr, xi, yr, yi; @@ -657,6 +637,7 @@ void dfst(int n, float *a, float *t, int *ip, float *w) } a[0] = 0; } +#endif // Not used. /* -------- initializing routines -------- */ @@ -664,17 +645,16 @@ void dfst(int n, float *a, float *t, int *ip, float *w) #include -void makewt(int nw, int *ip, float *w) +static void makewt(size_t nw, size_t *ip, float *w) { - void bitrv2(int n, int *ip, float *a); - int j, nwh; + size_t j, nwh; float delta, x, y; ip[0] = nw; ip[1] = 1; if (nw > 2) { nwh = nw >> 1; - delta = (float)atan(1.0f) / nwh; + delta = atanf(1.0f) / nwh; w[0] = 1; w[1] = 0; w[nwh] = (float)cos(delta * nwh); @@ -694,15 +674,15 @@ void makewt(int nw, int *ip, float *w) } -void makect(int nc, int *ip, float *c) +static void makect(size_t nc, size_t *ip, float *c) { - int j, nch; + size_t j, nch; float delta; ip[1] = nc; if (nc > 1) { nch = nc >> 1; - delta = (float)atan(1.0f) / nch; + delta = atanf(1.0f) / nch; c[0] = (float)cos(delta * nch); c[nch] = 0.5f * c[0]; for (j = 1; j < nch; j++) { @@ -716,9 +696,9 @@ void makect(int nc, int *ip, float *c) /* -------- child routines -------- */ -void bitrv2(int n, int *ip, float *a) +static void bitrv2(size_t n, size_t *ip, float *a) { - int j, j1, k, k1, l, m, m2; + size_t j, j1, k, k1, l, m, m2; float xr, xi, yr, yi; ip[0] = 0; @@ -815,8 +795,8 @@ void bitrv2(int n, int *ip, float *a) } } - -void bitrv2conj(int n, int *ip, float *a) +#if 0 // Not used. +static void bitrv2conj(int n, int *ip, float *a) { int j, j1, k, k1, l, m, m2; float xr, xi, yr, yi; @@ -923,13 +903,11 @@ void bitrv2conj(int n, int *ip, float *a) } } } +#endif - -void cftfsub(int n, float *a, float *w) +static void cftfsub(size_t n, float *a, float *w) { - void cft1st(int n, float *a, float *w); - void cftmdl(int n, int l, float *a, float *w); - int j, j1, j2, j3, l; + size_t j, j1, j2, j3, l; float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; @@ -977,11 +955,9 @@ void cftfsub(int n, float *a, float *w) } -void cftbsub(int n, float *a, float *w) +static void cftbsub(size_t n, float *a, float *w) { - void cft1st(int n, float *a, float *w); - void cftmdl(int n, int l, float *a, float *w); - int j, j1, j2, j3, l; + size_t j, j1, j2, j3, l; float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; l = 2; @@ -1029,9 +1005,9 @@ void cftbsub(int n, float *a, float *w) } -void cft1st(int n, float *a, float *w) +static void cft1st(size_t n, float *a, float *w) { - int j, k1, k2; + size_t j, k1, k2; float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; @@ -1134,9 +1110,9 @@ void cft1st(int n, float *a, float *w) } -void cftmdl(int n, int l, float *a, float *w) +static void cftmdl(size_t n, size_t l, float *a, float *w) { - int j, j1, j2, j3, k, k1, k2, m, m2; + size_t j, j1, j2, j3, k, k1, k2, m, m2; float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; @@ -1261,9 +1237,9 @@ void cftmdl(int n, int l, float *a, float *w) } -void rftfsub(int n, float *a, int nc, float *c) +static void rftfsub(size_t n, float *a, size_t nc, float *c) { - int j, k, kk, ks, m; + size_t j, k, kk, ks, m; float wkr, wki, xr, xi, yr, yi; m = n >> 1; @@ -1286,9 +1262,9 @@ void rftfsub(int n, float *a, int nc, float *c) } -void rftbsub(int n, float *a, int nc, float *c) +static void rftbsub(size_t n, float *a, size_t nc, float *c) { - int j, k, kk, ks, m; + size_t j, k, kk, ks, m; float wkr, wki, xr, xi, yr, yi; a[1] = -a[1]; @@ -1312,8 +1288,8 @@ void rftbsub(int n, float *a, int nc, float *c) a[m + 1] = -a[m + 1]; } - -void dctsub(int n, float *a, int nc, float *c) +#if 0 // Not used. +static void dctsub(int n, float *a, int nc, float *c) { int j, k, kk, ks, m; float wkr, wki, xr; @@ -1334,7 +1310,7 @@ void dctsub(int n, float *a, int nc, float *c) } -void dstsub(int n, float *a, int nc, float *c) +static void dstsub(int n, float *a, int nc, float *c) { int j, k, kk, ks, m; float wkr, wki, xr; @@ -1353,4 +1329,4 @@ void dstsub(int n, float *a, int nc, float *c) } a[m] *= c[0]; } - +#endif // Not used. diff --git a/webrtc/modules/audio_processing/utility/fft4g.h b/webrtc/common_audio/fft4g.h similarity index 58% rename from webrtc/modules/audio_processing/utility/fft4g.h rename to webrtc/common_audio/fft4g.h index 373ff14..6dd792f 100644 --- a/webrtc/modules/audio_processing/utility/fft4g.h +++ b/webrtc/common_audio/fft4g.h @@ -8,11 +8,18 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_FFT4G_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_FFT4G_H_ - -void rdft(int, int, float *, int *, float *); -void cdft(int, int, float *, int *, float *); +#ifndef WEBRTC_COMMON_AUDIO_FFT4G_H_ +#define WEBRTC_COMMON_AUDIO_FFT4G_H_ +#if defined(__cplusplus) +extern "C" { #endif +// Refer to fft4g.c for documentation. +void WebRtc_rdft(size_t n, int isgn, float *a, size_t *ip, float *w); + +#if defined(__cplusplus) +} +#endif + +#endif // WEBRTC_COMMON_AUDIO_FFT4G_H_ diff --git a/webrtc/common_audio/fir_filter.cc b/webrtc/common_audio/fir_filter.cc new file mode 100644 index 0000000..1c5548d --- /dev/null +++ b/webrtc/common_audio/fir_filter.cc @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/fir_filter.h" + +#include +#include + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/fir_filter_neon.h" +#include "webrtc/common_audio/fir_filter_sse.h" +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" + +namespace webrtc { + +class FIRFilterC : public FIRFilter { + public: + FIRFilterC(const float* coefficients, + size_t coefficients_length); + + void Filter(const float* in, size_t length, float* out) override; + + private: + size_t coefficients_length_; + size_t state_length_; + rtc::scoped_ptr coefficients_; + rtc::scoped_ptr state_; +}; + +FIRFilter* FIRFilter::Create(const float* coefficients, + size_t coefficients_length, + size_t max_input_length) { + if (!coefficients || coefficients_length <= 0 || max_input_length <= 0) { + assert(false); + return NULL; + } + + FIRFilter* filter = NULL; +// If we know the minimum architecture at compile time, avoid CPU detection. +#if defined(WEBRTC_ARCH_X86_FAMILY) +#if defined(__SSE2__) + filter = + new FIRFilterSSE2(coefficients, coefficients_length, max_input_length); +#else + // x86 CPU detection required. + if (WebRtc_GetCPUInfo(kSSE2)) { + filter = + new FIRFilterSSE2(coefficients, coefficients_length, max_input_length); + } else { + filter = new FIRFilterC(coefficients, coefficients_length); + } +#endif +#elif defined(WEBRTC_HAS_NEON) + filter = + new FIRFilterNEON(coefficients, coefficients_length, max_input_length); +#elif defined(WEBRTC_DETECT_NEON) + if (WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) { + filter = + new FIRFilterNEON(coefficients, coefficients_length, max_input_length); + } else { + filter = new FIRFilterC(coefficients, coefficients_length); + } +#else + filter = new FIRFilterC(coefficients, coefficients_length); +#endif + + return filter; +} + +FIRFilterC::FIRFilterC(const float* coefficients, size_t coefficients_length) + : coefficients_length_(coefficients_length), + state_length_(coefficients_length - 1), + coefficients_(new float[coefficients_length_]), + state_(new float[state_length_]) { + for (size_t i = 0; i < coefficients_length_; ++i) { + coefficients_[i] = coefficients[coefficients_length_ - i - 1]; + } + memset(state_.get(), 0, state_length_ * sizeof(state_[0])); +} + +void FIRFilterC::Filter(const float* in, size_t length, float* out) { + assert(length > 0); + + // Convolves the input signal |in| with the filter kernel |coefficients_| + // taking into account the previous state. + for (size_t i = 0; i < length; ++i) { + out[i] = 0.f; + size_t j; + for (j = 0; state_length_ > i && j < state_length_ - i; ++j) { + out[i] += state_[i + j] * coefficients_[j]; + } + for (; j < coefficients_length_; ++j) { + out[i] += in[j + i - state_length_] * coefficients_[j]; + } + } + + // Update current state. + if (length >= state_length_) { + memcpy( + state_.get(), &in[length - state_length_], state_length_ * sizeof(*in)); + } else { + memmove(state_.get(), + &state_[length], + (state_length_ - length) * sizeof(state_[0])); + memcpy(&state_[state_length_ - length], in, length * sizeof(*in)); + } +} + +} // namespace webrtc diff --git a/webrtc/common_audio/fir_filter.h b/webrtc/common_audio/fir_filter.h new file mode 100644 index 0000000..a5dc6ec --- /dev/null +++ b/webrtc/common_audio/fir_filter.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_FIR_FILTER_H_ +#define WEBRTC_COMMON_AUDIO_FIR_FILTER_H_ + +#include + +namespace webrtc { + +// Finite Impulse Response filter using floating-point arithmetic. +class FIRFilter { + public: + // Creates a filter with the given coefficients. All initial state values will + // be zeros. + // The length of the chunks fed to the filter should never be greater than + // |max_input_length|. This is needed because, when vectorizing it is + // necessary to concatenate the input after the state, and resizing this array + // dynamically is expensive. + static FIRFilter* Create(const float* coefficients, + size_t coefficients_length, + size_t max_input_length); + + virtual ~FIRFilter() {} + + // Filters the |in| data supplied. + // |out| must be previously allocated and it must be at least of |length|. + virtual void Filter(const float* in, size_t length, float* out) = 0; +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_FIR_FILTER_H_ diff --git a/webrtc/common_audio/fir_filter_neon.cc b/webrtc/common_audio/fir_filter_neon.cc new file mode 100644 index 0000000..97a75db --- /dev/null +++ b/webrtc/common_audio/fir_filter_neon.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/fir_filter_neon.h" + +#include +#include +#include + +#include "webrtc/system_wrappers/interface/aligned_malloc.h" + +namespace webrtc { + +FIRFilterNEON::FIRFilterNEON(const float* coefficients, + size_t coefficients_length, + size_t max_input_length) + : // Closest higher multiple of four. + coefficients_length_((coefficients_length + 3) & ~0x03), + state_length_(coefficients_length_ - 1), + coefficients_(static_cast( + AlignedMalloc(sizeof(float) * coefficients_length_, 16))), + state_(static_cast( + AlignedMalloc(sizeof(float) * (max_input_length + state_length_), + 16))) { + // Add zeros at the end of the coefficients. + size_t padding = coefficients_length_ - coefficients_length; + memset(coefficients_.get(), 0.f, padding * sizeof(coefficients_[0])); + // The coefficients are reversed to compensate for the order in which the + // input samples are acquired (most recent last). + for (size_t i = 0; i < coefficients_length; ++i) { + coefficients_[i + padding] = coefficients[coefficients_length - i - 1]; + } + memset(state_.get(), + 0.f, + (max_input_length + state_length_) * sizeof(state_[0])); +} + +void FIRFilterNEON::Filter(const float* in, size_t length, float* out) { + assert(length > 0); + + memcpy(&state_[state_length_], in, length * sizeof(*in)); + + // Convolves the input signal |in| with the filter kernel |coefficients_| + // taking into account the previous state. + for (size_t i = 0; i < length; ++i) { + float* in_ptr = &state_[i]; + float* coef_ptr = coefficients_.get(); + + float32x4_t m_sum = vmovq_n_f32(0); + float32x4_t m_in; + + for (size_t j = 0; j < coefficients_length_; j += 4) { + m_in = vld1q_f32(in_ptr + j); + m_sum = vmlaq_f32(m_sum, m_in, vld1q_f32(coef_ptr + j)); + } + + float32x2_t m_half = vadd_f32(vget_high_f32(m_sum), vget_low_f32(m_sum)); + out[i] = vget_lane_f32(vpadd_f32(m_half, m_half), 0); + } + + // Update current state. + memmove(state_.get(), &state_[length], state_length_ * sizeof(state_[0])); +} + +} // namespace webrtc diff --git a/webrtc/common_audio/fir_filter_neon.h b/webrtc/common_audio/fir_filter_neon.h new file mode 100644 index 0000000..d7399ad --- /dev/null +++ b/webrtc/common_audio/fir_filter_neon.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_FIR_FILTER_NEON_H_ +#define WEBRTC_COMMON_AUDIO_FIR_FILTER_NEON_H_ + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/fir_filter.h" +#include "webrtc/system_wrappers/interface/aligned_malloc.h" + +namespace webrtc { + +class FIRFilterNEON : public FIRFilter { + public: + FIRFilterNEON(const float* coefficients, + size_t coefficients_length, + size_t max_input_length); + + void Filter(const float* in, size_t length, float* out) override; + + private: + size_t coefficients_length_; + size_t state_length_; + rtc::scoped_ptr coefficients_; + rtc::scoped_ptr state_; +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_FIR_FILTER_NEON_H_ diff --git a/webrtc/common_audio/fir_filter_sse.cc b/webrtc/common_audio/fir_filter_sse.cc new file mode 100644 index 0000000..6e7ae70 --- /dev/null +++ b/webrtc/common_audio/fir_filter_sse.cc @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/fir_filter_sse.h" + +#include +#include +#include + +#include "webrtc/system_wrappers/interface/aligned_malloc.h" + +namespace webrtc { + +FIRFilterSSE2::FIRFilterSSE2(const float* coefficients, + size_t coefficients_length, + size_t max_input_length) + : // Closest higher multiple of four. + coefficients_length_((coefficients_length + 3) & ~0x03), + state_length_(coefficients_length_ - 1), + coefficients_(static_cast( + AlignedMalloc(sizeof(float) * coefficients_length_, 16))), + state_(static_cast( + AlignedMalloc(sizeof(float) * (max_input_length + state_length_), + 16))) { + // Add zeros at the end of the coefficients. + size_t padding = coefficients_length_ - coefficients_length; + memset(coefficients_.get(), 0, padding * sizeof(coefficients_[0])); + // The coefficients are reversed to compensate for the order in which the + // input samples are acquired (most recent last). + for (size_t i = 0; i < coefficients_length; ++i) { + coefficients_[i + padding] = coefficients[coefficients_length - i - 1]; + } + memset(state_.get(), + 0, + (max_input_length + state_length_) * sizeof(state_[0])); +} + +void FIRFilterSSE2::Filter(const float* in, size_t length, float* out) { + assert(length > 0); + + memcpy(&state_[state_length_], in, length * sizeof(*in)); + + // Convolves the input signal |in| with the filter kernel |coefficients_| + // taking into account the previous state. + for (size_t i = 0; i < length; ++i) { + float* in_ptr = &state_[i]; + float* coef_ptr = coefficients_.get(); + + __m128 m_sum = _mm_setzero_ps(); + __m128 m_in; + + // Depending on if the pointer is aligned with 16 bytes or not it is loaded + // differently. + if (reinterpret_cast(in_ptr) & 0x0F) { + for (size_t j = 0; j < coefficients_length_; j += 4) { + m_in = _mm_loadu_ps(in_ptr + j); + m_sum = _mm_add_ps(m_sum, _mm_mul_ps(m_in, _mm_load_ps(coef_ptr + j))); + } + } else { + for (size_t j = 0; j < coefficients_length_; j += 4) { + m_in = _mm_load_ps(in_ptr + j); + m_sum = _mm_add_ps(m_sum, _mm_mul_ps(m_in, _mm_load_ps(coef_ptr + j))); + } + } + m_sum = _mm_add_ps(_mm_movehl_ps(m_sum, m_sum), m_sum); + _mm_store_ss(out + i, _mm_add_ss(m_sum, _mm_shuffle_ps(m_sum, m_sum, 1))); + } + + // Update current state. + memmove(state_.get(), &state_[length], state_length_ * sizeof(state_[0])); +} + +} // namespace webrtc diff --git a/webrtc/common_audio/fir_filter_sse.h b/webrtc/common_audio/fir_filter_sse.h new file mode 100644 index 0000000..d396831 --- /dev/null +++ b/webrtc/common_audio/fir_filter_sse.h @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_FIR_FILTER_SSE_H_ +#define WEBRTC_COMMON_AUDIO_FIR_FILTER_SSE_H_ + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/fir_filter.h" +#include "webrtc/system_wrappers/interface/aligned_malloc.h" + +namespace webrtc { + +class FIRFilterSSE2 : public FIRFilter { + public: + FIRFilterSSE2(const float* coefficients, + size_t coefficients_length, + size_t max_input_length); + + void Filter(const float* in, size_t length, float* out) override; + + private: + size_t coefficients_length_; + size_t state_length_; + rtc::scoped_ptr coefficients_; + rtc::scoped_ptr state_; +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_FIR_FILTER_SSE_H_ diff --git a/webrtc/common_audio/include/audio_util.h b/webrtc/common_audio/include/audio_util.h new file mode 100644 index 0000000..2c0028c --- /dev/null +++ b/webrtc/common_audio/include/audio_util.h @@ -0,0 +1,188 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_ +#define WEBRTC_COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_ + +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +typedef std::numeric_limits limits_int16; + +// The conversion functions use the following naming convention: +// S16: int16_t [-32768, 32767] +// Float: float [-1.0, 1.0] +// FloatS16: float [-32768.0, 32767.0] +static inline int16_t FloatToS16(float v) { + if (v > 0) + return v >= 1 ? limits_int16::max() + : static_cast(v * limits_int16::max() + 0.5f); + return v <= -1 ? limits_int16::min() + : static_cast(-v * limits_int16::min() - 0.5f); +} + +static inline float S16ToFloat(int16_t v) { + static const float kMaxInt16Inverse = 1.f / limits_int16::max(); + static const float kMinInt16Inverse = 1.f / limits_int16::min(); + return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse); +} + +static inline int16_t FloatS16ToS16(float v) { + static const float kMaxRound = limits_int16::max() - 0.5f; + static const float kMinRound = limits_int16::min() + 0.5f; + if (v > 0) + return v >= kMaxRound ? limits_int16::max() + : static_cast(v + 0.5f); + return v <= kMinRound ? limits_int16::min() : static_cast(v - 0.5f); +} + +static inline float FloatToFloatS16(float v) { + return v * (v > 0 ? limits_int16::max() : -limits_int16::min()); +} + +static inline float FloatS16ToFloat(float v) { + static const float kMaxInt16Inverse = 1.f / limits_int16::max(); + static const float kMinInt16Inverse = 1.f / limits_int16::min(); + return v * (v > 0 ? kMaxInt16Inverse : -kMinInt16Inverse); +} + +void FloatToS16(const float* src, size_t size, int16_t* dest); +void S16ToFloat(const int16_t* src, size_t size, float* dest); +void FloatS16ToS16(const float* src, size_t size, int16_t* dest); +void FloatToFloatS16(const float* src, size_t size, float* dest); +void FloatS16ToFloat(const float* src, size_t size, float* dest); + +// Copy audio from |src| channels to |dest| channels unless |src| and |dest| +// point to the same address. |src| and |dest| must have the same number of +// channels, and there must be sufficient space allocated in |dest|. +template +void CopyAudioIfNeeded(const T* const* src, + int num_frames, + int num_channels, + T* const* dest) { + for (int i = 0; i < num_channels; ++i) { + if (src[i] != dest[i]) { + std::copy(src[i], src[i] + num_frames, dest[i]); + } + } +} + +// Deinterleave audio from |interleaved| to the channel buffers pointed to +// by |deinterleaved|. There must be sufficient space allocated in the +// |deinterleaved| buffers (|num_channel| buffers with |samples_per_channel| +// per buffer). +template +void Deinterleave(const T* interleaved, + size_t samples_per_channel, + int num_channels, + T* const* deinterleaved) { + for (int i = 0; i < num_channels; ++i) { + T* channel = deinterleaved[i]; + int interleaved_idx = i; + for (size_t j = 0; j < samples_per_channel; ++j) { + channel[j] = interleaved[interleaved_idx]; + interleaved_idx += num_channels; + } + } +} + +// Interleave audio from the channel buffers pointed to by |deinterleaved| to +// |interleaved|. There must be sufficient space allocated in |interleaved| +// (|samples_per_channel| * |num_channels|). +template +void Interleave(const T* const* deinterleaved, + size_t samples_per_channel, + int num_channels, + T* interleaved) { + for (int i = 0; i < num_channels; ++i) { + const T* channel = deinterleaved[i]; + int interleaved_idx = i; + for (size_t j = 0; j < samples_per_channel; ++j) { + interleaved[interleaved_idx] = channel[j]; + interleaved_idx += num_channels; + } + } +} + +// Copies audio from a single channel buffer pointed to by |mono| to each +// channel of |interleaved|. There must be sufficient space allocated in +// |interleaved| (|samples_per_channel| * |num_channels|). +template +void UpmixMonoToInterleaved(const T* mono, + int num_frames, + int num_channels, + T* interleaved) { + int interleaved_idx = 0; + for (int i = 0; i < num_frames; ++i) { + for (int j = 0; j < num_channels; ++j) { + interleaved[interleaved_idx++] = mono[i]; + } + } +} + +template +void DownmixToMono(const T* const* input_channels, + size_t num_frames, + int num_channels, + T* out) { + for (size_t i = 0; i < num_frames; ++i) { + Intermediate value = input_channels[0][i]; + for (int j = 1; j < num_channels; ++j) { + value += input_channels[j][i]; + } + out[i] = value / num_channels; + } +} + +// Downmixes an interleaved multichannel signal to a single channel by averaging +// all channels. +template +void DownmixInterleavedToMonoImpl(const T* interleaved, + size_t num_frames, + int num_channels, + T* deinterleaved) { + RTC_DCHECK_GT(num_channels, 0); + RTC_DCHECK_GT(num_frames, 0u); + + const T* const end = interleaved + num_frames * num_channels; + + while (interleaved < end) { + const T* const frame_end = interleaved + num_channels; + + Intermediate value = *interleaved++; + while (interleaved < frame_end) { + value += *interleaved++; + } + + *deinterleaved++ = value / num_channels; + } +} + +template +void DownmixInterleavedToMono(const T* interleaved, + size_t num_frames, + int num_channels, + T* deinterleaved); + +template <> +void DownmixInterleavedToMono(const int16_t* interleaved, + size_t num_frames, + int num_channels, + int16_t* deinterleaved); + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_INCLUDE_AUDIO_UTIL_H_ diff --git a/webrtc/common_audio/lapped_transform.cc b/webrtc/common_audio/lapped_transform.cc new file mode 100644 index 0000000..c01f1d9 --- /dev/null +++ b/webrtc/common_audio/lapped_transform.cc @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/lapped_transform.h" + +#include +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/common_audio/real_fourier.h" + +namespace webrtc { + +void LappedTransform::BlockThunk::ProcessBlock(const float* const* input, + size_t num_frames, + int num_input_channels, + int num_output_channels, + float* const* output) { + RTC_CHECK_EQ(num_input_channels, parent_->num_in_channels_); + RTC_CHECK_EQ(num_output_channels, parent_->num_out_channels_); + RTC_CHECK_EQ(parent_->block_length_, num_frames); + + for (int i = 0; i < num_input_channels; ++i) { + memcpy(parent_->real_buf_.Row(i), input[i], + num_frames * sizeof(*input[0])); + parent_->fft_->Forward(parent_->real_buf_.Row(i), + parent_->cplx_pre_.Row(i)); + } + + size_t block_length = RealFourier::ComplexLength( + RealFourier::FftOrder(num_frames)); + RTC_CHECK_EQ(parent_->cplx_length_, block_length); + parent_->block_processor_->ProcessAudioBlock(parent_->cplx_pre_.Array(), + num_input_channels, + parent_->cplx_length_, + num_output_channels, + parent_->cplx_post_.Array()); + + for (int i = 0; i < num_output_channels; ++i) { + parent_->fft_->Inverse(parent_->cplx_post_.Row(i), + parent_->real_buf_.Row(i)); + memcpy(output[i], parent_->real_buf_.Row(i), + num_frames * sizeof(*input[0])); + } +} + +LappedTransform::LappedTransform(int num_in_channels, + int num_out_channels, + size_t chunk_length, + const float* window, + size_t block_length, + size_t shift_amount, + Callback* callback) + : blocker_callback_(this), + num_in_channels_(num_in_channels), + num_out_channels_(num_out_channels), + block_length_(block_length), + chunk_length_(chunk_length), + block_processor_(callback), + blocker_(chunk_length_, + block_length_, + num_in_channels_, + num_out_channels_, + window, + shift_amount, + &blocker_callback_), + fft_(RealFourier::Create(RealFourier::FftOrder(block_length_))), + cplx_length_(RealFourier::ComplexLength(fft_->order())), + real_buf_(num_in_channels, + block_length_, + RealFourier::kFftBufferAlignment), + cplx_pre_(num_in_channels, + cplx_length_, + RealFourier::kFftBufferAlignment), + cplx_post_(num_out_channels, + cplx_length_, + RealFourier::kFftBufferAlignment) { + RTC_CHECK(num_in_channels_ > 0 && num_out_channels_ > 0); + RTC_CHECK_GT(block_length_, 0u); + RTC_CHECK_GT(chunk_length_, 0u); + RTC_CHECK(block_processor_); + + // block_length_ power of 2? + RTC_CHECK_EQ(0u, block_length_ & (block_length_ - 1)); +} + +void LappedTransform::ProcessChunk(const float* const* in_chunk, + float* const* out_chunk) { + blocker_.ProcessChunk(in_chunk, chunk_length_, num_in_channels_, + num_out_channels_, out_chunk); +} + +} // namespace webrtc diff --git a/webrtc/common_audio/lapped_transform.h b/webrtc/common_audio/lapped_transform.h new file mode 100644 index 0000000..75af186 --- /dev/null +++ b/webrtc/common_audio/lapped_transform.h @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_LAPPED_TRANSFORM_H_ +#define WEBRTC_COMMON_AUDIO_LAPPED_TRANSFORM_H_ + +#include + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/blocker.h" +#include "webrtc/common_audio/real_fourier.h" +#include "webrtc/system_wrappers/interface/aligned_array.h" + +namespace webrtc { + +// Helper class for audio processing modules which operate on frequency domain +// input derived from the windowed time domain audio stream. +// +// The input audio chunk is sliced into possibly overlapping blocks, multiplied +// by a window and transformed with an FFT implementation. The transformed data +// is supplied to the given callback for processing. The processed output is +// then inverse transformed into the time domain and spliced back into a chunk +// which constitutes the final output of this processing module. +class LappedTransform { + public: + class Callback { + public: + virtual ~Callback() {} + + virtual void ProcessAudioBlock(const std::complex* const* in_block, + int num_in_channels, size_t frames, + int num_out_channels, + std::complex* const* out_block) = 0; + }; + + // Construct a transform instance. |chunk_length| is the number of samples in + // each channel. |window| defines the window, owned by the caller (a copy is + // made internally); |window| should have length equal to |block_length|. + // |block_length| defines the length of a block, in samples. + // |shift_amount| is in samples. |callback| is the caller-owned audio + // processing function called for each block of the input chunk. + LappedTransform(int num_in_channels, + int num_out_channels, + size_t chunk_length, + const float* window, + size_t block_length, + size_t shift_amount, + Callback* callback); + ~LappedTransform() {} + + // Main audio processing helper method. Internally slices |in_chunk| into + // blocks, transforms them to frequency domain, calls the callback for each + // block and returns a de-blocked time domain chunk of audio through + // |out_chunk|. Both buffers are caller-owned. + void ProcessChunk(const float* const* in_chunk, float* const* out_chunk); + + // Get the chunk length. + // + // The chunk length is the number of samples per channel that must be passed + // to ProcessChunk via the parameter in_chunk. + // + // Returns the same chunk_length passed to the LappedTransform constructor. + size_t chunk_length() const { return chunk_length_; } + + // Get the number of input channels. + // + // This is the number of arrays that must be passed to ProcessChunk via + // in_chunk. + // + // Returns the same num_in_channels passed to the LappedTransform constructor. + int num_in_channels() const { return num_in_channels_; } + + // Get the number of output channels. + // + // This is the number of arrays that must be passed to ProcessChunk via + // out_chunk. + // + // Returns the same num_out_channels passed to the LappedTransform + // constructor. + int num_out_channels() const { return num_out_channels_; } + + private: + // Internal middleware callback, given to the blocker. Transforms each block + // and hands it over to the processing method given at construction time. + class BlockThunk : public BlockerCallback { + public: + explicit BlockThunk(LappedTransform* parent) : parent_(parent) {} + + virtual void ProcessBlock(const float* const* input, size_t num_frames, + int num_input_channels, int num_output_channels, + float* const* output); + + private: + LappedTransform* const parent_; + } blocker_callback_; + + const int num_in_channels_; + const int num_out_channels_; + + const size_t block_length_; + const size_t chunk_length_; + + Callback* const block_processor_; + Blocker blocker_; + + rtc::scoped_ptr fft_; + const size_t cplx_length_; + AlignedArray real_buf_; + AlignedArray > cplx_pre_; + AlignedArray > cplx_post_; +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_LAPPED_TRANSFORM_H_ + diff --git a/webrtc/common_audio/real_fourier.cc b/webrtc/common_audio/real_fourier.cc new file mode 100644 index 0000000..fef3c60 --- /dev/null +++ b/webrtc/common_audio/real_fourier.cc @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/real_fourier.h" + +#include "webrtc/base/checks.h" +#include "webrtc/common_audio/real_fourier_ooura.h" +#include "webrtc/common_audio/real_fourier_openmax.h" +#include "webrtc/common_audio/signal_processing/include/spl_inl.h" + +namespace webrtc { + +using std::complex; + +const int RealFourier::kFftBufferAlignment = 32; + +rtc::scoped_ptr RealFourier::Create(int fft_order) { +#if defined(RTC_USE_OPENMAX_DL) + return rtc::scoped_ptr(new RealFourierOpenmax(fft_order)); +#else + return rtc::scoped_ptr(new RealFourierOoura(fft_order)); +#endif +} + +int RealFourier::FftOrder(size_t length) { + RTC_CHECK_GT(length, 0U); + return WebRtcSpl_GetSizeInBits(static_cast(length - 1)); +} + +size_t RealFourier::FftLength(int order) { + RTC_CHECK_GE(order, 0); + return static_cast(1 << order); +} + +size_t RealFourier::ComplexLength(int order) { + return FftLength(order) / 2 + 1; +} + +RealFourier::fft_real_scoper RealFourier::AllocRealBuffer(int count) { + return fft_real_scoper(static_cast( + AlignedMalloc(sizeof(float) * count, kFftBufferAlignment))); +} + +RealFourier::fft_cplx_scoper RealFourier::AllocCplxBuffer(int count) { + return fft_cplx_scoper(static_cast*>( + AlignedMalloc(sizeof(complex) * count, kFftBufferAlignment))); +} + +} // namespace webrtc + diff --git a/webrtc/common_audio/real_fourier.h b/webrtc/common_audio/real_fourier.h new file mode 100644 index 0000000..ef4fec8 --- /dev/null +++ b/webrtc/common_audio/real_fourier.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_REAL_FOURIER_H_ +#define WEBRTC_COMMON_AUDIO_REAL_FOURIER_H_ + +#include + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/system_wrappers/interface/aligned_malloc.h" + +// Uniform interface class for the real DFT and its inverse, for power-of-2 +// input lengths. Also contains helper functions for buffer allocation, taking +// care of any memory alignment requirements the underlying library might have. + +namespace webrtc { + +class RealFourier { + public: + // Shorthand typenames for the scopers used by the buffer allocation helpers. + typedef rtc::scoped_ptr fft_real_scoper; + typedef rtc::scoped_ptr[], AlignedFreeDeleter> + fft_cplx_scoper; + + // The alignment required for all input and output buffers, in bytes. + static const int kFftBufferAlignment; + + // Construct a wrapper instance for the given input order, which must be + // between 1 and kMaxFftOrder, inclusively. + static rtc::scoped_ptr Create(int fft_order); + virtual ~RealFourier() {}; + + // Helper to compute the smallest FFT order (a power of 2) which will contain + // the given input length. + static int FftOrder(size_t length); + + // Helper to compute the input length from the FFT order. + static size_t FftLength(int order); + + // Helper to compute the exact length, in complex floats, of the transform + // output (i.e. |2^order / 2 + 1|). + static size_t ComplexLength(int order); + + // Buffer allocation helpers. The buffers are large enough to hold |count| + // floats/complexes and suitably aligned for use by the implementation. + // The returned scopers are set up with proper deleters; the caller owns + // the allocated memory. + static fft_real_scoper AllocRealBuffer(int count); + static fft_cplx_scoper AllocCplxBuffer(int count); + + // Main forward transform interface. The output array need only be big + // enough for |2^order / 2 + 1| elements - the conjugate pairs are not + // returned. Input and output must be properly aligned (e.g. through + // AllocRealBuffer and AllocCplxBuffer) and input length must be + // |2^order| (same as given at construction time). + virtual void Forward(const float* src, std::complex* dest) const = 0; + + // Inverse transform. Same input format as output above, conjugate pairs + // not needed. + virtual void Inverse(const std::complex* src, float* dest) const = 0; + + virtual int order() const = 0; +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_REAL_FOURIER_H_ + diff --git a/webrtc/common_audio/real_fourier_ooura.cc b/webrtc/common_audio/real_fourier_ooura.cc new file mode 100644 index 0000000..8cd4c86 --- /dev/null +++ b/webrtc/common_audio/real_fourier_ooura.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/real_fourier_ooura.h" + +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/common_audio/fft4g.h" + +namespace webrtc { + +using std::complex; + +namespace { + +void Conjugate(complex* array, size_t complex_length) { + std::for_each(array, array + complex_length, + [=](complex& v) { v = std::conj(v); }); +} + +size_t ComputeWorkIpSize(size_t fft_length) { + return static_cast(2 + std::ceil(std::sqrt( + static_cast(fft_length)))); +} + +} // namespace + +RealFourierOoura::RealFourierOoura(int fft_order) + : order_(fft_order), + length_(FftLength(order_)), + complex_length_(ComplexLength(order_)), + // Zero-initializing work_ip_ will cause rdft to initialize these work + // arrays on the first call. + work_ip_(new size_t[ComputeWorkIpSize(length_)]()), + work_w_(new float[complex_length_]()) { + RTC_CHECK_GE(fft_order, 1); +} + +void RealFourierOoura::Forward(const float* src, complex* dest) const { + { + // This cast is well-defined since C++11. See "Non-static data members" at: + // http://en.cppreference.com/w/cpp/numeric/complex + auto dest_float = reinterpret_cast(dest); + std::copy(src, src + length_, dest_float); + WebRtc_rdft(length_, 1, dest_float, work_ip_.get(), work_w_.get()); + } + + // Ooura places real[n/2] in imag[0]. + dest[complex_length_ - 1] = complex(dest[0].imag(), 0.0f); + dest[0] = complex(dest[0].real(), 0.0f); + // Ooura returns the conjugate of the usual Fourier definition. + Conjugate(dest, complex_length_); +} + +void RealFourierOoura::Inverse(const complex* src, float* dest) const { + { + auto dest_complex = reinterpret_cast*>(dest); + // The real output array is shorter than the input complex array by one + // complex element. + const size_t dest_complex_length = complex_length_ - 1; + std::copy(src, src + dest_complex_length, dest_complex); + // Restore Ooura's conjugate definition. + Conjugate(dest_complex, dest_complex_length); + // Restore real[n/2] to imag[0]. + dest_complex[0] = complex(dest_complex[0].real(), + src[complex_length_ - 1].real()); + } + + WebRtc_rdft(length_, -1, dest, work_ip_.get(), work_w_.get()); + + // Ooura returns a scaled version. + const float scale = 2.0f / length_; + std::for_each(dest, dest + length_, [scale](float& v) { v *= scale; }); +} + +} // namespace webrtc diff --git a/webrtc/common_audio/real_fourier_ooura.h b/webrtc/common_audio/real_fourier_ooura.h new file mode 100644 index 0000000..8d094bf --- /dev/null +++ b/webrtc/common_audio/real_fourier_ooura.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_REAL_FOURIER_OOURA_H_ +#define WEBRTC_COMMON_AUDIO_REAL_FOURIER_OOURA_H_ + +#include + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/real_fourier.h" + +namespace webrtc { + +class RealFourierOoura : public RealFourier { + public: + explicit RealFourierOoura(int fft_order); + + void Forward(const float* src, std::complex* dest) const override; + void Inverse(const std::complex* src, float* dest) const override; + + int order() const override { + return order_; + } + + private: + const int order_; + const size_t length_; + const size_t complex_length_; + // These are work arrays for Ooura. The names are based on the comments in + // fft4g.c. + const rtc::scoped_ptr work_ip_; + const rtc::scoped_ptr work_w_; +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_REAL_FOURIER_OOURA_H_ + diff --git a/webrtc/common_audio/real_fourier_openmax.h b/webrtc/common_audio/real_fourier_openmax.h new file mode 100644 index 0000000..63ce5ba --- /dev/null +++ b/webrtc/common_audio/real_fourier_openmax.h @@ -0,0 +1,44 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_REAL_FOURIER_OPENMAX_H_ +#define WEBRTC_COMMON_AUDIO_REAL_FOURIER_OPENMAX_H_ + +#include + +#include "webrtc/common_audio/real_fourier.h" + +namespace webrtc { + +class RealFourierOpenmax : public RealFourier { + public: + explicit RealFourierOpenmax(int fft_order); + ~RealFourierOpenmax() override; + + void Forward(const float* src, std::complex* dest) const override; + void Inverse(const std::complex* src, float* dest) const override; + + int order() const override { + return order_; + } + + private: + // Basically a forward declare of OMXFFTSpec_R_F32. To get rid of the + // dependency on openmax. + typedef void OMXFFTSpec_R_F32_; + const int order_; + + OMXFFTSpec_R_F32_* const omx_spec_; +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_REAL_FOURIER_OPENMAX_H_ + diff --git a/webrtc/common_audio/resampler/include/push_resampler.h b/webrtc/common_audio/resampler/include/push_resampler.h new file mode 100644 index 0000000..b5c0003 --- /dev/null +++ b/webrtc/common_audio/resampler/include/push_resampler.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_RESAMPLER_INCLUDE_PUSH_RESAMPLER_H_ +#define WEBRTC_COMMON_AUDIO_RESAMPLER_INCLUDE_PUSH_RESAMPLER_H_ + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class PushSincResampler; + +// Wraps PushSincResampler to provide stereo support. +// TODO(ajm): add support for an arbitrary number of channels. +template +class PushResampler { + public: + PushResampler(); + virtual ~PushResampler(); + + // Must be called whenever the parameters change. Free to be called at any + // time as it is a no-op if parameters have not changed since the last call. + int InitializeIfNeeded(int src_sample_rate_hz, int dst_sample_rate_hz, + int num_channels); + + // Returns the total number of samples provided in destination (e.g. 32 kHz, + // 2 channel audio gives 640 samples). + int Resample(const T* src, size_t src_length, T* dst, size_t dst_capacity); + + private: + rtc::scoped_ptr sinc_resampler_; + rtc::scoped_ptr sinc_resampler_right_; + int src_sample_rate_hz_; + int dst_sample_rate_hz_; + int num_channels_; + rtc::scoped_ptr src_left_; + rtc::scoped_ptr src_right_; + rtc::scoped_ptr dst_left_; + rtc::scoped_ptr dst_right_; +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_RESAMPLER_INCLUDE_PUSH_RESAMPLER_H_ diff --git a/webrtc/common_audio/resampler/include/resampler.h b/webrtc/common_audio/resampler/include/resampler.h new file mode 100644 index 0000000..0d4c1af --- /dev/null +++ b/webrtc/common_audio/resampler/include/resampler.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * A wrapper for resampling a numerous amount of sampling combinations. + */ + +#ifndef WEBRTC_RESAMPLER_RESAMPLER_H_ +#define WEBRTC_RESAMPLER_RESAMPLER_H_ + +#include + +#include "webrtc/typedefs.h" + +namespace webrtc { + +// All methods return 0 on success and -1 on failure. +class Resampler +{ + +public: + Resampler(); + Resampler(int inFreq, int outFreq, int num_channels); + ~Resampler(); + + // Reset all states + int Reset(int inFreq, int outFreq, int num_channels); + + // Reset all states if any parameter has changed + int ResetIfNeeded(int inFreq, int outFreq, int num_channels); + + // Resample samplesIn to samplesOut. + int Push(const int16_t* samplesIn, size_t lengthIn, int16_t* samplesOut, + size_t maxLen, size_t &outLen); + +private: + enum ResamplerMode + { + kResamplerMode1To1, + kResamplerMode1To2, + kResamplerMode1To3, + kResamplerMode1To4, + kResamplerMode1To6, + kResamplerMode1To12, + kResamplerMode2To3, + kResamplerMode2To11, + kResamplerMode4To11, + kResamplerMode8To11, + kResamplerMode11To16, + kResamplerMode11To32, + kResamplerMode2To1, + kResamplerMode3To1, + kResamplerMode4To1, + kResamplerMode6To1, + kResamplerMode12To1, + kResamplerMode3To2, + kResamplerMode11To2, + kResamplerMode11To4, + kResamplerMode11To8 + }; + + // Generic pointers since we don't know what states we'll need + void* state1_; + void* state2_; + void* state3_; + + // Storage if needed + int16_t* in_buffer_; + int16_t* out_buffer_; + size_t in_buffer_size_; + size_t out_buffer_size_; + size_t in_buffer_size_max_; + size_t out_buffer_size_max_; + + int my_in_frequency_khz_; + int my_out_frequency_khz_; + ResamplerMode my_mode_; + int num_channels_; + + // Extra instance for stereo + Resampler* slave_left_; + Resampler* slave_right_; +}; + +} // namespace webrtc + +#endif // WEBRTC_RESAMPLER_RESAMPLER_H_ diff --git a/webrtc/common_audio/resampler/push_resampler.cc b/webrtc/common_audio/resampler/push_resampler.cc new file mode 100644 index 0000000..566acde --- /dev/null +++ b/webrtc/common_audio/resampler/push_resampler.cc @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/resampler/include/push_resampler.h" + +#include + +#include "webrtc/common_audio/include/audio_util.h" +#include "webrtc/common_audio/resampler/include/resampler.h" +#include "webrtc/common_audio/resampler/push_sinc_resampler.h" + +namespace webrtc { + +template +PushResampler::PushResampler() + : src_sample_rate_hz_(0), + dst_sample_rate_hz_(0), + num_channels_(0) { +} + +template +PushResampler::~PushResampler() { +} + +template +int PushResampler::InitializeIfNeeded(int src_sample_rate_hz, + int dst_sample_rate_hz, + int num_channels) { + if (src_sample_rate_hz == src_sample_rate_hz_ && + dst_sample_rate_hz == dst_sample_rate_hz_ && + num_channels == num_channels_) + // No-op if settings haven't changed. + return 0; + + if (src_sample_rate_hz <= 0 || dst_sample_rate_hz <= 0 || + num_channels <= 0 || num_channels > 2) + return -1; + + src_sample_rate_hz_ = src_sample_rate_hz; + dst_sample_rate_hz_ = dst_sample_rate_hz; + num_channels_ = num_channels; + + const size_t src_size_10ms_mono = + static_cast(src_sample_rate_hz / 100); + const size_t dst_size_10ms_mono = + static_cast(dst_sample_rate_hz / 100); + sinc_resampler_.reset(new PushSincResampler(src_size_10ms_mono, + dst_size_10ms_mono)); + if (num_channels_ == 2) { + src_left_.reset(new T[src_size_10ms_mono]); + src_right_.reset(new T[src_size_10ms_mono]); + dst_left_.reset(new T[dst_size_10ms_mono]); + dst_right_.reset(new T[dst_size_10ms_mono]); + sinc_resampler_right_.reset(new PushSincResampler(src_size_10ms_mono, + dst_size_10ms_mono)); + } + + return 0; +} + +template +int PushResampler::Resample(const T* src, size_t src_length, T* dst, + size_t dst_capacity) { + const size_t src_size_10ms = + static_cast(src_sample_rate_hz_ * num_channels_ / 100); + const size_t dst_size_10ms = + static_cast(dst_sample_rate_hz_ * num_channels_ / 100); + if (src_length != src_size_10ms || dst_capacity < dst_size_10ms) + return -1; + + if (src_sample_rate_hz_ == dst_sample_rate_hz_) { + // The old resampler provides this memcpy facility in the case of matching + // sample rates, so reproduce it here for the sinc resampler. + memcpy(dst, src, src_length * sizeof(T)); + return static_cast(src_length); + } + if (num_channels_ == 2) { + const size_t src_length_mono = src_length / num_channels_; + const size_t dst_capacity_mono = dst_capacity / num_channels_; + T* deinterleaved[] = {src_left_.get(), src_right_.get()}; + Deinterleave(src, src_length_mono, num_channels_, deinterleaved); + + size_t dst_length_mono = + sinc_resampler_->Resample(src_left_.get(), src_length_mono, + dst_left_.get(), dst_capacity_mono); + sinc_resampler_right_->Resample(src_right_.get(), src_length_mono, + dst_right_.get(), dst_capacity_mono); + + deinterleaved[0] = dst_left_.get(); + deinterleaved[1] = dst_right_.get(); + Interleave(deinterleaved, dst_length_mono, num_channels_, dst); + return static_cast(dst_length_mono * num_channels_); + } else { + return static_cast( + sinc_resampler_->Resample(src, src_length, dst, dst_capacity)); + } +} + +// Explictly generate required instantiations. +template class PushResampler; +template class PushResampler; + +} // namespace webrtc diff --git a/webrtc/common_audio/resampler/push_sinc_resampler.cc b/webrtc/common_audio/resampler/push_sinc_resampler.cc new file mode 100644 index 0000000..a740423 --- /dev/null +++ b/webrtc/common_audio/resampler/push_sinc_resampler.cc @@ -0,0 +1,103 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/resampler/push_sinc_resampler.h" + +#include + +#include "webrtc/base/checks.h" +#include "webrtc/common_audio/include/audio_util.h" + +namespace webrtc { + +PushSincResampler::PushSincResampler(size_t source_frames, + size_t destination_frames) + : resampler_(new SincResampler(source_frames * 1.0 / destination_frames, + source_frames, + this)), + source_ptr_(nullptr), + source_ptr_int_(nullptr), + destination_frames_(destination_frames), + first_pass_(true), + source_available_(0) {} + +PushSincResampler::~PushSincResampler() { +} + +size_t PushSincResampler::Resample(const int16_t* source, + size_t source_length, + int16_t* destination, + size_t destination_capacity) { + if (!float_buffer_.get()) + float_buffer_.reset(new float[destination_frames_]); + + source_ptr_int_ = source; + // Pass nullptr as the float source to have Run() read from the int16 source. + Resample(nullptr, source_length, float_buffer_.get(), destination_frames_); + FloatS16ToS16(float_buffer_.get(), destination_frames_, destination); + source_ptr_int_ = nullptr; + return destination_frames_; +} + +size_t PushSincResampler::Resample(const float* source, + size_t source_length, + float* destination, + size_t destination_capacity) { + RTC_CHECK_EQ(source_length, resampler_->request_frames()); + RTC_CHECK_GE(destination_capacity, destination_frames_); + // Cache the source pointer. Calling Resample() will immediately trigger + // the Run() callback whereupon we provide the cached value. + source_ptr_ = source; + source_available_ = source_length; + + // On the first pass, we call Resample() twice. During the first call, we + // provide dummy input and discard the output. This is done to prime the + // SincResampler buffer with the correct delay (half the kernel size), thereby + // ensuring that all later Resample() calls will only result in one input + // request through Run(). + // + // If this wasn't done, SincResampler would call Run() twice on the first + // pass, and we'd have to introduce an entire |source_frames| of delay, rather + // than the minimum half kernel. + // + // It works out that ChunkSize() is exactly the amount of output we need to + // request in order to prime the buffer with a single Run() request for + // |source_frames|. + if (first_pass_) + resampler_->Resample(resampler_->ChunkSize(), destination); + + resampler_->Resample(destination_frames_, destination); + source_ptr_ = nullptr; + return destination_frames_; +} + +void PushSincResampler::Run(size_t frames, float* destination) { + // Ensure we are only asked for the available samples. This would fail if + // Run() was triggered more than once per Resample() call. + RTC_CHECK_EQ(source_available_, frames); + + if (first_pass_) { + // Provide dummy input on the first pass, the output of which will be + // discarded, as described in Resample(). + std::memset(destination, 0, frames * sizeof(*destination)); + first_pass_ = false; + return; + } + + if (source_ptr_) { + std::memcpy(destination, source_ptr_, frames * sizeof(*destination)); + } else { + for (size_t i = 0; i < frames; ++i) + destination[i] = static_cast(source_ptr_int_[i]); + } + source_available_ -= frames; +} + +} // namespace webrtc diff --git a/webrtc/common_audio/resampler/push_sinc_resampler.h b/webrtc/common_audio/resampler/push_sinc_resampler.h new file mode 100644 index 0000000..cefc62a --- /dev/null +++ b/webrtc/common_audio/resampler/push_sinc_resampler.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_ +#define WEBRTC_COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_ + +#include "webrtc/base/constructormagic.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/resampler/sinc_resampler.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// A thin wrapper over SincResampler to provide a push-based interface as +// required by WebRTC. SincResampler uses a pull-based interface, and will +// use SincResamplerCallback::Run() to request data upon a call to Resample(). +// These Run() calls will happen on the same thread Resample() is called on. +class PushSincResampler : public SincResamplerCallback { + public: + // Provide the size of the source and destination blocks in samples. These + // must correspond to the same time duration (typically 10 ms) as the sample + // ratio is inferred from them. + PushSincResampler(size_t source_frames, size_t destination_frames); + ~PushSincResampler() override; + + // Perform the resampling. |source_frames| must always equal the + // |source_frames| provided at construction. |destination_capacity| must be + // at least as large as |destination_frames|. Returns the number of samples + // provided in destination (for convenience, since this will always be equal + // to |destination_frames|). + size_t Resample(const int16_t* source, size_t source_frames, + int16_t* destination, size_t destination_capacity); + size_t Resample(const float* source, + size_t source_frames, + float* destination, + size_t destination_capacity); + + // Delay due to the filter kernel. Essentially, the time after which an input + // sample will appear in the resampled output. + static float AlgorithmicDelaySeconds(int source_rate_hz) { + return 1.f / source_rate_hz * SincResampler::kKernelSize / 2; + } + + protected: + // Implements SincResamplerCallback. + void Run(size_t frames, float* destination) override; + + private: + friend class PushSincResamplerTest; + SincResampler* get_resampler_for_testing() { return resampler_.get(); } + + rtc::scoped_ptr resampler_; + rtc::scoped_ptr float_buffer_; + const float* source_ptr_; + const int16_t* source_ptr_int_; + const size_t destination_frames_; + + // True on the first call to Resample(), to prime the SincResampler buffer. + bool first_pass_; + + // Used to assert we are only requested for as much data as is available. + size_t source_available_; + + RTC_DISALLOW_COPY_AND_ASSIGN(PushSincResampler); +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_RESAMPLER_PUSH_SINC_RESAMPLER_H_ diff --git a/webrtc/common_audio/resampler/resampler.cc b/webrtc/common_audio/resampler/resampler.cc new file mode 100644 index 0000000..c9e7a1f --- /dev/null +++ b/webrtc/common_audio/resampler/resampler.cc @@ -0,0 +1,959 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +/* + * A wrapper for resampling a numerous amount of sampling combinations. + */ + +#include +#include + +#include "webrtc/common_audio/resampler/include/resampler.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" + +namespace webrtc { + +Resampler::Resampler() + : state1_(nullptr), + state2_(nullptr), + state3_(nullptr), + in_buffer_(nullptr), + out_buffer_(nullptr), + in_buffer_size_(0), + out_buffer_size_(0), + in_buffer_size_max_(0), + out_buffer_size_max_(0), + my_in_frequency_khz_(0), + my_out_frequency_khz_(0), + my_mode_(kResamplerMode1To1), + num_channels_(0), + slave_left_(nullptr), + slave_right_(nullptr) { +} + +Resampler::Resampler(int inFreq, int outFreq, int num_channels) + : Resampler() { + Reset(inFreq, outFreq, num_channels); +} + +Resampler::~Resampler() +{ + if (state1_) + { + free(state1_); + } + if (state2_) + { + free(state2_); + } + if (state3_) + { + free(state3_); + } + if (in_buffer_) + { + free(in_buffer_); + } + if (out_buffer_) + { + free(out_buffer_); + } + if (slave_left_) + { + delete slave_left_; + } + if (slave_right_) + { + delete slave_right_; + } +} + +int Resampler::ResetIfNeeded(int inFreq, int outFreq, int num_channels) +{ + int tmpInFreq_kHz = inFreq / 1000; + int tmpOutFreq_kHz = outFreq / 1000; + + if ((tmpInFreq_kHz != my_in_frequency_khz_) || (tmpOutFreq_kHz != my_out_frequency_khz_) + || (num_channels != num_channels_)) + { + return Reset(inFreq, outFreq, num_channels); + } else + { + return 0; + } +} + +int Resampler::Reset(int inFreq, int outFreq, int num_channels) +{ + if (num_channels != 1 && num_channels != 2) { + return -1; + } + num_channels_ = num_channels; + + if (state1_) + { + free(state1_); + state1_ = NULL; + } + if (state2_) + { + free(state2_); + state2_ = NULL; + } + if (state3_) + { + free(state3_); + state3_ = NULL; + } + if (in_buffer_) + { + free(in_buffer_); + in_buffer_ = NULL; + } + if (out_buffer_) + { + free(out_buffer_); + out_buffer_ = NULL; + } + if (slave_left_) + { + delete slave_left_; + slave_left_ = NULL; + } + if (slave_right_) + { + delete slave_right_; + slave_right_ = NULL; + } + + in_buffer_size_ = 0; + out_buffer_size_ = 0; + in_buffer_size_max_ = 0; + out_buffer_size_max_ = 0; + + // Start with a math exercise, Euclid's algorithm to find the gcd: + int a = inFreq; + int b = outFreq; + int c = a % b; + while (c != 0) + { + a = b; + b = c; + c = a % b; + } + // b is now the gcd; + + // We need to track what domain we're in. + my_in_frequency_khz_ = inFreq / 1000; + my_out_frequency_khz_ = outFreq / 1000; + + // Scale with GCD + inFreq = inFreq / b; + outFreq = outFreq / b; + + if (num_channels_ == 2) + { + // Create two mono resamplers. + slave_left_ = new Resampler(inFreq, outFreq, 1); + slave_right_ = new Resampler(inFreq, outFreq, 1); + } + + if (inFreq == outFreq) + { + my_mode_ = kResamplerMode1To1; + } else if (inFreq == 1) + { + switch (outFreq) + { + case 2: + my_mode_ = kResamplerMode1To2; + break; + case 3: + my_mode_ = kResamplerMode1To3; + break; + case 4: + my_mode_ = kResamplerMode1To4; + break; + case 6: + my_mode_ = kResamplerMode1To6; + break; + case 12: + my_mode_ = kResamplerMode1To12; + break; + default: + return -1; + } + } else if (outFreq == 1) + { + switch (inFreq) + { + case 2: + my_mode_ = kResamplerMode2To1; + break; + case 3: + my_mode_ = kResamplerMode3To1; + break; + case 4: + my_mode_ = kResamplerMode4To1; + break; + case 6: + my_mode_ = kResamplerMode6To1; + break; + case 12: + my_mode_ = kResamplerMode12To1; + break; + default: + return -1; + } + } else if ((inFreq == 2) && (outFreq == 3)) + { + my_mode_ = kResamplerMode2To3; + } else if ((inFreq == 2) && (outFreq == 11)) + { + my_mode_ = kResamplerMode2To11; + } else if ((inFreq == 4) && (outFreq == 11)) + { + my_mode_ = kResamplerMode4To11; + } else if ((inFreq == 8) && (outFreq == 11)) + { + my_mode_ = kResamplerMode8To11; + } else if ((inFreq == 3) && (outFreq == 2)) + { + my_mode_ = kResamplerMode3To2; + } else if ((inFreq == 11) && (outFreq == 2)) + { + my_mode_ = kResamplerMode11To2; + } else if ((inFreq == 11) && (outFreq == 4)) + { + my_mode_ = kResamplerMode11To4; + } else if ((inFreq == 11) && (outFreq == 16)) + { + my_mode_ = kResamplerMode11To16; + } else if ((inFreq == 11) && (outFreq == 32)) + { + my_mode_ = kResamplerMode11To32; + } else if ((inFreq == 11) && (outFreq == 8)) + { + my_mode_ = kResamplerMode11To8; + } else + { + return -1; + } + + // Now create the states we need + switch (my_mode_) + { + case kResamplerMode1To1: + // No state needed; + break; + case kResamplerMode1To2: + state1_ = malloc(8 * sizeof(int32_t)); + memset(state1_, 0, 8 * sizeof(int32_t)); + break; + case kResamplerMode1To3: + state1_ = malloc(sizeof(WebRtcSpl_State16khzTo48khz)); + WebRtcSpl_ResetResample16khzTo48khz((WebRtcSpl_State16khzTo48khz *)state1_); + break; + case kResamplerMode1To4: + // 1:2 + state1_ = malloc(8 * sizeof(int32_t)); + memset(state1_, 0, 8 * sizeof(int32_t)); + // 2:4 + state2_ = malloc(8 * sizeof(int32_t)); + memset(state2_, 0, 8 * sizeof(int32_t)); + break; + case kResamplerMode1To6: + // 1:2 + state1_ = malloc(8 * sizeof(int32_t)); + memset(state1_, 0, 8 * sizeof(int32_t)); + // 2:6 + state2_ = malloc(sizeof(WebRtcSpl_State16khzTo48khz)); + WebRtcSpl_ResetResample16khzTo48khz((WebRtcSpl_State16khzTo48khz *)state2_); + break; + case kResamplerMode1To12: + // 1:2 + state1_ = malloc(8 * sizeof(int32_t)); + memset(state1_, 0, 8 * sizeof(int32_t)); + // 2:4 + state2_ = malloc(8 * sizeof(int32_t)); + memset(state2_, 0, 8 * sizeof(int32_t)); + // 4:12 + state3_ = malloc(sizeof(WebRtcSpl_State16khzTo48khz)); + WebRtcSpl_ResetResample16khzTo48khz( + (WebRtcSpl_State16khzTo48khz*) state3_); + break; + case kResamplerMode2To3: + // 2:6 + state1_ = malloc(sizeof(WebRtcSpl_State16khzTo48khz)); + WebRtcSpl_ResetResample16khzTo48khz((WebRtcSpl_State16khzTo48khz *)state1_); + // 6:3 + state2_ = malloc(8 * sizeof(int32_t)); + memset(state2_, 0, 8 * sizeof(int32_t)); + break; + case kResamplerMode2To11: + state1_ = malloc(8 * sizeof(int32_t)); + memset(state1_, 0, 8 * sizeof(int32_t)); + + state2_ = malloc(sizeof(WebRtcSpl_State8khzTo22khz)); + WebRtcSpl_ResetResample8khzTo22khz((WebRtcSpl_State8khzTo22khz *)state2_); + break; + case kResamplerMode4To11: + state1_ = malloc(sizeof(WebRtcSpl_State8khzTo22khz)); + WebRtcSpl_ResetResample8khzTo22khz((WebRtcSpl_State8khzTo22khz *)state1_); + break; + case kResamplerMode8To11: + state1_ = malloc(sizeof(WebRtcSpl_State16khzTo22khz)); + WebRtcSpl_ResetResample16khzTo22khz((WebRtcSpl_State16khzTo22khz *)state1_); + break; + case kResamplerMode11To16: + state1_ = malloc(8 * sizeof(int32_t)); + memset(state1_, 0, 8 * sizeof(int32_t)); + + state2_ = malloc(sizeof(WebRtcSpl_State22khzTo16khz)); + WebRtcSpl_ResetResample22khzTo16khz((WebRtcSpl_State22khzTo16khz *)state2_); + break; + case kResamplerMode11To32: + // 11 -> 22 + state1_ = malloc(8 * sizeof(int32_t)); + memset(state1_, 0, 8 * sizeof(int32_t)); + + // 22 -> 16 + state2_ = malloc(sizeof(WebRtcSpl_State22khzTo16khz)); + WebRtcSpl_ResetResample22khzTo16khz((WebRtcSpl_State22khzTo16khz *)state2_); + + // 16 -> 32 + state3_ = malloc(8 * sizeof(int32_t)); + memset(state3_, 0, 8 * sizeof(int32_t)); + + break; + case kResamplerMode2To1: + state1_ = malloc(8 * sizeof(int32_t)); + memset(state1_, 0, 8 * sizeof(int32_t)); + break; + case kResamplerMode3To1: + state1_ = malloc(sizeof(WebRtcSpl_State48khzTo16khz)); + WebRtcSpl_ResetResample48khzTo16khz((WebRtcSpl_State48khzTo16khz *)state1_); + break; + case kResamplerMode4To1: + // 4:2 + state1_ = malloc(8 * sizeof(int32_t)); + memset(state1_, 0, 8 * sizeof(int32_t)); + // 2:1 + state2_ = malloc(8 * sizeof(int32_t)); + memset(state2_, 0, 8 * sizeof(int32_t)); + break; + case kResamplerMode6To1: + // 6:2 + state1_ = malloc(sizeof(WebRtcSpl_State48khzTo16khz)); + WebRtcSpl_ResetResample48khzTo16khz((WebRtcSpl_State48khzTo16khz *)state1_); + // 2:1 + state2_ = malloc(8 * sizeof(int32_t)); + memset(state2_, 0, 8 * sizeof(int32_t)); + break; + case kResamplerMode12To1: + // 12:4 + state1_ = malloc(sizeof(WebRtcSpl_State48khzTo16khz)); + WebRtcSpl_ResetResample48khzTo16khz( + (WebRtcSpl_State48khzTo16khz*) state1_); + // 4:2 + state2_ = malloc(8 * sizeof(int32_t)); + memset(state2_, 0, 8 * sizeof(int32_t)); + // 2:1 + state3_ = malloc(8 * sizeof(int32_t)); + memset(state3_, 0, 8 * sizeof(int32_t)); + break; + case kResamplerMode3To2: + // 3:6 + state1_ = malloc(8 * sizeof(int32_t)); + memset(state1_, 0, 8 * sizeof(int32_t)); + // 6:2 + state2_ = malloc(sizeof(WebRtcSpl_State48khzTo16khz)); + WebRtcSpl_ResetResample48khzTo16khz((WebRtcSpl_State48khzTo16khz *)state2_); + break; + case kResamplerMode11To2: + state1_ = malloc(sizeof(WebRtcSpl_State22khzTo8khz)); + WebRtcSpl_ResetResample22khzTo8khz((WebRtcSpl_State22khzTo8khz *)state1_); + + state2_ = malloc(8 * sizeof(int32_t)); + memset(state2_, 0, 8 * sizeof(int32_t)); + + break; + case kResamplerMode11To4: + state1_ = malloc(sizeof(WebRtcSpl_State22khzTo8khz)); + WebRtcSpl_ResetResample22khzTo8khz((WebRtcSpl_State22khzTo8khz *)state1_); + break; + case kResamplerMode11To8: + state1_ = malloc(sizeof(WebRtcSpl_State22khzTo16khz)); + WebRtcSpl_ResetResample22khzTo16khz((WebRtcSpl_State22khzTo16khz *)state1_); + break; + + } + + return 0; +} + +// Synchronous resampling, all output samples are written to samplesOut +int Resampler::Push(const int16_t * samplesIn, size_t lengthIn, + int16_t* samplesOut, size_t maxLen, size_t &outLen) +{ + if (num_channels_ == 2) + { + // Split up the signal and call the slave object for each channel + int16_t* left = (int16_t*)malloc(lengthIn * sizeof(int16_t) / 2); + int16_t* right = (int16_t*)malloc(lengthIn * sizeof(int16_t) / 2); + int16_t* out_left = (int16_t*)malloc(maxLen / 2 * sizeof(int16_t)); + int16_t* out_right = + (int16_t*)malloc(maxLen / 2 * sizeof(int16_t)); + int res = 0; + for (size_t i = 0; i < lengthIn; i += 2) + { + left[i >> 1] = samplesIn[i]; + right[i >> 1] = samplesIn[i + 1]; + } + + // It's OK to overwrite the local parameter, since it's just a copy + lengthIn = lengthIn / 2; + + size_t actualOutLen_left = 0; + size_t actualOutLen_right = 0; + // Do resampling for right channel + res |= slave_left_->Push(left, lengthIn, out_left, maxLen / 2, actualOutLen_left); + res |= slave_right_->Push(right, lengthIn, out_right, maxLen / 2, actualOutLen_right); + if (res || (actualOutLen_left != actualOutLen_right)) + { + free(left); + free(right); + free(out_left); + free(out_right); + return -1; + } + + // Reassemble the signal + for (size_t i = 0; i < actualOutLen_left; i++) + { + samplesOut[i * 2] = out_left[i]; + samplesOut[i * 2 + 1] = out_right[i]; + } + outLen = 2 * actualOutLen_left; + + free(left); + free(right); + free(out_left); + free(out_right); + + return 0; + } + + // Containers for temp samples + int16_t* tmp; + int16_t* tmp_2; + // tmp data for resampling routines + int32_t* tmp_mem; + + switch (my_mode_) + { + case kResamplerMode1To1: + memcpy(samplesOut, samplesIn, lengthIn * sizeof(int16_t)); + outLen = lengthIn; + break; + case kResamplerMode1To2: + if (maxLen < (lengthIn * 2)) + { + return -1; + } + WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, samplesOut, (int32_t*)state1_); + outLen = lengthIn * 2; + return 0; + case kResamplerMode1To3: + + // We can only handle blocks of 160 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 160) != 0) + { + return -1; + } + if (maxLen < (lengthIn * 3)) + { + return -1; + } + tmp_mem = (int32_t*)malloc(336 * sizeof(int32_t)); + + for (size_t i = 0; i < lengthIn; i += 160) + { + WebRtcSpl_Resample16khzTo48khz(samplesIn + i, samplesOut + i * 3, + (WebRtcSpl_State16khzTo48khz *)state1_, + tmp_mem); + } + outLen = lengthIn * 3; + free(tmp_mem); + return 0; + case kResamplerMode1To4: + if (maxLen < (lengthIn * 4)) + { + return -1; + } + + tmp = (int16_t*)malloc(sizeof(int16_t) * 2 * lengthIn); + // 1:2 + WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, tmp, (int32_t*)state1_); + // 2:4 + WebRtcSpl_UpsampleBy2(tmp, lengthIn * 2, samplesOut, (int32_t*)state2_); + outLen = lengthIn * 4; + free(tmp); + return 0; + case kResamplerMode1To6: + // We can only handle blocks of 80 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 80) != 0) + { + return -1; + } + if (maxLen < (lengthIn * 6)) + { + return -1; + } + + //1:2 + + tmp_mem = (int32_t*)malloc(336 * sizeof(int32_t)); + tmp = (int16_t*)malloc(sizeof(int16_t) * 2 * lengthIn); + + WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, tmp, (int32_t*)state1_); + outLen = lengthIn * 2; + + for (size_t i = 0; i < outLen; i += 160) + { + WebRtcSpl_Resample16khzTo48khz(tmp + i, samplesOut + i * 3, + (WebRtcSpl_State16khzTo48khz *)state2_, + tmp_mem); + } + outLen = outLen * 3; + free(tmp_mem); + free(tmp); + + return 0; + case kResamplerMode1To12: + // We can only handle blocks of 40 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 40) != 0) { + return -1; + } + if (maxLen < (lengthIn * 12)) { + return -1; + } + + tmp_mem = (int32_t*) malloc(336 * sizeof(int32_t)); + tmp = (int16_t*) malloc(sizeof(int16_t) * 4 * lengthIn); + //1:2 + WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, samplesOut, + (int32_t*) state1_); + outLen = lengthIn * 2; + //2:4 + WebRtcSpl_UpsampleBy2(samplesOut, outLen, tmp, (int32_t*) state2_); + outLen = outLen * 2; + // 4:12 + for (size_t i = 0; i < outLen; i += 160) { + // WebRtcSpl_Resample16khzTo48khz() takes a block of 160 samples + // as input and outputs a resampled block of 480 samples. The + // data is now actually in 32 kHz sampling rate, despite the + // function name, and with a resampling factor of three becomes + // 96 kHz. + WebRtcSpl_Resample16khzTo48khz(tmp + i, samplesOut + i * 3, + (WebRtcSpl_State16khzTo48khz*) state3_, + tmp_mem); + } + outLen = outLen * 3; + free(tmp_mem); + free(tmp); + + return 0; + case kResamplerMode2To3: + if (maxLen < (lengthIn * 3 / 2)) + { + return -1; + } + // 2:6 + // We can only handle blocks of 160 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 160) != 0) + { + return -1; + } + tmp = static_cast (malloc(sizeof(int16_t) * lengthIn * 3)); + tmp_mem = (int32_t*)malloc(336 * sizeof(int32_t)); + for (size_t i = 0; i < lengthIn; i += 160) + { + WebRtcSpl_Resample16khzTo48khz(samplesIn + i, tmp + i * 3, + (WebRtcSpl_State16khzTo48khz *)state1_, + tmp_mem); + } + lengthIn = lengthIn * 3; + // 6:3 + WebRtcSpl_DownsampleBy2(tmp, lengthIn, samplesOut, (int32_t*)state2_); + outLen = lengthIn / 2; + free(tmp); + free(tmp_mem); + return 0; + case kResamplerMode2To11: + + // We can only handle blocks of 80 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 80) != 0) + { + return -1; + } + if (maxLen < ((lengthIn * 11) / 2)) + { + return -1; + } + tmp = (int16_t*)malloc(sizeof(int16_t) * 2 * lengthIn); + // 1:2 + WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, tmp, (int32_t*)state1_); + lengthIn *= 2; + + tmp_mem = (int32_t*)malloc(98 * sizeof(int32_t)); + + for (size_t i = 0; i < lengthIn; i += 80) + { + WebRtcSpl_Resample8khzTo22khz(tmp + i, samplesOut + (i * 11) / 4, + (WebRtcSpl_State8khzTo22khz *)state2_, + tmp_mem); + } + outLen = (lengthIn * 11) / 4; + free(tmp_mem); + free(tmp); + return 0; + case kResamplerMode4To11: + + // We can only handle blocks of 80 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 80) != 0) + { + return -1; + } + if (maxLen < ((lengthIn * 11) / 4)) + { + return -1; + } + tmp_mem = (int32_t*)malloc(98 * sizeof(int32_t)); + + for (size_t i = 0; i < lengthIn; i += 80) + { + WebRtcSpl_Resample8khzTo22khz(samplesIn + i, samplesOut + (i * 11) / 4, + (WebRtcSpl_State8khzTo22khz *)state1_, + tmp_mem); + } + outLen = (lengthIn * 11) / 4; + free(tmp_mem); + return 0; + case kResamplerMode8To11: + // We can only handle blocks of 160 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 160) != 0) + { + return -1; + } + if (maxLen < ((lengthIn * 11) / 8)) + { + return -1; + } + tmp_mem = (int32_t*)malloc(88 * sizeof(int32_t)); + + for (size_t i = 0; i < lengthIn; i += 160) + { + WebRtcSpl_Resample16khzTo22khz(samplesIn + i, samplesOut + (i * 11) / 8, + (WebRtcSpl_State16khzTo22khz *)state1_, + tmp_mem); + } + outLen = (lengthIn * 11) / 8; + free(tmp_mem); + return 0; + + case kResamplerMode11To16: + // We can only handle blocks of 110 samples + if ((lengthIn % 110) != 0) + { + return -1; + } + if (maxLen < ((lengthIn * 16) / 11)) + { + return -1; + } + + tmp_mem = (int32_t*)malloc(104 * sizeof(int32_t)); + tmp = (int16_t*)malloc((sizeof(int16_t) * lengthIn * 2)); + + WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, tmp, (int32_t*)state1_); + + for (size_t i = 0; i < (lengthIn * 2); i += 220) + { + WebRtcSpl_Resample22khzTo16khz(tmp + i, samplesOut + (i / 220) * 160, + (WebRtcSpl_State22khzTo16khz *)state2_, + tmp_mem); + } + + outLen = (lengthIn * 16) / 11; + + free(tmp_mem); + free(tmp); + return 0; + + case kResamplerMode11To32: + + // We can only handle blocks of 110 samples + if ((lengthIn % 110) != 0) + { + return -1; + } + if (maxLen < ((lengthIn * 32) / 11)) + { + return -1; + } + + tmp_mem = (int32_t*)malloc(104 * sizeof(int32_t)); + tmp = (int16_t*)malloc((sizeof(int16_t) * lengthIn * 2)); + + // 11 -> 22 kHz in samplesOut + WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, samplesOut, (int32_t*)state1_); + + // 22 -> 16 in tmp + for (size_t i = 0; i < (lengthIn * 2); i += 220) + { + WebRtcSpl_Resample22khzTo16khz(samplesOut + i, tmp + (i / 220) * 160, + (WebRtcSpl_State22khzTo16khz *)state2_, + tmp_mem); + } + + // 16 -> 32 in samplesOut + WebRtcSpl_UpsampleBy2(tmp, (lengthIn * 16) / 11, samplesOut, + (int32_t*)state3_); + + outLen = (lengthIn * 32) / 11; + + free(tmp_mem); + free(tmp); + return 0; + + case kResamplerMode2To1: + if (maxLen < (lengthIn / 2)) + { + return -1; + } + WebRtcSpl_DownsampleBy2(samplesIn, lengthIn, samplesOut, (int32_t*)state1_); + outLen = lengthIn / 2; + return 0; + case kResamplerMode3To1: + // We can only handle blocks of 480 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 480) != 0) + { + return -1; + } + if (maxLen < (lengthIn / 3)) + { + return -1; + } + tmp_mem = (int32_t*)malloc(496 * sizeof(int32_t)); + + for (size_t i = 0; i < lengthIn; i += 480) + { + WebRtcSpl_Resample48khzTo16khz(samplesIn + i, samplesOut + i / 3, + (WebRtcSpl_State48khzTo16khz *)state1_, + tmp_mem); + } + outLen = lengthIn / 3; + free(tmp_mem); + return 0; + case kResamplerMode4To1: + if (maxLen < (lengthIn / 4)) + { + return -1; + } + tmp = (int16_t*)malloc(sizeof(int16_t) * lengthIn / 2); + // 4:2 + WebRtcSpl_DownsampleBy2(samplesIn, lengthIn, tmp, (int32_t*)state1_); + // 2:1 + WebRtcSpl_DownsampleBy2(tmp, lengthIn / 2, samplesOut, (int32_t*)state2_); + outLen = lengthIn / 4; + free(tmp); + return 0; + + case kResamplerMode6To1: + // We can only handle blocks of 480 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 480) != 0) + { + return -1; + } + if (maxLen < (lengthIn / 6)) + { + return -1; + } + + tmp_mem = (int32_t*)malloc(496 * sizeof(int32_t)); + tmp = (int16_t*)malloc((sizeof(int16_t) * lengthIn) / 3); + + for (size_t i = 0; i < lengthIn; i += 480) + { + WebRtcSpl_Resample48khzTo16khz(samplesIn + i, tmp + i / 3, + (WebRtcSpl_State48khzTo16khz *)state1_, + tmp_mem); + } + outLen = lengthIn / 3; + free(tmp_mem); + WebRtcSpl_DownsampleBy2(tmp, outLen, samplesOut, (int32_t*)state2_); + free(tmp); + outLen = outLen / 2; + return 0; + case kResamplerMode12To1: + // We can only handle blocks of 480 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 480) != 0) { + return -1; + } + if (maxLen < (lengthIn / 12)) { + return -1; + } + + tmp_mem = (int32_t*) malloc(496 * sizeof(int32_t)); + tmp = (int16_t*) malloc((sizeof(int16_t) * lengthIn) / 3); + tmp_2 = (int16_t*) malloc((sizeof(int16_t) * lengthIn) / 6); + // 12:4 + for (size_t i = 0; i < lengthIn; i += 480) { + // WebRtcSpl_Resample48khzTo16khz() takes a block of 480 samples + // as input and outputs a resampled block of 160 samples. The + // data is now actually in 96 kHz sampling rate, despite the + // function name, and with a resampling factor of 1/3 becomes + // 32 kHz. + WebRtcSpl_Resample48khzTo16khz(samplesIn + i, tmp + i / 3, + (WebRtcSpl_State48khzTo16khz*) state1_, + tmp_mem); + } + outLen = lengthIn / 3; + free(tmp_mem); + // 4:2 + WebRtcSpl_DownsampleBy2(tmp, outLen, tmp_2, (int32_t*) state2_); + outLen = outLen / 2; + free(tmp); + // 2:1 + WebRtcSpl_DownsampleBy2(tmp_2, outLen, samplesOut, + (int32_t*) state3_); + free(tmp_2); + outLen = outLen / 2; + return 0; + case kResamplerMode3To2: + if (maxLen < (lengthIn * 2 / 3)) + { + return -1; + } + // 3:6 + tmp = static_cast (malloc(sizeof(int16_t) * lengthIn * 2)); + WebRtcSpl_UpsampleBy2(samplesIn, lengthIn, tmp, (int32_t*)state1_); + lengthIn *= 2; + // 6:2 + // We can only handle blocks of 480 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 480) != 0) + { + free(tmp); + return -1; + } + tmp_mem = (int32_t*)malloc(496 * sizeof(int32_t)); + for (size_t i = 0; i < lengthIn; i += 480) + { + WebRtcSpl_Resample48khzTo16khz(tmp + i, samplesOut + i / 3, + (WebRtcSpl_State48khzTo16khz *)state2_, + tmp_mem); + } + outLen = lengthIn / 3; + free(tmp); + free(tmp_mem); + return 0; + case kResamplerMode11To2: + // We can only handle blocks of 220 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 220) != 0) + { + return -1; + } + if (maxLen < ((lengthIn * 2) / 11)) + { + return -1; + } + tmp_mem = (int32_t*)malloc(126 * sizeof(int32_t)); + tmp = (int16_t*)malloc((lengthIn * 4) / 11 * sizeof(int16_t)); + + for (size_t i = 0; i < lengthIn; i += 220) + { + WebRtcSpl_Resample22khzTo8khz(samplesIn + i, tmp + (i * 4) / 11, + (WebRtcSpl_State22khzTo8khz *)state1_, + tmp_mem); + } + lengthIn = (lengthIn * 4) / 11; + + WebRtcSpl_DownsampleBy2(tmp, lengthIn, samplesOut, + (int32_t*)state2_); + outLen = lengthIn / 2; + + free(tmp_mem); + free(tmp); + return 0; + case kResamplerMode11To4: + // We can only handle blocks of 220 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 220) != 0) + { + return -1; + } + if (maxLen < ((lengthIn * 4) / 11)) + { + return -1; + } + tmp_mem = (int32_t*)malloc(126 * sizeof(int32_t)); + + for (size_t i = 0; i < lengthIn; i += 220) + { + WebRtcSpl_Resample22khzTo8khz(samplesIn + i, samplesOut + (i * 4) / 11, + (WebRtcSpl_State22khzTo8khz *)state1_, + tmp_mem); + } + outLen = (lengthIn * 4) / 11; + free(tmp_mem); + return 0; + case kResamplerMode11To8: + // We can only handle blocks of 160 samples + // Can be fixed, but I don't think it's needed + if ((lengthIn % 220) != 0) + { + return -1; + } + if (maxLen < ((lengthIn * 8) / 11)) + { + return -1; + } + tmp_mem = (int32_t*)malloc(104 * sizeof(int32_t)); + + for (size_t i = 0; i < lengthIn; i += 220) + { + WebRtcSpl_Resample22khzTo16khz(samplesIn + i, samplesOut + (i * 8) / 11, + (WebRtcSpl_State22khzTo16khz *)state1_, + tmp_mem); + } + outLen = (lengthIn * 8) / 11; + free(tmp_mem); + return 0; + break; + + } + return 0; +} + +} // namespace webrtc diff --git a/webrtc/common_audio/resampler/sinc_resampler.cc b/webrtc/common_audio/resampler/sinc_resampler.cc new file mode 100644 index 0000000..c4f1488 --- /dev/null +++ b/webrtc/common_audio/resampler/sinc_resampler.cc @@ -0,0 +1,378 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Modified from the Chromium original: +// src/media/base/sinc_resampler.cc + +// Initial input buffer layout, dividing into regions r0_ to r4_ (note: r0_, r3_ +// and r4_ will move after the first load): +// +// |----------------|-----------------------------------------|----------------| +// +// request_frames_ +// <---------------------------------------------------------> +// r0_ (during first load) +// +// kKernelSize / 2 kKernelSize / 2 kKernelSize / 2 kKernelSize / 2 +// <---------------> <---------------> <---------------> <---------------> +// r1_ r2_ r3_ r4_ +// +// block_size_ == r4_ - r2_ +// <---------------------------------------> +// +// request_frames_ +// <------------------ ... -----------------> +// r0_ (during second load) +// +// On the second request r0_ slides to the right by kKernelSize / 2 and r3_, r4_ +// and block_size_ are reinitialized via step (3) in the algorithm below. +// +// These new regions remain constant until a Flush() occurs. While complicated, +// this allows us to reduce jitter by always requesting the same amount from the +// provided callback. +// +// The algorithm: +// +// 1) Allocate input_buffer of size: request_frames_ + kKernelSize; this ensures +// there's enough room to read request_frames_ from the callback into region +// r0_ (which will move between the first and subsequent passes). +// +// 2) Let r1_, r2_ each represent half the kernel centered around r0_: +// +// r0_ = input_buffer_ + kKernelSize / 2 +// r1_ = input_buffer_ +// r2_ = r0_ +// +// r0_ is always request_frames_ in size. r1_, r2_ are kKernelSize / 2 in +// size. r1_ must be zero initialized to avoid convolution with garbage (see +// step (5) for why). +// +// 3) Let r3_, r4_ each represent half the kernel right aligned with the end of +// r0_ and choose block_size_ as the distance in frames between r4_ and r2_: +// +// r3_ = r0_ + request_frames_ - kKernelSize +// r4_ = r0_ + request_frames_ - kKernelSize / 2 +// block_size_ = r4_ - r2_ = request_frames_ - kKernelSize / 2 +// +// 4) Consume request_frames_ frames into r0_. +// +// 5) Position kernel centered at start of r2_ and generate output frames until +// the kernel is centered at the start of r4_ or we've finished generating +// all the output frames. +// +// 6) Wrap left over data from the r3_ to r1_ and r4_ to r2_. +// +// 7) If we're on the second load, in order to avoid overwriting the frames we +// just wrapped from r4_ we need to slide r0_ to the right by the size of +// r4_, which is kKernelSize / 2: +// +// r0_ = r0_ + kKernelSize / 2 = input_buffer_ + kKernelSize +// +// r3_, r4_, and block_size_ then need to be reinitialized, so goto (3). +// +// 8) Else, if we're not on the second load, goto (4). +// +// Note: we're glossing over how the sub-sample handling works with +// |virtual_source_idx_|, etc. + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include "webrtc/common_audio/resampler/sinc_resampler.h" + +#include +#include +#include + +#include + +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +namespace { + +double SincScaleFactor(double io_ratio) { + // |sinc_scale_factor| is basically the normalized cutoff frequency of the + // low-pass filter. + double sinc_scale_factor = io_ratio > 1.0 ? 1.0 / io_ratio : 1.0; + + // The sinc function is an idealized brick-wall filter, but since we're + // windowing it the transition from pass to stop does not happen right away. + // So we should adjust the low pass filter cutoff slightly downward to avoid + // some aliasing at the very high-end. + // TODO(crogers): this value is empirical and to be more exact should vary + // depending on kKernelSize. + sinc_scale_factor *= 0.9; + + return sinc_scale_factor; +} + +} // namespace + +// If we know the minimum architecture at compile time, avoid CPU detection. +#if defined(WEBRTC_ARCH_X86_FAMILY) +#if defined(__SSE2__) +#define CONVOLVE_FUNC Convolve_SSE +void SincResampler::InitializeCPUSpecificFeatures() {} +#else +// x86 CPU detection required. Function will be set by +// InitializeCPUSpecificFeatures(). +// TODO(dalecurtis): Once Chrome moves to an SSE baseline this can be removed. +#define CONVOLVE_FUNC convolve_proc_ + +void SincResampler::InitializeCPUSpecificFeatures() { + convolve_proc_ = WebRtc_GetCPUInfo(kSSE2) ? Convolve_SSE : Convolve_C; +} +#endif +#elif defined(WEBRTC_HAS_NEON) +#define CONVOLVE_FUNC Convolve_NEON +void SincResampler::InitializeCPUSpecificFeatures() {} +#elif defined(WEBRTC_DETECT_NEON) +#define CONVOLVE_FUNC convolve_proc_ +void SincResampler::InitializeCPUSpecificFeatures() { + convolve_proc_ = WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON ? + Convolve_NEON : Convolve_C; +} +#else +// Unknown architecture. +#define CONVOLVE_FUNC Convolve_C +void SincResampler::InitializeCPUSpecificFeatures() {} +#endif + +SincResampler::SincResampler(double io_sample_rate_ratio, + size_t request_frames, + SincResamplerCallback* read_cb) + : io_sample_rate_ratio_(io_sample_rate_ratio), + read_cb_(read_cb), + request_frames_(request_frames), + input_buffer_size_(request_frames_ + kKernelSize), + // Create input buffers with a 16-byte alignment for SSE optimizations. + kernel_storage_(static_cast( + AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))), + kernel_pre_sinc_storage_(static_cast( + AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))), + kernel_window_storage_(static_cast( + AlignedMalloc(sizeof(float) * kKernelStorageSize, 16))), + input_buffer_(static_cast( + AlignedMalloc(sizeof(float) * input_buffer_size_, 16))), +#if defined(WEBRTC_CPU_DETECTION) + convolve_proc_(NULL), +#endif + r1_(input_buffer_.get()), + r2_(input_buffer_.get() + kKernelSize / 2) { +#if defined(WEBRTC_CPU_DETECTION) + InitializeCPUSpecificFeatures(); + assert(convolve_proc_); +#endif + assert(request_frames_ > 0); + Flush(); + assert(block_size_ > kKernelSize); + + memset(kernel_storage_.get(), 0, + sizeof(*kernel_storage_.get()) * kKernelStorageSize); + memset(kernel_pre_sinc_storage_.get(), 0, + sizeof(*kernel_pre_sinc_storage_.get()) * kKernelStorageSize); + memset(kernel_window_storage_.get(), 0, + sizeof(*kernel_window_storage_.get()) * kKernelStorageSize); + + InitializeKernel(); +} + +SincResampler::~SincResampler() {} + +void SincResampler::UpdateRegions(bool second_load) { + // Setup various region pointers in the buffer (see diagram above). If we're + // on the second load we need to slide r0_ to the right by kKernelSize / 2. + r0_ = input_buffer_.get() + (second_load ? kKernelSize : kKernelSize / 2); + r3_ = r0_ + request_frames_ - kKernelSize; + r4_ = r0_ + request_frames_ - kKernelSize / 2; + block_size_ = r4_ - r2_; + + // r1_ at the beginning of the buffer. + assert(r1_ == input_buffer_.get()); + // r1_ left of r2_, r4_ left of r3_ and size correct. + assert(r2_ - r1_ == r4_ - r3_); + // r2_ left of r3. + assert(r2_ < r3_); +} + +void SincResampler::InitializeKernel() { + // Blackman window parameters. + static const double kAlpha = 0.16; + static const double kA0 = 0.5 * (1.0 - kAlpha); + static const double kA1 = 0.5; + static const double kA2 = 0.5 * kAlpha; + + // Generates a set of windowed sinc() kernels. + // We generate a range of sub-sample offsets from 0.0 to 1.0. + const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_); + for (size_t offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) { + const float subsample_offset = + static_cast(offset_idx) / kKernelOffsetCount; + + for (size_t i = 0; i < kKernelSize; ++i) { + const size_t idx = i + offset_idx * kKernelSize; + const float pre_sinc = static_cast(M_PI * + (static_cast(i) - static_cast(kKernelSize / 2) - + subsample_offset)); + kernel_pre_sinc_storage_[idx] = pre_sinc; + + // Compute Blackman window, matching the offset of the sinc(). + const float x = (i - subsample_offset) / kKernelSize; + const float window = static_cast(kA0 - kA1 * cos(2.0 * M_PI * x) + + kA2 * cos(4.0 * M_PI * x)); + kernel_window_storage_[idx] = window; + + // Compute the sinc with offset, then window the sinc() function and store + // at the correct offset. + kernel_storage_[idx] = static_cast(window * + ((pre_sinc == 0) ? + sinc_scale_factor : + (sin(sinc_scale_factor * pre_sinc) / pre_sinc))); + } + } +} + +void SincResampler::SetRatio(double io_sample_rate_ratio) { + if (fabs(io_sample_rate_ratio_ - io_sample_rate_ratio) < + std::numeric_limits::epsilon()) { + return; + } + + io_sample_rate_ratio_ = io_sample_rate_ratio; + + // Optimize reinitialization by reusing values which are independent of + // |sinc_scale_factor|. Provides a 3x speedup. + const double sinc_scale_factor = SincScaleFactor(io_sample_rate_ratio_); + for (size_t offset_idx = 0; offset_idx <= kKernelOffsetCount; ++offset_idx) { + for (size_t i = 0; i < kKernelSize; ++i) { + const size_t idx = i + offset_idx * kKernelSize; + const float window = kernel_window_storage_[idx]; + const float pre_sinc = kernel_pre_sinc_storage_[idx]; + + kernel_storage_[idx] = static_cast(window * + ((pre_sinc == 0) ? + sinc_scale_factor : + (sin(sinc_scale_factor * pre_sinc) / pre_sinc))); + } + } +} + +void SincResampler::Resample(size_t frames, float* destination) { + size_t remaining_frames = frames; + + // Step (1) -- Prime the input buffer at the start of the input stream. + if (!buffer_primed_ && remaining_frames) { + read_cb_->Run(request_frames_, r0_); + buffer_primed_ = true; + } + + // Step (2) -- Resample! const what we can outside of the loop for speed. It + // actually has an impact on ARM performance. See inner loop comment below. + const double current_io_ratio = io_sample_rate_ratio_; + const float* const kernel_ptr = kernel_storage_.get(); + while (remaining_frames) { + // |i| may be negative if the last Resample() call ended on an iteration + // that put |virtual_source_idx_| over the limit. + // + // Note: The loop construct here can severely impact performance on ARM + // or when built with clang. See https://codereview.chromium.org/18566009/ + for (int i = static_cast( + ceil((block_size_ - virtual_source_idx_) / current_io_ratio)); + i > 0; --i) { + assert(virtual_source_idx_ < block_size_); + + // |virtual_source_idx_| lies in between two kernel offsets so figure out + // what they are. + const int source_idx = static_cast(virtual_source_idx_); + const double subsample_remainder = virtual_source_idx_ - source_idx; + + const double virtual_offset_idx = + subsample_remainder * kKernelOffsetCount; + const int offset_idx = static_cast(virtual_offset_idx); + + // We'll compute "convolutions" for the two kernels which straddle + // |virtual_source_idx_|. + const float* const k1 = kernel_ptr + offset_idx * kKernelSize; + const float* const k2 = k1 + kKernelSize; + + // Ensure |k1|, |k2| are 16-byte aligned for SIMD usage. Should always be + // true so long as kKernelSize is a multiple of 16. + assert(0u == (reinterpret_cast(k1) & 0x0F)); + assert(0u == (reinterpret_cast(k2) & 0x0F)); + + // Initialize input pointer based on quantized |virtual_source_idx_|. + const float* const input_ptr = r1_ + source_idx; + + // Figure out how much to weight each kernel's "convolution". + const double kernel_interpolation_factor = + virtual_offset_idx - offset_idx; + *destination++ = CONVOLVE_FUNC( + input_ptr, k1, k2, kernel_interpolation_factor); + + // Advance the virtual index. + virtual_source_idx_ += current_io_ratio; + + if (!--remaining_frames) + return; + } + + // Wrap back around to the start. + virtual_source_idx_ -= block_size_; + + // Step (3) -- Copy r3_, r4_ to r1_, r2_. + // This wraps the last input frames back to the start of the buffer. + memcpy(r1_, r3_, sizeof(*input_buffer_.get()) * kKernelSize); + + // Step (4) -- Reinitialize regions if necessary. + if (r0_ == r2_) + UpdateRegions(true); + + // Step (5) -- Refresh the buffer with more input. + read_cb_->Run(request_frames_, r0_); + } +} + +#undef CONVOLVE_FUNC + +size_t SincResampler::ChunkSize() const { + return static_cast(block_size_ / io_sample_rate_ratio_); +} + +void SincResampler::Flush() { + virtual_source_idx_ = 0; + buffer_primed_ = false; + memset(input_buffer_.get(), 0, + sizeof(*input_buffer_.get()) * input_buffer_size_); + UpdateRegions(false); +} + +float SincResampler::Convolve_C(const float* input_ptr, const float* k1, + const float* k2, + double kernel_interpolation_factor) { + float sum1 = 0; + float sum2 = 0; + + // Generate a single output sample. Unrolling this loop hurt performance in + // local testing. + size_t n = kKernelSize; + while (n--) { + sum1 += *input_ptr * *k1++; + sum2 += *input_ptr++ * *k2++; + } + + // Linearly interpolate the two "convolutions". + return static_cast((1.0 - kernel_interpolation_factor) * sum1 + + kernel_interpolation_factor * sum2); +} + +} // namespace webrtc diff --git a/webrtc/common_audio/resampler/sinc_resampler.h b/webrtc/common_audio/resampler/sinc_resampler.h new file mode 100644 index 0000000..189112b --- /dev/null +++ b/webrtc/common_audio/resampler/sinc_resampler.h @@ -0,0 +1,170 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Modified from the Chromium original here: +// src/media/base/sinc_resampler.h + +#ifndef WEBRTC_COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_ +#define WEBRTC_COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_ + +#include "webrtc/base/constructormagic.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/system_wrappers/interface/aligned_malloc.h" +#include "webrtc/test/testsupport/gtest_prod_util.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Callback class for providing more data into the resampler. Expects |frames| +// of data to be rendered into |destination|; zero padded if not enough frames +// are available to satisfy the request. +class SincResamplerCallback { + public: + virtual ~SincResamplerCallback() {} + virtual void Run(size_t frames, float* destination) = 0; +}; + +// SincResampler is a high-quality single-channel sample-rate converter. +class SincResampler { + public: + // The kernel size can be adjusted for quality (higher is better) at the + // expense of performance. Must be a multiple of 32. + // TODO(dalecurtis): Test performance to see if we can jack this up to 64+. + static const size_t kKernelSize = 32; + + // Default request size. Affects how often and for how much SincResampler + // calls back for input. Must be greater than kKernelSize. + static const size_t kDefaultRequestSize = 512; + + // The kernel offset count is used for interpolation and is the number of + // sub-sample kernel shifts. Can be adjusted for quality (higher is better) + // at the expense of allocating more memory. + static const size_t kKernelOffsetCount = 32; + static const size_t kKernelStorageSize = + kKernelSize * (kKernelOffsetCount + 1); + + // Constructs a SincResampler with the specified |read_cb|, which is used to + // acquire audio data for resampling. |io_sample_rate_ratio| is the ratio + // of input / output sample rates. |request_frames| controls the size in + // frames of the buffer requested by each |read_cb| call. The value must be + // greater than kKernelSize. Specify kDefaultRequestSize if there are no + // request size constraints. + SincResampler(double io_sample_rate_ratio, + size_t request_frames, + SincResamplerCallback* read_cb); + virtual ~SincResampler(); + + // Resample |frames| of data from |read_cb_| into |destination|. + void Resample(size_t frames, float* destination); + + // The maximum size in frames that guarantees Resample() will only make a + // single call to |read_cb_| for more data. + size_t ChunkSize() const; + + size_t request_frames() const { return request_frames_; } + + // Flush all buffered data and reset internal indices. Not thread safe, do + // not call while Resample() is in progress. + void Flush(); + + // Update |io_sample_rate_ratio_|. SetRatio() will cause a reconstruction of + // the kernels used for resampling. Not thread safe, do not call while + // Resample() is in progress. + // + // TODO(ajm): Use this in PushSincResampler rather than reconstructing + // SincResampler. We would also need a way to update |request_frames_|. + void SetRatio(double io_sample_rate_ratio); + + float* get_kernel_for_testing() { return kernel_storage_.get(); } + + private: + FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, Convolve); + FRIEND_TEST_ALL_PREFIXES(SincResamplerTest, ConvolveBenchmark); + + void InitializeKernel(); + void UpdateRegions(bool second_load); + + // Selects runtime specific CPU features like SSE. Must be called before + // using SincResampler. + // TODO(ajm): Currently managed by the class internally. See the note with + // |convolve_proc_| below. + void InitializeCPUSpecificFeatures(); + + // Compute convolution of |k1| and |k2| over |input_ptr|, resultant sums are + // linearly interpolated using |kernel_interpolation_factor|. On x86 and ARM + // the underlying implementation is chosen at run time. + static float Convolve_C(const float* input_ptr, const float* k1, + const float* k2, double kernel_interpolation_factor); +#if defined(WEBRTC_ARCH_X86_FAMILY) + static float Convolve_SSE(const float* input_ptr, const float* k1, + const float* k2, + double kernel_interpolation_factor); +#elif defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON) + static float Convolve_NEON(const float* input_ptr, const float* k1, + const float* k2, + double kernel_interpolation_factor); +#endif + + // The ratio of input / output sample rates. + double io_sample_rate_ratio_; + + // An index on the source input buffer with sub-sample precision. It must be + // double precision to avoid drift. + double virtual_source_idx_; + + // The buffer is primed once at the very beginning of processing. + bool buffer_primed_; + + // Source of data for resampling. + SincResamplerCallback* read_cb_; + + // The size (in samples) to request from each |read_cb_| execution. + const size_t request_frames_; + + // The number of source frames processed per pass. + size_t block_size_; + + // The size (in samples) of the internal buffer used by the resampler. + const size_t input_buffer_size_; + + // Contains kKernelOffsetCount kernels back-to-back, each of size kKernelSize. + // The kernel offsets are sub-sample shifts of a windowed sinc shifted from + // 0.0 to 1.0 sample. + rtc::scoped_ptr kernel_storage_; + rtc::scoped_ptr kernel_pre_sinc_storage_; + rtc::scoped_ptr kernel_window_storage_; + + // Data from the source is copied into this buffer for each processing pass. + rtc::scoped_ptr input_buffer_; + + // Stores the runtime selection of which Convolve function to use. + // TODO(ajm): Move to using a global static which must only be initialized + // once by the user. We're not doing this initially, because we don't have + // e.g. a LazyInstance helper in webrtc. +#if defined(WEBRTC_CPU_DETECTION) + typedef float (*ConvolveProc)(const float*, const float*, const float*, + double); + ConvolveProc convolve_proc_; +#endif + + // Pointers to the various regions inside |input_buffer_|. See the diagram at + // the top of the .cc file for more information. + float* r0_; + float* const r1_; + float* const r2_; + float* r3_; + float* r4_; + + RTC_DISALLOW_COPY_AND_ASSIGN(SincResampler); +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_RESAMPLER_SINC_RESAMPLER_H_ diff --git a/webrtc/common_audio/resampler/sinc_resampler_neon.cc b/webrtc/common_audio/resampler/sinc_resampler_neon.cc new file mode 100644 index 0000000..e909a6c --- /dev/null +++ b/webrtc/common_audio/resampler/sinc_resampler_neon.cc @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Modified from the Chromium original: +// src/media/base/sinc_resampler.cc + +#include "webrtc/common_audio/resampler/sinc_resampler.h" + +#include + +namespace webrtc { + +float SincResampler::Convolve_NEON(const float* input_ptr, const float* k1, + const float* k2, + double kernel_interpolation_factor) { + float32x4_t m_input; + float32x4_t m_sums1 = vmovq_n_f32(0); + float32x4_t m_sums2 = vmovq_n_f32(0); + + const float* upper = input_ptr + kKernelSize; + for (; input_ptr < upper; ) { + m_input = vld1q_f32(input_ptr); + input_ptr += 4; + m_sums1 = vmlaq_f32(m_sums1, m_input, vld1q_f32(k1)); + k1 += 4; + m_sums2 = vmlaq_f32(m_sums2, m_input, vld1q_f32(k2)); + k2 += 4; + } + + // Linearly interpolate the two "convolutions". + m_sums1 = vmlaq_f32( + vmulq_f32(m_sums1, vmovq_n_f32(1.0 - kernel_interpolation_factor)), + m_sums2, vmovq_n_f32(kernel_interpolation_factor)); + + // Sum components together. + float32x2_t m_half = vadd_f32(vget_high_f32(m_sums1), vget_low_f32(m_sums1)); + return vget_lane_f32(vpadd_f32(m_half, m_half), 0); +} + +} // namespace webrtc diff --git a/webrtc/common_audio/resampler/sinc_resampler_sse.cc b/webrtc/common_audio/resampler/sinc_resampler_sse.cc new file mode 100644 index 0000000..9e3953f --- /dev/null +++ b/webrtc/common_audio/resampler/sinc_resampler_sse.cc @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Modified from the Chromium original: +// src/media/base/simd/sinc_resampler_sse.cc + +#include "webrtc/common_audio/resampler/sinc_resampler.h" + +#include + +namespace webrtc { + +float SincResampler::Convolve_SSE(const float* input_ptr, const float* k1, + const float* k2, + double kernel_interpolation_factor) { + __m128 m_input; + __m128 m_sums1 = _mm_setzero_ps(); + __m128 m_sums2 = _mm_setzero_ps(); + + // Based on |input_ptr| alignment, we need to use loadu or load. Unrolling + // these loops hurt performance in local testing. + if (reinterpret_cast(input_ptr) & 0x0F) { + for (size_t i = 0; i < kKernelSize; i += 4) { + m_input = _mm_loadu_ps(input_ptr + i); + m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i))); + m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i))); + } + } else { + for (size_t i = 0; i < kKernelSize; i += 4) { + m_input = _mm_load_ps(input_ptr + i); + m_sums1 = _mm_add_ps(m_sums1, _mm_mul_ps(m_input, _mm_load_ps(k1 + i))); + m_sums2 = _mm_add_ps(m_sums2, _mm_mul_ps(m_input, _mm_load_ps(k2 + i))); + } + } + + // Linearly interpolate the two "convolutions". + m_sums1 = _mm_mul_ps(m_sums1, _mm_set_ps1( + static_cast(1.0 - kernel_interpolation_factor))); + m_sums2 = _mm_mul_ps(m_sums2, _mm_set_ps1( + static_cast(kernel_interpolation_factor))); + m_sums1 = _mm_add_ps(m_sums1, m_sums2); + + // Sum components together. + float result; + m_sums2 = _mm_add_ps(_mm_movehl_ps(m_sums1, m_sums1), m_sums1); + _mm_store_ss(&result, _mm_add_ss(m_sums2, _mm_shuffle_ps( + m_sums2, m_sums2, 1))); + + return result; +} + +} // namespace webrtc diff --git a/webrtc/common_audio/resampler/sinusoidal_linear_chirp_source.cc b/webrtc/common_audio/resampler/sinusoidal_linear_chirp_source.cc new file mode 100644 index 0000000..5d21568 --- /dev/null +++ b/webrtc/common_audio/resampler/sinusoidal_linear_chirp_source.cc @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include "webrtc/common_audio/resampler/sinusoidal_linear_chirp_source.h" + +#include + +namespace webrtc { + +SinusoidalLinearChirpSource::SinusoidalLinearChirpSource(int sample_rate, + size_t samples, + double max_frequency, + double delay_samples) + : sample_rate_(sample_rate), + total_samples_(samples), + max_frequency_(max_frequency), + current_index_(0), + delay_samples_(delay_samples) { + // Chirp rate. + double duration = static_cast(total_samples_) / sample_rate_; + k_ = (max_frequency_ - kMinFrequency) / duration; +} + +void SinusoidalLinearChirpSource::Run(size_t frames, float* destination) { + for (size_t i = 0; i < frames; ++i, ++current_index_) { + // Filter out frequencies higher than Nyquist. + if (Frequency(current_index_) > 0.5 * sample_rate_) { + destination[i] = 0; + } else { + // Calculate time in seconds. + if (current_index_ < delay_samples_) { + destination[i] = 0; + } else { + // Sinusoidal linear chirp. + double t = (current_index_ - delay_samples_) / sample_rate_; + destination[i] = + sin(2 * M_PI * (kMinFrequency * t + (k_ / 2) * t * t)); + } + } + } +} + +double SinusoidalLinearChirpSource::Frequency(size_t position) { + return kMinFrequency + (position - delay_samples_) * + (max_frequency_ - kMinFrequency) / total_samples_; +} + +} // namespace webrtc diff --git a/webrtc/common_audio/resampler/sinusoidal_linear_chirp_source.h b/webrtc/common_audio/resampler/sinusoidal_linear_chirp_source.h new file mode 100644 index 0000000..1807f86 --- /dev/null +++ b/webrtc/common_audio/resampler/sinusoidal_linear_chirp_source.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Modified from the Chromium original here: +// src/media/base/sinc_resampler_unittest.cc + +#ifndef WEBRTC_COMMON_AUDIO_RESAMPLER_SINUSOIDAL_LINEAR_CHIRP_SOURCE_H_ +#define WEBRTC_COMMON_AUDIO_RESAMPLER_SINUSOIDAL_LINEAR_CHIRP_SOURCE_H_ + +#include "webrtc/base/constructormagic.h" +#include "webrtc/common_audio/resampler/sinc_resampler.h" + +namespace webrtc { + +// Fake audio source for testing the resampler. Generates a sinusoidal linear +// chirp (http://en.wikipedia.org/wiki/Chirp) which can be tuned to stress the +// resampler for the specific sample rate conversion being used. +class SinusoidalLinearChirpSource : public SincResamplerCallback { + public: + // |delay_samples| can be used to insert a fractional sample delay into the + // source. It will produce zeros until non-negative time is reached. + SinusoidalLinearChirpSource(int sample_rate, size_t samples, + double max_frequency, double delay_samples); + + virtual ~SinusoidalLinearChirpSource() {} + + void Run(size_t frames, float* destination) override; + + double Frequency(size_t position); + + private: + enum { + kMinFrequency = 5 + }; + + int sample_rate_; + size_t total_samples_; + double max_frequency_; + double k_; + size_t current_index_; + double delay_samples_; + + RTC_DISALLOW_COPY_AND_ASSIGN(SinusoidalLinearChirpSource); +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_RESAMPLER_SINUSOIDAL_LINEAR_CHIRP_SOURCE_H_ diff --git a/webrtc/common_audio/ring_buffer.c b/webrtc/common_audio/ring_buffer.c new file mode 100644 index 0000000..60fb5df --- /dev/null +++ b/webrtc/common_audio/ring_buffer.c @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// A ring buffer to hold arbitrary data. Provides no thread safety. Unless +// otherwise specified, functions return 0 on success and -1 on error. + +#include "webrtc/common_audio/ring_buffer.h" + +#include // size_t +#include +#include + +enum Wrap { + SAME_WRAP, + DIFF_WRAP +}; + +struct RingBuffer { + size_t read_pos; + size_t write_pos; + size_t element_count; + size_t element_size; + enum Wrap rw_wrap; + char* data; +}; + +// Get address of region(s) from which we can read data. +// If the region is contiguous, |data_ptr_bytes_2| will be zero. +// If non-contiguous, |data_ptr_bytes_2| will be the size in bytes of the second +// region. Returns room available to be read or |element_count|, whichever is +// smaller. +static size_t GetBufferReadRegions(RingBuffer* buf, + size_t element_count, + void** data_ptr_1, + size_t* data_ptr_bytes_1, + void** data_ptr_2, + size_t* data_ptr_bytes_2) { + + const size_t readable_elements = WebRtc_available_read(buf); + const size_t read_elements = (readable_elements < element_count ? + readable_elements : element_count); + const size_t margin = buf->element_count - buf->read_pos; + + // Check to see if read is not contiguous. + if (read_elements > margin) { + // Write data in two blocks that wrap the buffer. + *data_ptr_1 = buf->data + buf->read_pos * buf->element_size; + *data_ptr_bytes_1 = margin * buf->element_size; + *data_ptr_2 = buf->data; + *data_ptr_bytes_2 = (read_elements - margin) * buf->element_size; + } else { + *data_ptr_1 = buf->data + buf->read_pos * buf->element_size; + *data_ptr_bytes_1 = read_elements * buf->element_size; + *data_ptr_2 = NULL; + *data_ptr_bytes_2 = 0; + } + + return read_elements; +} + +RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size) { + RingBuffer* self = NULL; + if (element_count == 0 || element_size == 0) { + return NULL; + } + + self = malloc(sizeof(RingBuffer)); + if (!self) { + return NULL; + } + + self->data = malloc(element_count * element_size); + if (!self->data) { + free(self); + self = NULL; + return NULL; + } + + self->element_count = element_count; + self->element_size = element_size; + WebRtc_InitBuffer(self); + + return self; +} + +void WebRtc_InitBuffer(RingBuffer* self) { + self->read_pos = 0; + self->write_pos = 0; + self->rw_wrap = SAME_WRAP; + + // Initialize buffer to zeros + memset(self->data, 0, self->element_count * self->element_size); +} + +void WebRtc_FreeBuffer(void* handle) { + RingBuffer* self = (RingBuffer*)handle; + if (!self) { + return; + } + + free(self->data); + free(self); +} + +size_t WebRtc_ReadBuffer(RingBuffer* self, + void** data_ptr, + void* data, + size_t element_count) { + + if (self == NULL) { + return 0; + } + if (data == NULL) { + return 0; + } + + { + void* buf_ptr_1 = NULL; + void* buf_ptr_2 = NULL; + size_t buf_ptr_bytes_1 = 0; + size_t buf_ptr_bytes_2 = 0; + const size_t read_count = GetBufferReadRegions(self, + element_count, + &buf_ptr_1, + &buf_ptr_bytes_1, + &buf_ptr_2, + &buf_ptr_bytes_2); + + if (buf_ptr_bytes_2 > 0) { + // We have a wrap around when reading the buffer. Copy the buffer data to + // |data| and point to it. + memcpy(data, buf_ptr_1, buf_ptr_bytes_1); + memcpy(((char*) data) + buf_ptr_bytes_1, buf_ptr_2, buf_ptr_bytes_2); + buf_ptr_1 = data; + } else if (!data_ptr) { + // No wrap, but a memcpy was requested. + memcpy(data, buf_ptr_1, buf_ptr_bytes_1); + } + if (data_ptr) { + // |buf_ptr_1| == |data| in the case of a wrap. + *data_ptr = buf_ptr_1; + } + + // Update read position + WebRtc_MoveReadPtr(self, (int) read_count); + + return read_count; + } +} + +size_t WebRtc_WriteBuffer(RingBuffer* self, + const void* data, + size_t element_count) { + if (!self) { + return 0; + } + if (!data) { + return 0; + } + + { + const size_t free_elements = WebRtc_available_write(self); + const size_t write_elements = (free_elements < element_count ? free_elements + : element_count); + size_t n = write_elements; + const size_t margin = self->element_count - self->write_pos; + + if (write_elements > margin) { + // Buffer wrap around when writing. + memcpy(self->data + self->write_pos * self->element_size, + data, margin * self->element_size); + self->write_pos = 0; + n -= margin; + self->rw_wrap = DIFF_WRAP; + } + memcpy(self->data + self->write_pos * self->element_size, + ((const char*) data) + ((write_elements - n) * self->element_size), + n * self->element_size); + self->write_pos += n; + + return write_elements; + } +} + +int WebRtc_MoveReadPtr(RingBuffer* self, int element_count) { + if (!self) { + return 0; + } + + { + // We need to be able to take care of negative changes, hence use "int" + // instead of "size_t". + const int free_elements = (int) WebRtc_available_write(self); + const int readable_elements = (int) WebRtc_available_read(self); + int read_pos = (int) self->read_pos; + + if (element_count > readable_elements) { + element_count = readable_elements; + } + if (element_count < -free_elements) { + element_count = -free_elements; + } + + read_pos += element_count; + if (read_pos > (int) self->element_count) { + // Buffer wrap around. Restart read position and wrap indicator. + read_pos -= (int) self->element_count; + self->rw_wrap = SAME_WRAP; + } + if (read_pos < 0) { + // Buffer wrap around. Restart read position and wrap indicator. + read_pos += (int) self->element_count; + self->rw_wrap = DIFF_WRAP; + } + + self->read_pos = (size_t) read_pos; + + return element_count; + } +} + +size_t WebRtc_available_read(const RingBuffer* self) { + if (!self) { + return 0; + } + + if (self->rw_wrap == SAME_WRAP) { + return self->write_pos - self->read_pos; + } else { + return self->element_count - self->read_pos + self->write_pos; + } +} + +size_t WebRtc_available_write(const RingBuffer* self) { + if (!self) { + return 0; + } + + return self->element_count - WebRtc_available_read(self); +} diff --git a/webrtc/common_audio/ring_buffer.h b/webrtc/common_audio/ring_buffer.h new file mode 100644 index 0000000..4125c48 --- /dev/null +++ b/webrtc/common_audio/ring_buffer.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// A ring buffer to hold arbitrary data. Provides no thread safety. Unless +// otherwise specified, functions return 0 on success and -1 on error. + +#ifndef WEBRTC_COMMON_AUDIO_RING_BUFFER_H_ +#define WEBRTC_COMMON_AUDIO_RING_BUFFER_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include // size_t + +typedef struct RingBuffer RingBuffer; + +// Creates and initializes the buffer. Returns NULL on failure. +RingBuffer* WebRtc_CreateBuffer(size_t element_count, size_t element_size); +void WebRtc_InitBuffer(RingBuffer* handle); +void WebRtc_FreeBuffer(void* handle); + +// Reads data from the buffer. The |data_ptr| will point to the address where +// it is located. If all |element_count| data are feasible to read without +// buffer wrap around |data_ptr| will point to the location in the buffer. +// Otherwise, the data will be copied to |data| (memory allocation done by the +// user) and |data_ptr| points to the address of |data|. |data_ptr| is only +// guaranteed to be valid until the next call to WebRtc_WriteBuffer(). +// +// To force a copying to |data|, pass a NULL |data_ptr|. +// +// Returns number of elements read. +size_t WebRtc_ReadBuffer(RingBuffer* handle, + void** data_ptr, + void* data, + size_t element_count); + +// Writes |data| to buffer and returns the number of elements written. +size_t WebRtc_WriteBuffer(RingBuffer* handle, const void* data, + size_t element_count); + +// Moves the buffer read position and returns the number of elements moved. +// Positive |element_count| moves the read position towards the write position, +// that is, flushing the buffer. Negative |element_count| moves the read +// position away from the the write position, that is, stuffing the buffer. +// Returns number of elements moved. +int WebRtc_MoveReadPtr(RingBuffer* handle, int element_count); + +// Returns number of available elements to read. +size_t WebRtc_available_read(const RingBuffer* handle); + +// Returns number of available elements for write. +size_t WebRtc_available_write(const RingBuffer* handle); + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_COMMON_AUDIO_RING_BUFFER_H_ diff --git a/webrtc/common_audio/sparse_fir_filter.cc b/webrtc/common_audio/sparse_fir_filter.cc new file mode 100644 index 0000000..5862b7c --- /dev/null +++ b/webrtc/common_audio/sparse_fir_filter.cc @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/sparse_fir_filter.h" + +#include "webrtc/base/checks.h" + +namespace webrtc { + +SparseFIRFilter::SparseFIRFilter(const float* nonzero_coeffs, + size_t num_nonzero_coeffs, + size_t sparsity, + size_t offset) + : sparsity_(sparsity), + offset_(offset), + nonzero_coeffs_(nonzero_coeffs, nonzero_coeffs + num_nonzero_coeffs), + state_(sparsity_ * (num_nonzero_coeffs - 1) + offset_, 0.f) { + RTC_CHECK_GE(num_nonzero_coeffs, 1u); + RTC_CHECK_GE(sparsity, 1u); +} + +void SparseFIRFilter::Filter(const float* in, size_t length, float* out) { + // Convolves the input signal |in| with the filter kernel |nonzero_coeffs_| + // taking into account the previous state. + for (size_t i = 0; i < length; ++i) { + out[i] = 0.f; + size_t j; + for (j = 0; i >= j * sparsity_ + offset_ && + j < nonzero_coeffs_.size(); ++j) { + out[i] += in[i - j * sparsity_ - offset_] * nonzero_coeffs_[j]; + } + for (; j < nonzero_coeffs_.size(); ++j) { + out[i] += state_[i + (nonzero_coeffs_.size() - j - 1) * sparsity_] * + nonzero_coeffs_[j]; + } + } + + // Update current state. + if (state_.size() > 0u) { + if (length >= state_.size()) { + std::memcpy(&state_[0], + &in[length - state_.size()], + state_.size() * sizeof(*in)); + } else { + std::memmove(&state_[0], + &state_[length], + (state_.size() - length) * sizeof(state_[0])); + std::memcpy(&state_[state_.size() - length], in, length * sizeof(*in)); + } + } +} + +} // namespace webrtc diff --git a/webrtc/common_audio/sparse_fir_filter.h b/webrtc/common_audio/sparse_fir_filter.h new file mode 100644 index 0000000..2ba5cf4 --- /dev/null +++ b/webrtc/common_audio/sparse_fir_filter.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_SPARSE_FIR_FILTER_H_ +#define WEBRTC_COMMON_AUDIO_SPARSE_FIR_FILTER_H_ + +#include +#include + +#include "webrtc/base/constructormagic.h" + +namespace webrtc { + +// A Finite Impulse Response filter implementation which takes advantage of a +// sparse structure with uniformly distributed non-zero coefficients. +class SparseFIRFilter final { + public: + // |num_nonzero_coeffs| is the number of non-zero coefficients, + // |nonzero_coeffs|. They are assumed to be uniformly distributed every + // |sparsity| samples and with an initial |offset|. The rest of the filter + // coefficients will be assumed zeros. For example, with sparsity = 3, and + // offset = 1 the filter coefficients will be: + // B = [0 coeffs[0] 0 0 coeffs[1] 0 0 coeffs[2] ... ] + // All initial state values will be zeros. + SparseFIRFilter(const float* nonzero_coeffs, + size_t num_nonzero_coeffs, + size_t sparsity, + size_t offset); + + // Filters the |in| data supplied. + // |out| must be previously allocated and it must be at least of |length|. + void Filter(const float* in, size_t length, float* out); + + private: + const size_t sparsity_; + const size_t offset_; + const std::vector nonzero_coeffs_; + std::vector state_; + + RTC_DISALLOW_COPY_AND_ASSIGN(SparseFIRFilter); +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_SPARSE_FIR_FILTER_H_ diff --git a/webrtc/common_audio/wav_file.cc b/webrtc/common_audio/wav_file.cc new file mode 100644 index 0000000..8dae7d6 --- /dev/null +++ b/webrtc/common_audio/wav_file.cc @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/common_audio/wav_file.h" + +#include +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/base/safe_conversions.h" +#include "webrtc/common_audio/include/audio_util.h" +#include "webrtc/common_audio/wav_header.h" + +namespace webrtc { + +// We write 16-bit PCM WAV files. +static const WavFormat kWavFormat = kWavFormatPcm; +static const int kBytesPerSample = 2; + +// Doesn't take ownership of the file handle and won't close it. +class ReadableWavFile : public ReadableWav { + public: + explicit ReadableWavFile(FILE* file) : file_(file) {} + virtual size_t Read(void* buf, size_t num_bytes) { + return fread(buf, 1, num_bytes, file_); + } + + private: + FILE* file_; +}; + +WavReader::WavReader(const std::string& filename) + : file_handle_(fopen(filename.c_str(), "rb")) { + RTC_CHECK(file_handle_ && "Could not open wav file for reading."); + + ReadableWavFile readable(file_handle_); + WavFormat format; + int bytes_per_sample; + RTC_CHECK(ReadWavHeader(&readable, &num_channels_, &sample_rate_, &format, + &bytes_per_sample, &num_samples_)); + num_samples_remaining_ = num_samples_; + RTC_CHECK_EQ(kWavFormat, format); + RTC_CHECK_EQ(kBytesPerSample, bytes_per_sample); +} + +WavReader::~WavReader() { + Close(); +} + +size_t WavReader::ReadSamples(size_t num_samples, int16_t* samples) { +#ifndef WEBRTC_ARCH_LITTLE_ENDIAN +#error "Need to convert samples to big-endian when reading from WAV file" +#endif + // There could be metadata after the audio; ensure we don't read it. + num_samples = std::min(rtc::checked_cast(num_samples), + num_samples_remaining_); + const size_t read = + fread(samples, sizeof(*samples), num_samples, file_handle_); + // If we didn't read what was requested, ensure we've reached the EOF. + RTC_CHECK(read == num_samples || feof(file_handle_)); + RTC_CHECK_LE(read, num_samples_remaining_); + num_samples_remaining_ -= rtc::checked_cast(read); + return read; +} + +size_t WavReader::ReadSamples(size_t num_samples, float* samples) { + static const size_t kChunksize = 4096 / sizeof(uint16_t); + size_t read = 0; + for (size_t i = 0; i < num_samples; i += kChunksize) { + int16_t isamples[kChunksize]; + size_t chunk = std::min(kChunksize, num_samples - i); + chunk = ReadSamples(chunk, isamples); + for (size_t j = 0; j < chunk; ++j) + samples[i + j] = isamples[j]; + read += chunk; + } + return read; +} + +void WavReader::Close() { + RTC_CHECK_EQ(0, fclose(file_handle_)); + file_handle_ = NULL; +} + +WavWriter::WavWriter(const std::string& filename, int sample_rate, + int num_channels) + : sample_rate_(sample_rate), + num_channels_(num_channels), + num_samples_(0), + file_handle_(fopen(filename.c_str(), "wb")) { + RTC_CHECK(file_handle_ && "Could not open wav file for writing."); + RTC_CHECK(CheckWavParameters(num_channels_, sample_rate_, kWavFormat, + kBytesPerSample, num_samples_)); + + // Write a blank placeholder header, since we need to know the total number + // of samples before we can fill in the real data. + static const uint8_t blank_header[kWavHeaderSize] = {0}; + RTC_CHECK_EQ(1u, fwrite(blank_header, kWavHeaderSize, 1, file_handle_)); +} + +WavWriter::~WavWriter() { + Close(); +} + +void WavWriter::WriteSamples(const int16_t* samples, size_t num_samples) { +#ifndef WEBRTC_ARCH_LITTLE_ENDIAN +#error "Need to convert samples to little-endian when writing to WAV file" +#endif + const size_t written = + fwrite(samples, sizeof(*samples), num_samples, file_handle_); + RTC_CHECK_EQ(num_samples, written); + num_samples_ += static_cast(written); + RTC_CHECK(written <= std::numeric_limits::max() || + num_samples_ >= written); // detect uint32_t overflow +} + +void WavWriter::WriteSamples(const float* samples, size_t num_samples) { + static const size_t kChunksize = 4096 / sizeof(uint16_t); + for (size_t i = 0; i < num_samples; i += kChunksize) { + int16_t isamples[kChunksize]; + const size_t chunk = std::min(kChunksize, num_samples - i); + FloatS16ToS16(samples + i, chunk, isamples); + WriteSamples(isamples, chunk); + } +} + +void WavWriter::Close() { + RTC_CHECK_EQ(0, fseek(file_handle_, 0, SEEK_SET)); + uint8_t header[kWavHeaderSize]; + WriteWavHeader(header, num_channels_, sample_rate_, kWavFormat, + kBytesPerSample, num_samples_); + RTC_CHECK_EQ(1u, fwrite(header, kWavHeaderSize, 1, file_handle_)); + RTC_CHECK_EQ(0, fclose(file_handle_)); + file_handle_ = NULL; +} + +} // namespace webrtc + +rtc_WavWriter* rtc_WavOpen(const char* filename, + int sample_rate, + int num_channels) { + return reinterpret_cast( + new webrtc::WavWriter(filename, sample_rate, num_channels)); +} + +void rtc_WavClose(rtc_WavWriter* wf) { + delete reinterpret_cast(wf); +} + +void rtc_WavWriteSamples(rtc_WavWriter* wf, + const float* samples, + size_t num_samples) { + reinterpret_cast(wf)->WriteSamples(samples, num_samples); +} + +int rtc_WavSampleRate(const rtc_WavWriter* wf) { + return reinterpret_cast(wf)->sample_rate(); +} + +int rtc_WavNumChannels(const rtc_WavWriter* wf) { + return reinterpret_cast(wf)->num_channels(); +} + +uint32_t rtc_WavNumSamples(const rtc_WavWriter* wf) { + return reinterpret_cast(wf)->num_samples(); +} diff --git a/webrtc/common_audio/wav_file.h b/webrtc/common_audio/wav_file.h new file mode 100644 index 0000000..2eadd3f --- /dev/null +++ b/webrtc/common_audio/wav_file.h @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_WAV_FILE_H_ +#define WEBRTC_COMMON_AUDIO_WAV_FILE_H_ + +#ifdef __cplusplus + +#include +#include +#include + +#include "webrtc/base/constructormagic.h" + +namespace webrtc { + +// Interface to provide access to WAV file parameters. +class WavFile { + public: + virtual ~WavFile() {} + + virtual int sample_rate() const = 0; + virtual int num_channels() const = 0; + virtual uint32_t num_samples() const = 0; +}; + +// Simple C++ class for writing 16-bit PCM WAV files. All error handling is +// by calls to RTC_CHECK(), making it unsuitable for anything but debug code. +class WavWriter final : public WavFile { + public: + // Open a new WAV file for writing. + WavWriter(const std::string& filename, int sample_rate, int num_channels); + + // Close the WAV file, after writing its header. + ~WavWriter(); + + // Write additional samples to the file. Each sample is in the range + // [-32768,32767], and there must be the previously specified number of + // interleaved channels. + void WriteSamples(const float* samples, size_t num_samples); + void WriteSamples(const int16_t* samples, size_t num_samples); + + int sample_rate() const override { return sample_rate_; } + int num_channels() const override { return num_channels_; } + uint32_t num_samples() const override { return num_samples_; } + + private: + void Close(); + const int sample_rate_; + const int num_channels_; + uint32_t num_samples_; // Total number of samples written to file. + FILE* file_handle_; // Output file, owned by this class + + RTC_DISALLOW_COPY_AND_ASSIGN(WavWriter); +}; + +// Follows the conventions of WavWriter. +class WavReader final : public WavFile { + public: + // Opens an existing WAV file for reading. + explicit WavReader(const std::string& filename); + + // Close the WAV file. + ~WavReader(); + + // Returns the number of samples read. If this is less than requested, + // verifies that the end of the file was reached. + size_t ReadSamples(size_t num_samples, float* samples); + size_t ReadSamples(size_t num_samples, int16_t* samples); + + int sample_rate() const override { return sample_rate_; } + int num_channels() const override { return num_channels_; } + uint32_t num_samples() const override { return num_samples_; } + + private: + void Close(); + int sample_rate_; + int num_channels_; + uint32_t num_samples_; // Total number of samples in the file. + uint32_t num_samples_remaining_; + FILE* file_handle_; // Input file, owned by this class. + + RTC_DISALLOW_COPY_AND_ASSIGN(WavReader); +}; + +} // namespace webrtc + +extern "C" { +#endif // __cplusplus + +// C wrappers for the WavWriter class. +typedef struct rtc_WavWriter rtc_WavWriter; +rtc_WavWriter* rtc_WavOpen(const char* filename, + int sample_rate, + int num_channels); +void rtc_WavClose(rtc_WavWriter* wf); +void rtc_WavWriteSamples(rtc_WavWriter* wf, + const float* samples, + size_t num_samples); +int rtc_WavSampleRate(const rtc_WavWriter* wf); +int rtc_WavNumChannels(const rtc_WavWriter* wf); +uint32_t rtc_WavNumSamples(const rtc_WavWriter* wf); + +#ifdef __cplusplus +} // extern "C" +#endif + +#endif // WEBRTC_COMMON_AUDIO_WAV_FILE_H_ diff --git a/webrtc/common_audio/wav_header.cc b/webrtc/common_audio/wav_header.cc new file mode 100644 index 0000000..61cfffe --- /dev/null +++ b/webrtc/common_audio/wav_header.cc @@ -0,0 +1,242 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Based on the WAV file format documentation at +// https://ccrma.stanford.edu/courses/422/projects/WaveFormat/ and +// http://www-mmsp.ece.mcgill.ca/Documents/AudioFormats/WAVE/WAVE.html + +#include "webrtc/common_audio/wav_header.h" + +#include +#include +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/common_audio/include/audio_util.h" + +namespace webrtc { +namespace { + +struct ChunkHeader { + uint32_t ID; + uint32_t Size; +}; +static_assert(sizeof(ChunkHeader) == 8, "ChunkHeader size"); + +// We can't nest this definition in WavHeader, because VS2013 gives an error +// on sizeof(WavHeader::fmt): "error C2070: 'unknown': illegal sizeof operand". +struct FmtSubchunk { + ChunkHeader header; + uint16_t AudioFormat; + uint16_t NumChannels; + uint32_t SampleRate; + uint32_t ByteRate; + uint16_t BlockAlign; + uint16_t BitsPerSample; +}; +static_assert(sizeof(FmtSubchunk) == 24, "FmtSubchunk size"); +const uint32_t kFmtSubchunkSize = sizeof(FmtSubchunk) - sizeof(ChunkHeader); + +struct WavHeader { + struct { + ChunkHeader header; + uint32_t Format; + } riff; + FmtSubchunk fmt; + struct { + ChunkHeader header; + } data; +}; +static_assert(sizeof(WavHeader) == kWavHeaderSize, "no padding in header"); + +} // namespace + +bool CheckWavParameters(int num_channels, + int sample_rate, + WavFormat format, + int bytes_per_sample, + uint32_t num_samples) { + // num_channels, sample_rate, and bytes_per_sample must be positive, must fit + // in their respective fields, and their product must fit in the 32-bit + // ByteRate field. + if (num_channels <= 0 || sample_rate <= 0 || bytes_per_sample <= 0) + return false; + if (static_cast(sample_rate) > std::numeric_limits::max()) + return false; + if (static_cast(num_channels) > + std::numeric_limits::max()) + return false; + if (static_cast(bytes_per_sample) * 8 > + std::numeric_limits::max()) + return false; + if (static_cast(sample_rate) * num_channels * bytes_per_sample > + std::numeric_limits::max()) + return false; + + // format and bytes_per_sample must agree. + switch (format) { + case kWavFormatPcm: + // Other values may be OK, but for now we're conservative: + if (bytes_per_sample != 1 && bytes_per_sample != 2) + return false; + break; + case kWavFormatALaw: + case kWavFormatMuLaw: + if (bytes_per_sample != 1) + return false; + break; + default: + return false; + } + + // The number of bytes in the file, not counting the first ChunkHeader, must + // be less than 2^32; otherwise, the ChunkSize field overflows. + const uint32_t max_samples = + (std::numeric_limits::max() + - (kWavHeaderSize - sizeof(ChunkHeader))) / + bytes_per_sample; + if (num_samples > max_samples) + return false; + + // Each channel must have the same number of samples. + if (num_samples % num_channels != 0) + return false; + + return true; +} + +#ifdef WEBRTC_ARCH_LITTLE_ENDIAN +static inline void WriteLE16(uint16_t* f, uint16_t x) { *f = x; } +static inline void WriteLE32(uint32_t* f, uint32_t x) { *f = x; } +static inline void WriteFourCC(uint32_t* f, char a, char b, char c, char d) { + *f = static_cast(a) + | static_cast(b) << 8 + | static_cast(c) << 16 + | static_cast(d) << 24; +} + +static inline uint16_t ReadLE16(uint16_t x) { return x; } +static inline uint32_t ReadLE32(uint32_t x) { return x; } +static inline std::string ReadFourCC(uint32_t x) { + return std::string(reinterpret_cast(&x), 4); +} +#else +#error "Write be-to-le conversion functions" +#endif + +static inline uint32_t RiffChunkSize(uint32_t bytes_in_payload) { + return bytes_in_payload + kWavHeaderSize - sizeof(ChunkHeader); +} + +static inline uint32_t ByteRate(int num_channels, int sample_rate, + int bytes_per_sample) { + return static_cast(num_channels) * sample_rate * bytes_per_sample; +} + +static inline uint16_t BlockAlign(int num_channels, int bytes_per_sample) { + return num_channels * bytes_per_sample; +} + +void WriteWavHeader(uint8_t* buf, + int num_channels, + int sample_rate, + WavFormat format, + int bytes_per_sample, + uint32_t num_samples) { + RTC_CHECK(CheckWavParameters(num_channels, sample_rate, format, + bytes_per_sample, num_samples)); + + WavHeader header; + const uint32_t bytes_in_payload = bytes_per_sample * num_samples; + + WriteFourCC(&header.riff.header.ID, 'R', 'I', 'F', 'F'); + WriteLE32(&header.riff.header.Size, RiffChunkSize(bytes_in_payload)); + WriteFourCC(&header.riff.Format, 'W', 'A', 'V', 'E'); + + WriteFourCC(&header.fmt.header.ID, 'f', 'm', 't', ' '); + WriteLE32(&header.fmt.header.Size, kFmtSubchunkSize); + WriteLE16(&header.fmt.AudioFormat, format); + WriteLE16(&header.fmt.NumChannels, num_channels); + WriteLE32(&header.fmt.SampleRate, sample_rate); + WriteLE32(&header.fmt.ByteRate, ByteRate(num_channels, sample_rate, + bytes_per_sample)); + WriteLE16(&header.fmt.BlockAlign, BlockAlign(num_channels, bytes_per_sample)); + WriteLE16(&header.fmt.BitsPerSample, 8 * bytes_per_sample); + + WriteFourCC(&header.data.header.ID, 'd', 'a', 't', 'a'); + WriteLE32(&header.data.header.Size, bytes_in_payload); + + // Do an extra copy rather than writing everything to buf directly, since buf + // might not be correctly aligned. + memcpy(buf, &header, kWavHeaderSize); +} + +bool ReadWavHeader(ReadableWav* readable, + int* num_channels, + int* sample_rate, + WavFormat* format, + int* bytes_per_sample, + uint32_t* num_samples) { + WavHeader header; + if (readable->Read(&header, kWavHeaderSize - sizeof(header.data)) != + kWavHeaderSize - sizeof(header.data)) + return false; + + const uint32_t fmt_size = ReadLE32(header.fmt.header.Size); + if (fmt_size != kFmtSubchunkSize) { + // There is an optional two-byte extension field permitted to be present + // with PCM, but which must be zero. + int16_t ext_size; + if (kFmtSubchunkSize + sizeof(ext_size) != fmt_size) + return false; + if (readable->Read(&ext_size, sizeof(ext_size)) != sizeof(ext_size)) + return false; + if (ext_size != 0) + return false; + } + if (readable->Read(&header.data, sizeof(header.data)) != sizeof(header.data)) + return false; + + // Parse needed fields. + *format = static_cast(ReadLE16(header.fmt.AudioFormat)); + *num_channels = ReadLE16(header.fmt.NumChannels); + *sample_rate = ReadLE32(header.fmt.SampleRate); + *bytes_per_sample = ReadLE16(header.fmt.BitsPerSample) / 8; + const uint32_t bytes_in_payload = ReadLE32(header.data.header.Size); + if (*bytes_per_sample <= 0) + return false; + *num_samples = bytes_in_payload / *bytes_per_sample; + + // Sanity check remaining fields. + if (ReadFourCC(header.riff.header.ID) != "RIFF") + return false; + if (ReadFourCC(header.riff.Format) != "WAVE") + return false; + if (ReadFourCC(header.fmt.header.ID) != "fmt ") + return false; + if (ReadFourCC(header.data.header.ID) != "data") + return false; + + if (ReadLE32(header.riff.header.Size) < RiffChunkSize(bytes_in_payload)) + return false; + if (ReadLE32(header.fmt.ByteRate) != + ByteRate(*num_channels, *sample_rate, *bytes_per_sample)) + return false; + if (ReadLE16(header.fmt.BlockAlign) != + BlockAlign(*num_channels, *bytes_per_sample)) + return false; + + return CheckWavParameters(*num_channels, *sample_rate, *format, + *bytes_per_sample, *num_samples); +} + + +} // namespace webrtc diff --git a/webrtc/common_audio/wav_header.h b/webrtc/common_audio/wav_header.h new file mode 100644 index 0000000..1a0fd7c --- /dev/null +++ b/webrtc/common_audio/wav_header.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_WAV_HEADER_H_ +#define WEBRTC_COMMON_AUDIO_WAV_HEADER_H_ + +#include +#include + +namespace webrtc { + +static const size_t kWavHeaderSize = 44; + +class ReadableWav { + public: + // Returns the number of bytes read. + size_t virtual Read(void* buf, size_t num_bytes) = 0; + virtual ~ReadableWav() {} +}; + +enum WavFormat { + kWavFormatPcm = 1, // PCM, each sample of size bytes_per_sample + kWavFormatALaw = 6, // 8-bit ITU-T G.711 A-law + kWavFormatMuLaw = 7, // 8-bit ITU-T G.711 mu-law +}; + +// Return true if the given parameters will make a well-formed WAV header. +bool CheckWavParameters(int num_channels, + int sample_rate, + WavFormat format, + int bytes_per_sample, + uint32_t num_samples); + +// Write a kWavHeaderSize bytes long WAV header to buf. The payload that +// follows the header is supposed to have the specified number of interleaved +// channels and contain the specified total number of samples of the specified +// type. CHECKs the input parameters for validity. +void WriteWavHeader(uint8_t* buf, + int num_channels, + int sample_rate, + WavFormat format, + int bytes_per_sample, + uint32_t num_samples); + +// Read a WAV header from an implemented ReadableWav and parse the values into +// the provided output parameters. ReadableWav is used because the header can +// be variably sized. Returns false if the header is invalid. +bool ReadWavHeader(ReadableWav* readable, + int* num_channels, + int* sample_rate, + WavFormat* format, + int* bytes_per_sample, + uint32_t* num_samples); + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_WAV_HEADER_H_ diff --git a/webrtc/common_audio/window_generator.cc b/webrtc/common_audio/window_generator.cc new file mode 100644 index 0000000..ab983b7 --- /dev/null +++ b/webrtc/common_audio/window_generator.cc @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#define _USE_MATH_DEFINES + +#include "webrtc/common_audio/window_generator.h" + +#include +#include + +#include "webrtc/base/checks.h" + +using std::complex; + +namespace { + +// Modified Bessel function of order 0 for complex inputs. +complex I0(complex x) { + complex y = x / 3.75f; + y *= y; + return 1.0f + y * ( + 3.5156229f + y * ( + 3.0899424f + y * ( + 1.2067492f + y * ( + 0.2659732f + y * ( + 0.360768e-1f + y * 0.45813e-2f))))); +} + +} // namespace + +namespace webrtc { + +void WindowGenerator::Hanning(int length, float* window) { + RTC_CHECK_GT(length, 1); + RTC_CHECK(window != nullptr); + for (int i = 0; i < length; ++i) { + window[i] = 0.5f * (1 - cosf(2 * static_cast(M_PI) * i / + (length - 1))); + } +} + +void WindowGenerator::KaiserBesselDerived(float alpha, size_t length, + float* window) { + RTC_CHECK_GT(length, 1U); + RTC_CHECK(window != nullptr); + + const size_t half = (length + 1) / 2; + float sum = 0.0f; + + for (size_t i = 0; i <= half; ++i) { + complex r = (4.0f * i) / length - 1.0f; + sum += I0(static_cast(M_PI) * alpha * sqrt(1.0f - r * r)).real(); + window[i] = sum; + } + for (size_t i = length - 1; i >= half; --i) { + window[length - i - 1] = sqrtf(window[length - i - 1] / sum); + window[i] = window[length - i - 1]; + } + if (length % 2 == 1) { + window[half - 1] = sqrtf(window[half - 1] / sum); + } +} + +} // namespace webrtc + diff --git a/webrtc/common_audio/window_generator.h b/webrtc/common_audio/window_generator.h new file mode 100644 index 0000000..25dd233 --- /dev/null +++ b/webrtc/common_audio/window_generator.h @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_COMMON_AUDIO_WINDOW_GENERATOR_H_ +#define WEBRTC_COMMON_AUDIO_WINDOW_GENERATOR_H_ + +#include + +#include "webrtc/base/constructormagic.h" + +namespace webrtc { + +// Helper class with generators for various signal transform windows. +class WindowGenerator { + public: + static void Hanning(int length, float* window); + static void KaiserBesselDerived(float alpha, size_t length, float* window); + + private: + RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(WindowGenerator); +}; + +} // namespace webrtc + +#endif // WEBRTC_COMMON_AUDIO_WINDOW_GENERATOR_H_ + diff --git a/webrtc/modules/Makefile.am b/webrtc/modules/Makefile.am index 4b0bbc4..a2b4429 100644 --- a/webrtc/modules/Makefile.am +++ b/webrtc/modules/Makefile.am @@ -1 +1 @@ -SUBDIRS = audio_processing +SUBDIRS = audio_coding audio_processing diff --git a/webrtc/modules/audio_coding/BUILD.gn b/webrtc/modules/audio_coding/BUILD.gn new file mode 100644 index 0000000..7bbcd3a --- /dev/null +++ b/webrtc/modules/audio_coding/BUILD.gn @@ -0,0 +1,835 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("//build/config/arm.gni") +import("../../build/webrtc.gni") + +config("audio_coding_config") { + include_dirs = [ + "main/interface", + "../interface", + ] +} + +source_set("audio_coding") { + sources = [ + "main/acm2/acm_codec_database.cc", + "main/acm2/acm_codec_database.h", + "main/acm2/acm_common_defs.h", + "main/acm2/acm_receiver.cc", + "main/acm2/acm_receiver.h", + "main/acm2/acm_resampler.cc", + "main/acm2/acm_resampler.h", + "main/acm2/audio_coding_module.cc", + "main/acm2/audio_coding_module_impl.cc", + "main/acm2/audio_coding_module_impl.h", + "main/acm2/call_statistics.cc", + "main/acm2/call_statistics.h", + "main/acm2/codec_manager.cc", + "main/acm2/codec_manager.h", + "main/acm2/codec_owner.cc", + "main/acm2/codec_owner.h", + "main/acm2/initial_delay_manager.cc", + "main/acm2/initial_delay_manager.h", + "main/acm2/nack.cc", + "main/acm2/nack.h", + "main/interface/audio_coding_module.h", + "main/interface/audio_coding_module_typedefs.h", + ] + + defines = [] + + configs += [ "../..:common_config" ] + + public_configs = [ + "../..:common_inherited_config", + ":audio_coding_config", + ] + + if (is_win) { + cflags = [ + # TODO(kjellander): Bug 261: fix this warning. + "/wd4373", # virtual function override. + ] + } + + if (is_clang) { + # Suppress warnings from Chrome's Clang plugins. + # See http://code.google.com/p/webrtc/issues/detail?id=163 for details. + configs -= [ "//build/config/clang:find_bad_constructs" ] + } + + deps = [ + ":cng", + ":g711", + ":neteq", + ":pcm16b", + "../..:rtc_event_log", + "../..:webrtc_common", + "../../common_audio", + "../../system_wrappers", + ] + + if (rtc_include_opus) { + defines += [ "WEBRTC_CODEC_OPUS" ] + deps += [ ":webrtc_opus" ] + } + if (!build_with_mozilla) { + if (current_cpu == "arm") { + defines += [ "WEBRTC_CODEC_ISACFX" ] + deps += [ ":isac_fix" ] + } else { + defines += [ "WEBRTC_CODEC_ISAC" ] + deps += [ ":isac" ] + } + defines += [ "WEBRTC_CODEC_G722" ] + deps += [ ":g722" ] + } + if (!build_with_mozilla && !build_with_chromium) { + defines += [ + "WEBRTC_CODEC_ILBC", + "WEBRTC_CODEC_RED", + ] + deps += [ + ":ilbc", + ":red", + ] + } +} + +source_set("audio_decoder_interface") { + sources = [ + "codecs/audio_decoder.cc", + "codecs/audio_decoder.h", + ] + configs += [ "../..:common_config" ] + public_configs = [ "../..:common_inherited_config" ] + deps = [ + "../..:webrtc_common", + ] +} + +source_set("audio_encoder_interface") { + sources = [ + "codecs/audio_encoder.cc", + "codecs/audio_encoder.h", + ] + configs += [ "../..:common_config" ] + public_configs = [ "../..:common_inherited_config" ] + deps = [ + "../..:webrtc_common", + ] +} + +config("cng_config") { + include_dirs = [ + "../../..", + "codecs/cng/include", + ] +} + +source_set("cng") { + sources = [ + "codecs/cng/audio_encoder_cng.cc", + "codecs/cng/cng_helpfuns.c", + "codecs/cng/cng_helpfuns.h", + "codecs/cng/include/audio_encoder_cng.h", + "codecs/cng/include/webrtc_cng.h", + "codecs/cng/webrtc_cng.c", + ] + + configs += [ "../..:common_config" ] + + public_configs = [ + "../..:common_inherited_config", + ":cng_config", + ] + + deps = [ + "../../common_audio", + ":audio_encoder_interface", + ] +} + +config("red_config") { + include_dirs = [ "codecs/red" ] +} + +source_set("red") { + sources = [ + "codecs/red/audio_encoder_copy_red.cc", + "codecs/red/audio_encoder_copy_red.h", + ] + + configs += [ "../..:common_config" ] + + public_configs = [ + "../..:common_inherited_config", + ":red_config", + ] + + deps = [ + "../../common_audio", + ":audio_encoder_interface", + ] +} + +config("g711_config") { + include_dirs = [ + "../../..", + "codecs/g711/include", + ] +} + +source_set("g711") { + sources = [ + "codecs/g711/audio_decoder_pcm.cc", + "codecs/g711/audio_encoder_pcm.cc", + "codecs/g711/g711.c", + "codecs/g711/g711.h", + "codecs/g711/g711_interface.c", + "codecs/g711/include/audio_decoder_pcm.h", + "codecs/g711/include/audio_encoder_pcm.h", + "codecs/g711/include/g711_interface.h", + ] + + configs += [ "../..:common_config" ] + + public_configs = [ + "../..:common_inherited_config", + ":g711_config", + ] + + deps = [ + ":audio_encoder_interface", + ] +} + +config("g722_config") { + include_dirs = [ + "../../..", + "codecs/g722/include", + ] +} + +source_set("g722") { + sources = [ + "codecs/g722/audio_decoder_g722.cc", + "codecs/g722/audio_encoder_g722.cc", + "codecs/g722/g722_decode.c", + "codecs/g722/g722_enc_dec.h", + "codecs/g722/g722_encode.c", + "codecs/g722/g722_interface.c", + "codecs/g722/include/audio_decoder_g722.h", + "codecs/g722/include/audio_encoder_g722.h", + "codecs/g722/include/g722_interface.h", + ] + + configs += [ "../..:common_config" ] + + public_configs = [ + "../..:common_inherited_config", + ":g722_config", + ] + + deps = [ + ":audio_encoder_interface", + ] +} + +config("ilbc_config") { + include_dirs = [ + "../../..", + "codecs/ilbc/interface", + ] +} + +source_set("ilbc") { + sources = [ + "codecs/ilbc/abs_quant.c", + "codecs/ilbc/abs_quant.h", + "codecs/ilbc/abs_quant_loop.c", + "codecs/ilbc/abs_quant_loop.h", + "codecs/ilbc/audio_decoder_ilbc.cc", + "codecs/ilbc/audio_encoder_ilbc.cc", + "codecs/ilbc/augmented_cb_corr.c", + "codecs/ilbc/augmented_cb_corr.h", + "codecs/ilbc/bw_expand.c", + "codecs/ilbc/bw_expand.h", + "codecs/ilbc/cb_construct.c", + "codecs/ilbc/cb_construct.h", + "codecs/ilbc/cb_mem_energy.c", + "codecs/ilbc/cb_mem_energy.h", + "codecs/ilbc/cb_mem_energy_augmentation.c", + "codecs/ilbc/cb_mem_energy_augmentation.h", + "codecs/ilbc/cb_mem_energy_calc.c", + "codecs/ilbc/cb_mem_energy_calc.h", + "codecs/ilbc/cb_search.c", + "codecs/ilbc/cb_search.h", + "codecs/ilbc/cb_search_core.c", + "codecs/ilbc/cb_search_core.h", + "codecs/ilbc/cb_update_best_index.c", + "codecs/ilbc/cb_update_best_index.h", + "codecs/ilbc/chebyshev.c", + "codecs/ilbc/chebyshev.h", + "codecs/ilbc/comp_corr.c", + "codecs/ilbc/comp_corr.h", + "codecs/ilbc/constants.c", + "codecs/ilbc/constants.h", + "codecs/ilbc/create_augmented_vec.c", + "codecs/ilbc/create_augmented_vec.h", + "codecs/ilbc/decode.c", + "codecs/ilbc/decode.h", + "codecs/ilbc/decode_residual.c", + "codecs/ilbc/decode_residual.h", + "codecs/ilbc/decoder_interpolate_lsf.c", + "codecs/ilbc/decoder_interpolate_lsf.h", + "codecs/ilbc/defines.h", + "codecs/ilbc/do_plc.c", + "codecs/ilbc/do_plc.h", + "codecs/ilbc/encode.c", + "codecs/ilbc/encode.h", + "codecs/ilbc/energy_inverse.c", + "codecs/ilbc/energy_inverse.h", + "codecs/ilbc/enh_upsample.c", + "codecs/ilbc/enh_upsample.h", + "codecs/ilbc/enhancer.c", + "codecs/ilbc/enhancer.h", + "codecs/ilbc/enhancer_interface.c", + "codecs/ilbc/enhancer_interface.h", + "codecs/ilbc/filtered_cb_vecs.c", + "codecs/ilbc/filtered_cb_vecs.h", + "codecs/ilbc/frame_classify.c", + "codecs/ilbc/frame_classify.h", + "codecs/ilbc/gain_dequant.c", + "codecs/ilbc/gain_dequant.h", + "codecs/ilbc/gain_quant.c", + "codecs/ilbc/gain_quant.h", + "codecs/ilbc/get_cd_vec.c", + "codecs/ilbc/get_cd_vec.h", + "codecs/ilbc/get_lsp_poly.c", + "codecs/ilbc/get_lsp_poly.h", + "codecs/ilbc/get_sync_seq.c", + "codecs/ilbc/get_sync_seq.h", + "codecs/ilbc/hp_input.c", + "codecs/ilbc/hp_input.h", + "codecs/ilbc/hp_output.c", + "codecs/ilbc/hp_output.h", + "codecs/ilbc/ilbc.c", + "codecs/ilbc/include/audio_decoder_ilbc.h", + "codecs/ilbc/include/audio_encoder_ilbc.h", + "codecs/ilbc/index_conv_dec.c", + "codecs/ilbc/index_conv_dec.h", + "codecs/ilbc/index_conv_enc.c", + "codecs/ilbc/index_conv_enc.h", + "codecs/ilbc/init_decode.c", + "codecs/ilbc/init_decode.h", + "codecs/ilbc/init_encode.c", + "codecs/ilbc/init_encode.h", + "codecs/ilbc/interface/ilbc.h", + "codecs/ilbc/interpolate.c", + "codecs/ilbc/interpolate.h", + "codecs/ilbc/interpolate_samples.c", + "codecs/ilbc/interpolate_samples.h", + "codecs/ilbc/lpc_encode.c", + "codecs/ilbc/lpc_encode.h", + "codecs/ilbc/lsf_check.c", + "codecs/ilbc/lsf_check.h", + "codecs/ilbc/lsf_interpolate_to_poly_dec.c", + "codecs/ilbc/lsf_interpolate_to_poly_dec.h", + "codecs/ilbc/lsf_interpolate_to_poly_enc.c", + "codecs/ilbc/lsf_interpolate_to_poly_enc.h", + "codecs/ilbc/lsf_to_lsp.c", + "codecs/ilbc/lsf_to_lsp.h", + "codecs/ilbc/lsf_to_poly.c", + "codecs/ilbc/lsf_to_poly.h", + "codecs/ilbc/lsp_to_lsf.c", + "codecs/ilbc/lsp_to_lsf.h", + "codecs/ilbc/my_corr.c", + "codecs/ilbc/my_corr.h", + "codecs/ilbc/nearest_neighbor.c", + "codecs/ilbc/nearest_neighbor.h", + "codecs/ilbc/pack_bits.c", + "codecs/ilbc/pack_bits.h", + "codecs/ilbc/poly_to_lsf.c", + "codecs/ilbc/poly_to_lsf.h", + "codecs/ilbc/poly_to_lsp.c", + "codecs/ilbc/poly_to_lsp.h", + "codecs/ilbc/refiner.c", + "codecs/ilbc/refiner.h", + "codecs/ilbc/simple_interpolate_lsf.c", + "codecs/ilbc/simple_interpolate_lsf.h", + "codecs/ilbc/simple_lpc_analysis.c", + "codecs/ilbc/simple_lpc_analysis.h", + "codecs/ilbc/simple_lsf_dequant.c", + "codecs/ilbc/simple_lsf_dequant.h", + "codecs/ilbc/simple_lsf_quant.c", + "codecs/ilbc/simple_lsf_quant.h", + "codecs/ilbc/smooth.c", + "codecs/ilbc/smooth.h", + "codecs/ilbc/smooth_out_data.c", + "codecs/ilbc/smooth_out_data.h", + "codecs/ilbc/sort_sq.c", + "codecs/ilbc/sort_sq.h", + "codecs/ilbc/split_vq.c", + "codecs/ilbc/split_vq.h", + "codecs/ilbc/state_construct.c", + "codecs/ilbc/state_construct.h", + "codecs/ilbc/state_search.c", + "codecs/ilbc/state_search.h", + "codecs/ilbc/swap_bytes.c", + "codecs/ilbc/swap_bytes.h", + "codecs/ilbc/unpack_bits.c", + "codecs/ilbc/unpack_bits.h", + "codecs/ilbc/vq3.c", + "codecs/ilbc/vq3.h", + "codecs/ilbc/vq4.c", + "codecs/ilbc/vq4.h", + "codecs/ilbc/window32_w32.c", + "codecs/ilbc/window32_w32.h", + "codecs/ilbc/xcorr_coef.c", + "codecs/ilbc/xcorr_coef.h", + ] + + configs += [ "../..:common_config" ] + + public_configs = [ + "../..:common_inherited_config", + ":ilbc_config", + ] + + deps = [ + "../../common_audio", + ":audio_encoder_interface", + ] +} + +source_set("isac_common") { + sources = [ + "codecs/isac/audio_encoder_isac_t.h", + "codecs/isac/audio_encoder_isac_t_impl.h", + "codecs/isac/locked_bandwidth_info.cc", + "codecs/isac/locked_bandwidth_info.h", + ] + public_configs = [ "../..:common_inherited_config" ] +} + +config("isac_config") { + include_dirs = [ + "../../..", + "codecs/isac/main/interface", + ] +} + +source_set("isac") { + sources = [ + "codecs/isac/main/interface/audio_decoder_isac.h", + "codecs/isac/main/interface/audio_encoder_isac.h", + "codecs/isac/main/interface/isac.h", + "codecs/isac/main/source/arith_routines.c", + "codecs/isac/main/source/arith_routines.h", + "codecs/isac/main/source/arith_routines_hist.c", + "codecs/isac/main/source/arith_routines_logist.c", + "codecs/isac/main/source/audio_decoder_isac.cc", + "codecs/isac/main/source/audio_encoder_isac.cc", + "codecs/isac/main/source/bandwidth_estimator.c", + "codecs/isac/main/source/bandwidth_estimator.h", + "codecs/isac/main/source/codec.h", + "codecs/isac/main/source/crc.c", + "codecs/isac/main/source/crc.h", + "codecs/isac/main/source/decode.c", + "codecs/isac/main/source/decode_bwe.c", + "codecs/isac/main/source/encode.c", + "codecs/isac/main/source/encode_lpc_swb.c", + "codecs/isac/main/source/encode_lpc_swb.h", + "codecs/isac/main/source/entropy_coding.c", + "codecs/isac/main/source/entropy_coding.h", + "codecs/isac/main/source/fft.c", + "codecs/isac/main/source/fft.h", + "codecs/isac/main/source/filter_functions.c", + "codecs/isac/main/source/filterbank_tables.c", + "codecs/isac/main/source/filterbank_tables.h", + "codecs/isac/main/source/filterbanks.c", + "codecs/isac/main/source/intialize.c", + "codecs/isac/main/source/isac.c", + "codecs/isac/main/source/isac_float_type.h", + "codecs/isac/main/source/lattice.c", + "codecs/isac/main/source/lpc_analysis.c", + "codecs/isac/main/source/lpc_analysis.h", + "codecs/isac/main/source/lpc_gain_swb_tables.c", + "codecs/isac/main/source/lpc_gain_swb_tables.h", + "codecs/isac/main/source/lpc_shape_swb12_tables.c", + "codecs/isac/main/source/lpc_shape_swb12_tables.h", + "codecs/isac/main/source/lpc_shape_swb16_tables.c", + "codecs/isac/main/source/lpc_shape_swb16_tables.h", + "codecs/isac/main/source/lpc_tables.c", + "codecs/isac/main/source/lpc_tables.h", + "codecs/isac/main/source/os_specific_inline.h", + "codecs/isac/main/source/pitch_estimator.c", + "codecs/isac/main/source/pitch_estimator.h", + "codecs/isac/main/source/pitch_filter.c", + "codecs/isac/main/source/pitch_gain_tables.c", + "codecs/isac/main/source/pitch_gain_tables.h", + "codecs/isac/main/source/pitch_lag_tables.c", + "codecs/isac/main/source/pitch_lag_tables.h", + "codecs/isac/main/source/settings.h", + "codecs/isac/main/source/spectrum_ar_model_tables.c", + "codecs/isac/main/source/spectrum_ar_model_tables.h", + "codecs/isac/main/source/structs.h", + "codecs/isac/main/source/transform.c", + ] + + if (is_linux) { + libs = [ "m" ] + } + + configs += [ "../..:common_config" ] + + public_configs = [ + "../..:common_inherited_config", + ":isac_config", + ] + + deps = [ + ":audio_decoder_interface", + ":audio_encoder_interface", + ":isac_common", + "../../common_audio", + ] +} + +config("isac_fix_config") { + include_dirs = [ + "../../..", + "codecs/isac/fix/interface", + ] +} + +source_set("isac_fix") { + sources = [ + "codecs/isac/fix/interface/audio_decoder_isacfix.h", + "codecs/isac/fix/interface/audio_encoder_isacfix.h", + "codecs/isac/fix/interface/isacfix.h", + "codecs/isac/fix/source/arith_routines.c", + "codecs/isac/fix/source/arith_routines_hist.c", + "codecs/isac/fix/source/arith_routines_logist.c", + "codecs/isac/fix/source/arith_routins.h", + "codecs/isac/fix/source/audio_decoder_isacfix.cc", + "codecs/isac/fix/source/audio_encoder_isacfix.cc", + "codecs/isac/fix/source/bandwidth_estimator.c", + "codecs/isac/fix/source/bandwidth_estimator.h", + "codecs/isac/fix/source/codec.h", + "codecs/isac/fix/source/decode.c", + "codecs/isac/fix/source/decode_bwe.c", + "codecs/isac/fix/source/decode_plc.c", + "codecs/isac/fix/source/encode.c", + "codecs/isac/fix/source/entropy_coding.c", + "codecs/isac/fix/source/entropy_coding.h", + "codecs/isac/fix/source/fft.c", + "codecs/isac/fix/source/fft.h", + "codecs/isac/fix/source/filterbank_tables.c", + "codecs/isac/fix/source/filterbank_tables.h", + "codecs/isac/fix/source/filterbanks.c", + "codecs/isac/fix/source/filters.c", + "codecs/isac/fix/source/initialize.c", + "codecs/isac/fix/source/isac_fix_type.h", + "codecs/isac/fix/source/isacfix.c", + "codecs/isac/fix/source/lattice.c", + "codecs/isac/fix/source/lattice_c.c", + "codecs/isac/fix/source/lpc_masking_model.c", + "codecs/isac/fix/source/lpc_masking_model.h", + "codecs/isac/fix/source/lpc_tables.c", + "codecs/isac/fix/source/lpc_tables.h", + "codecs/isac/fix/source/pitch_estimator.c", + "codecs/isac/fix/source/pitch_estimator.h", + "codecs/isac/fix/source/pitch_estimator_c.c", + "codecs/isac/fix/source/pitch_filter.c", + "codecs/isac/fix/source/pitch_filter_c.c", + "codecs/isac/fix/source/pitch_gain_tables.c", + "codecs/isac/fix/source/pitch_gain_tables.h", + "codecs/isac/fix/source/pitch_lag_tables.c", + "codecs/isac/fix/source/pitch_lag_tables.h", + "codecs/isac/fix/source/settings.h", + "codecs/isac/fix/source/spectrum_ar_model_tables.c", + "codecs/isac/fix/source/spectrum_ar_model_tables.h", + "codecs/isac/fix/source/structs.h", + "codecs/isac/fix/source/transform.c", + "codecs/isac/fix/source/transform_tables.c", + ] + + if (!is_win) { + defines = [ "WEBRTC_LINUX" ] + } + + configs += [ "../..:common_config" ] + + public_configs = [ + "../..:common_inherited_config", + ":isac_fix_config", + ] + + deps = [ + ":audio_encoder_interface", + ":isac_common", + "../../common_audio", + "../../system_wrappers", + ] + + if (rtc_build_with_neon) { + deps += [ ":isac_neon" ] + } + + if (current_cpu == "arm" && arm_version >= 7) { + sources += [ + "codecs/isac/fix/source/lattice_armv7.S", + "codecs/isac/fix/source/pitch_filter_armv6.S", + ] + sources -= [ + "codecs/isac/fix/source/lattice_c.c", + "codecs/isac/fix/source/pitch_filter_c.c", + ] + } + + if (current_cpu == "mipsel") { + sources += [ + "codecs/isac/fix/source/entropy_coding_mips.c", + "codecs/isac/fix/source/filters_mips.c", + "codecs/isac/fix/source/lattice_mips.c", + "codecs/isac/fix/source/pitch_estimator_mips.c", + "codecs/isac/fix/source/transform_mips.c", + ] + sources -= [ + "codecs/isac/fix/source/lattice_c.c", + "codecs/isac/fix/source/pitch_estimator_c.c", + ] + if (mips_dsp_rev > 0) { + sources += [ "codecs/isac/fix/source/filterbanks_mips.c" ] + } + if (mips_dsp_rev > 1) { + sources += [ + "codecs/isac/fix/source/lpc_masking_model_mips.c", + "codecs/isac/fix/source/pitch_filter_mips.c", + ] + sources -= [ "codecs/isac/fix/source/pitch_filter_c.c" ] + } + } +} + +if (rtc_build_with_neon) { + source_set("isac_neon") { + sources = [ + "codecs/isac/fix/source/entropy_coding_neon.c", + "codecs/isac/fix/source/filterbanks_neon.c", + "codecs/isac/fix/source/filters_neon.c", + "codecs/isac/fix/source/lattice_neon.c", + "codecs/isac/fix/source/transform_neon.c", + ] + + if (current_cpu != "arm64") { + # Enable compilation for the NEON instruction set. This is needed + # since //build/config/arm.gni only enables NEON for iOS, not Android. + # This provides the same functionality as webrtc/build/arm_neon.gypi. + configs -= [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + # Disable LTO on NEON targets due to compiler bug. + # TODO(fdegans): Enable this. See crbug.com/408997. + if (rtc_use_lto) { + cflags -= [ + "-flto", + "-ffat-lto-objects", + ] + } + + configs += [ "../..:common_config" ] + public_configs = [ "../..:common_inherited_config" ] + + deps = [ + "../../common_audio", + ] + } +} + +config("pcm16b_config") { + include_dirs = [ + "../../..", + "codecs/pcm16b/include", + ] +} + +source_set("pcm16b") { + sources = [ + "codecs/pcm16b/audio_decoder_pcm16b.cc", + "codecs/pcm16b/audio_encoder_pcm16b.cc", + "codecs/pcm16b/include/audio_decoder_pcm16b.h", + "codecs/pcm16b/include/audio_encoder_pcm16b.h", + "codecs/pcm16b/include/pcm16b.h", + "codecs/pcm16b/pcm16b.c", + ] + + deps = [ + ":audio_encoder_interface", + ":g711", + ] + + configs += [ "../..:common_config" ] + + public_configs = [ + "../..:common_inherited_config", + ":pcm16b_config", + ] +} + +config("opus_config") { + include_dirs = [ "../../.." ] +} + +source_set("webrtc_opus") { + sources = [ + "codecs/opus/audio_decoder_opus.cc", + "codecs/opus/audio_encoder_opus.cc", + "codecs/opus/interface/audio_decoder_opus.h", + "codecs/opus/interface/audio_encoder_opus.h", + "codecs/opus/interface/opus_interface.h", + "codecs/opus/opus_inst.h", + "codecs/opus/opus_interface.c", + ] + + deps = [ + ":audio_encoder_interface", + ] + + if (rtc_build_opus) { + configs += [ "../..:common_config" ] + public_configs = [ "../..:common_inherited_config" ] + + public_deps = [ + rtc_opus_dir, + ] + } else if (build_with_mozilla) { + include_dirs = [ getenv("DIST") + "/include/opus" ] + } +} + +config("neteq_config") { + include_dirs = [ + # Need Opus header files for the audio classifier. + "//third_party/opus/src/celt", + "//third_party/opus/src/src", + ] +} + +source_set("neteq") { + sources = [ + "neteq/accelerate.cc", + "neteq/accelerate.h", + "neteq/audio_classifier.cc", + "neteq/audio_classifier.h", + "neteq/audio_decoder_impl.cc", + "neteq/audio_decoder_impl.h", + "neteq/audio_multi_vector.cc", + "neteq/audio_multi_vector.h", + "neteq/audio_vector.cc", + "neteq/audio_vector.h", + "neteq/background_noise.cc", + "neteq/background_noise.h", + "neteq/buffer_level_filter.cc", + "neteq/buffer_level_filter.h", + "neteq/comfort_noise.cc", + "neteq/comfort_noise.h", + "neteq/decision_logic.cc", + "neteq/decision_logic.h", + "neteq/decision_logic_fax.cc", + "neteq/decision_logic_fax.h", + "neteq/decision_logic_normal.cc", + "neteq/decision_logic_normal.h", + "neteq/decoder_database.cc", + "neteq/decoder_database.h", + "neteq/defines.h", + "neteq/delay_manager.cc", + "neteq/delay_manager.h", + "neteq/delay_peak_detector.cc", + "neteq/delay_peak_detector.h", + "neteq/dsp_helper.cc", + "neteq/dsp_helper.h", + "neteq/dtmf_buffer.cc", + "neteq/dtmf_buffer.h", + "neteq/dtmf_tone_generator.cc", + "neteq/dtmf_tone_generator.h", + "neteq/expand.cc", + "neteq/expand.h", + "neteq/interface/neteq.h", + "neteq/merge.cc", + "neteq/merge.h", + "neteq/neteq.cc", + "neteq/neteq_impl.cc", + "neteq/neteq_impl.h", + "neteq/normal.cc", + "neteq/normal.h", + "neteq/packet_buffer.cc", + "neteq/packet_buffer.h", + "neteq/payload_splitter.cc", + "neteq/payload_splitter.h", + "neteq/post_decode_vad.cc", + "neteq/post_decode_vad.h", + "neteq/preemptive_expand.cc", + "neteq/preemptive_expand.h", + "neteq/random_vector.cc", + "neteq/random_vector.h", + "neteq/rtcp.cc", + "neteq/rtcp.h", + "neteq/statistics_calculator.cc", + "neteq/statistics_calculator.h", + "neteq/sync_buffer.cc", + "neteq/sync_buffer.h", + "neteq/time_stretch.cc", + "neteq/time_stretch.h", + "neteq/timestamp_scaler.cc", + "neteq/timestamp_scaler.h", + ] + + configs += [ "../..:common_config" ] + + public_configs = [ + "../..:common_inherited_config", + ":neteq_config", + ] + + deps = [ + ":audio_decoder_interface", + ":cng", + ":g711", + ":pcm16b", + "../..:webrtc_common", + "../../common_audio", + "../../system_wrappers", + ] + + defines = [] + + if (rtc_include_opus) { + defines += [ "WEBRTC_CODEC_OPUS" ] + deps += [ ":webrtc_opus" ] + } + if (!build_with_mozilla) { + if (current_cpu == "arm") { + defines += [ "WEBRTC_CODEC_ISACFX" ] + deps += [ ":isac_fix" ] + } else { + defines += [ "WEBRTC_CODEC_ISAC" ] + deps += [ ":isac" ] + } + defines += [ "WEBRTC_CODEC_G722" ] + deps += [ ":g722" ] + } + if (!build_with_mozilla && !build_with_chromium) { + defines += [ "WEBRTC_CODEC_ILBC" ] + deps += [ ":ilbc" ] + } +} diff --git a/webrtc/modules/audio_coding/Makefile.am b/webrtc/modules/audio_coding/Makefile.am new file mode 100644 index 0000000..7b726d2 --- /dev/null +++ b/webrtc/modules/audio_coding/Makefile.am @@ -0,0 +1,35 @@ +noinst_LTLIBRARIES = libaudio_coding.la + +libaudio_coding_la_SOURCES = codecs/isac/main/interface/isac.h \ + codecs/isac/main/source/arith_routines.c \ + codecs/isac/main/source/arith_routines.h \ + codecs/isac/main/source/codec.h \ + codecs/isac/main/source/encode_lpc_swb.c \ + codecs/isac/main/source/encode_lpc_swb.h \ + codecs/isac/main/source/entropy_coding.c \ + codecs/isac/main/source/entropy_coding.h \ + codecs/isac/main/source/lpc_analysis.c \ + codecs/isac/main/source/lpc_analysis.h \ + codecs/isac/main/source/lpc_gain_swb_tables.c \ + codecs/isac/main/source/lpc_gain_swb_tables.h \ + codecs/isac/main/source/lpc_shape_swb12_tables.c \ + codecs/isac/main/source/lpc_shape_swb12_tables.h \ + codecs/isac/main/source/lpc_shape_swb16_tables.c \ + codecs/isac/main/source/lpc_shape_swb16_tables.h \ + codecs/isac/main/source/lpc_tables.c \ + codecs/isac/main/source/lpc_tables.h \ + codecs/isac/main/source/os_specific_inline.h \ + codecs/isac/main/source/pitch_estimator.c \ + codecs/isac/main/source/pitch_estimator.h \ + codecs/isac/main/source/pitch_gain_tables.c \ + codecs/isac/main/source/pitch_gain_tables.h \ + codecs/isac/main/source/pitch_lag_tables.c \ + codecs/isac/main/source/pitch_lag_tables.h \ + codecs/isac/main/source/settings.h \ + codecs/isac/main/source/spectrum_ar_model_tables.c \ + codecs/isac/main/source/spectrum_ar_model_tables.h \ + codecs/isac/main/source/structs.h \ + codecs/isac/bandwidth_info.h + +libaudio_coding_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) +libaudio_coding_la_CXXFLAGS = $(AM_CXXFLAGS) $(COMMON_CXXFLAGS) diff --git a/webrtc/modules/audio_coding/codecs/isac/bandwidth_info.h b/webrtc/modules/audio_coding/codecs/isac/bandwidth_info.h new file mode 100644 index 0000000..1e3f4c9 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/bandwidth_info.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_ + +#include "webrtc/typedefs.h" + +typedef struct { + int in_use; + int32_t send_bw_avg; + int32_t send_max_delay_avg; + int16_t bottleneck_idx; + int16_t jitter_info; +} IsacBandwidthInfo; + +#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_BANDWIDTH_INFO_H_ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h b/webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h new file mode 100644 index 0000000..1f5aeb3 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h @@ -0,0 +1,724 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_INTERFACE_ISAC_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_INTERFACE_ISAC_H_ + +#include + +#include "webrtc/modules/audio_coding/codecs/isac/bandwidth_info.h" +#include "webrtc/typedefs.h" + +typedef struct WebRtcISACStruct ISACStruct; + +#if defined(__cplusplus) +extern "C" { +#endif + + /****************************************************************************** + * WebRtcIsac_AssignSize(...) + * + * This function returns the size of the ISAC instance, so that the instance + * can be created outside iSAC. + * + * Input: + * - samplingRate : sampling rate of the input/output audio. + * + * Output: + * - sizeinbytes : number of bytes needed to allocate for the + * instance. + * + * Return value : 0 - Ok + * -1 - Error + */ + + int16_t WebRtcIsac_AssignSize( + int* sizeinbytes); + + + /****************************************************************************** + * WebRtcIsac_Assign(...) + * + * This function assignes the memory already created to the ISAC instance. + * + * Input: + * - *ISAC_main_inst : a pointer to the coder instance. + * - samplingRate : sampling rate of the input/output audio. + * - ISAC_inst_Addr : the already allocated memory, where we put the + * iSAC structure. + * + * Return value : 0 - Ok + * -1 - Error + */ + + int16_t WebRtcIsac_Assign( + ISACStruct** ISAC_main_inst, + void* ISAC_inst_Addr); + + + /****************************************************************************** + * WebRtcIsac_Create(...) + * + * This function creates an ISAC instance, which will contain the state + * information for one coding/decoding channel. + * + * Input: + * - *ISAC_main_inst : a pointer to the coder instance. + * + * Return value : 0 - Ok + * -1 - Error + */ + + int16_t WebRtcIsac_Create( + ISACStruct** ISAC_main_inst); + + + /****************************************************************************** + * WebRtcIsac_Free(...) + * + * This function frees the ISAC instance created at the beginning. + * + * Input: + * - ISAC_main_inst : an ISAC instance. + * + * Return value : 0 - Ok + * -1 - Error + */ + + int16_t WebRtcIsac_Free( + ISACStruct* ISAC_main_inst); + + + /****************************************************************************** + * WebRtcIsac_EncoderInit(...) + * + * This function initializes an ISAC instance prior to the encoder calls. + * + * Input: + * - ISAC_main_inst : ISAC instance. + * - CodingMode : 0 -> Bit rate and frame length are + * automatically adjusted to available bandwidth + * on transmission channel, just valid if codec + * is created to work in wideband mode. + * 1 -> User sets a frame length and a target bit + * rate which is taken as the maximum + * short-term average bit rate. + * + * Return value : 0 - Ok + * -1 - Error + */ + + int16_t WebRtcIsac_EncoderInit( + ISACStruct* ISAC_main_inst, + int16_t CodingMode); + + + /****************************************************************************** + * WebRtcIsac_Encode(...) + * + * This function encodes 10ms audio blocks and inserts it into a package. + * Input speech length has 160 samples if operating at 16 kHz sampling + * rate, or 320 if operating at 32 kHz sampling rate. The encoder buffers the + * input audio until the whole frame is buffered then proceeds with encoding. + * + * + * Input: + * - ISAC_main_inst : ISAC instance. + * - speechIn : input speech vector. + * + * Output: + * - encoded : the encoded data vector + * + * Return value: + * : >0 - Length (in bytes) of coded data + * : 0 - The buffer didn't reach the chosen + * frame-size so it keeps buffering speech + * samples. + * : -1 - Error + */ + + int WebRtcIsac_Encode( + ISACStruct* ISAC_main_inst, + const int16_t* speechIn, + uint8_t* encoded); + + + /****************************************************************************** + * WebRtcIsac_DecoderInit(...) + * + * This function initializes an ISAC instance prior to the decoder calls. + * + * Input: + * - ISAC_main_inst : ISAC instance. + */ + + void WebRtcIsac_DecoderInit(ISACStruct* ISAC_main_inst); + + /****************************************************************************** + * WebRtcIsac_UpdateBwEstimate(...) + * + * This function updates the estimate of the bandwidth. + * + * Input: + * - ISAC_main_inst : ISAC instance. + * - encoded : encoded ISAC frame(s). + * - packet_size : size of the packet. + * - rtp_seq_number : the RTP number of the packet. + * - send_ts : the RTP send timestamp, given in samples + * - arr_ts : the arrival time of the packet (from NetEq) + * in samples. + * + * Return value : 0 - Ok + * -1 - Error + */ + + int16_t WebRtcIsac_UpdateBwEstimate( + ISACStruct* ISAC_main_inst, + const uint8_t* encoded, + size_t packet_size, + uint16_t rtp_seq_number, + uint32_t send_ts, + uint32_t arr_ts); + + + /****************************************************************************** + * WebRtcIsac_Decode(...) + * + * This function decodes an ISAC frame. At 16 kHz sampling rate, the length + * of the output audio could be either 480 or 960 samples, equivalent to + * 30 or 60 ms respectively. At 32 kHz sampling rate, the length of the + * output audio is 960 samples, which is 30 ms. + * + * Input: + * - ISAC_main_inst : ISAC instance. + * - encoded : encoded ISAC frame(s). + * - len : bytes in encoded vector. + * + * Output: + * - decoded : The decoded vector. + * + * Return value : >0 - number of samples in decoded vector. + * -1 - Error. + */ + + int WebRtcIsac_Decode( + ISACStruct* ISAC_main_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); + + + /****************************************************************************** + * WebRtcIsac_DecodePlc(...) + * + * This function conducts PLC for ISAC frame(s). Output speech length + * will be a multiple of frames, i.e. multiples of 30 ms audio. Therefore, + * the output is multiple of 480 samples if operating at 16 kHz and multiple + * of 960 if operating at 32 kHz. + * + * Input: + * - ISAC_main_inst : ISAC instance. + * - noOfLostFrames : Number of PLC frames to produce. + * + * Output: + * - decoded : The decoded vector. + * + * Return value : Number of samples in decoded PLC vector + */ + + size_t WebRtcIsac_DecodePlc( + ISACStruct* ISAC_main_inst, + int16_t* decoded, + size_t noOfLostFrames); + + + /****************************************************************************** + * WebRtcIsac_Control(...) + * + * This function sets the limit on the short-term average bit-rate and the + * frame length. Should be used only in Instantaneous mode. At 16 kHz sampling + * rate, an average bit-rate between 10000 to 32000 bps is valid and a + * frame-size of 30 or 60 ms is acceptable. At 32 kHz, an average bit-rate + * between 10000 to 56000 is acceptable, and the valid frame-size is 30 ms. + * + * Input: + * - ISAC_main_inst : ISAC instance. + * - rate : limit on the short-term average bit rate, + * in bits/second. + * - framesize : frame-size in millisecond. + * + * Return value : 0 - ok + * -1 - Error + */ + + int16_t WebRtcIsac_Control( + ISACStruct* ISAC_main_inst, + int32_t rate, + int framesize); + + void WebRtcIsac_SetInitialBweBottleneck(ISACStruct* ISAC_main_inst, + int bottleneck_bits_per_second); + + /****************************************************************************** + * WebRtcIsac_ControlBwe(...) + * + * This function sets the initial values of bottleneck and frame-size if + * iSAC is used in channel-adaptive mode. Therefore, this API is not + * applicable if the codec is created to operate in super-wideband mode. + * + * Through this API, users can enforce a frame-size for all values of + * bottleneck. Then iSAC will not automatically change the frame-size. + * + * + * Input: + * - ISAC_main_inst : ISAC instance. + * - rateBPS : initial value of bottleneck in bits/second + * 10000 <= rateBPS <= 56000 is accepted + * For default bottleneck set rateBPS = 0 + * - frameSizeMs : number of milliseconds per frame (30 or 60) + * - enforceFrameSize : 1 to enforce the given frame-size through + * out the adaptation process, 0 to let iSAC + * change the frame-size if required. + * + * Return value : 0 - ok + * -1 - Error + */ + + int16_t WebRtcIsac_ControlBwe( + ISACStruct* ISAC_main_inst, + int32_t rateBPS, + int frameSizeMs, + int16_t enforceFrameSize); + + + /****************************************************************************** + * WebRtcIsac_ReadFrameLen(...) + * + * This function returns the length of the frame represented in the packet. + * + * Input: + * - encoded : Encoded bit-stream + * + * Output: + * - frameLength : Length of frame in packet (in samples) + * + */ + + int16_t WebRtcIsac_ReadFrameLen( + ISACStruct* ISAC_main_inst, + const uint8_t* encoded, + int16_t* frameLength); + + + /****************************************************************************** + * WebRtcIsac_version(...) + * + * This function returns the version number. + * + * Output: + * - version : Pointer to character string + * + */ + + void WebRtcIsac_version( + char *version); + + + /****************************************************************************** + * WebRtcIsac_GetErrorCode(...) + * + * This function can be used to check the error code of an iSAC instance. When + * a function returns -1 a error code will be set for that instance. The + * function below extract the code of the last error that occurred in the + * specified instance. + * + * Input: + * - ISAC_main_inst : ISAC instance + * + * Return value : Error code + */ + + int16_t WebRtcIsac_GetErrorCode( + ISACStruct* ISAC_main_inst); + + + /**************************************************************************** + * WebRtcIsac_GetUplinkBw(...) + * + * This function outputs the target bottleneck of the codec. In + * channel-adaptive mode, the target bottleneck is specified through in-band + * signalling retreived by bandwidth estimator. + * In channel-independent, also called instantaneous mode, the target + * bottleneck is provided to the encoder by calling xxx_control(...). If + * xxx_control is never called the default values is returned. The default + * value for bottleneck at 16 kHz encoder sampling rate is 32000 bits/sec, + * and it is 56000 bits/sec for 32 kHz sampling rate. + * Note that the output is the iSAC internal operating bottleneck which might + * differ slightly from the one provided through xxx_control(). + * + * Input: + * - ISAC_main_inst : iSAC instance + * + * Output: + * - *bottleneck : bottleneck in bits/sec + * + * Return value : -1 if error happens + * 0 bit-rates computed correctly. + */ + + int16_t WebRtcIsac_GetUplinkBw( + ISACStruct* ISAC_main_inst, + int32_t* bottleneck); + + + /****************************************************************************** + * WebRtcIsac_SetMaxPayloadSize(...) + * + * This function sets a limit for the maximum payload size of iSAC. The same + * value is used both for 30 and 60 ms packets. If the encoder sampling rate + * is 16 kHz the maximum payload size is between 120 and 400 bytes. If the + * encoder sampling rate is 32 kHz the maximum payload size is between 120 + * and 600 bytes. + * + * If an out of range limit is used, the function returns -1, but the closest + * valid value will be applied. + * + * --------------- + * IMPORTANT NOTES + * --------------- + * The size of a packet is limited to the minimum of 'max-payload-size' and + * 'max-rate.' For instance, let's assume the max-payload-size is set to + * 170 bytes, and max-rate is set to 40 kbps. Note that a limit of 40 kbps + * translates to 150 bytes for 30ms frame-size & 300 bytes for 60ms + * frame-size. Then a packet with a frame-size of 30 ms is limited to 150, + * i.e. min(170, 150), and a packet with 60 ms frame-size is limited to + * 170 bytes, i.e. min(170, 300). + * + * Input: + * - ISAC_main_inst : iSAC instance + * - maxPayloadBytes : maximum size of the payload in bytes + * valid values are between 120 and 400 bytes + * if encoder sampling rate is 16 kHz. For + * 32 kHz encoder sampling rate valid values + * are between 120 and 600 bytes. + * + * Return value : 0 if successful + * -1 if error happens + */ + + int16_t WebRtcIsac_SetMaxPayloadSize( + ISACStruct* ISAC_main_inst, + int16_t maxPayloadBytes); + + + /****************************************************************************** + * WebRtcIsac_SetMaxRate(...) + * + * This function sets the maximum rate which the codec may not exceed for + * any signal packet. The maximum rate is defined and payload-size per + * frame-size in bits per second. + * + * The codec has a maximum rate of 53400 bits per second (200 bytes per 30 + * ms) if the encoder sampling rate is 16kHz, and 160 kbps (600 bytes/30 ms) + * if the encoder sampling rate is 32 kHz. + * + * It is possible to set a maximum rate between 32000 and 53400 bits/sec + * in wideband mode, and 32000 to 160000 bits/sec in super-wideband mode. + * + * If an out of range limit is used, the function returns -1, but the closest + * valid value will be applied. + * + * --------------- + * IMPORTANT NOTES + * --------------- + * The size of a packet is limited to the minimum of 'max-payload-size' and + * 'max-rate.' For instance, let's assume the max-payload-size is set to + * 170 bytes, and max-rate is set to 40 kbps. Note that a limit of 40 kbps + * translates to 150 bytes for 30ms frame-size & 300 bytes for 60ms + * frame-size. Then a packet with a frame-size of 30 ms is limited to 150, + * i.e. min(170, 150), and a packet with 60 ms frame-size is limited to + * 170 bytes, min(170, 300). + * + * Input: + * - ISAC_main_inst : iSAC instance + * - maxRate : maximum rate in bits per second, + * valid values are 32000 to 53400 bits/sec in + * wideband mode, and 32000 to 160000 bits/sec in + * super-wideband mode. + * + * Return value : 0 if successful + * -1 if error happens + */ + + int16_t WebRtcIsac_SetMaxRate( + ISACStruct* ISAC_main_inst, + int32_t maxRate); + + + /****************************************************************************** + * WebRtcIsac_DecSampRate() + * Return the sampling rate of the decoded audio. + * + * Input: + * - ISAC_main_inst : iSAC instance + * + * Return value : sampling frequency in Hertz. + * + */ + + uint16_t WebRtcIsac_DecSampRate(ISACStruct* ISAC_main_inst); + + + /****************************************************************************** + * WebRtcIsac_EncSampRate() + * + * Input: + * - ISAC_main_inst : iSAC instance + * + * Return value : sampling rate in Hertz. + * + */ + + uint16_t WebRtcIsac_EncSampRate(ISACStruct* ISAC_main_inst); + + + /****************************************************************************** + * WebRtcIsac_SetDecSampRate() + * Set the sampling rate of the decoder. Initialization of the decoder WILL + * NOT overwrite the sampling rate of the encoder. The default value is 16 kHz + * which is set when the instance is created. + * + * Input: + * - ISAC_main_inst : iSAC instance + * - sampRate : sampling rate in Hertz. + * + * Return value : 0 if successful + * -1 if failed. + */ + + int16_t WebRtcIsac_SetDecSampRate(ISACStruct* ISAC_main_inst, + uint16_t samp_rate_hz); + + + /****************************************************************************** + * WebRtcIsac_SetEncSampRate() + * Set the sampling rate of the encoder. Initialization of the encoder WILL + * NOT overwrite the sampling rate of the encoder. The default value is 16 kHz + * which is set when the instance is created. The encoding-mode and the + * bottleneck remain unchanged by this call, however, the maximum rate and + * maximum payload-size will reset to their default value. + * + * Input: + * - ISAC_main_inst : iSAC instance + * - sampRate : sampling rate in Hertz. + * + * Return value : 0 if successful + * -1 if failed. + */ + + int16_t WebRtcIsac_SetEncSampRate(ISACStruct* ISAC_main_inst, + uint16_t sample_rate_hz); + + + + /****************************************************************************** + * WebRtcIsac_GetNewBitStream(...) + * + * This function returns encoded data, with the recieved bwe-index in the + * stream. If the rate is set to a value less than bottleneck of codec + * the new bistream will be re-encoded with the given target rate. + * It should always return a complete packet, i.e. only called once + * even for 60 msec frames. + * + * NOTE 1! This function does not write in the ISACStruct, it is not allowed. + * NOTE 2! Currently not implemented for SWB mode. + * NOTE 3! Rates larger than the bottleneck of the codec will be limited + * to the current bottleneck. + * + * Input: + * - ISAC_main_inst : ISAC instance. + * - bweIndex : Index of bandwidth estimate to put in new + * bitstream + * - rate : target rate of the transcoder is bits/sec. + * Valid values are the accepted rate in iSAC, + * i.e. 10000 to 56000. + * - isRCU : if the new bit-stream is an RCU stream. + * Note that the rate parameter always indicates + * the target rate of the main payload, regardless + * of 'isRCU' value. + * + * Output: + * - encoded : The encoded data vector + * + * Return value : >0 - Length (in bytes) of coded data + * -1 - Error or called in SWB mode + * NOTE! No error code is written to + * the struct since it is only allowed to read + * the struct. + */ + int16_t WebRtcIsac_GetNewBitStream( + ISACStruct* ISAC_main_inst, + int16_t bweIndex, + int16_t jitterInfo, + int32_t rate, + uint8_t* encoded, + int16_t isRCU); + + + + /**************************************************************************** + * WebRtcIsac_GetDownLinkBwIndex(...) + * + * This function returns index representing the Bandwidth estimate from + * other side to this side. + * + * Input: + * - ISAC_main_inst : iSAC struct + * + * Output: + * - bweIndex : Bandwidth estimate to transmit to other side. + * + */ + + int16_t WebRtcIsac_GetDownLinkBwIndex( + ISACStruct* ISAC_main_inst, + int16_t* bweIndex, + int16_t* jitterInfo); + + + /**************************************************************************** + * WebRtcIsac_UpdateUplinkBw(...) + * + * This function takes an index representing the Bandwidth estimate from + * this side to other side and updates BWE. + * + * Input: + * - ISAC_main_inst : iSAC struct + * - bweIndex : Bandwidth estimate from other side. + * + */ + + int16_t WebRtcIsac_UpdateUplinkBw( + ISACStruct* ISAC_main_inst, + int16_t bweIndex); + + + /**************************************************************************** + * WebRtcIsac_ReadBwIndex(...) + * + * This function returns the index of the Bandwidth estimate from the bitstream. + * + * Input: + * - encoded : Encoded bitstream + * + * Output: + * - frameLength : Length of frame in packet (in samples) + * - bweIndex : Bandwidth estimate in bitstream + * + */ + + int16_t WebRtcIsac_ReadBwIndex( + const uint8_t* encoded, + int16_t* bweIndex); + + + + /******************************************************************************* + * WebRtcIsac_GetNewFrameLen(...) + * + * returns the frame lenght (in samples) of the next packet. In the case of channel-adaptive + * mode, iSAC decides on its frame lenght based on the estimated bottleneck + * this allows a user to prepare for the next packet (at the encoder) + * + * The primary usage is in CE to make the iSAC works in channel-adaptive mode + * + * Input: + * - ISAC_main_inst : iSAC struct + * + * Return Value : frame lenght in samples + * + */ + + int16_t WebRtcIsac_GetNewFrameLen( + ISACStruct* ISAC_main_inst); + + + /**************************************************************************** + * WebRtcIsac_GetRedPayload(...) + * + * Populates "encoded" with the redundant payload of the recently encoded + * frame. This function has to be called once that WebRtcIsac_Encode(...) + * returns a positive value. Regardless of the frame-size this function will + * be called only once after encoding is completed. + * + * Input: + * - ISAC_main_inst : iSAC struct + * + * Output: + * - encoded : the encoded data vector + * + * + * Return value: + * : >0 - Length (in bytes) of coded data + * : -1 - Error + * + * + */ + int16_t WebRtcIsac_GetRedPayload( + ISACStruct* ISAC_main_inst, + uint8_t* encoded); + + + /**************************************************************************** + * WebRtcIsac_DecodeRcu(...) + * + * This function decodes a redundant (RCU) iSAC frame. Function is called in + * NetEq with a stored RCU payload i case of packet loss. Output speech length + * will be a multiple of 480 samples: 480 or 960 samples, + * depending on the framesize (30 or 60 ms). + * + * Input: + * - ISAC_main_inst : ISAC instance. + * - encoded : encoded ISAC RCU frame(s) + * - len : bytes in encoded vector + * + * Output: + * - decoded : The decoded vector + * + * Return value : >0 - number of samples in decoded vector + * -1 - Error + */ + int WebRtcIsac_DecodeRcu( + ISACStruct* ISAC_main_inst, + const uint8_t* encoded, + size_t len, + int16_t* decoded, + int16_t* speechType); + + /* Fills in an IsacBandwidthInfo struct. |inst| should be a decoder. */ + void WebRtcIsac_GetBandwidthInfo(ISACStruct* inst, IsacBandwidthInfo* bwinfo); + + /* Uses the values from an IsacBandwidthInfo struct. |inst| should be an + encoder. */ + void WebRtcIsac_SetBandwidthInfo(ISACStruct* inst, + const IsacBandwidthInfo* bwinfo); + + /* If |inst| is a decoder but not an encoder: tell it what sample rate the + encoder is using, for bandwidth estimation purposes. */ + void WebRtcIsac_SetEncSampRateInDecoder(ISACStruct* inst, int sample_rate_hz); + +#if defined(__cplusplus) +} +#endif + + + +#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_INTERFACE_ISAC_H_ */ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/arith_routines.c b/webrtc/modules/audio_coding/codecs/isac/main/source/arith_routines.c new file mode 100644 index 0000000..5c901bb --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/arith_routines.c @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "arith_routines.h" +#include "settings.h" + + +/* + * terminate and return byte stream; + * returns the number of bytes in the stream + */ +int WebRtcIsac_EncTerminate(Bitstr *streamdata) /* in-/output struct containing bitstream */ +{ + uint8_t *stream_ptr; + + + /* point to the right place in the stream buffer */ + stream_ptr = streamdata->stream + streamdata->stream_index; + + /* find minimum length (determined by current interval width) */ + if ( streamdata->W_upper > 0x01FFFFFF ) + { + streamdata->streamval += 0x01000000; + /* add carry to buffer */ + if (streamdata->streamval < 0x01000000) + { + /* propagate carry */ + while ( !(++(*--stream_ptr)) ); + /* put pointer back to the old value */ + stream_ptr = streamdata->stream + streamdata->stream_index; + } + /* write remaining data to bitstream */ + *stream_ptr++ = (uint8_t) (streamdata->streamval >> 24); + } + else + { + streamdata->streamval += 0x00010000; + /* add carry to buffer */ + if (streamdata->streamval < 0x00010000) + { + /* propagate carry */ + while ( !(++(*--stream_ptr)) ); + /* put pointer back to the old value */ + stream_ptr = streamdata->stream + streamdata->stream_index; + } + /* write remaining data to bitstream */ + *stream_ptr++ = (uint8_t) (streamdata->streamval >> 24); + *stream_ptr++ = (uint8_t) ((streamdata->streamval >> 16) & 0x00FF); + } + + /* calculate stream length */ + return (int)(stream_ptr - streamdata->stream); +} diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/arith_routines.h b/webrtc/modules/audio_coding/codecs/isac/main/source/arith_routines.h new file mode 100644 index 0000000..43ba40e --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/arith_routines.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * arith_routines.h + * + * Functions for arithmetic coding. + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ARITH_ROUTINES_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ARITH_ROUTINES_H_ + +#include "structs.h" + + +int WebRtcIsac_EncLogisticMulti2( + Bitstr *streamdata, /* in-/output struct containing bitstream */ + int16_t *dataQ7, /* input: data vector */ + const uint16_t *env, /* input: side info vector defining the width of the pdf */ + const int N, /* input: data vector length */ + const int16_t isSWB12kHz); /* if the codec is working in 12kHz bandwidth */ + +/* returns the number of bytes in the stream */ +int WebRtcIsac_EncTerminate(Bitstr *streamdata); /* in-/output struct containing bitstream */ + +/* returns the number of bytes in the stream so far */ +int WebRtcIsac_DecLogisticMulti2( + int16_t *data, /* output: data vector */ + Bitstr *streamdata, /* in-/output struct containing bitstream */ + const uint16_t *env, /* input: side info vector defining the width of the pdf */ + const int16_t *dither, /* input: dither vector */ + const int N, /* input: data vector length */ + const int16_t isSWB12kHz); /* if the codec is working in 12kHz bandwidth */ + +void WebRtcIsac_EncHistMulti( + Bitstr *streamdata, /* in-/output struct containing bitstream */ + const int *data, /* input: data vector */ + const uint16_t **cdf, /* input: array of cdf arrays */ + const int N); /* input: data vector length */ + +int WebRtcIsac_DecHistBisectMulti( + int *data, /* output: data vector */ + Bitstr *streamdata, /* in-/output struct containing bitstream */ + const uint16_t **cdf, /* input: array of cdf arrays */ + const uint16_t *cdf_size, /* input: array of cdf table sizes+1 (power of two: 2^k) */ + const int N); /* input: data vector length */ + +int WebRtcIsac_DecHistOneStepMulti( + int *data, /* output: data vector */ + Bitstr *streamdata, /* in-/output struct containing bitstream */ + const uint16_t **cdf, /* input: array of cdf arrays */ + const uint16_t *init_index,/* input: vector of initial cdf table search entries */ + const int N); /* input: data vector length */ + +#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ARITH_ROUTINES_H_ */ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/codec.h b/webrtc/modules/audio_coding/codecs/isac/main/source/codec.h new file mode 100644 index 0000000..7ef64b5 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/codec.h @@ -0,0 +1,233 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * codec.h + * + * This header file contains the calls to the internal encoder + * and decoder functions. + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_CODEC_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_CODEC_H_ + +#include "structs.h" + + +void WebRtcIsac_ResetBitstream(Bitstr* bit_stream); + +int WebRtcIsac_EstimateBandwidth(BwEstimatorstr* bwest_str, Bitstr* streamdata, + size_t packet_size, + uint16_t rtp_seq_number, + uint32_t send_ts, uint32_t arr_ts, + enum IsacSamplingRate encoderSampRate, + enum IsacSamplingRate decoderSampRate); + +int WebRtcIsac_DecodeLb(const TransformTables* transform_tables, + float* signal_out, + ISACLBDecStruct* ISACdec_obj, + int16_t* current_framesamples, + int16_t isRCUPayload); + +int WebRtcIsac_DecodeRcuLb(float* signal_out, ISACLBDecStruct* ISACdec_obj, + int16_t* current_framesamples); + +int WebRtcIsac_EncodeLb(const TransformTables* transform_tables, + float* in, + ISACLBEncStruct* ISACencLB_obj, + int16_t codingMode, + int16_t bottleneckIndex); + +int WebRtcIsac_EncodeStoredDataLb(const IsacSaveEncoderData* ISACSavedEnc_obj, + Bitstr* ISACBitStr_obj, int BWnumber, + float scale); + +int WebRtcIsac_EncodeStoredDataUb( + const ISACUBSaveEncDataStruct* ISACSavedEnc_obj, Bitstr* bitStream, + int32_t jitterInfo, float scale, enum ISACBandwidth bandwidth); + +int16_t WebRtcIsac_GetRedPayloadUb( + const ISACUBSaveEncDataStruct* ISACSavedEncObj, Bitstr* bitStreamObj, + enum ISACBandwidth bandwidth); + +/****************************************************************************** + * WebRtcIsac_RateAllocation() + * Internal function to perform a rate-allocation for upper and lower-band, + * given a total rate. + * + * Input: + * - inRateBitPerSec : a total bit-rate in bits/sec. + * + * Output: + * - rateLBBitPerSec : a bit-rate allocated to the lower-band + * in bits/sec. + * - rateUBBitPerSec : a bit-rate allocated to the upper-band + * in bits/sec. + * + * Return value : 0 if rate allocation has been successful. + * -1 if failed to allocate rates. + */ + +int16_t WebRtcIsac_RateAllocation(int32_t inRateBitPerSec, + double* rateLBBitPerSec, + double* rateUBBitPerSec, + enum ISACBandwidth* bandwidthKHz); + + +/****************************************************************************** + * WebRtcIsac_DecodeUb16() + * + * Decode the upper-band if the codec is in 0-16 kHz mode. + * + * Input/Output: + * -ISACdec_obj : pointer to the upper-band decoder object. The + * bit-stream is stored inside the decoder object. + * + * Output: + * -signal_out : decoded audio, 480 samples 30 ms. + * + * Return value : >0 number of decoded bytes. + * <0 if an error occurred. + */ +int WebRtcIsac_DecodeUb16(const TransformTables* transform_tables, + float* signal_out, + ISACUBDecStruct* ISACdec_obj, + int16_t isRCUPayload); + +/****************************************************************************** + * WebRtcIsac_DecodeUb12() + * + * Decode the upper-band if the codec is in 0-12 kHz mode. + * + * Input/Output: + * -ISACdec_obj : pointer to the upper-band decoder object. The + * bit-stream is stored inside the decoder object. + * + * Output: + * -signal_out : decoded audio, 480 samples 30 ms. + * + * Return value : >0 number of decoded bytes. + * <0 if an error occurred. + */ +int WebRtcIsac_DecodeUb12(const TransformTables* transform_tables, + float* signal_out, + ISACUBDecStruct* ISACdec_obj, + int16_t isRCUPayload); + +/****************************************************************************** + * WebRtcIsac_EncodeUb16() + * + * Encode the upper-band if the codec is in 0-16 kHz mode. + * + * Input: + * -in : upper-band audio, 160 samples (10 ms). + * + * Input/Output: + * -ISACdec_obj : pointer to the upper-band encoder object. The + * bit-stream is stored inside the encoder object. + * + * Return value : >0 number of encoded bytes. + * <0 if an error occurred. + */ +int WebRtcIsac_EncodeUb16(const TransformTables* transform_tables, + float* in, + ISACUBEncStruct* ISACenc_obj, + int32_t jitterInfo); + +/****************************************************************************** + * WebRtcIsac_EncodeUb12() + * + * Encode the upper-band if the codec is in 0-12 kHz mode. + * + * Input: + * -in : upper-band audio, 160 samples (10 ms). + * + * Input/Output: + * -ISACdec_obj : pointer to the upper-band encoder object. The + * bit-stream is stored inside the encoder object. + * + * Return value : >0 number of encoded bytes. + * <0 if an error occurred. + */ +int WebRtcIsac_EncodeUb12(const TransformTables* transform_tables, + float* in, + ISACUBEncStruct* ISACenc_obj, + int32_t jitterInfo); + +/************************** initialization functions *************************/ + +void WebRtcIsac_InitMasking(MaskFiltstr* maskdata); + +void WebRtcIsac_InitPreFilterbank(PreFiltBankstr* prefiltdata); + +void WebRtcIsac_InitPostFilterbank(PostFiltBankstr* postfiltdata); + +void WebRtcIsac_InitPitchFilter(PitchFiltstr* pitchfiltdata); + +void WebRtcIsac_InitPitchAnalysis(PitchAnalysisStruct* State); + + +/**************************** transform functions ****************************/ + +void WebRtcIsac_InitTransform(TransformTables* tables); + +void WebRtcIsac_Time2Spec(const TransformTables* tables, + double* inre1, + double* inre2, + int16_t* outre, + int16_t* outim, + FFTstr* fftstr_obj); + +void WebRtcIsac_Spec2time(const TransformTables* tables, + double* inre, + double* inim, + double* outre1, + double* outre2, + FFTstr* fftstr_obj); + +/******************************* filter functions ****************************/ + +void WebRtcIsac_AllPoleFilter(double* InOut, double* Coef, size_t lengthInOut, + int orderCoef); + +void WebRtcIsac_AllZeroFilter(double* In, double* Coef, size_t lengthInOut, + int orderCoef, double* Out); + +void WebRtcIsac_ZeroPoleFilter(double* In, double* ZeroCoef, double* PoleCoef, + size_t lengthInOut, int orderCoef, double* Out); + + +/***************************** filterbank functions **************************/ + +void WebRtcIsac_SplitAndFilterFloat(float* in, float* LP, float* HP, + double* LP_la, double* HP_la, + PreFiltBankstr* prefiltdata); + + +void WebRtcIsac_FilterAndCombineFloat(float* InLP, float* InHP, float* Out, + PostFiltBankstr* postfiltdata); + + +/************************* normalized lattice filters ************************/ + +void WebRtcIsac_NormLatticeFilterMa(int orderCoef, float* stateF, float* stateG, + float* lat_in, double* filtcoeflo, + double* lat_out); + +void WebRtcIsac_NormLatticeFilterAr(int orderCoef, float* stateF, float* stateG, + double* lat_in, double* lo_filt_coef, + float* lat_out); + +void WebRtcIsac_Dir2Lat(double* a, int orderCoef, float* sth, float* cth); + +void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order); + +#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_CODEC_H_ */ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/encode_lpc_swb.c b/webrtc/modules/audio_coding/codecs/isac/main/source/encode_lpc_swb.c new file mode 100644 index 0000000..d59f748 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/encode_lpc_swb.c @@ -0,0 +1,708 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * code_LPC_UB.c + * + * This file contains definition of functions used to + * encode LPC parameters (Shape & gain) of the upper band. + * + */ + +#include "encode_lpc_swb.h" + +#include +#include +#include + +#include "lpc_gain_swb_tables.h" +#include "lpc_shape_swb12_tables.h" +#include "lpc_shape_swb16_tables.h" +#include "settings.h" +#include "webrtc/typedefs.h" + +/****************************************************************************** + * WebRtcIsac_RemoveLarMean() + * + * Remove the means from LAR coefficients. + * + * Input: + * -lar : pointer to lar vectors. LAR vectors are + * concatenated. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -lar : pointer to mean-removed LAR:s. + * + * + */ +int16_t +WebRtcIsac_RemoveLarMean( + double* lar, + int16_t bandwidth) +{ + int16_t coeffCntr; + int16_t vecCntr; + int16_t numVec; + const double* meanLAR; + switch(bandwidth) + { + case isac12kHz: + { + numVec = UB_LPC_VEC_PER_FRAME; + meanLAR = WebRtcIsac_kMeanLarUb12; + break; + } + case isac16kHz: + { + numVec = UB16_LPC_VEC_PER_FRAME; + meanLAR = WebRtcIsac_kMeanLarUb16; + break; + } + default: + return -1; + } + + for(vecCntr = 0; vecCntr < numVec; vecCntr++) + { + for(coeffCntr = 0; coeffCntr < UB_LPC_ORDER; coeffCntr++) + { + // REMOVE MEAN + *lar++ -= meanLAR[coeffCntr]; + } + } + return 0; +} + +/****************************************************************************** + * WebRtcIsac_DecorrelateIntraVec() + * + * Remove the correlation amonge the components of LAR vectors. If LAR vectors + * of one frame are put in a matrix where each column is a LAR vector of a + * sub-frame, then this is equivalent to multiplying the LAR matrix with + * a decorrelting mtrix from left. + * + * Input: + * -inLar : pointer to mean-removed LAR vecrtors. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -out : decorrelated LAR vectors. + */ +int16_t +WebRtcIsac_DecorrelateIntraVec( + const double* data, + double* out, + int16_t bandwidth) +{ + const double* ptrData; + const double* ptrRow; + int16_t rowCntr; + int16_t colCntr; + int16_t larVecCntr; + int16_t numVec; + const double* decorrMat; + switch(bandwidth) + { + case isac12kHz: + { + decorrMat = &WebRtcIsac_kIntraVecDecorrMatUb12[0][0]; + numVec = UB_LPC_VEC_PER_FRAME; + break; + } + case isac16kHz: + { + decorrMat = &WebRtcIsac_kIintraVecDecorrMatUb16[0][0]; + numVec = UB16_LPC_VEC_PER_FRAME; + break; + } + default: + return -1; + } + + // + // decorrMat * data + // + // data is assumed to contain 'numVec' of LAR + // vectors (mean removed) each of dimension 'UB_LPC_ORDER' + // concatenated one after the other. + // + + ptrData = data; + for(larVecCntr = 0; larVecCntr < numVec; larVecCntr++) + { + for(rowCntr = 0; rowCntr < UB_LPC_ORDER; rowCntr++) + { + ptrRow = &decorrMat[rowCntr * UB_LPC_ORDER]; + *out = 0; + for(colCntr = 0; colCntr < UB_LPC_ORDER; colCntr++) + { + *out += ptrData[colCntr] * ptrRow[colCntr]; + } + out++; + } + ptrData += UB_LPC_ORDER; + } + return 0; +} + +/****************************************************************************** + * WebRtcIsac_DecorrelateInterVec() + * + * Remover the correlation among mean-removed LAR vectors. If LAR vectors + * of one frame are put in a matrix where each column is a LAR vector of a + * sub-frame, then this is equivalent to multiplying the LAR matrix with + * a decorrelting mtrix from right. + * + * Input: + * -data : pointer to matrix of LAR vectors. The matrix + * is stored column-wise. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -out : decorrelated LAR vectors. + */ +int16_t +WebRtcIsac_DecorrelateInterVec( + const double* data, + double* out, + int16_t bandwidth) +{ + int16_t coeffCntr; + int16_t rowCntr; + int16_t colCntr; + const double* decorrMat; + int16_t interVecDim; + + switch(bandwidth) + { + case isac12kHz: + { + decorrMat = &WebRtcIsac_kInterVecDecorrMatUb12[0][0]; + interVecDim = UB_LPC_VEC_PER_FRAME; + break; + } + case isac16kHz: + { + decorrMat = &WebRtcIsac_kInterVecDecorrMatUb16[0][0]; + interVecDim = UB16_LPC_VEC_PER_FRAME; + break; + } + default: + return -1; + } + + // + // data * decorrMat + // + // data is of size 'interVecDim' * 'UB_LPC_ORDER' + // That is 'interVecDim' of LAR vectors (mean removed) + // in columns each of dimension 'UB_LPC_ORDER'. + // matrix is stored column-wise. + // + + for(coeffCntr = 0; coeffCntr < UB_LPC_ORDER; coeffCntr++) + { + for(colCntr = 0; colCntr < interVecDim; colCntr++) + { + out[coeffCntr + colCntr * UB_LPC_ORDER] = 0; + for(rowCntr = 0; rowCntr < interVecDim; rowCntr++) + { + out[coeffCntr + colCntr * UB_LPC_ORDER] += + data[coeffCntr + rowCntr * UB_LPC_ORDER] * + decorrMat[rowCntr * interVecDim + colCntr]; + } + } + } + return 0; +} + +/****************************************************************************** + * WebRtcIsac_QuantizeUncorrLar() + * + * Quantize the uncorrelated parameters. + * + * Input: + * -data : uncorrelated LAR vectors. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -data : quantized version of the input. + * -idx : pointer to quantization indices. + */ +double +WebRtcIsac_QuantizeUncorrLar( + double* data, + int* recIdx, + int16_t bandwidth) +{ + int16_t cntr; + int32_t idx; + int16_t interVecDim; + const double* leftRecPoint; + double quantizationStepSize; + const int16_t* numQuantCell; + switch(bandwidth) + { + case isac12kHz: + { + leftRecPoint = WebRtcIsac_kLpcShapeLeftRecPointUb12; + quantizationStepSize = WebRtcIsac_kLpcShapeQStepSizeUb12; + numQuantCell = WebRtcIsac_kLpcShapeNumRecPointUb12; + interVecDim = UB_LPC_VEC_PER_FRAME; + break; + } + case isac16kHz: + { + leftRecPoint = WebRtcIsac_kLpcShapeLeftRecPointUb16; + quantizationStepSize = WebRtcIsac_kLpcShapeQStepSizeUb16; + numQuantCell = WebRtcIsac_kLpcShapeNumRecPointUb16; + interVecDim = UB16_LPC_VEC_PER_FRAME; + break; + } + default: + return -1; + } + + // + // Quantize the parametrs. + // + for(cntr = 0; cntr < UB_LPC_ORDER * interVecDim; cntr++) + { + idx = (int32_t)floor((*data - leftRecPoint[cntr]) / + quantizationStepSize + 0.5); + if(idx < 0) + { + idx = 0; + } + else if(idx >= numQuantCell[cntr]) + { + idx = numQuantCell[cntr] - 1; + } + + *data++ = leftRecPoint[cntr] + idx * quantizationStepSize; + *recIdx++ = idx; + } + return 0; +} + + +/****************************************************************************** + * WebRtcIsac_DequantizeLpcParam() + * + * Get the quantized value of uncorrelated LARs given the quantization indices. + * + * Input: + * -idx : pointer to quantiztion indices. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -out : pointer to quantized values. + */ +int16_t +WebRtcIsac_DequantizeLpcParam( + const int* idx, + double* out, + int16_t bandwidth) +{ + int16_t cntr; + int16_t interVecDim; + const double* leftRecPoint; + double quantizationStepSize; + + switch(bandwidth) + { + case isac12kHz: + { + leftRecPoint = WebRtcIsac_kLpcShapeLeftRecPointUb12; + quantizationStepSize = WebRtcIsac_kLpcShapeQStepSizeUb12; + interVecDim = UB_LPC_VEC_PER_FRAME; + break; + } + case isac16kHz: + { + leftRecPoint = WebRtcIsac_kLpcShapeLeftRecPointUb16; + quantizationStepSize = WebRtcIsac_kLpcShapeQStepSizeUb16; + interVecDim = UB16_LPC_VEC_PER_FRAME; + break; + } + default: + return -1; + } + + // + // Dequantize given the quantization indices + // + + for(cntr = 0; cntr < UB_LPC_ORDER * interVecDim; cntr++) + { + *out++ = leftRecPoint[cntr] + *idx++ * quantizationStepSize; + } + return 0; +} + + +/****************************************************************************** + * WebRtcIsac_CorrelateIntraVec() + * + * This is the inverse of WebRtcIsac_DecorrelateIntraVec(). + * + * Input: + * -data : uncorrelated parameters. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -out : correlated parametrs. + */ +int16_t +WebRtcIsac_CorrelateIntraVec( + const double* data, + double* out, + int16_t bandwidth) +{ + int16_t vecCntr; + int16_t rowCntr; + int16_t colCntr; + int16_t numVec; + const double* ptrData; + const double* intraVecDecorrMat; + + switch(bandwidth) + { + case isac12kHz: + { + numVec = UB_LPC_VEC_PER_FRAME; + intraVecDecorrMat = &WebRtcIsac_kIntraVecDecorrMatUb12[0][0]; + break; + } + case isac16kHz: + { + numVec = UB16_LPC_VEC_PER_FRAME; + intraVecDecorrMat = &WebRtcIsac_kIintraVecDecorrMatUb16[0][0]; + break; + } + default: + return -1; + } + + + ptrData = data; + for(vecCntr = 0; vecCntr < numVec; vecCntr++) + { + for(colCntr = 0; colCntr < UB_LPC_ORDER; colCntr++) + { + *out = 0; + for(rowCntr = 0; rowCntr < UB_LPC_ORDER; rowCntr++) + { + *out += ptrData[rowCntr] * + intraVecDecorrMat[rowCntr * UB_LPC_ORDER + colCntr]; + } + out++; + } + ptrData += UB_LPC_ORDER; + } + return 0; +} + +/****************************************************************************** + * WebRtcIsac_CorrelateInterVec() + * + * This is the inverse of WebRtcIsac_DecorrelateInterVec(). + * + * Input: + * -data + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -out : correlated parametrs. + */ +int16_t +WebRtcIsac_CorrelateInterVec( + const double* data, + double* out, + int16_t bandwidth) +{ + int16_t coeffCntr; + int16_t rowCntr; + int16_t colCntr; + int16_t interVecDim; + double myVec[UB16_LPC_VEC_PER_FRAME]; + const double* interVecDecorrMat; + + switch(bandwidth) + { + case isac12kHz: + { + interVecDim = UB_LPC_VEC_PER_FRAME; + interVecDecorrMat = &WebRtcIsac_kInterVecDecorrMatUb12[0][0]; + break; + } + case isac16kHz: + { + interVecDim = UB16_LPC_VEC_PER_FRAME; + interVecDecorrMat = &WebRtcIsac_kInterVecDecorrMatUb16[0][0]; + break; + } + default: + return -1; + } + + for(coeffCntr = 0; coeffCntr < UB_LPC_ORDER; coeffCntr++) + { + for(rowCntr = 0; rowCntr < interVecDim; rowCntr++) + { + myVec[rowCntr] = 0; + for(colCntr = 0; colCntr < interVecDim; colCntr++) + { + myVec[rowCntr] += data[coeffCntr + colCntr * UB_LPC_ORDER] * //*ptrData * + interVecDecorrMat[rowCntr * interVecDim + colCntr]; + //ptrData += UB_LPC_ORDER; + } + } + + for(rowCntr = 0; rowCntr < interVecDim; rowCntr++) + { + out[coeffCntr + rowCntr * UB_LPC_ORDER] = myVec[rowCntr]; + } + } + return 0; +} + +/****************************************************************************** + * WebRtcIsac_AddLarMean() + * + * This is the inverse of WebRtcIsac_RemoveLarMean() + * + * Input: + * -data : pointer to mean-removed LAR:s. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -data : pointer to LARs. + */ +int16_t +WebRtcIsac_AddLarMean( + double* data, + int16_t bandwidth) +{ + int16_t coeffCntr; + int16_t vecCntr; + int16_t numVec; + const double* meanLAR; + + switch(bandwidth) + { + case isac12kHz: + { + numVec = UB_LPC_VEC_PER_FRAME; + meanLAR = WebRtcIsac_kMeanLarUb12; + break; + } + case isac16kHz: + { + numVec = UB16_LPC_VEC_PER_FRAME; + meanLAR = WebRtcIsac_kMeanLarUb16; + break; + } + default: + return -1; + } + + for(vecCntr = 0; vecCntr < numVec; vecCntr++) + { + for(coeffCntr = 0; coeffCntr < UB_LPC_ORDER; coeffCntr++) + { + *data++ += meanLAR[coeffCntr]; + } + } + return 0; +} + +/****************************************************************************** + * WebRtcIsac_ToLogDomainRemoveMean() + * + * Transform the LPC gain to log domain then remove the mean value. + * + * Input: + * -lpcGain : pointer to LPC Gain, expecting 6 LPC gains + * + * Output: + * -lpcGain : mean-removed in log domain. + */ +int16_t +WebRtcIsac_ToLogDomainRemoveMean( + double* data) +{ + int16_t coeffCntr; + for(coeffCntr = 0; coeffCntr < UB_LPC_GAIN_DIM; coeffCntr++) + { + data[coeffCntr] = log(data[coeffCntr]) - WebRtcIsac_kMeanLpcGain; + } + return 0; +} + + +/****************************************************************************** + * WebRtcIsac_DecorrelateLPGain() + * + * Decorrelate LPC gains. There are 6 LPC Gains per frame. This is like + * multiplying gain vector with decorrelating matrix. + * + * Input: + * -data : LPC gain in log-domain with mean removed. + * + * Output: + * -out : decorrelated parameters. + */ +int16_t WebRtcIsac_DecorrelateLPGain( + const double* data, + double* out) +{ + int16_t rowCntr; + int16_t colCntr; + + for(colCntr = 0; colCntr < UB_LPC_GAIN_DIM; colCntr++) + { + *out = 0; + for(rowCntr = 0; rowCntr < UB_LPC_GAIN_DIM; rowCntr++) + { + *out += data[rowCntr] * WebRtcIsac_kLpcGainDecorrMat[rowCntr][colCntr]; + } + out++; + } + return 0; +} + +/****************************************************************************** + * WebRtcIsac_QuantizeLpcGain() + * + * Quantize the decorrelated log-domain gains. + * + * Input: + * -lpcGain : uncorrelated LPC gains. + * + * Output: + * -idx : quantization indices + * -lpcGain : quantized value of the inpt. + */ +double WebRtcIsac_QuantizeLpcGain( + double* data, + int* idx) +{ + int16_t coeffCntr; + for(coeffCntr = 0; coeffCntr < UB_LPC_GAIN_DIM; coeffCntr++) + { + *idx = (int)floor((*data - WebRtcIsac_kLeftRecPointLpcGain[coeffCntr]) / + WebRtcIsac_kQSizeLpcGain + 0.5); + + if(*idx < 0) + { + *idx = 0; + } + else if(*idx >= WebRtcIsac_kNumQCellLpcGain[coeffCntr]) + { + *idx = WebRtcIsac_kNumQCellLpcGain[coeffCntr] - 1; + } + *data = WebRtcIsac_kLeftRecPointLpcGain[coeffCntr] + *idx * + WebRtcIsac_kQSizeLpcGain; + + data++; + idx++; + } + return 0; +} + +/****************************************************************************** + * WebRtcIsac_DequantizeLpcGain() + * + * Get the quantized values given the quantization indices. + * + * Input: + * -idx : pointer to quantization indices. + * + * Output: + * -lpcGains : quantized values of the given parametes. + */ +int16_t WebRtcIsac_DequantizeLpcGain( + const int* idx, + double* out) +{ + int16_t coeffCntr; + for(coeffCntr = 0; coeffCntr < UB_LPC_GAIN_DIM; coeffCntr++) + { + *out = WebRtcIsac_kLeftRecPointLpcGain[coeffCntr] + *idx * + WebRtcIsac_kQSizeLpcGain; + out++; + idx++; + } + return 0; +} + +/****************************************************************************** + * WebRtcIsac_CorrelateLpcGain() + * + * This is the inverse of WebRtcIsac_DecorrelateLPGain(). + * + * Input: + * -data : decorrelated parameters. + * + * Output: + * -out : correlated parameters. + */ +int16_t WebRtcIsac_CorrelateLpcGain( + const double* data, + double* out) +{ + int16_t rowCntr; + int16_t colCntr; + + for(rowCntr = 0; rowCntr < UB_LPC_GAIN_DIM; rowCntr++) + { + *out = 0; + for(colCntr = 0; colCntr < UB_LPC_GAIN_DIM; colCntr++) + { + *out += WebRtcIsac_kLpcGainDecorrMat[rowCntr][colCntr] * data[colCntr]; + } + out++; + } + + return 0; +} + + +/****************************************************************************** + * WebRtcIsac_AddMeanToLinearDomain() + * + * This is the inverse of WebRtcIsac_ToLogDomainRemoveMean(). + * + * Input: + * -lpcGain : LPC gain in log-domain & mean removed + * + * Output: + * -lpcGain : LPC gain in normal domain. + */ +int16_t WebRtcIsac_AddMeanToLinearDomain( + double* lpcGains) +{ + int16_t coeffCntr; + for(coeffCntr = 0; coeffCntr < UB_LPC_GAIN_DIM; coeffCntr++) + { + lpcGains[coeffCntr] = exp(lpcGains[coeffCntr] + WebRtcIsac_kMeanLpcGain); + } + return 0; +} diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/encode_lpc_swb.h b/webrtc/modules/audio_coding/codecs/isac/main/source/encode_lpc_swb.h new file mode 100644 index 0000000..3dd2311 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/encode_lpc_swb.h @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * encode_lpc_swb.h + * + * This file contains declaration of functions used to + * encode LPC parameters (Shape & gain) of the upper band. + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ENCODE_LPC_SWB_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ENCODE_LPC_SWB_H_ + +#include "settings.h" +#include "structs.h" +#include "webrtc/typedefs.h" + +/****************************************************************************** + * WebRtcIsac_RemoveLarMean() + * + * Remove the means from LAR coefficients. + * + * Input: + * -lar : pointer to lar vectors. LAR vectors are + * concatenated. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -lar : pointer to mean-removed LAR:s. + * + * + */ +int16_t WebRtcIsac_RemoveLarMean( + double* lar, + int16_t bandwidth); + +/****************************************************************************** + * WebRtcIsac_DecorrelateIntraVec() + * + * Remove the correlation amonge the components of LAR vectors. If LAR vectors + * of one frame are put in a matrix where each column is a LAR vector of a + * sub-frame, then this is equivalent to multiplying the LAR matrix with + * a decorrelting mtrix from left. + * + * Input: + * -inLar : pointer to mean-removed LAR vecrtors. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -out : decorrelated LAR vectors. + */ +int16_t WebRtcIsac_DecorrelateIntraVec( + const double* inLAR, + double* out, + int16_t bandwidth); + + +/****************************************************************************** + * WebRtcIsac_DecorrelateInterVec() + * + * Remover the correlation among mean-removed LAR vectors. If LAR vectors + * of one frame are put in a matrix where each column is a LAR vector of a + * sub-frame, then this is equivalent to multiplying the LAR matrix with + * a decorrelting mtrix from right. + * + * Input: + * -data : pointer to matrix of LAR vectors. The matrix + * is stored column-wise. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -out : decorrelated LAR vectors. + */ +int16_t WebRtcIsac_DecorrelateInterVec( + const double* data, + double* out, + int16_t bandwidth); + + +/****************************************************************************** + * WebRtcIsac_QuantizeUncorrLar() + * + * Quantize the uncorrelated parameters. + * + * Input: + * -data : uncorrelated LAR vectors. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -data : quantized version of the input. + * -idx : pointer to quantization indices. + */ +double WebRtcIsac_QuantizeUncorrLar( + double* data, + int* idx, + int16_t bandwidth); + + +/****************************************************************************** + * WebRtcIsac_CorrelateIntraVec() + * + * This is the inverse of WebRtcIsac_DecorrelateIntraVec(). + * + * Input: + * -data : uncorrelated parameters. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -out : correlated parametrs. + */ +int16_t WebRtcIsac_CorrelateIntraVec( + const double* data, + double* out, + int16_t bandwidth); + + +/****************************************************************************** + * WebRtcIsac_CorrelateInterVec() + * + * This is the inverse of WebRtcIsac_DecorrelateInterVec(). + * + * Input: + * -data + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -out : correlated parametrs. + */ +int16_t WebRtcIsac_CorrelateInterVec( + const double* data, + double* out, + int16_t bandwidth); + + +/****************************************************************************** + * WebRtcIsac_AddLarMean() + * + * This is the inverse of WebRtcIsac_RemoveLarMean() + * + * Input: + * -data : pointer to mean-removed LAR:s. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -data : pointer to LARs. + */ +int16_t WebRtcIsac_AddLarMean( + double* data, + int16_t bandwidth); + + +/****************************************************************************** + * WebRtcIsac_DequantizeLpcParam() + * + * Get the quantized value of uncorrelated LARs given the quantization indices. + * + * Input: + * -idx : pointer to quantiztion indices. + * -bandwidth : indicates if the given LAR vectors belong + * to SWB-12kHz or SWB-16kHz. + * + * Output: + * -out : pointer to quantized values. + */ +int16_t WebRtcIsac_DequantizeLpcParam( + const int* idx, + double* out, + int16_t bandwidth); + + +/****************************************************************************** + * WebRtcIsac_ToLogDomainRemoveMean() + * + * Transform the LPC gain to log domain then remove the mean value. + * + * Input: + * -lpcGain : pointer to LPC Gain, expecting 6 LPC gains + * + * Output: + * -lpcGain : mean-removed in log domain. + */ +int16_t WebRtcIsac_ToLogDomainRemoveMean( + double* lpGains); + + +/****************************************************************************** + * WebRtcIsac_DecorrelateLPGain() + * + * Decorrelate LPC gains. There are 6 LPC Gains per frame. This is like + * multiplying gain vector with decorrelating matrix. + * + * Input: + * -data : LPC gain in log-domain with mean removed. + * + * Output: + * -out : decorrelated parameters. + */ +int16_t WebRtcIsac_DecorrelateLPGain( + const double* data, + double* out); + + +/****************************************************************************** + * WebRtcIsac_QuantizeLpcGain() + * + * Quantize the decorrelated log-domain gains. + * + * Input: + * -lpcGain : uncorrelated LPC gains. + * + * Output: + * -idx : quantization indices + * -lpcGain : quantized value of the inpt. + */ +double WebRtcIsac_QuantizeLpcGain( + double* lpGains, + int* idx); + + +/****************************************************************************** + * WebRtcIsac_DequantizeLpcGain() + * + * Get the quantized values given the quantization indices. + * + * Input: + * -idx : pointer to quantization indices. + * + * Output: + * -lpcGains : quantized values of the given parametes. + */ +int16_t WebRtcIsac_DequantizeLpcGain( + const int* idx, + double* lpGains); + + +/****************************************************************************** + * WebRtcIsac_CorrelateLpcGain() + * + * This is the inverse of WebRtcIsac_DecorrelateLPGain(). + * + * Input: + * -data : decorrelated parameters. + * + * Output: + * -out : correlated parameters. + */ +int16_t WebRtcIsac_CorrelateLpcGain( + const double* data, + double* out); + + +/****************************************************************************** + * WebRtcIsac_AddMeanToLinearDomain() + * + * This is the inverse of WebRtcIsac_ToLogDomainRemoveMean(). + * + * Input: + * -lpcGain : LPC gain in log-domain & mean removed + * + * Output: + * -lpcGain : LPC gain in normal domain. + */ +int16_t WebRtcIsac_AddMeanToLinearDomain( + double* lpcGains); + + +#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ENCODE_LPC_SWB_H_ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/entropy_coding.c b/webrtc/modules/audio_coding/codecs/isac/main/source/entropy_coding.c new file mode 100644 index 0000000..139679f --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/entropy_coding.c @@ -0,0 +1,2066 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * entropy_coding.c + * + * This header file defines all of the functions used to arithmetically + * encode the iSAC bistream + * + */ + + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "entropy_coding.h" +#include "settings.h" +#include "arith_routines.h" +#include "spectrum_ar_model_tables.h" +#include "lpc_tables.h" +#include "pitch_gain_tables.h" +#include "pitch_lag_tables.h" +#include "encode_lpc_swb.h" +#include "lpc_shape_swb12_tables.h" +#include "lpc_shape_swb16_tables.h" +#include "lpc_gain_swb_tables.h" +#include "os_specific_inline.h" + +#include +#include + +static const uint16_t kLpcVecPerSegmentUb12 = 5; +static const uint16_t kLpcVecPerSegmentUb16 = 4; + +/* CDF array for encoder bandwidth (12 vs 16 kHz) indicator. */ +static const uint16_t kOneBitEqualProbCdf[3] = { + 0, 32768, 65535 }; + +/* Pointer to cdf array for encoder bandwidth (12 vs 16 kHz) indicator. */ +static const uint16_t* kOneBitEqualProbCdf_ptr[1] = { + kOneBitEqualProbCdf }; + +/* + * Initial cdf index for decoder of encoded bandwidth + * (12 vs 16 kHz) indicator. + */ +static const uint16_t kOneBitEqualProbInitIndex[1] = { 1 }; + + +static const int kIsSWB12 = 1; + +/* compute correlation from power spectrum */ +static void FindCorrelation(int32_t* PSpecQ12, int32_t* CorrQ7) { + int32_t summ[FRAMESAMPLES / 8]; + int32_t diff[FRAMESAMPLES / 8]; + const int16_t* CS_ptrQ9; + int32_t sum; + int k, n; + + for (k = 0; k < FRAMESAMPLES / 8; k++) { + summ[k] = (PSpecQ12[k] + PSpecQ12[FRAMESAMPLES_QUARTER - 1 - k] + 16) >> 5; + diff[k] = (PSpecQ12[k] - PSpecQ12[FRAMESAMPLES_QUARTER - 1 - k] + 16) >> 5; + } + + sum = 2; + for (n = 0; n < FRAMESAMPLES / 8; n++) { + sum += summ[n]; + } + CorrQ7[0] = sum; + + for (k = 0; k < AR_ORDER; k += 2) { + sum = 0; + CS_ptrQ9 = WebRtcIsac_kCos[k]; + for (n = 0; n < FRAMESAMPLES / 8; n++) + sum += (CS_ptrQ9[n] * diff[n] + 256) >> 9; + CorrQ7[k + 1] = sum; + } + + for (k = 1; k < AR_ORDER; k += 2) { + sum = 0; + CS_ptrQ9 = WebRtcIsac_kCos[k]; + for (n = 0; n < FRAMESAMPLES / 8; n++) + sum += (CS_ptrQ9[n] * summ[n] + 256) >> 9; + CorrQ7[k + 1] = sum; + } +} + +/* compute inverse AR power spectrum */ +/* Changed to the function used in iSAC FIX for compatibility reasons */ +static void FindInvArSpec(const int16_t* ARCoefQ12, + const int32_t gainQ10, + int32_t* CurveQ16) { + int32_t CorrQ11[AR_ORDER + 1]; + int32_t sum, tmpGain; + int32_t diffQ16[FRAMESAMPLES / 8]; + const int16_t* CS_ptrQ9; + int k, n; + int16_t round, shftVal = 0, sh; + + sum = 0; + for (n = 0; n < AR_ORDER + 1; n++) { + sum += WEBRTC_SPL_MUL(ARCoefQ12[n], ARCoefQ12[n]); /* Q24 */ + } + sum = ((sum >> 6) * 65 + 32768) >> 16; /* Q8 */ + CorrQ11[0] = (sum * gainQ10 + 256) >> 9; + + /* To avoid overflow, we shift down gainQ10 if it is large. + * We will not lose any precision */ + if (gainQ10 > 400000) { + tmpGain = gainQ10 >> 3; + round = 32; + shftVal = 6; + } else { + tmpGain = gainQ10; + round = 256; + shftVal = 9; + } + + for (k = 1; k < AR_ORDER + 1; k++) { + sum = 16384; + for (n = k; n < AR_ORDER + 1; n++) + sum += WEBRTC_SPL_MUL(ARCoefQ12[n - k], ARCoefQ12[n]); /* Q24 */ + sum >>= 15; + CorrQ11[k] = (sum * tmpGain + round) >> shftVal; + } + sum = CorrQ11[0] << 7; + for (n = 0; n < FRAMESAMPLES / 8; n++) { + CurveQ16[n] = sum; + } + for (k = 1; k < AR_ORDER; k += 2) { + for (n = 0; n < FRAMESAMPLES / 8; n++) { + CurveQ16[n] += (WebRtcIsac_kCos[k][n] * CorrQ11[k + 1] + 2) >> 2; + } + } + + CS_ptrQ9 = WebRtcIsac_kCos[0]; + + /* If CorrQ11[1] too large we avoid getting overflow in the + * calculation by shifting */ + sh = WebRtcSpl_NormW32(CorrQ11[1]); + if (CorrQ11[1] == 0) { /* Use next correlation */ + sh = WebRtcSpl_NormW32(CorrQ11[2]); + } + if (sh < 9) { + shftVal = 9 - sh; + } else { + shftVal = 0; + } + for (n = 0; n < FRAMESAMPLES / 8; n++) { + diffQ16[n] = (CS_ptrQ9[n] * (CorrQ11[1] >> shftVal) + 2) >> 2; + } + for (k = 2; k < AR_ORDER; k += 2) { + CS_ptrQ9 = WebRtcIsac_kCos[k]; + for (n = 0; n < FRAMESAMPLES / 8; n++) { + diffQ16[n] += (CS_ptrQ9[n] * (CorrQ11[k + 1] >> shftVal) + 2) >> 2; + } + } + + for (k = 0; k < FRAMESAMPLES / 8; k++) { + CurveQ16[FRAMESAMPLES_QUARTER - 1 - k] = CurveQ16[k] - + (diffQ16[k] << shftVal); + CurveQ16[k] += diffQ16[k] << shftVal; + } +} + +/* Generate array of dither samples in Q7. */ +static void GenerateDitherQ7Lb(int16_t* bufQ7, uint32_t seed, + int length, int16_t AvgPitchGain_Q12) { + int k, shft; + int16_t dither1_Q7, dither2_Q7, dither_gain_Q14; + + /* This threshold should be equal to that in decode_spec(). */ + if (AvgPitchGain_Q12 < 614) { + for (k = 0; k < length - 2; k += 3) { + /* New random unsigned int. */ + seed = (seed * 196314165) + 907633515; + + /* Fixed-point dither sample between -64 and 64 (Q7). */ + /* dither = seed * 128 / 4294967295 */ + dither1_Q7 = (int16_t)(((int)seed + 16777216) >> 25); + + /* New random unsigned int. */ + seed = (seed * 196314165) + 907633515; + + /* Fixed-point dither sample between -64 and 64. */ + dither2_Q7 = (int16_t)(((int)seed + 16777216) >> 25); + + shft = (seed >> 25) & 15; + if (shft < 5) { + bufQ7[k] = dither1_Q7; + bufQ7[k + 1] = dither2_Q7; + bufQ7[k + 2] = 0; + } else if (shft < 10) { + bufQ7[k] = dither1_Q7; + bufQ7[k + 1] = 0; + bufQ7[k + 2] = dither2_Q7; + } else { + bufQ7[k] = 0; + bufQ7[k + 1] = dither1_Q7; + bufQ7[k + 2] = dither2_Q7; + } + } + } else { + dither_gain_Q14 = (int16_t)(22528 - 10 * AvgPitchGain_Q12); + + /* Dither on half of the coefficients. */ + for (k = 0; k < length - 1; k += 2) { + /* New random unsigned int */ + seed = (seed * 196314165) + 907633515; + + /* Fixed-point dither sample between -64 and 64. */ + dither1_Q7 = (int16_t)(((int)seed + 16777216) >> 25); + + /* Dither sample is placed in either even or odd index. */ + shft = (seed >> 25) & 1; /* Either 0 or 1 */ + + bufQ7[k + shft] = (((dither_gain_Q14 * dither1_Q7) + 8192) >> 14); + bufQ7[k + 1 - shft] = 0; + } + } +} + + + +/****************************************************************************** + * GenerateDitherQ7LbUB() + * + * generate array of dither samples in Q7 There are less zeros in dither + * vector compared to GenerateDitherQ7Lb. + * + * A uniform random number generator with the range of [-64 64] is employed + * but the generated dithers are scaled by 0.35, a heuristic scaling. + * + * Input: + * -seed : the initial seed for the random number generator. + * -length : the number of dither values to be generated. + * + * Output: + * -bufQ7 : pointer to a buffer where dithers are written to. + */ +static void GenerateDitherQ7LbUB( + int16_t* bufQ7, + uint32_t seed, + int length) { + int k; + for (k = 0; k < length; k++) { + /* new random unsigned int */ + seed = (seed * 196314165) + 907633515; + + /* Fixed-point dither sample between -64 and 64 (Q7). */ + /* bufQ7 = seed * 128 / 4294967295 */ + bufQ7[k] = (int16_t)(((int)seed + 16777216) >> 25); + + /* Scale by 0.35. */ + bufQ7[k] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT(bufQ7[k], 2048, 13); + } +} + +/* + * Function to decode the complex spectrum from the bit stream + * returns the total number of bytes in the stream. + */ +int WebRtcIsac_DecodeSpec(Bitstr* streamdata, int16_t AvgPitchGain_Q12, + enum ISACBand band, double* fr, double* fi) { + int16_t DitherQ7[FRAMESAMPLES]; + int16_t data[FRAMESAMPLES]; + int32_t invARSpec2_Q16[FRAMESAMPLES_QUARTER]; + uint16_t invARSpecQ8[FRAMESAMPLES_QUARTER]; + int16_t ARCoefQ12[AR_ORDER + 1]; + int16_t RCQ15[AR_ORDER]; + int16_t gainQ10; + int32_t gain2_Q10, res; + int32_t in_sqrt; + int32_t newRes; + int k, len, i; + int is_12khz = !kIsSWB12; + int num_dft_coeff = FRAMESAMPLES; + /* Create dither signal. */ + if (band == kIsacLowerBand) { + GenerateDitherQ7Lb(DitherQ7, streamdata->W_upper, FRAMESAMPLES, + AvgPitchGain_Q12); + } else { + GenerateDitherQ7LbUB(DitherQ7, streamdata->W_upper, FRAMESAMPLES); + if (band == kIsacUpperBand12) { + is_12khz = kIsSWB12; + num_dft_coeff = FRAMESAMPLES_HALF; + } + } + + /* Decode model parameters. */ + if (WebRtcIsac_DecodeRc(streamdata, RCQ15) < 0) + return -ISAC_RANGE_ERROR_DECODE_SPECTRUM; + + WebRtcSpl_ReflCoefToLpc(RCQ15, AR_ORDER, ARCoefQ12); + + if (WebRtcIsac_DecodeGain2(streamdata, &gain2_Q10) < 0) + return -ISAC_RANGE_ERROR_DECODE_SPECTRUM; + + /* Compute inverse AR power spectrum. */ + FindInvArSpec(ARCoefQ12, gain2_Q10, invARSpec2_Q16); + + /* Convert to magnitude spectrum, + * by doing square-roots (modified from SPLIB). */ + res = 1 << (WebRtcSpl_GetSizeInBits(invARSpec2_Q16[0]) >> 1); + for (k = 0; k < FRAMESAMPLES_QUARTER; k++) { + in_sqrt = invARSpec2_Q16[k]; + i = 10; + + /* Negative values make no sense for a real sqrt-function. */ + if (in_sqrt < 0) + in_sqrt = -in_sqrt; + + newRes = (in_sqrt / res + res) >> 1; + do { + res = newRes; + newRes = (in_sqrt / res + res) >> 1; + } while (newRes != res && i-- > 0); + + invARSpecQ8[k] = (int16_t)newRes; + } + + len = WebRtcIsac_DecLogisticMulti2(data, streamdata, invARSpecQ8, DitherQ7, + num_dft_coeff, is_12khz); + /* Arithmetic decoding of spectrum. */ + if (len < 1) { + return -ISAC_RANGE_ERROR_DECODE_SPECTRUM; + } + + switch (band) { + case kIsacLowerBand: { + /* Scale down spectral samples with low SNR. */ + int32_t p1; + int32_t p2; + if (AvgPitchGain_Q12 <= 614) { + p1 = 30 << 10; + p2 = 32768 + (33 << 16); + } else { + p1 = 36 << 10; + p2 = 32768 + (40 << 16); + } + for (k = 0; k < FRAMESAMPLES; k += 4) { + gainQ10 = WebRtcSpl_DivW32W16ResW16(p1, (int16_t)( + (invARSpec2_Q16[k >> 2] + p2) >> 16)); + *fr++ = (double)((data[ k ] * gainQ10 + 512) >> 10) / 128.0; + *fi++ = (double)((data[k + 1] * gainQ10 + 512) >> 10) / 128.0; + *fr++ = (double)((data[k + 2] * gainQ10 + 512) >> 10) / 128.0; + *fi++ = (double)((data[k + 3] * gainQ10 + 512) >> 10) / 128.0; + } + break; + } + case kIsacUpperBand12: { + for (k = 0, i = 0; k < FRAMESAMPLES_HALF; k += 4) { + fr[i] = (double)data[ k ] / 128.0; + fi[i] = (double)data[k + 1] / 128.0; + i++; + fr[i] = (double)data[k + 2] / 128.0; + fi[i] = (double)data[k + 3] / 128.0; + i++; + } + /* The second half of real and imaginary coefficients is zero. This is + * due to using the old FFT module which requires two signals as input + * while in 0-12 kHz mode we only have 8-12 kHz band, and the second + * signal is set to zero. */ + memset(&fr[FRAMESAMPLES_QUARTER], 0, FRAMESAMPLES_QUARTER * + sizeof(double)); + memset(&fi[FRAMESAMPLES_QUARTER], 0, FRAMESAMPLES_QUARTER * + sizeof(double)); + break; + } + case kIsacUpperBand16: { + for (i = 0, k = 0; k < FRAMESAMPLES; k += 4, i++) { + fr[i] = (double)data[ k ] / 128.0; + fi[i] = (double)data[k + 1] / 128.0; + fr[(FRAMESAMPLES_HALF) - 1 - i] = (double)data[k + 2] / 128.0; + fi[(FRAMESAMPLES_HALF) - 1 - i] = (double)data[k + 3] / 128.0; + } + break; + } + } + return len; +} + + +int WebRtcIsac_EncodeSpec(const int16_t* fr, const int16_t* fi, + int16_t AvgPitchGain_Q12, enum ISACBand band, + Bitstr* streamdata) { + int16_t ditherQ7[FRAMESAMPLES]; + int16_t dataQ7[FRAMESAMPLES]; + int32_t PSpec[FRAMESAMPLES_QUARTER]; + int32_t invARSpec2_Q16[FRAMESAMPLES_QUARTER]; + uint16_t invARSpecQ8[FRAMESAMPLES_QUARTER]; + int32_t CorrQ7[AR_ORDER + 1]; + int32_t CorrQ7_norm[AR_ORDER + 1]; + int16_t RCQ15[AR_ORDER]; + int16_t ARCoefQ12[AR_ORDER + 1]; + int32_t gain2_Q10; + int16_t val; + int32_t nrg, res; + uint32_t sum; + int32_t in_sqrt; + int32_t newRes; + int16_t err; + uint32_t nrg_u32; + int shift_var; + int k, n, j, i; + int is_12khz = !kIsSWB12; + int num_dft_coeff = FRAMESAMPLES; + + /* Create dither signal. */ + if (band == kIsacLowerBand) { + GenerateDitherQ7Lb(ditherQ7, streamdata->W_upper, FRAMESAMPLES, + AvgPitchGain_Q12); + } else { + GenerateDitherQ7LbUB(ditherQ7, streamdata->W_upper, FRAMESAMPLES); + if (band == kIsacUpperBand12) { + is_12khz = kIsSWB12; + num_dft_coeff = FRAMESAMPLES_HALF; + } + } + + /* add dither and quantize, and compute power spectrum */ + switch (band) { + case kIsacLowerBand: { + for (k = 0; k < FRAMESAMPLES; k += 4) { + val = ((*fr++ + ditherQ7[k] + 64) & 0xFF80) - ditherQ7[k]; + dataQ7[k] = val; + sum = val * val; + + val = ((*fi++ + ditherQ7[k + 1] + 64) & 0xFF80) - ditherQ7[k + 1]; + dataQ7[k + 1] = val; + sum += val * val; + + val = ((*fr++ + ditherQ7[k + 2] + 64) & 0xFF80) - ditherQ7[k + 2]; + dataQ7[k + 2] = val; + sum += val * val; + + val = ((*fi++ + ditherQ7[k + 3] + 64) & 0xFF80) - ditherQ7[k + 3]; + dataQ7[k + 3] = val; + sum += val * val; + + PSpec[k >> 2] = sum >> 2; + } + break; + } + case kIsacUpperBand12: { + for (k = 0, j = 0; k < FRAMESAMPLES_HALF; k += 4) { + val = ((*fr++ + ditherQ7[k] + 64) & 0xFF80) - ditherQ7[k]; + dataQ7[k] = val; + sum = val * val; + + val = ((*fi++ + ditherQ7[k + 1] + 64) & 0xFF80) - ditherQ7[k + 1]; + dataQ7[k + 1] = val; + sum += val * val; + + PSpec[j++] = sum >> 1; + + val = ((*fr++ + ditherQ7[k + 2] + 64) & 0xFF80) - ditherQ7[k + 2]; + dataQ7[k + 2] = val; + sum = val * val; + + val = ((*fi++ + ditherQ7[k + 3] + 64) & 0xFF80) - ditherQ7[k + 3]; + dataQ7[k + 3] = val; + sum += val * val; + + PSpec[j++] = sum >> 1; + } + break; + } + case kIsacUpperBand16: { + for (j = 0, k = 0; k < FRAMESAMPLES; k += 4, j++) { + val = ((fr[j] + ditherQ7[k] + 64) & 0xFF80) - ditherQ7[k]; + dataQ7[k] = val; + sum = val * val; + + val = ((fi[j] + ditherQ7[k + 1] + 64) & 0xFF80) - ditherQ7[k + 1]; + dataQ7[k + 1] = val; + sum += val * val; + + val = ((fr[(FRAMESAMPLES_HALF) - 1 - j] + ditherQ7[k + 2] + 64) & + 0xFF80) - ditherQ7[k + 2]; + dataQ7[k + 2] = val; + sum += val * val; + + val = ((fi[(FRAMESAMPLES_HALF) - 1 - j] + ditherQ7[k + 3] + 64) & + 0xFF80) - ditherQ7[k + 3]; + dataQ7[k + 3] = val; + sum += val * val; + + PSpec[k >> 2] = sum >> 2; + } + break; + } + } + + /* compute correlation from power spectrum */ + FindCorrelation(PSpec, CorrQ7); + + /* Find AR coefficients */ + /* Aumber of bit shifts to 14-bit normalize CorrQ7[0] + * (leaving room for sign) */ + shift_var = WebRtcSpl_NormW32(CorrQ7[0]) - 18; + + if (shift_var > 0) { + for (k = 0; k < AR_ORDER + 1; k++) { + CorrQ7_norm[k] = CorrQ7[k] << shift_var; + } + } else { + for (k = 0; k < AR_ORDER + 1; k++) { + CorrQ7_norm[k] = CorrQ7[k] >> (-shift_var); + } + } + + /* Find RC coefficients. */ + WebRtcSpl_AutoCorrToReflCoef(CorrQ7_norm, AR_ORDER, RCQ15); + + /* Quantize & code RC Coefficient. */ + WebRtcIsac_EncodeRc(RCQ15, streamdata); + + /* RC -> AR coefficients */ + WebRtcSpl_ReflCoefToLpc(RCQ15, AR_ORDER, ARCoefQ12); + + /* Compute ARCoef' * Corr * ARCoef in Q19. */ + nrg = 0; + for (j = 0; j <= AR_ORDER; j++) { + for (n = 0; n <= j; n++) { + nrg += (ARCoefQ12[j] * ((CorrQ7_norm[j - n] * ARCoefQ12[n] + 256) >> 9) + + 4) >> 3; + } + for (n = j + 1; n <= AR_ORDER; n++) { + nrg += (ARCoefQ12[j] * ((CorrQ7_norm[n - j] * ARCoefQ12[n] + 256) >> 9) + + 4) >> 3; + } + } + + nrg_u32 = (uint32_t)nrg; + if (shift_var > 0) { + nrg_u32 = nrg_u32 >> shift_var; + } else { + nrg_u32 = nrg_u32 << (-shift_var); + } + if (nrg_u32 > 0x7FFFFFFF) { + nrg = 0x7FFFFFFF; + } else { + nrg = (int32_t)nrg_u32; + } + /* Also shifts 31 bits to the left! */ + gain2_Q10 = WebRtcSpl_DivResultInQ31(FRAMESAMPLES_QUARTER, nrg); + + /* Quantize & code gain2_Q10. */ + if (WebRtcIsac_EncodeGain2(&gain2_Q10, streamdata)) { + return -1; + } + + /* Compute inverse AR power spectrum. */ + FindInvArSpec(ARCoefQ12, gain2_Q10, invARSpec2_Q16); + /* Convert to magnitude spectrum, by doing square-roots + * (modified from SPLIB). */ + res = 1 << (WebRtcSpl_GetSizeInBits(invARSpec2_Q16[0]) >> 1); + for (k = 0; k < FRAMESAMPLES_QUARTER; k++) { + in_sqrt = invARSpec2_Q16[k]; + i = 10; + /* Negative values make no sense for a real sqrt-function. */ + if (in_sqrt < 0) { + in_sqrt = -in_sqrt; + } + newRes = (in_sqrt / res + res) >> 1; + do { + res = newRes; + newRes = (in_sqrt / res + res) >> 1; + } while (newRes != res && i-- > 0); + + invARSpecQ8[k] = (int16_t)newRes; + } + /* arithmetic coding of spectrum */ + err = WebRtcIsac_EncLogisticMulti2(streamdata, dataQ7, invARSpecQ8, + num_dft_coeff, is_12khz); + if (err < 0) { + return (err); + } + return 0; +} + + +/* step-up */ +void WebRtcIsac_Rc2Poly(double* RC, int N, double* a) { + int m, k; + double tmp[MAX_AR_MODEL_ORDER]; + + a[0] = 1.0; + tmp[0] = 1.0; + for (m = 1; m <= N; m++) { + /* copy */ + memcpy(&tmp[1], &a[1], (m - 1) * sizeof(double)); + a[m] = RC[m - 1]; + for (k = 1; k < m; k++) { + a[k] += RC[m - 1] * tmp[m - k]; + } + } + return; +} + +/* step-down */ +void WebRtcIsac_Poly2Rc(double* a, int N, double* RC) { + int m, k; + double tmp[MAX_AR_MODEL_ORDER]; + double tmp_inv; + + RC[N - 1] = a[N]; + for (m = N - 1; m > 0; m--) { + tmp_inv = 1.0 / (1.0 - RC[m] * RC[m]); + for (k = 1; k <= m; k++) { + tmp[k] = (a[k] - RC[m] * a[m - k + 1]) * tmp_inv; + } + + memcpy(&a[1], &tmp[1], (m - 1) * sizeof(double)); + RC[m - 1] = tmp[m]; + } + return; +} + + +#define MAX_ORDER 100 + +/* Matlab's LAR definition */ +void WebRtcIsac_Rc2Lar(const double* refc, double* lar, int order) { + int k; + for (k = 0; k < order; k++) { + lar[k] = log((1 + refc[k]) / (1 - refc[k])); + } +} + + +void WebRtcIsac_Lar2Rc(const double* lar, double* refc, int order) { + int k; + double tmp; + + for (k = 0; k < order; k++) { + tmp = exp(lar[k]); + refc[k] = (tmp - 1) / (tmp + 1); + } +} + +void WebRtcIsac_Poly2Lar(double* lowband, int orderLo, double* hiband, + int orderHi, int Nsub, double* lars) { + int k; + double rc[MAX_ORDER], *inpl, *inph, *outp; + + inpl = lowband; + inph = hiband; + outp = lars; + for (k = 0; k < Nsub; k++) { + /* gains */ + outp[0] = inpl[0]; + outp[1] = inph[0]; + outp += 2; + + /* Low band */ + inpl[0] = 1.0; + WebRtcIsac_Poly2Rc(inpl, orderLo, rc); + WebRtcIsac_Rc2Lar(rc, outp, orderLo); + outp += orderLo; + + /* High band */ + inph[0] = 1.0; + WebRtcIsac_Poly2Rc(inph, orderHi, rc); + WebRtcIsac_Rc2Lar(rc, outp, orderHi); + outp += orderHi; + + inpl += orderLo + 1; + inph += orderHi + 1; + } +} + + +int16_t WebRtcIsac_Poly2LarUB(double* lpcVecs, int16_t bandwidth) { + double poly[MAX_ORDER]; + double rc[MAX_ORDER]; + double* ptrIO; + int16_t vecCntr; + int16_t vecSize; + int16_t numVec; + + vecSize = UB_LPC_ORDER; + switch (bandwidth) { + case isac12kHz: { + numVec = UB_LPC_VEC_PER_FRAME; + break; + } + case isac16kHz: { + numVec = UB16_LPC_VEC_PER_FRAME; + break; + } + default: + return -1; + } + + ptrIO = lpcVecs; + poly[0] = 1.0; + for (vecCntr = 0; vecCntr < numVec; vecCntr++) { + memcpy(&poly[1], ptrIO, sizeof(double) * vecSize); + WebRtcIsac_Poly2Rc(poly, vecSize, rc); + WebRtcIsac_Rc2Lar(rc, ptrIO, vecSize); + ptrIO += vecSize; + } + return 0; +} + + +void WebRtcIsac_Lar2Poly(double* lars, double* lowband, int orderLo, + double* hiband, int orderHi, int Nsub) { + int k, orderTot; + double rc[MAX_ORDER], *outpl, *outph, *inp; + + orderTot = (orderLo + orderHi + 2); + outpl = lowband; + outph = hiband; + /* First two elements of 'inp' store gains*/ + inp = lars; + for (k = 0; k < Nsub; k++) { + /* Low band */ + WebRtcIsac_Lar2Rc(&inp[2], rc, orderLo); + WebRtcIsac_Rc2Poly(rc, orderLo, outpl); + + /* High band */ + WebRtcIsac_Lar2Rc(&inp[orderLo + 2], rc, orderHi); + WebRtcIsac_Rc2Poly(rc, orderHi, outph); + + /* gains */ + outpl[0] = inp[0]; + outph[0] = inp[1]; + + outpl += orderLo + 1; + outph += orderHi + 1; + inp += orderTot; + } +} + +/* + * assumes 2 LAR vectors interpolates to 'numPolyVec' A-polynomials + * Note: 'numPolyVecs' includes the first and the last point of the interval + */ +void WebRtcIsac_Lar2PolyInterpolUB(double* larVecs, double* percepFilterParams, + int numPolyVecs) { + int polyCntr, coeffCntr; + double larInterpol[UB_LPC_ORDER]; + double rc[UB_LPC_ORDER]; + double delta[UB_LPC_ORDER]; + + /* calculate the step-size for linear interpolation coefficients */ + for (coeffCntr = 0; coeffCntr < UB_LPC_ORDER; coeffCntr++) { + delta[coeffCntr] = (larVecs[UB_LPC_ORDER + coeffCntr] - + larVecs[coeffCntr]) / (numPolyVecs - 1); + } + + for (polyCntr = 0; polyCntr < numPolyVecs; polyCntr++) { + for (coeffCntr = 0; coeffCntr < UB_LPC_ORDER; coeffCntr++) { + larInterpol[coeffCntr] = larVecs[coeffCntr] + + delta[coeffCntr] * polyCntr; + } + WebRtcIsac_Lar2Rc(larInterpol, rc, UB_LPC_ORDER); + + /* convert to A-polynomial, the following function returns A[0] = 1; + * which is written where gains had to be written. Then we write the + * gain (outside this function). This way we say a memcpy. */ + WebRtcIsac_Rc2Poly(rc, UB_LPC_ORDER, percepFilterParams); + percepFilterParams += (UB_LPC_ORDER + 1); + } +} + +int WebRtcIsac_DecodeLpc(Bitstr* streamdata, double* LPCCoef_lo, + double* LPCCoef_hi) { + double lars[KLT_ORDER_GAIN + KLT_ORDER_SHAPE]; + int err; + + err = WebRtcIsac_DecodeLpcCoef(streamdata, lars); + if (err < 0) { + return -ISAC_RANGE_ERROR_DECODE_LPC; + } + WebRtcIsac_Lar2Poly(lars, LPCCoef_lo, ORDERLO, LPCCoef_hi, ORDERHI, + SUBFRAMES); + return 0; +} + +int16_t WebRtcIsac_DecodeInterpolLpcUb(Bitstr* streamdata, + double* percepFilterParams, + int16_t bandwidth) { + double lpcCoeff[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME]; + int err; + int interpolCntr; + int subframeCntr; + int16_t numSegments; + int16_t numVecPerSegment; + int16_t numGains; + + double percepFilterGains[SUBFRAMES << 1]; + double* ptrOutParam = percepFilterParams; + + err = WebRtcIsac_DecodeLpcCoefUB(streamdata, lpcCoeff, percepFilterGains, + bandwidth); + if (err < 0) { + return -ISAC_RANGE_ERROR_DECODE_LPC; + } + + switch (bandwidth) { + case isac12kHz: { + numGains = SUBFRAMES; + numSegments = UB_LPC_VEC_PER_FRAME - 1; + numVecPerSegment = kLpcVecPerSegmentUb12; + break; + } + case isac16kHz: { + numGains = SUBFRAMES << 1; + numSegments = UB16_LPC_VEC_PER_FRAME - 1; + numVecPerSegment = kLpcVecPerSegmentUb16; + break; + } + default: + return -1; + } + + for (interpolCntr = 0; interpolCntr < numSegments; interpolCntr++) { + WebRtcIsac_Lar2PolyInterpolUB(&lpcCoeff[interpolCntr * UB_LPC_ORDER], + ptrOutParam, numVecPerSegment + 1); + ptrOutParam += (numVecPerSegment * (UB_LPC_ORDER + 1)); + } + + ptrOutParam = percepFilterParams; + + if (bandwidth == isac16kHz) { + ptrOutParam += (1 + UB_LPC_ORDER); + } + + for (subframeCntr = 0; subframeCntr < numGains; subframeCntr++) { + *ptrOutParam = percepFilterGains[subframeCntr]; + ptrOutParam += (1 + UB_LPC_ORDER); + } + return 0; +} + + +/* decode & dequantize LPC Coef */ +int WebRtcIsac_DecodeLpcCoef(Bitstr* streamdata, double* LPCCoef) { + int j, k, n, pos, pos2, posg, poss, offsg, offss, offs2; + int index_g[KLT_ORDER_GAIN], index_s[KLT_ORDER_SHAPE]; + double tmpcoeffs_g[KLT_ORDER_GAIN], tmpcoeffs_s[KLT_ORDER_SHAPE]; + double tmpcoeffs2_g[KLT_ORDER_GAIN], tmpcoeffs2_s[KLT_ORDER_SHAPE]; + double sum; + int err; + int model = 1; + + /* entropy decoding of model number */ + /* We are keeping this for backward compatibility of bit-streams. */ + err = WebRtcIsac_DecHistOneStepMulti(&model, streamdata, + WebRtcIsac_kQKltModelCdfPtr, + WebRtcIsac_kQKltModelInitIndex, 1); + if (err < 0) { + return err; + } + /* Only accepted value of model is 0. It is kept in bit-stream for backward + * compatibility. */ + if (model != 0) { + return -ISAC_DISALLOWED_LPC_MODEL; + } + + /* entropy decoding of quantization indices */ + err = WebRtcIsac_DecHistOneStepMulti( + index_s, streamdata, WebRtcIsac_kQKltCdfPtrShape, + WebRtcIsac_kQKltInitIndexShape, KLT_ORDER_SHAPE); + if (err < 0) { + return err; + } + err = WebRtcIsac_DecHistOneStepMulti( + index_g, streamdata, WebRtcIsac_kQKltCdfPtrGain, + WebRtcIsac_kQKltInitIndexGain, KLT_ORDER_GAIN); + if (err < 0) { + return err; + } + + /* find quantization levels for coefficients */ + for (k = 0; k < KLT_ORDER_SHAPE; k++) { + tmpcoeffs_s[k] = + WebRtcIsac_kQKltLevelsShape[WebRtcIsac_kQKltOffsetShape[k] + + index_s[k]]; + } + for (k = 0; k < KLT_ORDER_GAIN; k++) { + tmpcoeffs_g[k] = WebRtcIsac_kQKltLevelsGain[WebRtcIsac_kQKltOffsetGain[k] + + index_g[k]]; + } + + /* Inverse KLT */ + + /* Left transform, transpose matrix! */ + offsg = 0; + offss = 0; + posg = 0; + poss = 0; + for (j = 0; j < SUBFRAMES; j++) { + offs2 = 0; + for (k = 0; k < LPC_GAIN_ORDER; k++) { + sum = 0; + pos = offsg; + pos2 = offs2; + for (n = 0; n < LPC_GAIN_ORDER; n++) { + sum += tmpcoeffs_g[pos++] * WebRtcIsac_kKltT1Gain[pos2++]; + } + tmpcoeffs2_g[posg++] = sum; + offs2 += LPC_GAIN_ORDER; + } + offs2 = 0; + for (k = 0; k < LPC_SHAPE_ORDER; k++) { + sum = 0; + pos = offss; + pos2 = offs2; + for (n = 0; n < LPC_SHAPE_ORDER; n++) { + sum += tmpcoeffs_s[pos++] * WebRtcIsac_kKltT1Shape[pos2++]; + } + tmpcoeffs2_s[poss++] = sum; + offs2 += LPC_SHAPE_ORDER; + } + offsg += LPC_GAIN_ORDER; + offss += LPC_SHAPE_ORDER; + } + + /* Right transform, transpose matrix */ + offsg = 0; + offss = 0; + posg = 0; + poss = 0; + for (j = 0; j < SUBFRAMES; j++) { + posg = offsg; + for (k = 0; k < LPC_GAIN_ORDER; k++) { + sum = 0; + pos = k; + pos2 = j; + for (n = 0; n < SUBFRAMES; n++) { + sum += tmpcoeffs2_g[pos] * WebRtcIsac_kKltT2Gain[pos2]; + pos += LPC_GAIN_ORDER; + pos2 += SUBFRAMES; + + } + tmpcoeffs_g[posg++] = sum; + } + poss = offss; + for (k = 0; k < LPC_SHAPE_ORDER; k++) { + sum = 0; + pos = k; + pos2 = j; + for (n = 0; n < SUBFRAMES; n++) { + sum += tmpcoeffs2_s[pos] * WebRtcIsac_kKltT2Shape[pos2]; + pos += LPC_SHAPE_ORDER; + pos2 += SUBFRAMES; + } + tmpcoeffs_s[poss++] = sum; + } + offsg += LPC_GAIN_ORDER; + offss += LPC_SHAPE_ORDER; + } + + /* scaling, mean addition, and gain restoration */ + posg = 0; + poss = 0; + pos = 0; + for (k = 0; k < SUBFRAMES; k++) { + /* log gains */ + LPCCoef[pos] = tmpcoeffs_g[posg] / LPC_GAIN_SCALE; + LPCCoef[pos] += WebRtcIsac_kLpcMeansGain[posg]; + LPCCoef[pos] = exp(LPCCoef[pos]); + pos++; + posg++; + LPCCoef[pos] = tmpcoeffs_g[posg] / LPC_GAIN_SCALE; + LPCCoef[pos] += WebRtcIsac_kLpcMeansGain[posg]; + LPCCoef[pos] = exp(LPCCoef[pos]); + pos++; + posg++; + + /* Low-band LAR coefficients. */ + for (n = 0; n < LPC_LOBAND_ORDER; n++, pos++, poss++) { + LPCCoef[pos] = tmpcoeffs_s[poss] / LPC_LOBAND_SCALE; + LPCCoef[pos] += WebRtcIsac_kLpcMeansShape[poss]; + } + + /* High-band LAR coefficients. */ + for (n = 0; n < LPC_HIBAND_ORDER; n++, pos++, poss++) { + LPCCoef[pos] = tmpcoeffs_s[poss] / LPC_HIBAND_SCALE; + LPCCoef[pos] += WebRtcIsac_kLpcMeansShape[poss]; + } + } + return 0; +} + +/* Encode LPC in LAR domain. */ +void WebRtcIsac_EncodeLar(double* LPCCoef, Bitstr* streamdata, + IsacSaveEncoderData* encData) { + int j, k, n, pos, pos2, poss, offss, offs2; + int index_s[KLT_ORDER_SHAPE]; + int index_ovr_s[KLT_ORDER_SHAPE]; + double tmpcoeffs_s[KLT_ORDER_SHAPE]; + double tmpcoeffs2_s[KLT_ORDER_SHAPE]; + double sum; + const int kModel = 0; + + /* Mean removal and scaling. */ + poss = 0; + pos = 0; + for (k = 0; k < SUBFRAMES; k++) { + /* First two element are gains, move over them. */ + pos += 2; + + /* Low-band LAR coefficients. */ + for (n = 0; n < LPC_LOBAND_ORDER; n++, poss++, pos++) { + tmpcoeffs_s[poss] = LPCCoef[pos] - WebRtcIsac_kLpcMeansShape[poss]; + tmpcoeffs_s[poss] *= LPC_LOBAND_SCALE; + } + + /* High-band LAR coefficients. */ + for (n = 0; n < LPC_HIBAND_ORDER; n++, poss++, pos++) { + tmpcoeffs_s[poss] = LPCCoef[pos] - WebRtcIsac_kLpcMeansShape[poss]; + tmpcoeffs_s[poss] *= LPC_HIBAND_SCALE; + } + } + + /* KLT */ + + /* Left transform. */ + offss = 0; + for (j = 0; j < SUBFRAMES; j++) { + poss = offss; + for (k = 0; k < LPC_SHAPE_ORDER; k++) { + sum = 0; + pos = offss; + pos2 = k; + for (n = 0; n < LPC_SHAPE_ORDER; n++) { + sum += tmpcoeffs_s[pos++] * WebRtcIsac_kKltT1Shape[pos2]; + pos2 += LPC_SHAPE_ORDER; + } + tmpcoeffs2_s[poss++] = sum; + } + offss += LPC_SHAPE_ORDER; + } + + /* Right transform. */ + offss = 0; + offs2 = 0; + for (j = 0; j < SUBFRAMES; j++) { + poss = offss; + for (k = 0; k < LPC_SHAPE_ORDER; k++) { + sum = 0; + pos = k; + pos2 = offs2; + for (n = 0; n < SUBFRAMES; n++) { + sum += tmpcoeffs2_s[pos] * WebRtcIsac_kKltT2Shape[pos2++]; + pos += LPC_SHAPE_ORDER; + } + tmpcoeffs_s[poss++] = sum; + } + offs2 += SUBFRAMES; + offss += LPC_SHAPE_ORDER; + } + + /* Quantize coefficients. */ + for (k = 0; k < KLT_ORDER_SHAPE; k++) { + index_s[k] = (WebRtcIsac_lrint(tmpcoeffs_s[k] / KLT_STEPSIZE)) + + WebRtcIsac_kQKltQuantMinShape[k]; + if (index_s[k] < 0) { + index_s[k] = 0; + } else if (index_s[k] > WebRtcIsac_kQKltMaxIndShape[k]) { + index_s[k] = WebRtcIsac_kQKltMaxIndShape[k]; + } + index_ovr_s[k] = WebRtcIsac_kQKltOffsetShape[k] + index_s[k]; + } + + + /* Only one model remains in this version of the code, kModel = 0. We + * are keeping for bit-streams to be backward compatible. */ + /* entropy coding of model number */ + WebRtcIsac_EncHistMulti(streamdata, &kModel, WebRtcIsac_kQKltModelCdfPtr, 1); + + /* Save data for creation of multiple bit streams */ + /* Entropy coding of quantization indices - shape only. */ + WebRtcIsac_EncHistMulti(streamdata, index_s, WebRtcIsac_kQKltCdfPtrShape, + KLT_ORDER_SHAPE); + + /* Save data for creation of multiple bit streams. */ + for (k = 0; k < KLT_ORDER_SHAPE; k++) { + encData->LPCindex_s[KLT_ORDER_SHAPE * encData->startIdx + k] = index_s[k]; + } + + /* Find quantization levels for shape coefficients. */ + for (k = 0; k < KLT_ORDER_SHAPE; k++) { + tmpcoeffs_s[k] = WebRtcIsac_kQKltLevelsShape[index_ovr_s[k]]; + } + /* Inverse KLT. */ + /* Left transform, transpose matrix.! */ + offss = 0; + poss = 0; + for (j = 0; j < SUBFRAMES; j++) { + offs2 = 0; + for (k = 0; k < LPC_SHAPE_ORDER; k++) { + sum = 0; + pos = offss; + pos2 = offs2; + for (n = 0; n < LPC_SHAPE_ORDER; n++) { + sum += tmpcoeffs_s[pos++] * WebRtcIsac_kKltT1Shape[pos2++]; + } + tmpcoeffs2_s[poss++] = sum; + offs2 += LPC_SHAPE_ORDER; + } + offss += LPC_SHAPE_ORDER; + } + + /* Right transform, Transpose matrix */ + offss = 0; + poss = 0; + for (j = 0; j < SUBFRAMES; j++) { + poss = offss; + for (k = 0; k < LPC_SHAPE_ORDER; k++) { + sum = 0; + pos = k; + pos2 = j; + for (n = 0; n < SUBFRAMES; n++) { + sum += tmpcoeffs2_s[pos] * WebRtcIsac_kKltT2Shape[pos2]; + pos += LPC_SHAPE_ORDER; + pos2 += SUBFRAMES; + } + tmpcoeffs_s[poss++] = sum; + } + offss += LPC_SHAPE_ORDER; + } + + /* Scaling, mean addition, and gain restoration. */ + poss = 0; + pos = 0; + for (k = 0; k < SUBFRAMES; k++) { + /* Ignore gains. */ + pos += 2; + + /* Low band LAR coefficients. */ + for (n = 0; n < LPC_LOBAND_ORDER; n++, pos++, poss++) { + LPCCoef[pos] = tmpcoeffs_s[poss] / LPC_LOBAND_SCALE; + LPCCoef[pos] += WebRtcIsac_kLpcMeansShape[poss]; + } + + /* High band LAR coefficients. */ + for (n = 0; n < LPC_HIBAND_ORDER; n++, pos++, poss++) { + LPCCoef[pos] = tmpcoeffs_s[poss] / LPC_HIBAND_SCALE; + LPCCoef[pos] += WebRtcIsac_kLpcMeansShape[poss]; + } + } +} + + +void WebRtcIsac_EncodeLpcLb(double* LPCCoef_lo, double* LPCCoef_hi, + Bitstr* streamdata, IsacSaveEncoderData* encData) { + double lars[KLT_ORDER_GAIN + KLT_ORDER_SHAPE]; + int k; + + WebRtcIsac_Poly2Lar(LPCCoef_lo, ORDERLO, LPCCoef_hi, ORDERHI, SUBFRAMES, + lars); + WebRtcIsac_EncodeLar(lars, streamdata, encData); + WebRtcIsac_Lar2Poly(lars, LPCCoef_lo, ORDERLO, LPCCoef_hi, ORDERHI, + SUBFRAMES); + /* Save data for creation of multiple bit streams (and transcoding). */ + for (k = 0; k < (ORDERLO + 1)*SUBFRAMES; k++) { + encData->LPCcoeffs_lo[(ORDERLO + 1)*SUBFRAMES * encData->startIdx + k] = + LPCCoef_lo[k]; + } + for (k = 0; k < (ORDERHI + 1)*SUBFRAMES; k++) { + encData->LPCcoeffs_hi[(ORDERHI + 1)*SUBFRAMES * encData->startIdx + k] = + LPCCoef_hi[k]; + } +} + + +int16_t WebRtcIsac_EncodeLpcUB(double* lpcVecs, Bitstr* streamdata, + double* interpolLPCCoeff, + int16_t bandwidth, + ISACUBSaveEncDataStruct* encData) { + double U[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME]; + int idx[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME]; + int interpolCntr; + + WebRtcIsac_Poly2LarUB(lpcVecs, bandwidth); + WebRtcIsac_RemoveLarMean(lpcVecs, bandwidth); + WebRtcIsac_DecorrelateIntraVec(lpcVecs, U, bandwidth); + WebRtcIsac_DecorrelateInterVec(U, lpcVecs, bandwidth); + WebRtcIsac_QuantizeUncorrLar(lpcVecs, idx, bandwidth); + + WebRtcIsac_CorrelateInterVec(lpcVecs, U, bandwidth); + WebRtcIsac_CorrelateIntraVec(U, lpcVecs, bandwidth); + WebRtcIsac_AddLarMean(lpcVecs, bandwidth); + + switch (bandwidth) { + case isac12kHz: { + /* Store the indices to be used for multiple encoding. */ + memcpy(encData->indexLPCShape, idx, UB_LPC_ORDER * + UB_LPC_VEC_PER_FRAME * sizeof(int)); + WebRtcIsac_EncHistMulti(streamdata, idx, WebRtcIsac_kLpcShapeCdfMatUb12, + UB_LPC_ORDER * UB_LPC_VEC_PER_FRAME); + for (interpolCntr = 0; interpolCntr < UB_INTERPOL_SEGMENTS; + interpolCntr++) { + WebRtcIsac_Lar2PolyInterpolUB(lpcVecs, interpolLPCCoeff, + kLpcVecPerSegmentUb12 + 1); + lpcVecs += UB_LPC_ORDER; + interpolLPCCoeff += (kLpcVecPerSegmentUb12 * (UB_LPC_ORDER + 1)); + } + break; + } + case isac16kHz: { + /* Store the indices to be used for multiple encoding. */ + memcpy(encData->indexLPCShape, idx, UB_LPC_ORDER * + UB16_LPC_VEC_PER_FRAME * sizeof(int)); + WebRtcIsac_EncHistMulti(streamdata, idx, WebRtcIsac_kLpcShapeCdfMatUb16, + UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME); + for (interpolCntr = 0; interpolCntr < UB16_INTERPOL_SEGMENTS; + interpolCntr++) { + WebRtcIsac_Lar2PolyInterpolUB(lpcVecs, interpolLPCCoeff, + kLpcVecPerSegmentUb16 + 1); + lpcVecs += UB_LPC_ORDER; + interpolLPCCoeff += (kLpcVecPerSegmentUb16 * (UB_LPC_ORDER + 1)); + } + break; + } + default: + return -1; + } + return 0; +} + +void WebRtcIsac_EncodeLpcGainLb(double* LPCCoef_lo, double* LPCCoef_hi, + Bitstr* streamdata, + IsacSaveEncoderData* encData) { + int j, k, n, pos, pos2, posg, offsg, offs2; + int index_g[KLT_ORDER_GAIN]; + int index_ovr_g[KLT_ORDER_GAIN]; + double tmpcoeffs_g[KLT_ORDER_GAIN]; + double tmpcoeffs2_g[KLT_ORDER_GAIN]; + double sum; + /* log gains, mean removal and scaling */ + posg = 0; + for (k = 0; k < SUBFRAMES; k++) { + tmpcoeffs_g[posg] = log(LPCCoef_lo[(LPC_LOBAND_ORDER + 1) * k]); + tmpcoeffs_g[posg] -= WebRtcIsac_kLpcMeansGain[posg]; + tmpcoeffs_g[posg] *= LPC_GAIN_SCALE; + posg++; + tmpcoeffs_g[posg] = log(LPCCoef_hi[(LPC_HIBAND_ORDER + 1) * k]); + tmpcoeffs_g[posg] -= WebRtcIsac_kLpcMeansGain[posg]; + tmpcoeffs_g[posg] *= LPC_GAIN_SCALE; + posg++; + } + + /* KLT */ + + /* Left transform. */ + offsg = 0; + for (j = 0; j < SUBFRAMES; j++) { + posg = offsg; + for (k = 0; k < LPC_GAIN_ORDER; k++) { + sum = 0; + pos = offsg; + pos2 = k; + for (n = 0; n < LPC_GAIN_ORDER; n++) { + sum += tmpcoeffs_g[pos++] * WebRtcIsac_kKltT1Gain[pos2]; + pos2 += LPC_GAIN_ORDER; + } + tmpcoeffs2_g[posg++] = sum; + } + offsg += LPC_GAIN_ORDER; + } + + /* Right transform. */ + offsg = 0; + offs2 = 0; + for (j = 0; j < SUBFRAMES; j++) { + posg = offsg; + for (k = 0; k < LPC_GAIN_ORDER; k++) { + sum = 0; + pos = k; + pos2 = offs2; + for (n = 0; n < SUBFRAMES; n++) { + sum += tmpcoeffs2_g[pos] * WebRtcIsac_kKltT2Gain[pos2++]; + pos += LPC_GAIN_ORDER; + } + tmpcoeffs_g[posg++] = sum; + } + offs2 += SUBFRAMES; + offsg += LPC_GAIN_ORDER; + } + + /* Quantize coefficients. */ + for (k = 0; k < KLT_ORDER_GAIN; k++) { + /* Get index. */ + pos2 = WebRtcIsac_lrint(tmpcoeffs_g[k] / KLT_STEPSIZE); + index_g[k] = (pos2) + WebRtcIsac_kQKltQuantMinGain[k]; + if (index_g[k] < 0) { + index_g[k] = 0; + } else if (index_g[k] > WebRtcIsac_kQKltMaxIndGain[k]) { + index_g[k] = WebRtcIsac_kQKltMaxIndGain[k]; + } + index_ovr_g[k] = WebRtcIsac_kQKltOffsetGain[k] + index_g[k]; + + /* Find quantization levels for coefficients. */ + tmpcoeffs_g[k] = WebRtcIsac_kQKltLevelsGain[index_ovr_g[k]]; + + /* Save data for creation of multiple bit streams. */ + encData->LPCindex_g[KLT_ORDER_GAIN * encData->startIdx + k] = index_g[k]; + } + + /* Entropy coding of quantization indices - gain. */ + WebRtcIsac_EncHistMulti(streamdata, index_g, WebRtcIsac_kQKltCdfPtrGain, + KLT_ORDER_GAIN); + + /* Find quantization levels for coefficients. */ + /* Left transform. */ + offsg = 0; + posg = 0; + for (j = 0; j < SUBFRAMES; j++) { + offs2 = 0; + for (k = 0; k < LPC_GAIN_ORDER; k++) { + sum = 0; + pos = offsg; + pos2 = offs2; + for (n = 0; n < LPC_GAIN_ORDER; n++) + sum += tmpcoeffs_g[pos++] * WebRtcIsac_kKltT1Gain[pos2++]; + tmpcoeffs2_g[posg++] = sum; + offs2 += LPC_GAIN_ORDER; + } + offsg += LPC_GAIN_ORDER; + } + + /* Right transform, transpose matrix. */ + offsg = 0; + posg = 0; + for (j = 0; j < SUBFRAMES; j++) { + posg = offsg; + for (k = 0; k < LPC_GAIN_ORDER; k++) { + sum = 0; + pos = k; + pos2 = j; + for (n = 0; n < SUBFRAMES; n++) { + sum += tmpcoeffs2_g[pos] * WebRtcIsac_kKltT2Gain[pos2]; + pos += LPC_GAIN_ORDER; + pos2 += SUBFRAMES; + } + tmpcoeffs_g[posg++] = sum; + } + offsg += LPC_GAIN_ORDER; + } + + + /* Scaling, mean addition, and gain restoration. */ + posg = 0; + for (k = 0; k < SUBFRAMES; k++) { + sum = tmpcoeffs_g[posg] / LPC_GAIN_SCALE; + sum += WebRtcIsac_kLpcMeansGain[posg]; + LPCCoef_lo[k * (LPC_LOBAND_ORDER + 1)] = exp(sum); + pos++; + posg++; + sum = tmpcoeffs_g[posg] / LPC_GAIN_SCALE; + sum += WebRtcIsac_kLpcMeansGain[posg]; + LPCCoef_hi[k * (LPC_HIBAND_ORDER + 1)] = exp(sum); + pos++; + posg++; + } + +} + +void WebRtcIsac_EncodeLpcGainUb(double* lpGains, Bitstr* streamdata, + int* lpcGainIndex) { + double U[UB_LPC_GAIN_DIM]; + int idx[UB_LPC_GAIN_DIM]; + WebRtcIsac_ToLogDomainRemoveMean(lpGains); + WebRtcIsac_DecorrelateLPGain(lpGains, U); + WebRtcIsac_QuantizeLpcGain(U, idx); + /* Store the index for re-encoding for FEC. */ + memcpy(lpcGainIndex, idx, UB_LPC_GAIN_DIM * sizeof(int)); + WebRtcIsac_CorrelateLpcGain(U, lpGains); + WebRtcIsac_AddMeanToLinearDomain(lpGains); + WebRtcIsac_EncHistMulti(streamdata, idx, WebRtcIsac_kLpcGainCdfMat, + UB_LPC_GAIN_DIM); +} + + +void WebRtcIsac_StoreLpcGainUb(double* lpGains, Bitstr* streamdata) { + double U[UB_LPC_GAIN_DIM]; + int idx[UB_LPC_GAIN_DIM]; + WebRtcIsac_ToLogDomainRemoveMean(lpGains); + WebRtcIsac_DecorrelateLPGain(lpGains, U); + WebRtcIsac_QuantizeLpcGain(U, idx); + WebRtcIsac_EncHistMulti(streamdata, idx, WebRtcIsac_kLpcGainCdfMat, + UB_LPC_GAIN_DIM); +} + + + +int16_t WebRtcIsac_DecodeLpcGainUb(double* lpGains, Bitstr* streamdata) { + double U[UB_LPC_GAIN_DIM]; + int idx[UB_LPC_GAIN_DIM]; + int err; + err = WebRtcIsac_DecHistOneStepMulti(idx, streamdata, + WebRtcIsac_kLpcGainCdfMat, + WebRtcIsac_kLpcGainEntropySearch, + UB_LPC_GAIN_DIM); + if (err < 0) { + return -1; + } + WebRtcIsac_DequantizeLpcGain(idx, U); + WebRtcIsac_CorrelateLpcGain(U, lpGains); + WebRtcIsac_AddMeanToLinearDomain(lpGains); + return 0; +} + + + +/* decode & dequantize RC */ +int WebRtcIsac_DecodeRc(Bitstr* streamdata, int16_t* RCQ15) { + int k, err; + int index[AR_ORDER]; + + /* entropy decoding of quantization indices */ + err = WebRtcIsac_DecHistOneStepMulti(index, streamdata, + WebRtcIsac_kQArRcCdfPtr, + WebRtcIsac_kQArRcInitIndex, AR_ORDER); + if (err < 0) + return err; + + /* find quantization levels for reflection coefficients */ + for (k = 0; k < AR_ORDER; k++) { + RCQ15[k] = *(WebRtcIsac_kQArRcLevelsPtr[k] + index[k]); + } + return 0; +} + + +/* quantize & code RC */ +void WebRtcIsac_EncodeRc(int16_t* RCQ15, Bitstr* streamdata) { + int k; + int index[AR_ORDER]; + + /* quantize reflection coefficients (add noise feedback?) */ + for (k = 0; k < AR_ORDER; k++) { + index[k] = WebRtcIsac_kQArRcInitIndex[k]; + // The safe-guards in following while conditions are to suppress gcc 4.8.3 + // warnings, Issue 2888. Otherwise, first and last elements of + // |WebRtcIsac_kQArBoundaryLevels| are such that the following search + // *never* cause an out-of-boundary read. + if (RCQ15[k] > WebRtcIsac_kQArBoundaryLevels[index[k]]) { + while (index[k] + 1 < NUM_AR_RC_QUANT_BAUNDARY && + RCQ15[k] > WebRtcIsac_kQArBoundaryLevels[index[k] + 1]) { + index[k]++; + } + } else { + while (index[k] > 0 && + RCQ15[k] < WebRtcIsac_kQArBoundaryLevels[--index[k]]) ; + } + RCQ15[k] = *(WebRtcIsac_kQArRcLevelsPtr[k] + index[k]); + } + + /* entropy coding of quantization indices */ + WebRtcIsac_EncHistMulti(streamdata, index, WebRtcIsac_kQArRcCdfPtr, AR_ORDER); +} + + +/* decode & dequantize squared Gain */ +int WebRtcIsac_DecodeGain2(Bitstr* streamdata, int32_t* gainQ10) { + int index, err; + + /* entropy decoding of quantization index */ + err = WebRtcIsac_DecHistOneStepMulti(&index, streamdata, + WebRtcIsac_kQGainCdf_ptr, + WebRtcIsac_kQGainInitIndex, 1); + if (err < 0) { + return err; + } + /* find quantization level */ + *gainQ10 = WebRtcIsac_kQGain2Levels[index]; + return 0; +} + + +/* quantize & code squared Gain */ +int WebRtcIsac_EncodeGain2(int32_t* gainQ10, Bitstr* streamdata) { + int index; + + /* find quantization index */ + index = WebRtcIsac_kQGainInitIndex[0]; + if (*gainQ10 > WebRtcIsac_kQGain2BoundaryLevels[index]) { + while (*gainQ10 > WebRtcIsac_kQGain2BoundaryLevels[index + 1]) { + index++; + } + } else { + while (*gainQ10 < WebRtcIsac_kQGain2BoundaryLevels[--index]) ; + } + /* De-quantize */ + *gainQ10 = WebRtcIsac_kQGain2Levels[index]; + + /* entropy coding of quantization index */ + WebRtcIsac_EncHistMulti(streamdata, &index, WebRtcIsac_kQGainCdf_ptr, 1); + return 0; +} + + +/* code and decode Pitch Gains and Lags functions */ + +/* decode & dequantize Pitch Gains */ +int WebRtcIsac_DecodePitchGain(Bitstr* streamdata, + int16_t* PitchGains_Q12) { + int index_comb, err; + const uint16_t* WebRtcIsac_kQPitchGainCdf_ptr[1]; + + /* Entropy decoding of quantization indices */ + *WebRtcIsac_kQPitchGainCdf_ptr = WebRtcIsac_kQPitchGainCdf; + err = WebRtcIsac_DecHistBisectMulti(&index_comb, streamdata, + WebRtcIsac_kQPitchGainCdf_ptr, + WebRtcIsac_kQCdfTableSizeGain, 1); + /* Error check, Q_mean_Gain.. tables are of size 144 */ + if ((err < 0) || (index_comb < 0) || (index_comb >= 144)) { + return -ISAC_RANGE_ERROR_DECODE_PITCH_GAIN; + } + /* De-quantize back to pitch gains by table look-up. */ + PitchGains_Q12[0] = WebRtcIsac_kQMeanGain1Q12[index_comb]; + PitchGains_Q12[1] = WebRtcIsac_kQMeanGain2Q12[index_comb]; + PitchGains_Q12[2] = WebRtcIsac_kQMeanGain3Q12[index_comb]; + PitchGains_Q12[3] = WebRtcIsac_kQMeanGain4Q12[index_comb]; + return 0; +} + + +/* Quantize & code Pitch Gains. */ +void WebRtcIsac_EncodePitchGain(int16_t* PitchGains_Q12, + Bitstr* streamdata, + IsacSaveEncoderData* encData) { + int k, j; + double C; + double S[PITCH_SUBFRAMES]; + int index[3]; + int index_comb; + const uint16_t* WebRtcIsac_kQPitchGainCdf_ptr[1]; + double PitchGains[PITCH_SUBFRAMES] = {0, 0, 0, 0}; + + /* Take the asin. */ + for (k = 0; k < PITCH_SUBFRAMES; k++) { + PitchGains[k] = ((float)PitchGains_Q12[k]) / 4096; + S[k] = asin(PitchGains[k]); + } + + /* Find quantization index; only for the first three + * transform coefficients. */ + for (k = 0; k < 3; k++) { + /* transform */ + C = 0.0; + for (j = 0; j < PITCH_SUBFRAMES; j++) { + C += WebRtcIsac_kTransform[k][j] * S[j]; + } + /* Quantize */ + index[k] = WebRtcIsac_lrint(C / PITCH_GAIN_STEPSIZE); + + /* Check that the index is not outside the boundaries of the table. */ + if (index[k] < WebRtcIsac_kIndexLowerLimitGain[k]) { + index[k] = WebRtcIsac_kIndexLowerLimitGain[k]; + } else if (index[k] > WebRtcIsac_kIndexUpperLimitGain[k]) { + index[k] = WebRtcIsac_kIndexUpperLimitGain[k]; + } + index[k] -= WebRtcIsac_kIndexLowerLimitGain[k]; + } + + /* Calculate unique overall index. */ + index_comb = WebRtcIsac_kIndexMultsGain[0] * index[0] + + WebRtcIsac_kIndexMultsGain[1] * index[1] + index[2]; + + /* unquantize back to pitch gains by table look-up */ + PitchGains_Q12[0] = WebRtcIsac_kQMeanGain1Q12[index_comb]; + PitchGains_Q12[1] = WebRtcIsac_kQMeanGain2Q12[index_comb]; + PitchGains_Q12[2] = WebRtcIsac_kQMeanGain3Q12[index_comb]; + PitchGains_Q12[3] = WebRtcIsac_kQMeanGain4Q12[index_comb]; + + /* entropy coding of quantization pitch gains */ + *WebRtcIsac_kQPitchGainCdf_ptr = WebRtcIsac_kQPitchGainCdf; + WebRtcIsac_EncHistMulti(streamdata, &index_comb, + WebRtcIsac_kQPitchGainCdf_ptr, 1); + encData->pitchGain_index[encData->startIdx] = index_comb; +} + + + +/* Pitch LAG */ +/* Decode & de-quantize Pitch Lags. */ +int WebRtcIsac_DecodePitchLag(Bitstr* streamdata, int16_t* PitchGain_Q12, + double* PitchLags) { + int k, err; + double StepSize; + double C; + int index[PITCH_SUBFRAMES]; + double mean_gain; + const double* mean_val2, *mean_val3, *mean_val4; + const int16_t* lower_limit; + const uint16_t* init_index; + const uint16_t* cdf_size; + const uint16_t** cdf; + double PitchGain[4] = {0, 0, 0, 0}; + + /* compute mean pitch gain */ + mean_gain = 0.0; + for (k = 0; k < 4; k++) { + PitchGain[k] = ((float)PitchGain_Q12[k]) / 4096; + mean_gain += PitchGain[k]; + } + mean_gain /= 4.0; + + /* voicing classification. */ + if (mean_gain < 0.2) { + StepSize = WebRtcIsac_kQPitchLagStepsizeLo; + cdf = WebRtcIsac_kQPitchLagCdfPtrLo; + cdf_size = WebRtcIsac_kQPitchLagCdfSizeLo; + mean_val2 = WebRtcIsac_kQMeanLag2Lo; + mean_val3 = WebRtcIsac_kQMeanLag3Lo; + mean_val4 = WebRtcIsac_kQMeanLag4Lo; + lower_limit = WebRtcIsac_kQIndexLowerLimitLagLo; + init_index = WebRtcIsac_kQInitIndexLagLo; + } else if (mean_gain < 0.4) { + StepSize = WebRtcIsac_kQPitchLagStepsizeMid; + cdf = WebRtcIsac_kQPitchLagCdfPtrMid; + cdf_size = WebRtcIsac_kQPitchLagCdfSizeMid; + mean_val2 = WebRtcIsac_kQMeanLag2Mid; + mean_val3 = WebRtcIsac_kQMeanLag3Mid; + mean_val4 = WebRtcIsac_kQMeanLag4Mid; + lower_limit = WebRtcIsac_kQIndexLowerLimitLagMid; + init_index = WebRtcIsac_kQInitIndexLagMid; + } else { + StepSize = WebRtcIsac_kQPitchLagStepsizeHi; + cdf = WebRtcIsac_kQPitchLagCdfPtrHi; + cdf_size = WebRtcIsac_kQPitchLagCdfSizeHi; + mean_val2 = WebRtcIsac_kQMeanLag2Hi; + mean_val3 = WebRtcIsac_kQMeanLag3Hi; + mean_val4 = WebRtcIsac_kQMeanLag4Hi; + lower_limit = WebRtcIsac_kQindexLowerLimitLagHi; + init_index = WebRtcIsac_kQInitIndexLagHi; + } + + /* Entropy decoding of quantization indices. */ + err = WebRtcIsac_DecHistBisectMulti(index, streamdata, cdf, cdf_size, 1); + if ((err < 0) || (index[0] < 0)) { + return -ISAC_RANGE_ERROR_DECODE_PITCH_LAG; + } + err = WebRtcIsac_DecHistOneStepMulti(index + 1, streamdata, cdf + 1, + init_index, 3); + if (err < 0) { + return -ISAC_RANGE_ERROR_DECODE_PITCH_LAG; + } + + /* Unquantize back to transform coefficients and do the inverse transform: + * S = T'*C. */ + C = (index[0] + lower_limit[0]) * StepSize; + for (k = 0; k < PITCH_SUBFRAMES; k++) { + PitchLags[k] = WebRtcIsac_kTransformTranspose[k][0] * C; + } + C = mean_val2[index[1]]; + for (k = 0; k < PITCH_SUBFRAMES; k++) { + PitchLags[k] += WebRtcIsac_kTransformTranspose[k][1] * C; + } + C = mean_val3[index[2]]; + for (k = 0; k < PITCH_SUBFRAMES; k++) { + PitchLags[k] += WebRtcIsac_kTransformTranspose[k][2] * C; + } + C = mean_val4[index[3]]; + for (k = 0; k < PITCH_SUBFRAMES; k++) { + PitchLags[k] += WebRtcIsac_kTransformTranspose[k][3] * C; + } + return 0; +} + + + +/* Quantize & code pitch lags. */ +void WebRtcIsac_EncodePitchLag(double* PitchLags, int16_t* PitchGain_Q12, + Bitstr* streamdata, + IsacSaveEncoderData* encData) { + int k, j; + double StepSize; + double C; + int index[PITCH_SUBFRAMES]; + double mean_gain; + const double* mean_val2, *mean_val3, *mean_val4; + const int16_t* lower_limit, *upper_limit; + const uint16_t** cdf; + double PitchGain[4] = {0, 0, 0, 0}; + + /* compute mean pitch gain */ + mean_gain = 0.0; + for (k = 0; k < 4; k++) { + PitchGain[k] = ((float)PitchGain_Q12[k]) / 4096; + mean_gain += PitchGain[k]; + } + mean_gain /= 4.0; + + /* Save data for creation of multiple bit streams */ + encData->meanGain[encData->startIdx] = mean_gain; + + /* Voicing classification. */ + if (mean_gain < 0.2) { + StepSize = WebRtcIsac_kQPitchLagStepsizeLo; + cdf = WebRtcIsac_kQPitchLagCdfPtrLo; + mean_val2 = WebRtcIsac_kQMeanLag2Lo; + mean_val3 = WebRtcIsac_kQMeanLag3Lo; + mean_val4 = WebRtcIsac_kQMeanLag4Lo; + lower_limit = WebRtcIsac_kQIndexLowerLimitLagLo; + upper_limit = WebRtcIsac_kQIndexUpperLimitLagLo; + } else if (mean_gain < 0.4) { + StepSize = WebRtcIsac_kQPitchLagStepsizeMid; + cdf = WebRtcIsac_kQPitchLagCdfPtrMid; + mean_val2 = WebRtcIsac_kQMeanLag2Mid; + mean_val3 = WebRtcIsac_kQMeanLag3Mid; + mean_val4 = WebRtcIsac_kQMeanLag4Mid; + lower_limit = WebRtcIsac_kQIndexLowerLimitLagMid; + upper_limit = WebRtcIsac_kQIndexUpperLimitLagMid; + } else { + StepSize = WebRtcIsac_kQPitchLagStepsizeHi; + cdf = WebRtcIsac_kQPitchLagCdfPtrHi; + mean_val2 = WebRtcIsac_kQMeanLag2Hi; + mean_val3 = WebRtcIsac_kQMeanLag3Hi; + mean_val4 = WebRtcIsac_kQMeanLag4Hi; + lower_limit = WebRtcIsac_kQindexLowerLimitLagHi; + upper_limit = WebRtcIsac_kQindexUpperLimitLagHi; + } + + /* find quantization index */ + for (k = 0; k < 4; k++) { + /* transform */ + C = 0.0; + for (j = 0; j < PITCH_SUBFRAMES; j++) { + C += WebRtcIsac_kTransform[k][j] * PitchLags[j]; + } + /* quantize */ + index[k] = WebRtcIsac_lrint(C / StepSize); + + /* check that the index is not outside the boundaries of the table */ + if (index[k] < lower_limit[k]) { + index[k] = lower_limit[k]; + } else if (index[k] > upper_limit[k]) index[k] = upper_limit[k]; { + index[k] -= lower_limit[k]; + } + /* Save data for creation of multiple bit streams */ + encData->pitchIndex[PITCH_SUBFRAMES * encData->startIdx + k] = index[k]; + } + + /* Un-quantize back to transform coefficients and do the inverse transform: + * S = T'*C */ + C = (index[0] + lower_limit[0]) * StepSize; + for (k = 0; k < PITCH_SUBFRAMES; k++) { + PitchLags[k] = WebRtcIsac_kTransformTranspose[k][0] * C; + } + C = mean_val2[index[1]]; + for (k = 0; k < PITCH_SUBFRAMES; k++) { + PitchLags[k] += WebRtcIsac_kTransformTranspose[k][1] * C; + } + C = mean_val3[index[2]]; + for (k = 0; k < PITCH_SUBFRAMES; k++) { + PitchLags[k] += WebRtcIsac_kTransformTranspose[k][2] * C; + } + C = mean_val4[index[3]]; + for (k = 0; k < PITCH_SUBFRAMES; k++) { + PitchLags[k] += WebRtcIsac_kTransformTranspose[k][3] * C; + } + /* entropy coding of quantization pitch lags */ + WebRtcIsac_EncHistMulti(streamdata, index, cdf, PITCH_SUBFRAMES); +} + + + +/* Routines for in-band signaling of bandwidth estimation */ +/* Histograms based on uniform distribution of indices */ +/* Move global variables later! */ + + +/* cdf array for frame length indicator */ +const uint16_t WebRtcIsac_kFrameLengthCdf[4] = { + 0, 21845, 43690, 65535 }; + +/* pointer to cdf array for frame length indicator */ +const uint16_t* WebRtcIsac_kFrameLengthCdf_ptr[1] = { + WebRtcIsac_kFrameLengthCdf }; + +/* initial cdf index for decoder of frame length indicator */ +const uint16_t WebRtcIsac_kFrameLengthInitIndex[1] = { 1 }; + + +int WebRtcIsac_DecodeFrameLen(Bitstr* streamdata, int16_t* framesamples) { + int frame_mode, err; + err = 0; + /* entropy decoding of frame length [1:30ms,2:60ms] */ + err = WebRtcIsac_DecHistOneStepMulti(&frame_mode, streamdata, + WebRtcIsac_kFrameLengthCdf_ptr, + WebRtcIsac_kFrameLengthInitIndex, 1); + if (err < 0) + return -ISAC_RANGE_ERROR_DECODE_FRAME_LENGTH; + + switch (frame_mode) { + case 1: + *framesamples = 480; /* 30ms */ + break; + case 2: + *framesamples = 960; /* 60ms */ + break; + default: + err = -ISAC_DISALLOWED_FRAME_MODE_DECODER; + } + return err; +} + +int WebRtcIsac_EncodeFrameLen(int16_t framesamples, Bitstr* streamdata) { + int frame_mode, status; + + status = 0; + frame_mode = 0; + /* entropy coding of frame length [1:480 samples,2:960 samples] */ + switch (framesamples) { + case 480: + frame_mode = 1; + break; + case 960: + frame_mode = 2; + break; + default: + status = - ISAC_DISALLOWED_FRAME_MODE_ENCODER; + } + + if (status < 0) + return status; + + WebRtcIsac_EncHistMulti(streamdata, &frame_mode, + WebRtcIsac_kFrameLengthCdf_ptr, 1); + return status; +} + +/* cdf array for estimated bandwidth */ +static const uint16_t kBwCdf[25] = { + 0, 2731, 5461, 8192, 10923, 13653, 16384, 19114, 21845, 24576, 27306, 30037, + 32768, 35498, 38229, 40959, 43690, 46421, 49151, 51882, 54613, 57343, 60074, + 62804, 65535 }; + +/* pointer to cdf array for estimated bandwidth */ +static const uint16_t* kBwCdfPtr[1] = { kBwCdf }; + +/* initial cdf index for decoder of estimated bandwidth*/ +static const uint16_t kBwInitIndex[1] = { 7 }; + + +int WebRtcIsac_DecodeSendBW(Bitstr* streamdata, int16_t* BWno) { + int BWno32, err; + + /* entropy decoding of sender's BW estimation [0..23] */ + err = WebRtcIsac_DecHistOneStepMulti(&BWno32, streamdata, kBwCdfPtr, + kBwInitIndex, 1); + if (err < 0) { + return -ISAC_RANGE_ERROR_DECODE_BANDWIDTH; + } + *BWno = (int16_t)BWno32; + return err; +} + +void WebRtcIsac_EncodeReceiveBw(int* BWno, Bitstr* streamdata) { + /* entropy encoding of receiver's BW estimation [0..23] */ + WebRtcIsac_EncHistMulti(streamdata, BWno, kBwCdfPtr, 1); +} + + +/* estimate code length of LPC Coef */ +void WebRtcIsac_TranscodeLPCCoef(double* LPCCoef_lo, double* LPCCoef_hi, + int* index_g) { + int j, k, n, pos, pos2, posg, offsg, offs2; + int index_ovr_g[KLT_ORDER_GAIN]; + double tmpcoeffs_g[KLT_ORDER_GAIN]; + double tmpcoeffs2_g[KLT_ORDER_GAIN]; + double sum; + + /* log gains, mean removal and scaling */ + posg = 0; + for (k = 0; k < SUBFRAMES; k++) { + tmpcoeffs_g[posg] = log(LPCCoef_lo[(LPC_LOBAND_ORDER + 1) * k]); + tmpcoeffs_g[posg] -= WebRtcIsac_kLpcMeansGain[posg]; + tmpcoeffs_g[posg] *= LPC_GAIN_SCALE; + posg++; + tmpcoeffs_g[posg] = log(LPCCoef_hi[(LPC_HIBAND_ORDER + 1) * k]); + tmpcoeffs_g[posg] -= WebRtcIsac_kLpcMeansGain[posg]; + tmpcoeffs_g[posg] *= LPC_GAIN_SCALE; + posg++; + } + + /* KLT */ + + /* Left transform. */ + offsg = 0; + for (j = 0; j < SUBFRAMES; j++) { + posg = offsg; + for (k = 0; k < LPC_GAIN_ORDER; k++) { + sum = 0; + pos = offsg; + pos2 = k; + for (n = 0; n < LPC_GAIN_ORDER; n++) { + sum += tmpcoeffs_g[pos++] * WebRtcIsac_kKltT1Gain[pos2]; + pos2 += LPC_GAIN_ORDER; + } + tmpcoeffs2_g[posg++] = sum; + } + offsg += LPC_GAIN_ORDER; + } + + /* Right transform. */ + offsg = 0; + offs2 = 0; + for (j = 0; j < SUBFRAMES; j++) { + posg = offsg; + for (k = 0; k < LPC_GAIN_ORDER; k++) { + sum = 0; + pos = k; + pos2 = offs2; + for (n = 0; n < SUBFRAMES; n++) { + sum += tmpcoeffs2_g[pos] * WebRtcIsac_kKltT2Gain[pos2++]; + pos += LPC_GAIN_ORDER; + } + tmpcoeffs_g[posg++] = sum; + } + offs2 += SUBFRAMES; + offsg += LPC_GAIN_ORDER; + } + + + /* quantize coefficients */ + for (k = 0; k < KLT_ORDER_GAIN; k++) { + /* Get index. */ + pos2 = WebRtcIsac_lrint(tmpcoeffs_g[k] / KLT_STEPSIZE); + index_g[k] = (pos2) + WebRtcIsac_kQKltQuantMinGain[k]; + if (index_g[k] < 0) { + index_g[k] = 0; + } else if (index_g[k] > WebRtcIsac_kQKltMaxIndGain[k]) { + index_g[k] = WebRtcIsac_kQKltMaxIndGain[k]; + } + index_ovr_g[k] = WebRtcIsac_kQKltOffsetGain[k] + index_g[k]; + + /* find quantization levels for coefficients */ + tmpcoeffs_g[k] = WebRtcIsac_kQKltLevelsGain[index_ovr_g[k]]; + } +} + + +/* Decode & de-quantize LPC Coefficients. */ +int WebRtcIsac_DecodeLpcCoefUB(Bitstr* streamdata, double* lpcVecs, + double* percepFilterGains, + int16_t bandwidth) { + int index_s[KLT_ORDER_SHAPE]; + + double U[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME]; + int err; + + /* Entropy decoding of quantization indices. */ + switch (bandwidth) { + case isac12kHz: { + err = WebRtcIsac_DecHistOneStepMulti( + index_s, streamdata, WebRtcIsac_kLpcShapeCdfMatUb12, + WebRtcIsac_kLpcShapeEntropySearchUb12, UB_LPC_ORDER * + UB_LPC_VEC_PER_FRAME); + break; + } + case isac16kHz: { + err = WebRtcIsac_DecHistOneStepMulti( + index_s, streamdata, WebRtcIsac_kLpcShapeCdfMatUb16, + WebRtcIsac_kLpcShapeEntropySearchUb16, UB_LPC_ORDER * + UB16_LPC_VEC_PER_FRAME); + break; + } + default: + return -1; + } + + if (err < 0) { + return err; + } + + WebRtcIsac_DequantizeLpcParam(index_s, lpcVecs, bandwidth); + WebRtcIsac_CorrelateInterVec(lpcVecs, U, bandwidth); + WebRtcIsac_CorrelateIntraVec(U, lpcVecs, bandwidth); + WebRtcIsac_AddLarMean(lpcVecs, bandwidth); + WebRtcIsac_DecodeLpcGainUb(percepFilterGains, streamdata); + + if (bandwidth == isac16kHz) { + /* Decode another set of Gains. */ + WebRtcIsac_DecodeLpcGainUb(&percepFilterGains[SUBFRAMES], streamdata); + } + return 0; +} + +int16_t WebRtcIsac_EncodeBandwidth(enum ISACBandwidth bandwidth, + Bitstr* streamData) { + int bandwidthMode; + switch (bandwidth) { + case isac12kHz: { + bandwidthMode = 0; + break; + } + case isac16kHz: { + bandwidthMode = 1; + break; + } + default: + return -ISAC_DISALLOWED_ENCODER_BANDWIDTH; + } + WebRtcIsac_EncHistMulti(streamData, &bandwidthMode, kOneBitEqualProbCdf_ptr, + 1); + return 0; +} + +int16_t WebRtcIsac_DecodeBandwidth(Bitstr* streamData, + enum ISACBandwidth* bandwidth) { + int bandwidthMode; + if (WebRtcIsac_DecHistOneStepMulti(&bandwidthMode, streamData, + kOneBitEqualProbCdf_ptr, + kOneBitEqualProbInitIndex, 1) < 0) { + return -ISAC_RANGE_ERROR_DECODE_BANDWITH; + } + switch (bandwidthMode) { + case 0: { + *bandwidth = isac12kHz; + break; + } + case 1: { + *bandwidth = isac16kHz; + break; + } + default: + return -ISAC_DISALLOWED_BANDWIDTH_MODE_DECODER; + } + return 0; +} + +int16_t WebRtcIsac_EncodeJitterInfo(int32_t jitterIndex, + Bitstr* streamData) { + /* This is to avoid LINUX warning until we change 'int' to 'Word32'. */ + int intVar; + + if ((jitterIndex < 0) || (jitterIndex > 1)) { + return -1; + } + intVar = (int)(jitterIndex); + /* Use the same CDF table as for bandwidth + * both take two values with equal probability.*/ + WebRtcIsac_EncHistMulti(streamData, &intVar, kOneBitEqualProbCdf_ptr, 1); + return 0; +} + +int16_t WebRtcIsac_DecodeJitterInfo(Bitstr* streamData, + int32_t* jitterInfo) { + int intVar; + /* Use the same CDF table as for bandwidth + * both take two values with equal probability. */ + if (WebRtcIsac_DecHistOneStepMulti(&intVar, streamData, + kOneBitEqualProbCdf_ptr, + kOneBitEqualProbInitIndex, 1) < 0) { + return -ISAC_RANGE_ERROR_DECODE_BANDWITH; + } + *jitterInfo = (int16_t)(intVar); + return 0; +} diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/entropy_coding.h b/webrtc/modules/audio_coding/codecs/isac/main/source/entropy_coding.h new file mode 100644 index 0000000..d715d86 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/entropy_coding.h @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * entropy_coding.h + * + * This header file declares all of the functions used to arithmetically + * encode the iSAC bistream + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ENTROPY_CODING_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ENTROPY_CODING_H_ + +#include "settings.h" +#include "structs.h" + +/****************************************************************************** + * WebRtcIsac_DecodeSpec() + * Decode real and imaginary part of the DFT coefficients, given a bit-stream. + * The decoded DFT coefficient can be transformed to time domain by + * WebRtcIsac_Time2Spec(). + * + * Input: + * - streamdata : pointer to a stucture containg the encoded + * data and theparameters needed for entropy + * coding. + * - AvgPitchGain_Q12 : average pitch-gain of the frame. This is only + * relevant for 0-4 kHz band, and the input value is + * not used in other bands. + * - band : specifies which band's DFT should be decoded. + * + * Output: + * - *fr : pointer to a buffer where the real part of DFT + * coefficients are written to. + * - *fi : pointer to a buffer where the imaginary part + * of DFT coefficients are written to. + * + * Return value : < 0 if an error occures + * 0 if succeeded. + */ +int WebRtcIsac_DecodeSpec(Bitstr* streamdata, int16_t AvgPitchGain_Q12, + enum ISACBand band, double* fr, double* fi); + +/****************************************************************************** + * WebRtcIsac_EncodeSpec() + * Encode real and imaginary part of the DFT coefficients into the given + * bit-stream. + * + * Input: + * - *fr : pointer to a buffer where the real part of DFT + * coefficients are written to. + * - *fi : pointer to a buffer where the imaginary part + * of DFT coefficients are written to. + * - AvgPitchGain_Q12 : average pitch-gain of the frame. This is only + * relevant for 0-4 kHz band, and the input value is + * not used in other bands. + * - band : specifies which band's DFT should be decoded. + * + * Output: + * - streamdata : pointer to a stucture containg the encoded + * data and theparameters needed for entropy + * coding. + * + * Return value : < 0 if an error occures + * 0 if succeeded. + */ +int WebRtcIsac_EncodeSpec(const int16_t* fr, const int16_t* fi, + int16_t AvgPitchGain_Q12, enum ISACBand band, + Bitstr* streamdata); + +/* decode & dequantize LPC Coef */ +int WebRtcIsac_DecodeLpcCoef(Bitstr* streamdata, double* LPCCoef); +int WebRtcIsac_DecodeLpcCoefUB(Bitstr* streamdata, double* lpcVecs, + double* percepFilterGains, + int16_t bandwidth); + +int WebRtcIsac_DecodeLpc(Bitstr* streamdata, double* LPCCoef_lo, + double* LPCCoef_hi); + +/* quantize & code LPC Coef */ +void WebRtcIsac_EncodeLpcLb(double* LPCCoef_lo, double* LPCCoef_hi, + Bitstr* streamdata, IsacSaveEncoderData* encData); + +void WebRtcIsac_EncodeLpcGainLb(double* LPCCoef_lo, double* LPCCoef_hi, + Bitstr* streamdata, + IsacSaveEncoderData* encData); + +/****************************************************************************** + * WebRtcIsac_EncodeLpcUB() + * Encode LPC parameters, given as A-polynomial, of upper-band. The encoding + * is performed in LAR domain. + * For the upper-band, we compute and encode LPC of some sub-frames, LPC of + * other sub-frames are computed by linear interpolation, in LAR domain. This + * function performs the interpolation and returns the LPC of all sub-frames. + * + * Inputs: + * - lpcCoef : a buffer containing A-polynomials of sub-frames + * (excluding first coefficient that is 1). + * - bandwidth : specifies if the codec is operating at 0-12 kHz + * or 0-16 kHz mode. + * + * Input/output: + * - streamdata : pointer to a structure containing the encoded + * data and the parameters needed for entropy + * coding. + * + * Output: + * - interpolLPCCoeff : Decoded and interpolated LPC (A-polynomial) + * of all sub-frames. + * If LP analysis is of order K, and there are N + * sub-frames then this is a buffer of size + * (k + 1) * N, each vector starts with the LPC gain + * of the corresponding sub-frame. The LPC gains + * are encoded and inserted after this function is + * called. The first A-coefficient which is 1 is not + * included. + * + * Return value : 0 if encoding is successful, + * <0 if failed to encode. + */ +int16_t WebRtcIsac_EncodeLpcUB(double* lpcCoeff, Bitstr* streamdata, + double* interpolLPCCoeff, + int16_t bandwidth, + ISACUBSaveEncDataStruct* encData); + +/****************************************************************************** + * WebRtcIsac_DecodeInterpolLpcUb() + * Decode LPC coefficients and interpolate to get the coefficients fo all + * sub-frmaes. + * + * Inputs: + * - bandwidth : spepecifies if the codec is in 0-12 kHz or + * 0-16 kHz mode. + * + * Input/output: + * - streamdata : pointer to a stucture containg the encoded + * data and theparameters needed for entropy + * coding. + * + * Output: + * - percepFilterParam : Decoded and interpolated LPC (A-polynomial) of + * all sub-frames. + * If LP analysis is of order K, and there are N + * sub-frames then this is a buffer of size + * (k + 1) * N, each vector starts with the LPC gain + * of the corresponding sub-frame. The LPC gains + * are encoded and inserted after this function is + * called. The first A-coefficient which is 1 is not + * included. + * + * Return value : 0 if encoding is successful, + * <0 if failed to encode. + */ +int16_t WebRtcIsac_DecodeInterpolLpcUb(Bitstr* streamdata, + double* percepFilterParam, + int16_t bandwidth); + +/* Decode & dequantize RC */ +int WebRtcIsac_DecodeRc(Bitstr* streamdata, int16_t* RCQ15); + +/* Quantize & code RC */ +void WebRtcIsac_EncodeRc(int16_t* RCQ15, Bitstr* streamdata); + +/* Decode & dequantize squared Gain */ +int WebRtcIsac_DecodeGain2(Bitstr* streamdata, int32_t* Gain2); + +/* Quantize & code squared Gain (input is squared gain) */ +int WebRtcIsac_EncodeGain2(int32_t* gain2, Bitstr* streamdata); + +void WebRtcIsac_EncodePitchGain(int16_t* PitchGains_Q12, + Bitstr* streamdata, + IsacSaveEncoderData* encData); + +void WebRtcIsac_EncodePitchLag(double* PitchLags, + int16_t* PitchGain_Q12, + Bitstr* streamdata, + IsacSaveEncoderData* encData); + +int WebRtcIsac_DecodePitchGain(Bitstr* streamdata, + int16_t* PitchGain_Q12); +int WebRtcIsac_DecodePitchLag(Bitstr* streamdata, int16_t* PitchGain_Q12, + double* PitchLag); + +int WebRtcIsac_DecodeFrameLen(Bitstr* streamdata, int16_t* framelength); +int WebRtcIsac_EncodeFrameLen(int16_t framelength, Bitstr* streamdata); +int WebRtcIsac_DecodeSendBW(Bitstr* streamdata, int16_t* BWno); +void WebRtcIsac_EncodeReceiveBw(int* BWno, Bitstr* streamdata); + +/* Step-down */ +void WebRtcIsac_Poly2Rc(double* a, int N, double* RC); + +/* Step-up */ +void WebRtcIsac_Rc2Poly(double* RC, int N, double* a); + +void WebRtcIsac_TranscodeLPCCoef(double* LPCCoef_lo, double* LPCCoef_hi, + int* index_g); + + +/****************************************************************************** + * WebRtcIsac_EncodeLpcGainUb() + * Encode LPC gains of sub-Frames. + * + * Input/outputs: + * - lpGains : a buffer which contains 'SUBFRAME' number of + * LP gains to be encoded. The input values are + * overwritten by the quantized values. + * - streamdata : pointer to a stucture containg the encoded + * data and theparameters needed for entropy + * coding. + * + * Output: + * - lpcGainIndex : quantization indices for lpc gains, these will + * be stored to be used for FEC. + */ +void WebRtcIsac_EncodeLpcGainUb(double* lpGains, Bitstr* streamdata, + int* lpcGainIndex); + + +/****************************************************************************** + * WebRtcIsac_EncodeLpcGainUb() + * Store LPC gains of sub-Frames in 'streamdata'. + * + * Input: + * - lpGains : a buffer which contains 'SUBFRAME' number of + * LP gains to be encoded. + * Input/outputs: + * - streamdata : pointer to a stucture containg the encoded + * data and theparameters needed for entropy + * coding. + * + */ +void WebRtcIsac_StoreLpcGainUb(double* lpGains, Bitstr* streamdata); + + +/****************************************************************************** + * WebRtcIsac_DecodeLpcGainUb() + * Decode the LPC gain of sub-frames. + * + * Input/output: + * - streamdata : pointer to a stucture containg the encoded + * data and theparameters needed for entropy + * coding. + * + * Output: + * - lpGains : a buffer where decoded LPC gians will be stored. + * + * Return value : 0 if succeeded. + * <0 if failed. + */ +int16_t WebRtcIsac_DecodeLpcGainUb(double* lpGains, Bitstr* streamdata); + + +/****************************************************************************** + * WebRtcIsac_EncodeBandwidth() + * Encode if the bandwidth of encoded audio is 0-12 kHz or 0-16 kHz. + * + * Input: + * - bandwidth : an enumerator specifying if the codec in is + * 0-12 kHz or 0-16 kHz mode. + * + * Input/output: + * - streamdata : pointer to a stucture containg the encoded + * data and theparameters needed for entropy + * coding. + * + * Return value : 0 if succeeded. + * <0 if failed. + */ +int16_t WebRtcIsac_EncodeBandwidth(enum ISACBandwidth bandwidth, + Bitstr* streamData); + + +/****************************************************************************** + * WebRtcIsac_DecodeBandwidth() + * Decode the bandwidth of the encoded audio, i.e. if the bandwidth is 0-12 kHz + * or 0-16 kHz. + * + * Input/output: + * - streamdata : pointer to a stucture containg the encoded + * data and theparameters needed for entropy + * coding. + * + * Output: + * - bandwidth : an enumerator specifying if the codec is in + * 0-12 kHz or 0-16 kHz mode. + * + * Return value : 0 if succeeded. + * <0 if failed. + */ +int16_t WebRtcIsac_DecodeBandwidth(Bitstr* streamData, + enum ISACBandwidth* bandwidth); + + +/****************************************************************************** + * WebRtcIsac_EncodeJitterInfo() + * Decode the jitter information. + * + * Input/output: + * - streamdata : pointer to a stucture containg the encoded + * data and theparameters needed for entropy + * coding. + * + * Input: + * - jitterInfo : one bit of info specifying if the channel is + * in high/low jitter. Zero indicates low jitter + * and one indicates high jitter. + * + * Return value : 0 if succeeded. + * <0 if failed. + */ +int16_t WebRtcIsac_EncodeJitterInfo(int32_t jitterIndex, + Bitstr* streamData); + + +/****************************************************************************** + * WebRtcIsac_DecodeJitterInfo() + * Decode the jitter information. + * + * Input/output: + * - streamdata : pointer to a stucture containg the encoded + * data and theparameters needed for entropy + * coding. + * + * Output: + * - jitterInfo : one bit of info specifying if the channel is + * in high/low jitter. Zero indicates low jitter + * and one indicates high jitter. + * + * Return value : 0 if succeeded. + * <0 if failed. + */ +int16_t WebRtcIsac_DecodeJitterInfo(Bitstr* streamData, + int32_t* jitterInfo); + +#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_ENTROPY_CODING_H_ */ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.c b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.c new file mode 100644 index 0000000..60fc25b --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.c @@ -0,0 +1,535 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "lpc_analysis.h" +#include "settings.h" +#include "codec.h" +#include "entropy_coding.h" + +#include +#include + +#define LEVINSON_EPS 1.0e-10 + + +/* window */ +/* Matlab generation code: + * t = (1:256)/257; r = 1-(1-t).^.45; w = sin(r*pi).^3; w = w/sum(w); plot((1:256)/8, w); grid; + * for k=1:16, fprintf(1, '%.8f, ', w(k*16 + (-15:0))); fprintf(1, '\n'); end + */ +static const double kLpcCorrWindow[WINLEN] = { + 0.00000000, 0.00000001, 0.00000004, 0.00000010, 0.00000020, + 0.00000035, 0.00000055, 0.00000083, 0.00000118, 0.00000163, + 0.00000218, 0.00000283, 0.00000361, 0.00000453, 0.00000558, 0.00000679, + 0.00000817, 0.00000973, 0.00001147, 0.00001342, 0.00001558, + 0.00001796, 0.00002058, 0.00002344, 0.00002657, 0.00002997, + 0.00003365, 0.00003762, 0.00004190, 0.00004651, 0.00005144, 0.00005673, + 0.00006236, 0.00006837, 0.00007476, 0.00008155, 0.00008875, + 0.00009636, 0.00010441, 0.00011290, 0.00012186, 0.00013128, + 0.00014119, 0.00015160, 0.00016252, 0.00017396, 0.00018594, 0.00019846, + 0.00021155, 0.00022521, 0.00023946, 0.00025432, 0.00026978, + 0.00028587, 0.00030260, 0.00031998, 0.00033802, 0.00035674, + 0.00037615, 0.00039626, 0.00041708, 0.00043863, 0.00046092, 0.00048396, + 0.00050775, 0.00053233, 0.00055768, 0.00058384, 0.00061080, + 0.00063858, 0.00066720, 0.00069665, 0.00072696, 0.00075813, + 0.00079017, 0.00082310, 0.00085692, 0.00089164, 0.00092728, 0.00096384, + 0.00100133, 0.00103976, 0.00107914, 0.00111947, 0.00116077, + 0.00120304, 0.00124630, 0.00129053, 0.00133577, 0.00138200, + 0.00142924, 0.00147749, 0.00152676, 0.00157705, 0.00162836, 0.00168070, + 0.00173408, 0.00178850, 0.00184395, 0.00190045, 0.00195799, + 0.00201658, 0.00207621, 0.00213688, 0.00219860, 0.00226137, + 0.00232518, 0.00239003, 0.00245591, 0.00252284, 0.00259079, 0.00265977, + 0.00272977, 0.00280078, 0.00287280, 0.00294582, 0.00301984, + 0.00309484, 0.00317081, 0.00324774, 0.00332563, 0.00340446, + 0.00348421, 0.00356488, 0.00364644, 0.00372889, 0.00381220, 0.00389636, + 0.00398135, 0.00406715, 0.00415374, 0.00424109, 0.00432920, + 0.00441802, 0.00450754, 0.00459773, 0.00468857, 0.00478001, + 0.00487205, 0.00496464, 0.00505775, 0.00515136, 0.00524542, 0.00533990, + 0.00543476, 0.00552997, 0.00562548, 0.00572125, 0.00581725, + 0.00591342, 0.00600973, 0.00610612, 0.00620254, 0.00629895, + 0.00639530, 0.00649153, 0.00658758, 0.00668341, 0.00677894, 0.00687413, + 0.00696891, 0.00706322, 0.00715699, 0.00725016, 0.00734266, + 0.00743441, 0.00752535, 0.00761540, 0.00770449, 0.00779254, + 0.00787947, 0.00796519, 0.00804963, 0.00813270, 0.00821431, 0.00829437, + 0.00837280, 0.00844949, 0.00852436, 0.00859730, 0.00866822, + 0.00873701, 0.00880358, 0.00886781, 0.00892960, 0.00898884, + 0.00904542, 0.00909923, 0.00915014, 0.00919805, 0.00924283, 0.00928436, + 0.00932252, 0.00935718, 0.00938821, 0.00941550, 0.00943890, + 0.00945828, 0.00947351, 0.00948446, 0.00949098, 0.00949294, + 0.00949020, 0.00948262, 0.00947005, 0.00945235, 0.00942938, 0.00940099, + 0.00936704, 0.00932738, 0.00928186, 0.00923034, 0.00917268, + 0.00910872, 0.00903832, 0.00896134, 0.00887763, 0.00878706, + 0.00868949, 0.00858478, 0.00847280, 0.00835343, 0.00822653, 0.00809199, + 0.00794970, 0.00779956, 0.00764145, 0.00747530, 0.00730103, + 0.00711857, 0.00692787, 0.00672888, 0.00652158, 0.00630597, + 0.00608208, 0.00584994, 0.00560962, 0.00536124, 0.00510493, 0.00484089, + 0.00456935, 0.00429062, 0.00400505, 0.00371310, 0.00341532, + 0.00311238, 0.00280511, 0.00249452, 0.00218184, 0.00186864, + 0.00155690, 0.00124918, 0.00094895, 0.00066112, 0.00039320, 0.00015881 +}; + +double WebRtcIsac_LevDurb(double *a, double *k, double *r, size_t order) +{ + + double sum, alpha; + size_t m, m_h, i; + alpha = 0; //warning -DH + a[0] = 1.0; + if (r[0] < LEVINSON_EPS) { /* if r[0] <= 0, set LPC coeff. to zero */ + for (i = 0; i < order; i++) { + k[i] = 0; + a[i+1] = 0; + } + } else { + a[1] = k[0] = -r[1]/r[0]; + alpha = r[0] + r[1] * k[0]; + for (m = 1; m < order; m++){ + sum = r[m + 1]; + for (i = 0; i < m; i++){ + sum += a[i+1] * r[m - i]; + } + k[m] = -sum / alpha; + alpha += k[m] * sum; + m_h = (m + 1) >> 1; + for (i = 0; i < m_h; i++){ + sum = a[i+1] + k[m] * a[m - i]; + a[m - i] += k[m] * a[i+1]; + a[i+1] = sum; + } + a[m+1] = k[m]; + } + } + return alpha; +} + + +//was static before, but didn't work with MEX file +void WebRtcIsac_GetVars(const double *input, const int16_t *pitchGains_Q12, + double *oldEnergy, double *varscale) +{ + double nrg[4], chng, pg; + int k; + + double pitchGains[4]={0,0,0,0};; + + /* Calculate energies of first and second frame halfs */ + nrg[0] = 0.0001; + for (k = QLOOKAHEAD/2; k < (FRAMESAMPLES_QUARTER + QLOOKAHEAD) / 2; k++) { + nrg[0] += input[k]*input[k]; + } + nrg[1] = 0.0001; + for ( ; k < (FRAMESAMPLES_HALF + QLOOKAHEAD) / 2; k++) { + nrg[1] += input[k]*input[k]; + } + nrg[2] = 0.0001; + for ( ; k < (FRAMESAMPLES*3/4 + QLOOKAHEAD) / 2; k++) { + nrg[2] += input[k]*input[k]; + } + nrg[3] = 0.0001; + for ( ; k < (FRAMESAMPLES + QLOOKAHEAD) / 2; k++) { + nrg[3] += input[k]*input[k]; + } + + /* Calculate average level change */ + chng = 0.25 * (fabs(10.0 * log10(nrg[3] / nrg[2])) + + fabs(10.0 * log10(nrg[2] / nrg[1])) + + fabs(10.0 * log10(nrg[1] / nrg[0])) + + fabs(10.0 * log10(nrg[0] / *oldEnergy))); + + + /* Find average pitch gain */ + pg = 0.0; + for (k=0; k<4; k++) + { + pitchGains[k] = ((float)pitchGains_Q12[k])/4096; + pg += pitchGains[k]; + } + pg *= 0.25; + + /* If pitch gain is low and energy constant - increase noise level*/ + /* Matlab code: + pg = 0:.01:.45; plot(pg, 0.0 + 1.0 * exp( -1.0 * exp(-200.0 * pg.*pg.*pg) / (1.0 + 0.4 * 0) )) + */ + *varscale = 0.0 + 1.0 * exp( -1.4 * exp(-200.0 * pg*pg*pg) / (1.0 + 0.4 * chng) ); + + *oldEnergy = nrg[3]; +} + +void +WebRtcIsac_GetVarsUB( + const double* input, + double* oldEnergy, + double* varscale) +{ + double nrg[4], chng; + int k; + + /* Calculate energies of first and second frame halfs */ + nrg[0] = 0.0001; + for (k = 0; k < (FRAMESAMPLES_QUARTER) / 2; k++) { + nrg[0] += input[k]*input[k]; + } + nrg[1] = 0.0001; + for ( ; k < (FRAMESAMPLES_HALF) / 2; k++) { + nrg[1] += input[k]*input[k]; + } + nrg[2] = 0.0001; + for ( ; k < (FRAMESAMPLES*3/4) / 2; k++) { + nrg[2] += input[k]*input[k]; + } + nrg[3] = 0.0001; + for ( ; k < (FRAMESAMPLES) / 2; k++) { + nrg[3] += input[k]*input[k]; + } + + /* Calculate average level change */ + chng = 0.25 * (fabs(10.0 * log10(nrg[3] / nrg[2])) + + fabs(10.0 * log10(nrg[2] / nrg[1])) + + fabs(10.0 * log10(nrg[1] / nrg[0])) + + fabs(10.0 * log10(nrg[0] / *oldEnergy))); + + + /* If pitch gain is low and energy constant - increase noise level*/ + /* Matlab code: + pg = 0:.01:.45; plot(pg, 0.0 + 1.0 * exp( -1.0 * exp(-200.0 * pg.*pg.*pg) / (1.0 + 0.4 * 0) )) + */ + *varscale = exp( -1.4 / (1.0 + 0.4 * chng) ); + + *oldEnergy = nrg[3]; +} + +void WebRtcIsac_GetLpcCoefLb(double *inLo, double *inHi, MaskFiltstr *maskdata, + double signal_noise_ratio, const int16_t *pitchGains_Q12, + double *lo_coeff, double *hi_coeff) +{ + int k, n, j, pos1, pos2; + double varscale; + + double DataLo[WINLEN], DataHi[WINLEN]; + double corrlo[ORDERLO+2], corrlo2[ORDERLO+1]; + double corrhi[ORDERHI+1]; + double k_veclo[ORDERLO], k_vechi[ORDERHI]; + + double a_LO[ORDERLO+1], a_HI[ORDERHI+1]; + double tmp, res_nrg; + + double FwdA, FwdB; + + /* hearing threshold level in dB; higher value gives more noise */ + const double HearThresOffset = -28.0; + + /* bandwdith expansion factors for low- and high band */ + const double gammaLo = 0.9; + const double gammaHi = 0.8; + + /* less-noise-at-low-frequencies factor */ + double aa; + + + /* convert from dB to signal level */ + const double H_T_H = pow(10.0, 0.05 * HearThresOffset); + double S_N_R = pow(10.0, 0.05 * signal_noise_ratio) / 3.46; /* divide by sqrt(12) */ + + /* change quallevel depending on pitch gains and level fluctuations */ + WebRtcIsac_GetVars(inLo, pitchGains_Q12, &(maskdata->OldEnergy), &varscale); + + /* less-noise-at-low-frequencies factor */ + aa = 0.35 * (0.5 + 0.5 * varscale); + + /* replace data in buffer by new look-ahead data */ + for (pos1 = 0; pos1 < QLOOKAHEAD; pos1++) + maskdata->DataBufferLo[pos1 + WINLEN - QLOOKAHEAD] = inLo[pos1]; + + for (k = 0; k < SUBFRAMES; k++) { + + /* Update input buffer and multiply signal with window */ + for (pos1 = 0; pos1 < WINLEN - UPDATE/2; pos1++) { + maskdata->DataBufferLo[pos1] = maskdata->DataBufferLo[pos1 + UPDATE/2]; + maskdata->DataBufferHi[pos1] = maskdata->DataBufferHi[pos1 + UPDATE/2]; + DataLo[pos1] = maskdata->DataBufferLo[pos1] * kLpcCorrWindow[pos1]; + DataHi[pos1] = maskdata->DataBufferHi[pos1] * kLpcCorrWindow[pos1]; + } + pos2 = k * UPDATE/2; + for (n = 0; n < UPDATE/2; n++, pos1++) { + maskdata->DataBufferLo[pos1] = inLo[QLOOKAHEAD + pos2]; + maskdata->DataBufferHi[pos1] = inHi[pos2++]; + DataLo[pos1] = maskdata->DataBufferLo[pos1] * kLpcCorrWindow[pos1]; + DataHi[pos1] = maskdata->DataBufferHi[pos1] * kLpcCorrWindow[pos1]; + } + + /* Get correlation coefficients */ + WebRtcIsac_AutoCorr(corrlo, DataLo, WINLEN, ORDERLO+1); /* computing autocorrelation */ + WebRtcIsac_AutoCorr(corrhi, DataHi, WINLEN, ORDERHI); + + + /* less noise for lower frequencies, by filtering/scaling autocorrelation sequences */ + corrlo2[0] = (1.0+aa*aa) * corrlo[0] - 2.0*aa * corrlo[1]; + tmp = (1.0 + aa*aa); + for (n = 1; n <= ORDERLO; n++) { + corrlo2[n] = tmp * corrlo[n] - aa * (corrlo[n-1] + corrlo[n+1]); + } + tmp = (1.0+aa) * (1.0+aa); + for (n = 0; n <= ORDERHI; n++) { + corrhi[n] = tmp * corrhi[n]; + } + + /* add white noise floor */ + corrlo2[0] += 1e-6; + corrhi[0] += 1e-6; + + + FwdA = 0.01; + FwdB = 0.01; + + /* recursive filtering of correlation over subframes */ + for (n = 0; n <= ORDERLO; n++) { + maskdata->CorrBufLo[n] = FwdA * maskdata->CorrBufLo[n] + corrlo2[n]; + corrlo2[n] = ((1.0-FwdA)*FwdB) * maskdata->CorrBufLo[n] + (1.0-FwdB) * corrlo2[n]; + } + for (n = 0; n <= ORDERHI; n++) { + maskdata->CorrBufHi[n] = FwdA * maskdata->CorrBufHi[n] + corrhi[n]; + corrhi[n] = ((1.0-FwdA)*FwdB) * maskdata->CorrBufHi[n] + (1.0-FwdB) * corrhi[n]; + } + + /* compute prediction coefficients */ + WebRtcIsac_LevDurb(a_LO, k_veclo, corrlo2, ORDERLO); + WebRtcIsac_LevDurb(a_HI, k_vechi, corrhi, ORDERHI); + + /* bandwidth expansion */ + tmp = gammaLo; + for (n = 1; n <= ORDERLO; n++) { + a_LO[n] *= tmp; + tmp *= gammaLo; + } + + /* residual energy */ + res_nrg = 0.0; + for (j = 0; j <= ORDERLO; j++) { + for (n = 0; n <= j; n++) { + res_nrg += a_LO[j] * corrlo2[j-n] * a_LO[n]; + } + for (n = j+1; n <= ORDERLO; n++) { + res_nrg += a_LO[j] * corrlo2[n-j] * a_LO[n]; + } + } + + /* add hearing threshold and compute the gain */ + *lo_coeff++ = S_N_R / (sqrt(res_nrg) / varscale + H_T_H); + + /* copy coefficients to output array */ + for (n = 1; n <= ORDERLO; n++) { + *lo_coeff++ = a_LO[n]; + } + + + /* bandwidth expansion */ + tmp = gammaHi; + for (n = 1; n <= ORDERHI; n++) { + a_HI[n] *= tmp; + tmp *= gammaHi; + } + + /* residual energy */ + res_nrg = 0.0; + for (j = 0; j <= ORDERHI; j++) { + for (n = 0; n <= j; n++) { + res_nrg += a_HI[j] * corrhi[j-n] * a_HI[n]; + } + for (n = j+1; n <= ORDERHI; n++) { + res_nrg += a_HI[j] * corrhi[n-j] * a_HI[n]; + } + } + + /* add hearing threshold and compute of the gain */ + *hi_coeff++ = S_N_R / (sqrt(res_nrg) / varscale + H_T_H); + + /* copy coefficients to output array */ + for (n = 1; n <= ORDERHI; n++) { + *hi_coeff++ = a_HI[n]; + } + } +} + + + +/****************************************************************************** + * WebRtcIsac_GetLpcCoefUb() + * + * Compute LP coefficients and correlation coefficients. At 12 kHz LP + * coefficients of the first and the last sub-frame is computed. At 16 kHz + * LP coefficients of 4th, 8th and 12th sub-frames are computed. We always + * compute correlation coefficients of all sub-frames. + * + * Inputs: + * -inSignal : Input signal + * -maskdata : a structure keeping signal from previous frame. + * -bandwidth : specifies if the codec is in 0-16 kHz mode or + * 0-12 kHz mode. + * + * Outputs: + * -lpCoeff : pointer to a buffer where A-polynomials are + * written to (first coeff is 1 and it is not + * written) + * -corrMat : a matrix where correlation coefficients of each + * sub-frame are written to one row. + * -varscale : a scale used to compute LPC gains. + */ +void +WebRtcIsac_GetLpcCoefUb( + double* inSignal, + MaskFiltstr* maskdata, + double* lpCoeff, + double corrMat[][UB_LPC_ORDER + 1], + double* varscale, + int16_t bandwidth) +{ + int frameCntr, activeFrameCntr, n, pos1, pos2; + int16_t criterion1; + int16_t criterion2; + int16_t numSubFrames = SUBFRAMES * (1 + (bandwidth == isac16kHz)); + double data[WINLEN]; + double corrSubFrame[UB_LPC_ORDER+2]; + double reflecCoeff[UB_LPC_ORDER]; + + double aPolynom[UB_LPC_ORDER+1]; + double tmp; + + /* bandwdith expansion factors */ + const double gamma = 0.9; + + /* change quallevel depending on pitch gains and level fluctuations */ + WebRtcIsac_GetVarsUB(inSignal, &(maskdata->OldEnergy), varscale); + + /* replace data in buffer by new look-ahead data */ + for(frameCntr = 0, activeFrameCntr = 0; frameCntr < numSubFrames; + frameCntr++) + { + if(frameCntr == SUBFRAMES) + { + // we are in 16 kHz + varscale++; + WebRtcIsac_GetVarsUB(&inSignal[FRAMESAMPLES_HALF], + &(maskdata->OldEnergy), varscale); + } + /* Update input buffer and multiply signal with window */ + for(pos1 = 0; pos1 < WINLEN - UPDATE/2; pos1++) + { + maskdata->DataBufferLo[pos1] = maskdata->DataBufferLo[pos1 + + UPDATE/2]; + data[pos1] = maskdata->DataBufferLo[pos1] * kLpcCorrWindow[pos1]; + } + pos2 = frameCntr * UPDATE/2; + for(n = 0; n < UPDATE/2; n++, pos1++, pos2++) + { + maskdata->DataBufferLo[pos1] = inSignal[pos2]; + data[pos1] = maskdata->DataBufferLo[pos1] * kLpcCorrWindow[pos1]; + } + + /* Get correlation coefficients */ + /* computing autocorrelation */ + WebRtcIsac_AutoCorr(corrSubFrame, data, WINLEN, UB_LPC_ORDER+1); + memcpy(corrMat[frameCntr], corrSubFrame, + (UB_LPC_ORDER+1)*sizeof(double)); + + criterion1 = ((frameCntr == 0) || (frameCntr == (SUBFRAMES - 1))) && + (bandwidth == isac12kHz); + criterion2 = (((frameCntr+1) % 4) == 0) && + (bandwidth == isac16kHz); + if(criterion1 || criterion2) + { + /* add noise */ + corrSubFrame[0] += 1e-6; + /* compute prediction coefficients */ + WebRtcIsac_LevDurb(aPolynom, reflecCoeff, corrSubFrame, + UB_LPC_ORDER); + + /* bandwidth expansion */ + tmp = gamma; + for (n = 1; n <= UB_LPC_ORDER; n++) + { + *lpCoeff++ = aPolynom[n] * tmp; + tmp *= gamma; + } + activeFrameCntr++; + } + } +} + + + +/****************************************************************************** + * WebRtcIsac_GetLpcGain() + * + * Compute the LPC gains for each sub-frame, given the LPC of each sub-frame + * and the corresponding correlation coefficients. + * + * Inputs: + * -signal_noise_ratio : the desired SNR in dB. + * -numVecs : number of sub-frames + * -corrMat : a matrix of correlation coefficients where + * each row is a set of correlation coefficients of + * one sub-frame. + * -varscale : a scale computed when WebRtcIsac_GetLpcCoefUb() + * is called. + * + * Outputs: + * -gain : pointer to a buffer where LP gains are written. + * + */ +void +WebRtcIsac_GetLpcGain( + double signal_noise_ratio, + const double* filtCoeffVecs, + int numVecs, + double* gain, + double corrMat[][UB_LPC_ORDER + 1], + const double* varscale) +{ + int16_t j, n; + int16_t subFrameCntr; + double aPolynom[ORDERLO + 1]; + double res_nrg; + + const double HearThresOffset = -28.0; + const double H_T_H = pow(10.0, 0.05 * HearThresOffset); + /* divide by sqrt(12) = 3.46 */ + const double S_N_R = pow(10.0, 0.05 * signal_noise_ratio) / 3.46; + + aPolynom[0] = 1; + for(subFrameCntr = 0; subFrameCntr < numVecs; subFrameCntr++) + { + if(subFrameCntr == SUBFRAMES) + { + // we are in second half of a SWB frame. use new varscale + varscale++; + } + memcpy(&aPolynom[1], &filtCoeffVecs[(subFrameCntr * (UB_LPC_ORDER + 1)) + + 1], sizeof(double) * UB_LPC_ORDER); + + /* residual energy */ + res_nrg = 0.0; + for(j = 0; j <= UB_LPC_ORDER; j++) + { + for(n = 0; n <= j; n++) + { + res_nrg += aPolynom[j] * corrMat[subFrameCntr][j-n] * + aPolynom[n]; + } + for(n = j+1; n <= UB_LPC_ORDER; n++) + { + res_nrg += aPolynom[j] * corrMat[subFrameCntr][n-j] * + aPolynom[n]; + } + } + + /* add hearing threshold and compute the gain */ + gain[subFrameCntr] = S_N_R / (sqrt(res_nrg) / *varscale + H_T_H); + } +} diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h new file mode 100644 index 0000000..8dfe383 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * lpc_analysis.h + * + * LPC functions + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_ANALYSIS_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_ANALYSIS_H_ + +#include "settings.h" +#include "structs.h" + +double WebRtcIsac_LevDurb(double *a, double *k, double *r, size_t order); + +void WebRtcIsac_GetVars(const double *input, const int16_t *pitchGains_Q12, + double *oldEnergy, double *varscale); + +void WebRtcIsac_GetLpcCoefLb(double *inLo, double *inHi, MaskFiltstr *maskdata, + double signal_noise_ratio, const int16_t *pitchGains_Q12, + double *lo_coeff, double *hi_coeff); + + +void WebRtcIsac_GetLpcGain( + double signal_noise_ratio, + const double* filtCoeffVecs, + int numVecs, + double* gain, + double corrLo[][UB_LPC_ORDER + 1], + const double* varscale); + +void WebRtcIsac_GetLpcCoefUb( + double* inSignal, + MaskFiltstr* maskdata, + double* lpCoeff, + double corr[][UB_LPC_ORDER + 1], + double* varscale, + int16_t bandwidth); + +#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_ANALYIS_H_ */ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_gain_swb_tables.c b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_gain_swb_tables.c new file mode 100644 index 0000000..5cc6c11 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_gain_swb_tables.c @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * SWB_KLT_Tables_LPCGain.c + * + * This file defines tables used for entropy coding of LPC Gain + * of upper-band. + * + */ + +#include "lpc_gain_swb_tables.h" +#include "settings.h" +#include "webrtc/typedefs.h" + +const double WebRtcIsac_kQSizeLpcGain = 0.100000; + +const double WebRtcIsac_kMeanLpcGain = -3.3822; + +/* +* The smallest reconstruction points for quantiztion of +* LPC gains. +*/ +const double WebRtcIsac_kLeftRecPointLpcGain[SUBFRAMES] = +{ + -0.800000, -1.000000, -1.200000, -2.200000, -3.000000, -12.700000 +}; + +/* +* Number of reconstruction points of quantizers for LPC Gains. +*/ +const int16_t WebRtcIsac_kNumQCellLpcGain[SUBFRAMES] = +{ + 17, 20, 25, 45, 77, 170 +}; +/* +* Starting index for entropy decoder to search for the right interval, +* one entry per LAR coefficient +*/ +const uint16_t WebRtcIsac_kLpcGainEntropySearch[SUBFRAMES] = +{ + 8, 10, 12, 22, 38, 85 +}; + +/* +* The following 6 vectors define CDF of 6 decorrelated LPC +* gains. +*/ +const uint16_t WebRtcIsac_kLpcGainCdfVec0[18] = +{ + 0, 10, 27, 83, 234, 568, 1601, 4683, 16830, 57534, 63437, + 64767, 65229, 65408, 65483, 65514, 65527, 65535 +}; + +const uint16_t WebRtcIsac_kLpcGainCdfVec1[21] = +{ + 0, 15, 33, 84, 185, 385, 807, 1619, 3529, 7850, 19488, + 51365, 62437, 64548, 65088, 65304, 65409, 65484, 65507, 65522, 65535 +}; + +const uint16_t WebRtcIsac_kLpcGainCdfVec2[26] = +{ + 0, 15, 29, 54, 89, 145, 228, 380, 652, 1493, 4260, + 12359, 34133, 50749, 57224, 60814, 62927, 64078, 64742, 65103, 65311, 65418, + 65473, 65509, 65521, 65535 +}; + +const uint16_t WebRtcIsac_kLpcGainCdfVec3[46] = +{ + 0, 8, 12, 16, 26, 42, 56, 76, 111, 164, 247, + 366, 508, 693, 1000, 1442, 2155, 3188, 4854, 7387, 11249, 17617, + 30079, 46711, 56291, 60127, 62140, 63258, 63954, 64384, 64690, 64891, 65031, + 65139, 65227, 65293, 65351, 65399, 65438, 65467, 65492, 65504, 65510, 65518, + 65523, 65535 +}; + +const uint16_t WebRtcIsac_kLpcGainCdfVec4[78] = +{ + 0, 17, 29, 39, 51, 70, 104, 154, 234, 324, 443, + 590, 760, 971, 1202, 1494, 1845, 2274, 2797, 3366, 4088, 4905, + 5899, 7142, 8683, 10625, 12983, 16095, 20637, 28216, 38859, 47237, 51537, + 54150, 56066, 57583, 58756, 59685, 60458, 61103, 61659, 62144, 62550, 62886, + 63186, 63480, 63743, 63954, 64148, 64320, 64467, 64600, 64719, 64837, 64939, + 65014, 65098, 65160, 65211, 65250, 65290, 65325, 65344, 65366, 65391, 65410, + 65430, 65447, 65460, 65474, 65487, 65494, 65501, 65509, 65513, 65518, 65520, + 65535 +}; + +const uint16_t WebRtcIsac_kLpcGainCdfVec5[171] = +{ + 0, 10, 12, 14, 16, 18, 23, 29, 35, 42, 51, + 58, 65, 72, 78, 87, 96, 103, 111, 122, 134, 150, + 167, 184, 202, 223, 244, 265, 289, 315, 346, 379, 414, + 450, 491, 532, 572, 613, 656, 700, 751, 802, 853, 905, + 957, 1021, 1098, 1174, 1250, 1331, 1413, 1490, 1565, 1647, 1730, + 1821, 1913, 2004, 2100, 2207, 2314, 2420, 2532, 2652, 2783, 2921, + 3056, 3189, 3327, 3468, 3640, 3817, 3993, 4171, 4362, 4554, 4751, + 4948, 5142, 5346, 5566, 5799, 6044, 6301, 6565, 6852, 7150, 7470, + 7797, 8143, 8492, 8835, 9181, 9547, 9919, 10315, 10718, 11136, 11566, + 12015, 12482, 12967, 13458, 13953, 14432, 14903, 15416, 15936, 16452, 16967, + 17492, 18024, 18600, 19173, 19736, 20311, 20911, 21490, 22041, 22597, 23157, + 23768, 24405, 25034, 25660, 26280, 26899, 27614, 28331, 29015, 29702, 30403, + 31107, 31817, 32566, 33381, 34224, 35099, 36112, 37222, 38375, 39549, 40801, + 42074, 43350, 44626, 45982, 47354, 48860, 50361, 51845, 53312, 54739, 56026, + 57116, 58104, 58996, 59842, 60658, 61488, 62324, 63057, 63769, 64285, 64779, + 65076, 65344, 65430, 65500, 65517, 65535 +}; + +/* +* An array of pointers to CDFs of decorrelated LPC Gains +*/ +const uint16_t* WebRtcIsac_kLpcGainCdfMat[SUBFRAMES] = +{ + WebRtcIsac_kLpcGainCdfVec0, WebRtcIsac_kLpcGainCdfVec1, + WebRtcIsac_kLpcGainCdfVec2, WebRtcIsac_kLpcGainCdfVec3, + WebRtcIsac_kLpcGainCdfVec4, WebRtcIsac_kLpcGainCdfVec5 +}; + +/* +* A matrix to decorrellate LPC gains of subframes. +*/ +const double WebRtcIsac_kLpcGainDecorrMat[SUBFRAMES][SUBFRAMES] = +{ + {-0.150860, 0.327872, 0.367220, 0.504613, 0.559270, 0.409234}, + { 0.457128, -0.613591, -0.289283, -0.029734, 0.393760, 0.418240}, + {-0.626043, 0.136489, -0.439118, -0.448323, 0.135987, 0.420869}, + { 0.526617, 0.480187, 0.242552, -0.488754, -0.158713, 0.411331}, + {-0.302587, -0.494953, 0.588112, -0.063035, -0.404290, 0.387510}, + { 0.086378, 0.147714, -0.428875, 0.548300, -0.570121, 0.401391} +}; diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_gain_swb_tables.h b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_gain_swb_tables.h new file mode 100644 index 0000000..c163f4a --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_gain_swb_tables.h @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * SWB_KLT_Tables_LPCGain.h + * + * This file declares tables used for entropy coding of LPC Gain + * of upper-band. + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_GAIN_SWB_TABLES_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_GAIN_SWB_TABLES_H_ + +#include "settings.h" +#include "webrtc/typedefs.h" + +extern const double WebRtcIsac_kQSizeLpcGain; + +extern const double WebRtcIsac_kLeftRecPointLpcGain[SUBFRAMES]; + +extern const int16_t WebRtcIsac_kNumQCellLpcGain[SUBFRAMES]; + +extern const uint16_t WebRtcIsac_kLpcGainEntropySearch[SUBFRAMES]; + +extern const uint16_t WebRtcIsac_kLpcGainCdfVec0[18]; + +extern const uint16_t WebRtcIsac_kLpcGainCdfVec1[21]; + +extern const uint16_t WebRtcIsac_kLpcGainCdfVec2[26]; + +extern const uint16_t WebRtcIsac_kLpcGainCdfVec3[46]; + +extern const uint16_t WebRtcIsac_kLpcGainCdfVec4[78]; + +extern const uint16_t WebRtcIsac_kLpcGainCdfVec5[171]; + +extern const uint16_t* WebRtcIsac_kLpcGainCdfMat[SUBFRAMES]; + +extern const double WebRtcIsac_kLpcGainDecorrMat[SUBFRAMES][SUBFRAMES]; + +#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_GAIN_SWB_TABLES_H_ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_shape_swb12_tables.c b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_shape_swb12_tables.c new file mode 100644 index 0000000..599b89d --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_shape_swb12_tables.c @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * SWB_KLT_Tables.c + * + * This file defines tables used for entropy coding of LPC shape of + * upper-band signal if the bandwidth is 12 kHz. + * + */ + +#include "lpc_shape_swb12_tables.h" +#include "settings.h" +#include "webrtc/typedefs.h" + +/* +* Mean value of LAR +*/ +const double WebRtcIsac_kMeanLarUb12[UB_LPC_ORDER] = +{ + 0.03748928306641, 0.09453441192543, -0.01112522344398, 0.03800237516842 +}; + +/* +* A rotation matrix to decorrelate intra-vector correlation, +* i.e. correlation among components of LAR vector. +*/ +const double WebRtcIsac_kIntraVecDecorrMatUb12[UB_LPC_ORDER][UB_LPC_ORDER] = +{ + {-0.00075365493856, -0.05809964887743, -0.23397966154116, 0.97050367376411}, + { 0.00625021257734, -0.17299965610679, 0.95977735920651, 0.22104179375008}, + { 0.20543384258374, -0.96202143495696, -0.15301870801552, -0.09432375099565}, + {-0.97865075648479, -0.20300322280841, -0.02581111653779, -0.01913568980258} +}; + +/* +* A rotation matrix to remove correlation among LAR coefficients +* of different LAR vectors. One might guess that decorrelation matrix +* for the first component should differ from the second component +* but we haven't observed a significant benefit of having different +* decorrelation matrices for different components. +*/ +const double WebRtcIsac_kInterVecDecorrMatUb12 +[UB_LPC_VEC_PER_FRAME][UB_LPC_VEC_PER_FRAME] = +{ + { 0.70650597970460, -0.70770707262373}, + {-0.70770707262373, -0.70650597970460} +}; + +/* +* LAR quantization step-size. +*/ +const double WebRtcIsac_kLpcShapeQStepSizeUb12 = 0.150000; + +/* +* The smallest reconstruction points for quantiztion of LAR coefficients. +*/ +const double WebRtcIsac_kLpcShapeLeftRecPointUb12 +[UB_LPC_ORDER*UB_LPC_VEC_PER_FRAME] = +{ + -0.900000, -1.050000, -1.350000, -1.800000, -1.350000, -1.650000, + -2.250000, -3.450000 +}; + +/* +* Number of reconstruction points of quantizers for LAR coefficients. +*/ +const int16_t WebRtcIsac_kLpcShapeNumRecPointUb12 +[UB_LPC_ORDER * UB_LPC_VEC_PER_FRAME] = +{ + 13, 15, 19, 27, 19, 24, 32, 48 +}; + +/* +* Starting index for entropy decoder to search for the right interval, +* one entry per LAR coefficient +*/ +const uint16_t WebRtcIsac_kLpcShapeEntropySearchUb12 +[UB_LPC_ORDER * UB_LPC_VEC_PER_FRAME] = +{ + 6, 7, 9, 13, 9, 12, 16, 24 +}; + +/* +* The following 8 vectors define CDF of 8 decorrelated LAR +* coefficients. +*/ +const uint16_t WebRtcIsac_kLpcShapeCdfVec0Ub12[14] = +{ + 0, 13, 95, 418, 1687, 6498, 21317, 44200, 59029, 63849, 65147, + 65449, 65525, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec1Ub12[16] = +{ + 0, 10, 59, 255, 858, 2667, 8200, 22609, 42988, 57202, 62947, + 64743, 65308, 65476, 65522, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec2Ub12[20] = +{ + 0, 18, 40, 118, 332, 857, 2017, 4822, 11321, 24330, 41279, + 54342, 60637, 63394, 64659, 65184, 65398, 65482, 65518, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec3Ub12[28] = +{ + 0, 21, 38, 90, 196, 398, 770, 1400, 2589, 4650, 8211, + 14933, 26044, 39592, 50814, 57452, 60971, 62884, 63995, 64621, 65019, 65273, + 65410, 65480, 65514, 65522, 65531, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec4Ub12[20] = +{ + 0, 7, 46, 141, 403, 969, 2132, 4649, 10633, 24902, 43254, + 54665, 59928, 62674, 64173, 64938, 65293, 65464, 65523, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec5Ub12[25] = +{ + 0, 7, 22, 72, 174, 411, 854, 1737, 3545, 6774, 13165, + 25221, 40980, 52821, 58714, 61706, 63472, 64437, 64989, 65287, 65430, 65503, + 65525, 65529, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec6Ub12[33] = +{ + 0, 11, 21, 36, 65, 128, 228, 401, 707, 1241, 2126, + 3589, 6060, 10517, 18853, 31114, 42477, 49770, 54271, 57467, 59838, 61569, + 62831, 63772, 64433, 64833, 65123, 65306, 65419, 65466, 65499, 65519, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec7Ub12[49] = +{ + 0, 14, 34, 67, 107, 167, 245, 326, 449, 645, 861, + 1155, 1508, 2003, 2669, 3544, 4592, 5961, 7583, 9887, 13256, 18765, + 26519, 34077, 40034, 44349, 47795, 50663, 53262, 55473, 57458, 59122, 60592, + 61742, 62690, 63391, 63997, 64463, 64794, 65045, 65207, 65309, 65394, 65443, + 65478, 65504, 65514, 65523, 65535 +}; + +/* +* An array of pointers to CDFs of decorrelated LARs +*/ +const uint16_t* WebRtcIsac_kLpcShapeCdfMatUb12 +[UB_LPC_ORDER * UB_LPC_VEC_PER_FRAME] = +{ + WebRtcIsac_kLpcShapeCdfVec0Ub12, WebRtcIsac_kLpcShapeCdfVec1Ub12, + WebRtcIsac_kLpcShapeCdfVec2Ub12, WebRtcIsac_kLpcShapeCdfVec3Ub12, + WebRtcIsac_kLpcShapeCdfVec4Ub12, WebRtcIsac_kLpcShapeCdfVec5Ub12, + WebRtcIsac_kLpcShapeCdfVec6Ub12, WebRtcIsac_kLpcShapeCdfVec7Ub12 +}; diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_shape_swb12_tables.h b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_shape_swb12_tables.h new file mode 100644 index 0000000..256f1d4 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_shape_swb12_tables.h @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * lpc_shape_swb12_tables.h + * + * This file declares tables used for entropy coding of LPC shape of + * upper-band signal if the bandwidth is 12 kHz. + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_SHAPE_SWB12_TABLES_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_SHAPE_SWB12_TABLES_H_ + +#include "settings.h" +#include "webrtc/typedefs.h" + +extern const double WebRtcIsac_kMeanLarUb12[UB_LPC_ORDER]; + +extern const double WebRtcIsac_kMeanLpcGain; + +extern const double WebRtcIsac_kIntraVecDecorrMatUb12[UB_LPC_ORDER][UB_LPC_ORDER]; + +extern const double WebRtcIsac_kInterVecDecorrMatUb12 +[UB_LPC_VEC_PER_FRAME][UB_LPC_VEC_PER_FRAME]; + +extern const double WebRtcIsac_kLpcShapeQStepSizeUb12; + +extern const double WebRtcIsac_kLpcShapeLeftRecPointUb12 +[UB_LPC_ORDER*UB_LPC_VEC_PER_FRAME]; + + +extern const int16_t WebRtcIsac_kLpcShapeNumRecPointUb12 +[UB_LPC_ORDER * UB_LPC_VEC_PER_FRAME]; + +extern const uint16_t WebRtcIsac_kLpcShapeEntropySearchUb12 +[UB_LPC_ORDER * UB_LPC_VEC_PER_FRAME]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec0Ub12[14]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec1Ub12[16]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec2Ub12[20]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec3Ub12[28]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec4Ub12[20]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec5Ub12[25]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec6Ub12[33]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec7Ub12[49]; + +extern const uint16_t* WebRtcIsac_kLpcShapeCdfMatUb12 +[UB_LPC_ORDER * UB_LPC_VEC_PER_FRAME]; + +#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_SHAPE_SWB12_TABLES_H_ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_shape_swb16_tables.c b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_shape_swb16_tables.c new file mode 100644 index 0000000..6176d2c --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_shape_swb16_tables.c @@ -0,0 +1,248 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * SWB16_KLT_Tables.c + * + * This file defines tables used for entropy coding of LPC shape of + * upper-band signal if the bandwidth is 16 kHz. + * + */ + +#include "lpc_shape_swb16_tables.h" +#include "settings.h" +#include "webrtc/typedefs.h" + +/* +* Mean value of LAR +*/ +const double WebRtcIsac_kMeanLarUb16[UB_LPC_ORDER] = +{ +0.454978, 0.364747, 0.102999, 0.104523 +}; + +/* +* A rotation matrix to decorrelate intra-vector correlation, +* i.e. correlation among components of LAR vector. +*/ +const double WebRtcIsac_kIintraVecDecorrMatUb16[UB_LPC_ORDER][UB_LPC_ORDER] = +{ + {-0.020528, -0.085858, -0.002431, 0.996093}, + {-0.033155, 0.036102, 0.998786, 0.004866}, + { 0.202627, 0.974853, -0.028940, 0.088132}, + {-0.978479, 0.202454, -0.039785, -0.002811} +}; + +/* +* A rotation matrix to remove correlation among LAR coefficients +* of different LAR vectors. One might guess that decorrelation matrix +* for the first component should differ from the second component +* but we haven't observed a significant benefit of having different +* decorrelation matrices for different components. +*/ +const double WebRtcIsac_kInterVecDecorrMatUb16 +[UB16_LPC_VEC_PER_FRAME][UB16_LPC_VEC_PER_FRAME] = +{ + { 0.291675, -0.515786, 0.644927, 0.482658}, + {-0.647220, 0.479712, 0.289556, 0.516856}, + { 0.643084, 0.485489, -0.289307, 0.516763}, + {-0.287185, -0.517823, -0.645389, 0.482553} +}; + +/* +* The following 16 vectors define CDF of 16 decorrelated LAR +* coefficients. +*/ +const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub16[14] = +{ + 0, 2, 20, 159, 1034, 5688, 20892, 44653, + 59849, 64485, 65383, 65518, 65534, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec1Ub16[16] = +{ + 0, 1, 7, 43, 276, 1496, 6681, 21653, + 43891, 58859, 64022, 65248, 65489, 65529, 65534, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec2Ub16[18] = +{ + 0, 1, 9, 54, 238, 933, 3192, 9461, + 23226, 42146, 56138, 62413, 64623, 65300, 65473, 65521, + 65533, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec3Ub16[30] = +{ + 0, 2, 4, 8, 17, 36, 75, 155, + 329, 683, 1376, 2662, 5047, 9508, 17526, 29027, + 40363, 48997, 55096, 59180, 61789, 63407, 64400, 64967, + 65273, 65429, 65497, 65526, 65534, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec4Ub16[16] = +{ + 0, 1, 10, 63, 361, 1785, 7407, 22242, + 43337, 58125, 63729, 65181, 65472, 65527, 65534, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec5Ub16[17] = +{ + 0, 1, 7, 29, 134, 599, 2443, 8590, + 22962, 42635, 56911, 63060, 64940, 65408, 65513, 65531, + 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec6Ub16[21] = +{ + 0, 1, 5, 16, 57, 191, 611, 1808, + 4847, 11755, 24612, 40910, 53789, 60698, 63729, 64924, + 65346, 65486, 65523, 65532, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec7Ub16[36] = +{ + 0, 1, 4, 12, 25, 55, 104, 184, + 314, 539, 926, 1550, 2479, 3861, 5892, 8845, + 13281, 20018, 29019, 38029, 45581, 51557, 56057, 59284, + 61517, 63047, 64030, 64648, 65031, 65261, 65402, 65480, + 65518, 65530, 65534, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec8Ub16[21] = +{ + 0, 1, 2, 7, 26, 103, 351, 1149, + 3583, 10204, 23846, 41711, 55361, 61917, 64382, 65186, + 65433, 65506, 65528, 65534, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub160[21] = +{ + 0, 6, 19, 63, 205, 638, 1799, 4784, + 11721, 24494, 40803, 53805, 60886, 63822, 64931, 65333, + 65472, 65517, 65530, 65533, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub161[28] = +{ + 0, 1, 3, 11, 31, 86, 221, 506, + 1101, 2296, 4486, 8477, 15356, 26079, 38941, 49952, + 57165, 61257, 63426, 64549, 65097, 65351, 65463, 65510, + 65526, 65532, 65534, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub162[55] = +{ + 0, 3, 12, 23, 42, 65, 89, 115, + 150, 195, 248, 327, 430, 580, 784, 1099, + 1586, 2358, 3651, 5899, 9568, 14312, 19158, 23776, + 28267, 32663, 36991, 41153, 45098, 48680, 51870, 54729, + 57141, 59158, 60772, 62029, 63000, 63761, 64322, 64728, + 65000, 65192, 65321, 65411, 65463, 65496, 65514, 65523, + 65527, 65529, 65531, 65532, 65533, 65534, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub163[26] = +{ + 0, 2, 4, 10, 21, 48, 114, 280, + 701, 1765, 4555, 11270, 24267, 41213, 54285, 61003, + 63767, 64840, 65254, 65421, 65489, 65514, 65526, 65532, + 65534, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub164[28] = +{ + 0, 1, 3, 6, 15, 36, 82, 196, + 453, 1087, 2557, 5923, 13016, 25366, 40449, 52582, + 59539, 62896, 64389, 65033, 65316, 65442, 65494, 65519, + 65529, 65533, 65534, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub165[34] = +{ + 0, 2, 4, 8, 18, 35, 73, 146, + 279, 524, 980, 1789, 3235, 5784, 10040, 16998, + 27070, 38543, 48499, 55421, 59712, 62257, 63748, 64591, + 65041, 65278, 65410, 65474, 65508, 65522, 65530, 65533, + 65534, 65535 +}; + +const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub166[71] = +{ + 0, 1, 2, 6, 13, 26, 55, 92, + 141, 191, 242, 296, 355, 429, 522, 636, + 777, 947, 1162, 1428, 1753, 2137, 2605, 3140, + 3743, 4409, 5164, 6016, 6982, 8118, 9451, 10993, + 12754, 14810, 17130, 19780, 22864, 26424, 30547, 35222, + 40140, 44716, 48698, 52056, 54850, 57162, 59068, 60643, + 61877, 62827, 63561, 64113, 64519, 64807, 65019, 65167, + 65272, 65343, 65399, 65440, 65471, 65487, 65500, 65509, + 65518, 65524, 65527, 65531, 65533, 65534, 65535 +}; + +/* +* An array of pointers to CDFs of decorrelated LARs +*/ +const uint16_t* WebRtcIsac_kLpcShapeCdfMatUb16 +[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME] = { + WebRtcIsac_kLpcShapeCdfVec01Ub16, + WebRtcIsac_kLpcShapeCdfVec1Ub16, + WebRtcIsac_kLpcShapeCdfVec2Ub16, + WebRtcIsac_kLpcShapeCdfVec3Ub16, + WebRtcIsac_kLpcShapeCdfVec4Ub16, + WebRtcIsac_kLpcShapeCdfVec5Ub16, + WebRtcIsac_kLpcShapeCdfVec6Ub16, + WebRtcIsac_kLpcShapeCdfVec7Ub16, + WebRtcIsac_kLpcShapeCdfVec8Ub16, + WebRtcIsac_kLpcShapeCdfVec01Ub160, + WebRtcIsac_kLpcShapeCdfVec01Ub161, + WebRtcIsac_kLpcShapeCdfVec01Ub162, + WebRtcIsac_kLpcShapeCdfVec01Ub163, + WebRtcIsac_kLpcShapeCdfVec01Ub164, + WebRtcIsac_kLpcShapeCdfVec01Ub165, + WebRtcIsac_kLpcShapeCdfVec01Ub166 +}; + +/* +* The smallest reconstruction points for quantiztion of LAR coefficients. +*/ +const double WebRtcIsac_kLpcShapeLeftRecPointUb16 +[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME] = +{ + -0.8250, -0.9750, -1.1250, -2.1750, -0.9750, -1.1250, -1.4250, + -2.6250, -1.4250, -1.2750, -1.8750, -3.6750, -1.7250, -1.8750, + -2.3250, -5.4750 +}; + +/* +* Number of reconstruction points of quantizers for LAR coefficients. +*/ +const int16_t WebRtcIsac_kLpcShapeNumRecPointUb16 +[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME] = +{ + 13, 15, 17, 29, 15, 16, 20, 35, 20, + 20, 27, 54, 25, 27, 33, 70 +}; + +/* +* Starting index for entropy decoder to search for the right interval, +* one entry per LAR coefficient +*/ +const uint16_t WebRtcIsac_kLpcShapeEntropySearchUb16 +[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME] = +{ + 6, 7, 8, 14, 7, 8, 10, 17, 10, + 10, 13, 27, 12, 13, 16, 35 +}; + +/* +* LAR quantization step-size. +*/ +const double WebRtcIsac_kLpcShapeQStepSizeUb16 = 0.150000; diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_shape_swb16_tables.h b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_shape_swb16_tables.h new file mode 100644 index 0000000..3e1bdf7 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_shape_swb16_tables.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * lpc_shape_swb16_tables.h + * + * This file declares tables used for entropy coding of LPC shape of + * upper-band signal if the bandwidth is 16 kHz. + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_SHAPE_SWB16_TABLES_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_SHAPE_SWB16_TABLES_H_ + +#include "settings.h" +#include "webrtc/typedefs.h" + +extern const double WebRtcIsac_kMeanLarUb16[UB_LPC_ORDER]; + +extern const double WebRtcIsac_kIintraVecDecorrMatUb16[UB_LPC_ORDER][UB_LPC_ORDER]; + +extern const double WebRtcIsac_kInterVecDecorrMatUb16 +[UB16_LPC_VEC_PER_FRAME][UB16_LPC_VEC_PER_FRAME]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub16[14]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec1Ub16[16]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec2Ub16[18]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec3Ub16[30]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec4Ub16[16]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec5Ub16[17]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec6Ub16[21]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec7Ub16[36]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec8Ub16[21]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub160[21]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub161[28]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub162[55]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub163[26]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub164[28]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub165[34]; + +extern const uint16_t WebRtcIsac_kLpcShapeCdfVec01Ub166[71]; + +extern const uint16_t* WebRtcIsac_kLpcShapeCdfMatUb16 +[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME]; + +extern const double WebRtcIsac_kLpcShapeLeftRecPointUb16 +[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME]; + +extern const int16_t WebRtcIsac_kLpcShapeNumRecPointUb16 +[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME]; + +extern const uint16_t WebRtcIsac_kLpcShapeEntropySearchUb16 +[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME]; + +extern const double WebRtcIsac_kLpcShapeQStepSizeUb16; + +#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_SHAPE_SWB16_TABLES_H_ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_tables.c b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_tables.c new file mode 100644 index 0000000..909809b --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_tables.c @@ -0,0 +1,601 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* coding tables for the KLT coefficients */ + +#include "lpc_tables.h" +#include "settings.h" + +/* cdf array for model indicator */ +const uint16_t WebRtcIsac_kQKltModelCdf[4] = { + 0, 15434, 37548, 65535 }; + +/* pointer to cdf array for model indicator */ +const uint16_t *WebRtcIsac_kQKltModelCdfPtr[1] = { + WebRtcIsac_kQKltModelCdf }; + +/* initial cdf index for decoder of model indicator */ +const uint16_t WebRtcIsac_kQKltModelInitIndex[1] = { 1 }; + +/* offset to go from rounded value to quantization index */ +const short WebRtcIsac_kQKltQuantMinGain[12] = { + 3, 6, 4, 6, 6, 9, 5, 16, 11, 34, 32, 47 }; + + +const short WebRtcIsac_kQKltQuantMinShape[108] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 2, 2, 2, 3, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 1, 2, 2, 3, 0, 0, 0, 0, + 1, 0, 1, 1, 1, 1, 1, 1, 1, 2, + 2, 4, 3, 5, 0, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 2, 1, 2, 2, 3, 4, + 4, 7, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 2, 3, 2, 3, 4, 4, 5, 7, 13, + 0, 1, 1, 2, 3, 2, 2, 2, 4, 4, + 5, 6, 7, 11, 9, 13, 12, 26 }; + +/* maximum quantization index */ +const uint16_t WebRtcIsac_kQKltMaxIndGain[12] = { + 6, 12, 8, 14, 10, 19, 12, 31, 22, 56, 52, 138 }; + +const uint16_t WebRtcIsac_kQKltMaxIndShape[108] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 2, 2, 2, 2, 4, 4, 5, 6, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 1, 2, 2, + 2, 2, 3, 4, 5, 7, 0, 0, 0, 0, + 2, 0, 2, 2, 2, 2, 3, 2, 2, 4, + 4, 6, 6, 9, 0, 0, 0, 0, 2, 2, + 2, 2, 2, 2, 3, 2, 4, 4, 7, 7, + 9, 13, 0, 0, 2, 2, 2, 2, 2, 2, + 3, 4, 5, 4, 6, 8, 8, 10, 16, 25, + 0, 2, 2, 4, 5, 4, 4, 4, 7, 8, + 9, 10, 13, 19, 17, 23, 25, 49 }; + +/* index offset */ +const uint16_t WebRtcIsac_kQKltOffsetGain[12] = { + 0, 7, 20, 29, 44, 55, 75, 88, 120, 143, 200, 253 }; + +const uint16_t WebRtcIsac_kQKltOffsetShape[108] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, + 11, 14, 17, 20, 23, 28, 33, 39, 46, 47, + 48, 49, 50, 52, 53, 54, 55, 56, 58, 61, + 64, 67, 70, 74, 79, 85, 93, 94, 95, 96, + 97, 100, 101, 104, 107, 110, 113, 117, 120, 123, + 128, 133, 140, 147, 157, 158, 159, 160, 161, 164, + 167, 170, 173, 176, 179, 183, 186, 191, 196, 204, + 212, 222, 236, 237, 238, 241, 244, 247, 250, 253, + 256, 260, 265, 271, 276, 283, 292, 301, 312, 329, + 355, 356, 359, 362, 367, 373, 378, 383, 388, 396, + 405, 415, 426, 440, 460, 478, 502, 528 }; + +/* initial cdf index for KLT coefficients */ +const uint16_t WebRtcIsac_kQKltInitIndexGain[12] = { + 3, 6, 4, 7, 5, 10, 6, 16, 11, 28, 26, 69}; + +const uint16_t WebRtcIsac_kQKltInitIndexShape[108] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, + 1, 1, 1, 1, 2, 2, 3, 3, 0, 0, + 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, + 1, 1, 2, 2, 3, 4, 0, 0, 0, 0, + 1, 0, 1, 1, 1, 1, 2, 1, 1, 2, + 2, 3, 3, 5, 0, 0, 0, 0, 1, 1, + 1, 1, 1, 1, 2, 1, 2, 2, 4, 4, + 5, 7, 0, 0, 1, 1, 1, 1, 1, 1, + 2, 2, 3, 2, 3, 4, 4, 5, 8, 13, + 0, 1, 1, 2, 3, 2, 2, 2, 4, 4, + 5, 5, 7, 10, 9, 12, 13, 25 }; + + +/* quantizer representation levels */ +const double WebRtcIsac_kQKltLevelsGain[392] = { + -2.78127126, -1.76745590, -0.77913790, -0.00437329, 0.79961206, + 1.81775776, 2.81389782, -5.78753143, -4.88384084, -3.89320940, + -2.88133610, -1.92859977, -0.86347396, 0.02003888, 0.86140400, + 1.89667156, 2.97134967, 3.98781964, 4.91727277, 5.82865898, + -4.11195874, -2.80898424, -1.87547977, -0.80943825, -0.00679084, + 0.79573851, 1.83953397, 2.67586037, 3.76274082, -6.10933968, + -4.93034581, -3.89281296, -2.91530625, -1.89684163, -0.85319130, + -0.02275767, 0.86862017, 1.91578276, 2.96107339, 3.96543056, + 4.91369908, 5.91058154, 6.83848343, 8.07136925, -5.87470395, + -4.84703049, -3.84284597, -2.86168446, -1.89290192, -0.82798145, + -0.00080013, 0.82594974, 1.85754329, 2.88351798, 3.96172628, + -8.85684885, -7.87387461, -6.97811862, -5.93256270, -4.94301439, + -3.95513701, -2.96041544, -1.94031192, -0.87961478, -0.00456201, + 0.89911505, 1.91723376, 2.94011511, 3.93302540, 4.97990967, + 5.93133404, 7.02181199, 7.92407762, 8.80155440, 10.04665814, + -4.82396678, -3.85612158, -2.89482244, -1.89558408, -0.90036978, + -0.00677823, 0.90607989, 1.90937981, 2.91175777, 3.91637730, + 4.97565723, 5.84771228, 7.11145863, -16.07879840, -15.03776309, + -13.93905670, -12.95671800, -11.89171202, -10.95820934, -9.95923714, + -8.94357334, -7.99068299, -6.97481009, -5.94826231, -4.96673988, + -3.97490466, -2.97846970, -1.95130435, -0.94215262, -0.01444043, + 0.96770704, 1.95848598, 2.94107862, 3.95666119, 4.97253085, + 5.97191122, 6.93277360, 7.96608727, 8.87958779, 10.00264269, + 10.86560820, 12.07449071, 13.04491775, 13.97507061, 14.91845261, + -10.85696295, -9.83365357, -9.01245635, -7.95915145, -6.95625003, + -5.95362618, -4.93468444, -3.98760978, -2.95044407, -1.97041277, + -0.97701799, -0.00840234, 0.97834289, 1.98361415, 2.97802439, + 3.96415871, 4.95369042, 5.94101770, 6.92756798, 7.94063998, + 8.85951828, 9.97077022, 11.00068503, -33.92030406, -32.81426422, + -32.00000000, -31.13243639, -30.11886909, -29.06017570, -28.12598824, + -27.22045482, -25.81215858, -25.07849962, -23.93018013, -23.02097643, + -21.89529725, -20.99091085, -19.98889048, -18.94327044, -17.96562071, + -16.96126218, -15.95054062, -14.98516200, -13.97101012, -13.02106500, + -11.98438006, -11.03216748, -9.95930286, -8.97043946, -7.98085082, + -6.98360995, -5.98998802, -4.98668173, -4.00032906, -3.00420619, + -1.98701132, -0.99324682, -0.00609324, 0.98297834, 1.99483076, + 3.00305044, 3.97142097, 4.97525759, 5.98612258, 6.97448236, + 7.97575900, 9.01086211, 9.98665542, 11.00541438, 11.98078628, + 12.92352471, 14.06849675, 14.99949430, 15.94904834, 16.97440321, + 18.04040916, 18.88987609, 20.05312391, 21.00000000, 21.79443341, + -31.98578825, -31.00000000, -29.89060567, -28.98555686, -27.97114102, + -26.84935410, -26.02402230, -24.94195278, -23.92336849, -22.95552382, + -21.97932836, -20.96055470, -19.99649553, -19.03436122, -17.96706525, + -17.01139515, -16.01363516, -14.99154248, -14.00298333, -12.99630613, + -11.99955519, -10.99000421, -10.00819092, -8.99763648, -7.98431793, + -7.01769025, -5.99604690, -4.99980697, -3.99334671, -3.01748192, + -2.02051217, -1.00848371, -0.01942358, 1.00477757, 1.95477872, + 2.98593031, 3.98779079, 4.96862849, 6.02694771, 6.93983733, + 7.89874717, 8.99615862, 10.02367921, 10.96293452, 11.84351528, + 12.92207187, 13.85122329, 15.05146877, 15.99371264, 17.00000000, + 18.00000000, 19.00000000, 19.82763573, -47.00000000, -46.00000000, + -44.87138498, -44.00000000, -43.00000000, -42.00000000, -41.00000000, + -39.88966612, -38.98913239, -37.80306486, -37.23584325, -35.94200288, + -34.99881301, -34.11361858, -33.06507360, -32.13129135, -30.90891364, + -29.81511907, -28.99250380, -28.04535391, -26.99767800, -26.04418164, + -24.95687851, -24.04865595, -23.03392645, -21.89366707, -20.93517364, + -19.99388660, -18.91620943, -18.03749683, -16.99532379, -15.98683813, + -15.06421479, -13.99359211, -12.99714098, -11.97022520, -10.98500279, + -9.98834422, -8.95729330, -8.01232284, -7.00253661, -5.99681626, + -5.01207817, -3.95914904, -3.01232178, -1.96615919, -0.97687670, + 0.01228030, 0.98412288, 2.01753544, 3.00580570, 3.97783510, + 4.98846894, 6.01321400, 7.00867732, 8.00416375, 9.01771966, + 9.98637729, 10.98255180, 11.99194163, 13.01807333, 14.00999545, + 15.00118556, 16.00089224, 17.00584148, 17.98251763, 18.99942091, + 19.96917690, 20.97839265, 21.98207297, 23.00171271, 23.99930737, + 24.99746061, 26.00936304, 26.98240132, 28.01126868, 29.01395915, + 29.98153507, 31.01376711, 31.99876818, 33.00475317, 33.99753994, + 34.99493913, 35.98933585, 36.95620160, 37.98428461, 38.99317544, + 40.01832073, 40.98048133, 41.95999283, 42.98232091, 43.96523612, + 44.99574268, 45.99524194, 47.05464025, 48.03821548, 48.99354366, + 49.96400411, 50.98017973, 51.95184408, 52.96291806, 54.00194392, + 54.96603783, 55.95623778, 57.03076595, 58.05889901, 58.99081551, + 59.97928121, 61.05071612, 62.03971580, 63.01286038, 64.01290338, + 65.02074503, 65.99454594, 67.00399425, 67.96571257, 68.95305727, + 69.92030664, 70.95594862, 71.98088567, 73.04764124, 74.00285480, + 75.02696330, 75.89837673, 76.93459997, 78.16266309, 78.83317543, + 80.00000000, 80.87251574, 82.09803524, 83.10671664, 84.00000000, + 84.77023523, 86.00000000, 87.00000000, 87.92946897, 88.69159118, + 90.00000000, 90.90535270 }; + +const double WebRtcIsac_kQKltLevelsShape[578] = { + 0.00032397, 0.00008053, -0.00061202, -0.00012620, 0.00030437, + 0.00054764, -0.00027902, 0.00069360, 0.00029449, -0.80219239, + 0.00091089, -0.74514927, -0.00094283, 0.64030631, -0.60509119, + 0.00035575, 0.61851665, -0.62129957, 0.00375219, 0.60054900, + -0.61554359, 0.00054977, 0.63362016, -1.73118727, -0.65422341, + 0.00524568, 0.66165298, 1.76785515, -1.83182018, -0.65997434, + -0.00011887, 0.67524299, 1.79933938, -1.76344480, -0.72547708, + -0.00133017, 0.73104704, 1.75305377, 2.85164534, -2.80423916, + -1.71959639, -0.75419722, -0.00329945, 0.77196760, 1.72211069, + 2.87339653, 0.00031089, -0.00015311, 0.00018201, -0.00035035, + -0.77357251, 0.00154647, -0.00047625, -0.00045299, 0.00086590, + 0.00044762, -0.83383829, 0.00024787, -0.68526258, -0.00122472, + 0.64643255, -0.60904942, -0.00448987, 0.62309184, -0.59626442, + -0.00574132, 0.62296546, -0.63222115, 0.00013441, 0.63609545, + -0.66911055, -0.00369971, 0.66346095, 2.07281301, -1.77184694, + -0.67640425, -0.00010145, 0.64818392, 1.74948973, -1.69420224, + -0.71943894, -0.00004680, 0.75303493, 1.81075983, 2.80610041, + -2.80005755, -1.79866753, -0.77409777, -0.00084220, 0.80141293, + 1.78291081, 2.73954236, 3.82994169, 0.00015140, -0.00012766, + -0.00034241, -0.00119125, -0.76113497, 0.00069246, 0.76722027, + 0.00132862, -0.69107530, 0.00010656, 0.77061578, -0.78012970, + 0.00095947, 0.77828502, -0.64787758, 0.00217168, 0.63050167, + -0.58601125, 0.00306596, 0.59466308, -0.58603410, 0.00059779, + 0.64257970, 1.76512766, -0.61193600, -0.00259517, 0.59767574, + -0.61026273, 0.00315811, 0.61725479, -1.69169719, -0.65816029, + 0.00067575, 0.65576890, 2.00000000, -1.72689193, -0.69780808, + -0.00040990, 0.70668487, 1.74198458, -3.79028154, -3.00000000, + -1.73194459, -0.70179341, -0.00106695, 0.71302629, 1.76849782, + -2.89332364, -1.78585007, -0.78731491, -0.00132610, 0.79692976, + 1.75247009, 2.97828682, -5.26238694, -3.69559829, -2.87286122, + -1.84908818, -0.84434577, -0.01167975, 0.84641753, 1.84087672, + 2.87628156, 3.83556679, -0.00190204, 0.00092642, 0.00354385, + -0.00012982, -0.67742785, 0.00229509, 0.64935672, -0.58444751, + 0.00470733, 0.57299534, -0.58456202, -0.00097715, 0.64593607, + -0.64060330, -0.00638534, 0.59680157, -0.59287537, 0.00490772, + 0.58919707, -0.60306173, -0.00417464, 0.60562100, -1.75218757, + -0.63018569, -0.00225922, 0.63863300, -0.63949939, -0.00126421, + 0.64268914, -1.75851182, -0.68318060, 0.00510418, 0.69049211, + 1.88178506, -1.71136148, -0.72710534, -0.00815559, 0.73412917, + 1.79996711, -2.77111145, -1.73940498, -0.78212945, 0.01074476, + 0.77688916, 1.76873972, 2.87281379, 3.77554698, -3.75832725, + -2.95463235, -1.80451491, -0.80017226, 0.00149902, 0.80729206, + 1.78265046, 2.89391793, -3.78236148, -2.83640598, -1.82532067, + -0.88844327, -0.00620952, 0.88208030, 1.85757631, 2.81712391, + 3.88430176, 5.16179367, -7.00000000, -5.93805408, -4.87172597, + -3.87524433, -2.89399744, -1.92359563, -0.92136341, -0.00172725, + 0.93087018, 1.90528280, 2.89809686, 3.88085708, 4.89147740, + 5.89078692, -0.00239502, 0.00312564, -1.00000000, 0.00178325, + 1.00000000, -0.62198029, 0.00143254, 0.65344051, -0.59851220, + -0.00676987, 0.61510140, -0.58894151, 0.00385055, 0.59794203, + -0.59808568, -0.00038214, 0.57625703, -0.63009713, -0.01107985, + 0.61278758, -0.64206758, -0.00154369, 0.65480598, 1.80604162, + -1.80909286, -0.67810514, 0.00205762, 0.68571097, 1.79453891, + -3.22682422, -1.73808453, -0.71870305, -0.00738594, 0.71486172, + 1.73005326, -1.66891897, -0.73689615, -0.00616203, 0.74262409, + 1.73807899, -2.92417482, -1.73866741, -0.78133871, 0.00764425, + 0.80027264, 1.78668732, 2.74992588, -4.00000000, -2.75578740, + -1.83697516, -0.83117035, -0.00355191, 0.83527172, 1.82814700, + 2.77377675, 3.80718693, -3.81667698, -2.83575471, -1.83372350, + -0.86579471, 0.00547578, 0.87582281, 1.82858793, 2.87265007, + 3.91405377, -4.87521600, -3.78999094, -2.86437014, -1.86964365, + -0.90618018, 0.00128243, 0.91497811, 1.87374952, 2.83199819, + 3.91519130, 4.76632822, -6.68713448, -6.01252467, -4.94587936, + -3.88795368, -2.91299088, -1.92592211, -0.95504570, -0.00089980, + 0.94565200, 1.93239633, 2.91832808, 3.91363475, 4.88920034, + 5.96471415, 6.83905252, 7.86195009, 8.81571018,-12.96141759, + -11.73039516,-10.96459719, -9.97382433, -9.04414433, -7.89460619, + -6.96628608, -5.93236595, -4.93337924, -3.95479990, -2.96451499, + -1.96635876, -0.97271229, -0.00402238, 0.98343930, 1.98348291, + 2.96641164, 3.95456471, 4.95517089, 5.98975714, 6.90322073, + 7.90468849, 8.85639467, 9.97255498, 10.79006309, 11.81988596, + 0.04950500, -1.00000000, -0.01226628, 1.00000000, -0.59479469, + -0.10438305, 0.59822144, -2.00000000, -0.67109149, -0.09256692, + 0.65171621, 2.00000000, -3.00000000, -1.68391999, -0.76681039, + -0.03354151, 0.71509146, 1.77615472, -2.00000000, -0.68661511, + -0.02497881, 0.66478398, 2.00000000, -2.00000000, -0.67032784, + -0.00920582, 0.64892756, 2.00000000, -2.00000000, -0.68561894, + 0.03641869, 0.73021611, 1.68293863, -4.00000000, -2.72024184, + -1.80096059, -0.81696185, 0.03604685, 0.79232033, 1.70070730, + 3.00000000, -4.00000000, -2.71795670, -1.80482986, -0.86001162, + 0.03764903, 0.87723968, 1.79970771, 2.72685932, 3.67589143, + -5.00000000, -4.00000000, -2.85492548, -1.78996365, -0.83250358, + -0.01376828, 0.84195506, 1.78161105, 2.76754458, 4.00000000, + -6.00000000, -5.00000000, -3.82268811, -2.77563624, -1.82608163, + -0.86486114, -0.02671886, 0.86693165, 1.88422879, 2.86248347, + 3.95632216, -7.00000000, -6.00000000, -5.00000000, -3.77533988, + -2.86391432, -1.87052039, -0.90513658, 0.06271236, 0.91083620, + 1.85734756, 2.86031688, 3.82019418, 4.94420394, 6.00000000, + -11.00000000,-10.00000000, -9.00000000, -8.00000000, -6.91952415, + -6.00000000, -4.92044374, -3.87845165, -2.87392362, -1.88413020, + -0.91915740, 0.00318517, 0.91602800, 1.89664838, 2.88925058, + 3.84123856, 4.78988651, 5.94526812, 6.81953917, 8.00000000, + -9.00000000, -8.00000000, -7.03319143, -5.94530963, -4.86669720, + -3.92438007, -2.88620396, -1.92848070, -0.94365985, 0.01671855, + 0.97349410, 1.93419878, 2.89740109, 3.89662823, 4.83235583, + 5.88106535, 6.80328232, 8.00000000,-13.00000000,-12.00000000, + -11.00000000,-10.00000000, -9.00000000, -7.86033489, -6.83344055, + -5.89844215, -4.90811454, -3.94841298, -2.95820490, -1.98627966, + -0.99161468, -0.02286136, 0.96055651, 1.95052433, 2.93969396, + 3.94304346, 4.88522624, 5.87434241, 6.78309433, 7.87244101, + 9.00000000, 10.00000000,-12.09117356,-11.00000000,-10.00000000, + -8.84766108, -7.86934236, -6.98544896, -5.94233429, -4.95583292, + -3.95575986, -2.97085529, -1.98955811, -0.99359873, -0.00485413, + 0.98298870, 1.98093258, 2.96430203, 3.95540216, 4.96915010, + 5.96775124, 6.99236918, 7.96503302, 8.99864542, 9.85857723, + 10.96541926, 11.91647197, 12.71060069,-26.00000000,-25.00000000, + -24.00585596,-23.11642573,-22.14271284,-20.89800711,-19.87815799, + -19.05036354,-17.88555651,-16.86471209,-15.97711073,-14.94012359, + -14.02661226,-12.98243228,-11.97489256,-10.97402777, -9.96425624, + -9.01085220, -7.97372506, -6.98795002, -5.97271328, -5.00191694, + -3.98055849, -2.98458048, -1.99470442, -0.99656768, -0.00825666, + 1.00272004, 1.99922218, 2.99357669, 4.01407905, 5.01003897, + 5.98115528, 7.00018958, 8.00338125, 8.98981046, 9.98990318, + 10.96341479, 11.96866930, 12.99175139, 13.94580443, 14.95745083, + 15.98992869, 16.97484646, 17.99630043, 18.93396897, 19.88347741, + 20.96532482, 21.92191032, 23.22314702 }; + + +/* cdf tables for quantizer indices */ +const uint16_t WebRtcIsac_kQKltCdfGain[404] = { + 0, 13, 301, 3730, 61784, 65167, 65489, 65535, 0, 17, + 142, 314, 929, 2466, 7678, 56450, 63463, 64740, 65204, 65426, + 65527, 65535, 0, 8, 100, 724, 6301, 60105, 65125, 65510, + 65531, 65535, 0, 13, 117, 368, 1068, 3010, 11928, 53603, + 61177, 63404, 64505, 65108, 65422, 65502, 65531, 65535, 0, 4, + 17, 96, 410, 1859, 12125, 54361, 64103, 65305, 65497, 65535, + 0, 4, 88, 230, 469, 950, 1746, 3228, 6092, 16592, + 44756, 56848, 61256, 63308, 64325, 64920, 65309, 65460, 65502, + 65522, 65535, 0, 88, 352, 1675, 6339, 20749, 46686, 59284, 63525, + 64949, 65359, 65502, 65527, 65535, 0, 13, 38, 63, 117, + 234, 381, 641, 929, 1407, 2043, 2809, 4032, 5753, 8792, + 14407, 24308, 38941, 48947, 55403, 59293, 61411, 62688, 63630, + 64329, 64840, 65188, 65376, 65472, 65506, 65527, 65531, 65535, + 0, 8, 29, 75, 222, 615, 1327, 2801, 5623, 9931, 16094, 24966, + 34419, 43458, 50676, 56186, 60055, 62500, 63936, 64765, 65225, + 65435, 65514, 65535, 0, 8, 13, 15, 17, 21, 33, 59, + 71, 92, 151, 243, 360, 456, 674, 934, 1223, 1583, + 1989, 2504, 3031, 3617, 4354, 5154, 6163, 7411, 8780, 10747, + 12874, 15591, 18974, 23027, 27436, 32020, 36948, 41830, 46205, + 49797, 53042, 56094, 58418, 60360, 61763, 62818, 63559, 64103, + 64509, 64798, 65045, 65162, 65288, 65363, 65447, 65506, 65522, + 65531, 65533, 65535, 0, 4, 6, 25, 38, 71, 138, 264, 519, 808, + 1227, 1825, 2516, 3408, 4279, 5560, 7092, 9197, 11420, 14108, + 16947, 20300, 23926, 27459, 31164, 34827, 38575, 42178, 45540, + 48747, 51444, 54090, 56426, 58460, 60080, 61595, 62734, 63668, + 64275, 64673, 64936, 65112, 65217, 65334, 65426, 65464, 65477, + 65489, 65518, 65527, 65529, 65531, 65533, 65535, 0, 2, 4, 8, 10, + 12, 14, 16, 21, 33, 50, 71, 84, 92, 105, 138, 180, 255, 318, + 377, 435, 473, 511, 590, 682, 758, 913, 1097, 1256, 1449, 1671, + 1884, 2169, 2445, 2772, 3157, 3563, 3944, 4375, 4848, 5334, 5820, + 6448, 7101, 7716, 8378, 9102, 9956, 10752, 11648, 12707, 13670, + 14758, 15910, 17187, 18472, 19627, 20649, 21951, 23169, 24283, + 25552, 26862, 28227, 29391, 30764, 31882, 33213, 34432, 35600, + 36910, 38116, 39464, 40729, 41872, 43144, 44371, 45514, 46762, + 47813, 48968, 50069, 51032, 51974, 52908, 53737, 54603, 55445, + 56282, 56990, 57572, 58191, 58840, 59410, 59887, 60264, 60607, + 60946, 61269, 61516, 61771, 61960, 62198, 62408, 62558, 62776, + 62985, 63207, 63408, 63546, 63739, 63906, 64070, 64237, 64371, + 64551, 64677, 64836, 64999, 65095, 65213, 65284, 65338, 65380, + 65426, 65447, 65472, 65485, 65487, 65489, 65502, 65510, 65512, + 65514, 65516, 65518, 65522, 65531, 65533, 65535 }; + + +const uint16_t WebRtcIsac_kQKltCdfShape[686] = { + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, 4, + 65535, 0, 8, 65514, 65535, 0, 29, 65481, 65535, 0, + 121, 65439, 65535, 0, 239, 65284, 65535, 0, 8, 779, + 64999, 65527, 65535, 0, 8, 888, 64693, 65522, 65535, 0, + 29, 2604, 62843, 65497, 65531, 65535, 0, 25, 176, 4576, + 61164, 65275, 65527, 65535, 0, 65535, 0, 65535, 0, 65535, + 0, 65535, 0, 4, 65535, 0, 65535, 0, 65535, 0, + 65535, 0, 65535, 0, 4, 65535, 0, 33, 65502, 65535, + 0, 54, 65481, 65535, 0, 251, 65309, 65535, 0, 611, + 65074, 65535, 0, 1273, 64292, 65527, 65535, 0, 4, 1809, + 63940, 65518, 65535, 0, 88, 4392, 60603, 65426, 65531, 65535, + 0, 25, 419, 7046, 57756, 64961, 65514, 65531, 65535, 0, + 65535, 0, 65535, 0, 65535, 0, 65535, 0, 4, 65531, + 65535, 0, 65535, 0, 8, 65531, 65535, 0, 4, 65527, + 65535, 0, 17, 65510, 65535, 0, 42, 65481, 65535, 0, + 197, 65342, 65531, 65535, 0, 385, 65154, 65535, 0, 1005, + 64522, 65535, 0, 8, 1985, 63469, 65533, 65535, 0, 38, + 3119, 61884, 65514, 65535, 0, 4, 6, 67, 4961, 60804, + 65472, 65535, 0, 17, 565, 9182, 56538, 65087, 65514, 65535, + 0, 8, 63, 327, 2118, 14490, 52774, 63839, 65376, 65522, + 65535, 0, 65535, 0, 65535, 0, 65535, 0, 65535, 0, + 17, 65522, 65535, 0, 59, 65489, 65535, 0, 50, 65522, + 65535, 0, 54, 65489, 65535, 0, 310, 65179, 65535, 0, + 615, 64836, 65535, 0, 4, 1503, 63965, 65535, 0, 2780, + 63383, 65535, 0, 21, 3919, 61051, 65527, 65535, 0, 84, + 6674, 59929, 65435, 65535, 0, 4, 255, 7976, 55784, 65150, + 65518, 65531, 65535, 0, 4, 8, 582, 10726, 53465, 64949, + 65518, 65535, 0, 29, 339, 3006, 17555, 49517, 62956, 65200, + 65497, 65531, 65535, 0, 2, 33, 138, 565, 2324, 7670, + 22089, 45966, 58949, 63479, 64966, 65380, 65518, 65535, 0, 65535, + 0, 65535, 0, 2, 65533, 65535, 0, 46, 65514, 65535, + 0, 414, 65091, 65535, 0, 540, 64911, 65535, 0, 419, + 65162, 65535, 0, 976, 64790, 65535, 0, 2977, 62495, 65531, + 65535, 0, 4, 3852, 61034, 65527, 65535, 0, 4, 29, + 6021, 60243, 65468, 65535, 0, 84, 6711, 58066, 65418, 65535, + 0, 13, 281, 9550, 54917, 65125, 65506, 65535, 0, 2, + 63, 984, 12108, 52644, 64342, 65435, 65527, 65535, 0, 29, + 251, 2014, 14871, 47553, 62881, 65229, 65518, 65535, 0, 13, + 142, 749, 4220, 18497, 45200, 60913, 64823, 65426, 65527, 65535, + 0, 13, 71, 264, 1176, 3789, 10500, 24480, 43488, 56324, + 62315, 64493, 65242, 65464, 65514, 65522, 65531, 65535, 0, 4, + 13, 38, 109, 205, 448, 850, 1708, 3429, 6276, 11371, + 19221, 29734, 40955, 49391, 55411, 59460, 62102, 63793, 64656, + 65150, 65401, 65485, 65522, 65531, 65535, 0, 65535, 0, 2, 65533, + 65535, 0, 1160, 65476, 65535, 0, 2, 6640, 64763, 65533, + 65535, 0, 2, 38, 9923, 61009, 65527, 65535, 0, 2, + 4949, 63092, 65533, 65535, 0, 2, 3090, 63398, 65533, 65535, + 0, 2, 2520, 58744, 65510, 65535, 0, 2, 13, 544, + 8784, 51403, 65148, 65533, 65535, 0, 2, 25, 1017, 10412, + 43550, 63651, 65489, 65527, 65535, 0, 2, 4, 29, 783, + 13377, 52462, 64524, 65495, 65533, 65535, 0, 2, 4, 6, + 100, 1817, 18451, 52590, 63559, 65376, 65531, 65535, 0, 2, + 4, 6, 46, 385, 2562, 11225, 37416, 60488, 65026, 65487, + 65529, 65533, 65535, 0, 2, 4, 6, 8, 10, 12, + 42, 222, 971, 5221, 19811, 45048, 60312, 64486, 65294, 65474, + 65525, 65529, 65533, 65535, 0, 2, 4, 8, 71, 167, + 666, 2533, 7875, 19622, 38082, 54359, 62108, 64633, 65290, 65495, + 65529, 65533, 65535, 0, 2, 4, 6, 8, 10, 13, + 109, 586, 1930, 4949, 11600, 22641, 36125, 48312, 56899, 61495, + 63927, 64932, 65389, 65489, 65518, 65531, 65533, 65535, 0, 4, + 6, 8, 67, 209, 712, 1838, 4195, 8432, 14432, 22834, + 31723, 40523, 48139, 53929, 57865, 60657, 62403, 63584, 64363, + 64907, 65167, 65372, 65472, 65514, 65535, 0, 2, 4, 13, 25, + 42, 46, 50, 75, 113, 147, 281, 448, 657, 909, + 1185, 1591, 1976, 2600, 3676, 5317, 7398, 9914, 12941, 16169, + 19477, 22885, 26464, 29851, 33360, 37228, 41139, 44802, 48654, + 52058, 55181, 57676, 59581, 61022, 62190, 63107, 63676, 64199, + 64547, 64924, 65158, 65313, 65430, 65481, 65518, 65535 }; + + +/* pointers to cdf tables for quantizer indices */ +const uint16_t *WebRtcIsac_kQKltCdfPtrGain[12] = { + WebRtcIsac_kQKltCdfGain +0 +0, WebRtcIsac_kQKltCdfGain +0 +8, + WebRtcIsac_kQKltCdfGain +0 +22, WebRtcIsac_kQKltCdfGain +0 +32, + WebRtcIsac_kQKltCdfGain +0 +48, WebRtcIsac_kQKltCdfGain +0 +60, + WebRtcIsac_kQKltCdfGain +0 +81, WebRtcIsac_kQKltCdfGain +0 +95, + WebRtcIsac_kQKltCdfGain +0 +128, WebRtcIsac_kQKltCdfGain +0 +152, + WebRtcIsac_kQKltCdfGain +0 +210, WebRtcIsac_kQKltCdfGain +0 +264 }; + +const uint16_t *WebRtcIsac_kQKltCdfPtrShape[108] = { + WebRtcIsac_kQKltCdfShape +0 +0, WebRtcIsac_kQKltCdfShape +0 +2, + WebRtcIsac_kQKltCdfShape +0 +4, WebRtcIsac_kQKltCdfShape +0 +6, + WebRtcIsac_kQKltCdfShape +0 +8, WebRtcIsac_kQKltCdfShape +0 +10, + WebRtcIsac_kQKltCdfShape +0 +12, WebRtcIsac_kQKltCdfShape +0 +14, + WebRtcIsac_kQKltCdfShape +0 +16, WebRtcIsac_kQKltCdfShape +0 +18, + WebRtcIsac_kQKltCdfShape +0 +21, WebRtcIsac_kQKltCdfShape +0 +25, + WebRtcIsac_kQKltCdfShape +0 +29, WebRtcIsac_kQKltCdfShape +0 +33, + WebRtcIsac_kQKltCdfShape +0 +37, WebRtcIsac_kQKltCdfShape +0 +43, + WebRtcIsac_kQKltCdfShape +0 +49, WebRtcIsac_kQKltCdfShape +0 +56, + WebRtcIsac_kQKltCdfShape +0 +64, WebRtcIsac_kQKltCdfShape +0 +66, + WebRtcIsac_kQKltCdfShape +0 +68, WebRtcIsac_kQKltCdfShape +0 +70, + WebRtcIsac_kQKltCdfShape +0 +72, WebRtcIsac_kQKltCdfShape +0 +75, + WebRtcIsac_kQKltCdfShape +0 +77, WebRtcIsac_kQKltCdfShape +0 +79, + WebRtcIsac_kQKltCdfShape +0 +81, WebRtcIsac_kQKltCdfShape +0 +83, + WebRtcIsac_kQKltCdfShape +0 +86, WebRtcIsac_kQKltCdfShape +0 +90, + WebRtcIsac_kQKltCdfShape +0 +94, WebRtcIsac_kQKltCdfShape +0 +98, + WebRtcIsac_kQKltCdfShape +0 +102, WebRtcIsac_kQKltCdfShape +0 +107, + WebRtcIsac_kQKltCdfShape +0 +113, WebRtcIsac_kQKltCdfShape +0 +120, + WebRtcIsac_kQKltCdfShape +0 +129, WebRtcIsac_kQKltCdfShape +0 +131, + WebRtcIsac_kQKltCdfShape +0 +133, WebRtcIsac_kQKltCdfShape +0 +135, + WebRtcIsac_kQKltCdfShape +0 +137, WebRtcIsac_kQKltCdfShape +0 +141, + WebRtcIsac_kQKltCdfShape +0 +143, WebRtcIsac_kQKltCdfShape +0 +147, + WebRtcIsac_kQKltCdfShape +0 +151, WebRtcIsac_kQKltCdfShape +0 +155, + WebRtcIsac_kQKltCdfShape +0 +159, WebRtcIsac_kQKltCdfShape +0 +164, + WebRtcIsac_kQKltCdfShape +0 +168, WebRtcIsac_kQKltCdfShape +0 +172, + WebRtcIsac_kQKltCdfShape +0 +178, WebRtcIsac_kQKltCdfShape +0 +184, + WebRtcIsac_kQKltCdfShape +0 +192, WebRtcIsac_kQKltCdfShape +0 +200, + WebRtcIsac_kQKltCdfShape +0 +211, WebRtcIsac_kQKltCdfShape +0 +213, + WebRtcIsac_kQKltCdfShape +0 +215, WebRtcIsac_kQKltCdfShape +0 +217, + WebRtcIsac_kQKltCdfShape +0 +219, WebRtcIsac_kQKltCdfShape +0 +223, + WebRtcIsac_kQKltCdfShape +0 +227, WebRtcIsac_kQKltCdfShape +0 +231, + WebRtcIsac_kQKltCdfShape +0 +235, WebRtcIsac_kQKltCdfShape +0 +239, + WebRtcIsac_kQKltCdfShape +0 +243, WebRtcIsac_kQKltCdfShape +0 +248, + WebRtcIsac_kQKltCdfShape +0 +252, WebRtcIsac_kQKltCdfShape +0 +258, + WebRtcIsac_kQKltCdfShape +0 +264, WebRtcIsac_kQKltCdfShape +0 +273, + WebRtcIsac_kQKltCdfShape +0 +282, WebRtcIsac_kQKltCdfShape +0 +293, + WebRtcIsac_kQKltCdfShape +0 +308, WebRtcIsac_kQKltCdfShape +0 +310, + WebRtcIsac_kQKltCdfShape +0 +312, WebRtcIsac_kQKltCdfShape +0 +316, + WebRtcIsac_kQKltCdfShape +0 +320, WebRtcIsac_kQKltCdfShape +0 +324, + WebRtcIsac_kQKltCdfShape +0 +328, WebRtcIsac_kQKltCdfShape +0 +332, + WebRtcIsac_kQKltCdfShape +0 +336, WebRtcIsac_kQKltCdfShape +0 +341, + WebRtcIsac_kQKltCdfShape +0 +347, WebRtcIsac_kQKltCdfShape +0 +354, + WebRtcIsac_kQKltCdfShape +0 +360, WebRtcIsac_kQKltCdfShape +0 +368, + WebRtcIsac_kQKltCdfShape +0 +378, WebRtcIsac_kQKltCdfShape +0 +388, + WebRtcIsac_kQKltCdfShape +0 +400, WebRtcIsac_kQKltCdfShape +0 +418, + WebRtcIsac_kQKltCdfShape +0 +445, WebRtcIsac_kQKltCdfShape +0 +447, + WebRtcIsac_kQKltCdfShape +0 +451, WebRtcIsac_kQKltCdfShape +0 +455, + WebRtcIsac_kQKltCdfShape +0 +461, WebRtcIsac_kQKltCdfShape +0 +468, + WebRtcIsac_kQKltCdfShape +0 +474, WebRtcIsac_kQKltCdfShape +0 +480, + WebRtcIsac_kQKltCdfShape +0 +486, WebRtcIsac_kQKltCdfShape +0 +495, + WebRtcIsac_kQKltCdfShape +0 +505, WebRtcIsac_kQKltCdfShape +0 +516, + WebRtcIsac_kQKltCdfShape +0 +528, WebRtcIsac_kQKltCdfShape +0 +543, + WebRtcIsac_kQKltCdfShape +0 +564, WebRtcIsac_kQKltCdfShape +0 +583, + WebRtcIsac_kQKltCdfShape +0 +608, WebRtcIsac_kQKltCdfShape +0 +635 }; + + +/* left KLT transforms */ +const double WebRtcIsac_kKltT1Gain[4] = { + -0.79742827, 0.60341375, 0.60341375, 0.79742827 }; + +const double WebRtcIsac_kKltT1Shape[324] = { + 0.00159597, 0.00049320, 0.00513821, 0.00021066, 0.01338581, + -0.00422367, -0.00272072, 0.00935107, 0.02047622, 0.02691189, + 0.00478236, 0.03969702, 0.00886698, 0.04877604, -0.10898362, + -0.05930891, -0.03415047, 0.98889721, 0.00293558, -0.00035282, + 0.01156321, -0.00195341, -0.00937631, 0.01052213, -0.02551163, + 0.01644059, 0.03189927, 0.07754773, -0.08742313, -0.03026338, + 0.05136248, -0.14395974, 0.17725040, 0.22664856, 0.93380230, + 0.07076411, 0.00557890, -0.00222834, 0.01377569, 0.01466808, + 0.02847361, -0.00603178, 0.02382480, -0.01210452, 0.03797267, + -0.02371480, 0.11260335, -0.07366682, 0.00453436, -0.04136941, + -0.07912843, -0.95031418, 0.25295337, -0.05302216, -0.00617554, + -0.00044040, -0.00653778, 0.01097838, 0.01529174, 0.01374431, + -0.00748512, -0.00020034, 0.02432713, 0.11101570, -0.08556891, + 0.09282249, -0.01029446, 0.67556443, -0.67454300, 0.06910063, + 0.20866865, -0.10318050, 0.00932175, 0.00524058, 0.00803610, + -0.00594676, -0.01082578, 0.01069906, 0.00546768, 0.01565291, + 0.06816200, 0.10201227, 0.16812734, 0.22984074, 0.58213170, + -0.54138651, -0.51379962, 0.06847390, -0.01920037, -0.04592324, + -0.00467394, 0.00328858, 0.00377424, -0.00987448, 0.08222096, + -0.00377301, 0.04551941, -0.02592517, 0.16317082, 0.13077530, + 0.22702921, -0.31215289, -0.69645962, -0.38047101, -0.39339411, + 0.11124777, 0.02508035, -0.00708074, 0.00400344, 0.00040331, + 0.01142402, 0.01725406, 0.01635170, 0.14285366, 0.03949233, + -0.05905676, 0.05877154, -0.17497577, -0.32479440, 0.80754464, + -0.38085603, -0.17055430, -0.03168622, -0.07531451, 0.02942002, + -0.02148095, -0.00754114, -0.00322372, 0.00567812, -0.01701521, + -0.12358320, 0.11473564, 0.09070136, 0.06533068, -0.22560802, + 0.19209022, 0.81605094, 0.36592275, -0.09919829, 0.16667122, + 0.16300725, 0.04803807, 0.06739263, -0.00156752, -0.01685302, + -0.00905240, -0.02297836, -0.00469939, 0.06310613, -0.16391930, + 0.10919511, 0.12529293, 0.85581322, -0.32145522, 0.24539076, + 0.07181839, 0.07289591, 0.14066759, 0.10406711, 0.05815518, + 0.01072680, -0.00759339, 0.00053486, -0.00044865, 0.03407361, + 0.01645348, 0.08758579, 0.27722240, 0.53665485, -0.74853376, + -0.01118192, -0.19805430, 0.06130619, -0.09675299, 0.08978480, + 0.03405255, -0.00706867, 0.05102045, 0.03250746, 0.01849966, + -0.01216314, -0.01184187, -0.01579288, 0.00114807, 0.11376166, + 0.88342114, -0.36425379, 0.13863190, 0.12524180, -0.13553892, + 0.04715856, -0.12341103, 0.04531568, 0.01899360, -0.00206897, + 0.00567768, -0.01444163, 0.00411946, -0.00855896, 0.00381663, + -0.01664861, -0.05534280, 0.21328278, 0.20161162, 0.72360394, + 0.59130708, -0.08043791, 0.08757349, -0.13893918, -0.05147377, + 0.02680690, -0.01144070, 0.00625162, -0.00634215, -0.01248947, + -0.00329455, -0.00609625, -0.00136305, -0.05097048, -0.01029851, + 0.25065384, -0.16856837, -0.07123372, 0.15992623, -0.39487617, + -0.79972301, 0.18118185, -0.04826639, -0.01805578, -0.02927253, + -0.16400618, 0.07472763, 0.10376449, 0.01705406, 0.01065801, + -0.01500498, 0.02039914, 0.37776349, -0.84484186, 0.10434286, + 0.15616990, 0.13474456, -0.00906238, -0.25238368, -0.03820885, + -0.10650905, -0.03880833, -0.03660028, -0.09640894, 0.00583314, + 0.01922097, 0.01489911, -0.02431117, -0.09372217, 0.39404721, + -0.84786223, -0.31277121, 0.03193850, 0.01974060, 0.01887901, + 0.00337911, -0.11359599, -0.02792521, -0.03220184, -0.01533311, + 0.00015962, -0.04225043, -0.00933965, 0.00675311, 0.00206060, + 0.15926771, 0.40199829, -0.80792558, -0.35591604, -0.17169764, + 0.02830436, 0.02459982, -0.03438589, 0.00718705, -0.01798329, + -0.01594508, -0.00702430, -0.00952419, -0.00962701, -0.01307212, + -0.01749740, 0.01299602, 0.00587270, -0.36103108, -0.82039266, + -0.43092844, -0.08500097, -0.04361674, -0.00333482, 0.01250434, + -0.02538295, -0.00921797, 0.01645071, -0.01400872, 0.00317607, + 0.00003277, -0.01617646, -0.00616863, -0.00882661, 0.00466157, + 0.00353237, 0.91803104, -0.39503305, -0.02048964, 0.00060125, + 0.01980634, 0.00300109, 0.00313880, 0.00657337, 0.00715163, + 0.00000261, 0.00854276, -0.00154825, -0.00516128, 0.00909527, + 0.00095609, 0.00701196, -0.00221867, -0.00156741 }; + +/* right KLT transforms */ +const double WebRtcIsac_kKltT2Gain[36] = { + 0.14572837, -0.45446306, 0.61990621, -0.52197033, 0.32145074, + -0.11026900, -0.20698282, 0.48962182, -0.27127933, -0.33627476, + 0.65094037, -0.32715751, 0.40262573, -0.47844405, -0.33876075, + 0.44130653, 0.37383966, -0.39964662, -0.51730480, 0.06611973, + 0.49030187, 0.47512886, -0.02141226, -0.51129451, -0.58578569, + -0.39132064, -0.13187771, 0.15649421, 0.40735596, 0.54396897, + 0.40381276, 0.40904942, 0.41179766, 0.41167576, 0.40840251, + 0.40468132 }; + +const double WebRtcIsac_kKltT2Shape[36] = { + 0.13427386, -0.35132558, 0.52506528, -0.59419077, 0.45075085, + -0.16312057, 0.29857439, -0.58660147, 0.34265431, 0.20879510, + -0.56063262, 0.30238345, 0.43308283, -0.41186999, -0.35288681, + 0.42768996, 0.36094634, -0.45284910, -0.47116680, 0.02893449, + 0.54326135, 0.45249040, -0.06264420, -0.52283830, 0.57137758, + 0.44298139, 0.12617554, -0.20819946, -0.42324603, -0.48876443, + 0.39597050, 0.40713935, 0.41389880, 0.41512486, 0.41130400, + 0.40575001 }; + +/* means of log gains and LAR coefficients*/ +const double WebRtcIsac_kLpcMeansGain[12] = { + -6.86881911, -5.35075273, -6.86792680, -5.36200897, -6.86401538, + -5.36921533, -6.86802969, -5.36893966, -6.86538097, -5.36315063, + -6.85535304, -5.35155315 }; + +const double WebRtcIsac_kLpcMeansShape[108] = { + -0.91232981, 0.26258634, -0.33716701, 0.08477430, -0.03378426, + 0.14423909, 0.07036185, 0.06155019, 0.01490385, 0.04138740, + 0.01427317, 0.01288970, 0.83872106, 0.25750199, 0.07988929, + -0.01957923, 0.00831390, 0.01770300, -0.90957164, 0.25732216, + -0.33385344, 0.08735740, -0.03715332, 0.14584917, 0.06998990, + 0.06131968, 0.01504379, 0.04067339, 0.01428039, 0.01406460, + 0.83846243, 0.26169862, 0.08109025, -0.01767055, 0.00970539, + 0.01954310, -0.90490803, 0.24656405, -0.33578607, 0.08843286, + -0.03749139, 0.14443959, 0.07214669, 0.06170993, 0.01449947, + 0.04134309, 0.01314762, 0.01413471, 0.83895203, 0.26748062, + 0.08197507, -0.01781298, 0.00885967, 0.01922394, -0.90922472, + 0.24495889, -0.33921540, 0.08877169, -0.03581332, 0.14199172, + 0.07444032, 0.06185940, 0.01502054, 0.04185113, 0.01276579, + 0.01355457, 0.83645358, 0.26631720, 0.08119697, -0.01835449, + 0.00788512, 0.01846446, -0.90482253, 0.24658310, -0.34019734, + 0.08281090, -0.03486038, 0.14359248, 0.07401336, 0.06001471, + 0.01528421, 0.04254560, 0.01321472, 0.01240799, 0.83857127, + 0.26281654, 0.08174380, -0.02099842, 0.00755176, 0.01699448, + -0.90132307, 0.25174308, -0.33838268, 0.07883863, -0.02877906, + 0.14105407, 0.07220290, 0.06000352, 0.01684879, 0.04226844, + 0.01331331, 0.01269244, 0.83832138, 0.25467485, 0.08118028, + -0.02120528, 0.00747832, 0.01567212 }; diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_tables.h b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_tables.h new file mode 100644 index 0000000..51f6316 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/lpc_tables.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * lpc_tables.h + * + * header file for coding tables for the LPC coefficients + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_TABLES_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_TABLES_H_ + +#include "structs.h" + +#include "settings.h" + +#define KLT_STEPSIZE 1.00000000 +#define KLT_NUM_AVG_GAIN 0 +#define KLT_NUM_AVG_SHAPE 0 +#define KLT_NUM_MODELS 3 +#define LPC_GAIN_SCALE 4.000f +#define LPC_LOBAND_SCALE 2.100f +#define LPC_LOBAND_ORDER ORDERLO +#define LPC_HIBAND_SCALE 0.450f +#define LPC_HIBAND_ORDER ORDERHI +#define LPC_GAIN_ORDER 2 + +#define LPC_SHAPE_ORDER (LPC_LOBAND_ORDER + LPC_HIBAND_ORDER) + +#define KLT_ORDER_GAIN (LPC_GAIN_ORDER * SUBFRAMES) +#define KLT_ORDER_SHAPE (LPC_SHAPE_ORDER * SUBFRAMES) + +/* cdf array for model indicator */ +extern const uint16_t WebRtcIsac_kQKltModelCdf[KLT_NUM_MODELS+1]; + +/* pointer to cdf array for model indicator */ +extern const uint16_t *WebRtcIsac_kQKltModelCdfPtr[1]; + +/* initial cdf index for decoder of model indicator */ +extern const uint16_t WebRtcIsac_kQKltModelInitIndex[1]; + +/* offset to go from rounded value to quantization index */ +extern const short WebRtcIsac_kQKltQuantMinGain[12]; + +extern const short WebRtcIsac_kQKltQuantMinShape[108]; + +/* maximum quantization index */ +extern const uint16_t WebRtcIsac_kQKltMaxIndGain[12]; + +extern const uint16_t WebRtcIsac_kQKltMaxIndShape[108]; + +/* index offset */ +extern const uint16_t WebRtcIsac_kQKltOffsetGain[12]; + +extern const uint16_t WebRtcIsac_kQKltOffsetShape[108]; + +/* initial cdf index for KLT coefficients */ +extern const uint16_t WebRtcIsac_kQKltInitIndexGain[12]; + +extern const uint16_t WebRtcIsac_kQKltInitIndexShape[108]; + +/* quantizer representation levels */ +extern const double WebRtcIsac_kQKltLevelsGain[392]; + +extern const double WebRtcIsac_kQKltLevelsShape[578]; + +/* cdf tables for quantizer indices */ +extern const uint16_t WebRtcIsac_kQKltCdfGain[404]; + +extern const uint16_t WebRtcIsac_kQKltCdfShape[686]; + +/* pointers to cdf tables for quantizer indices */ +extern const uint16_t *WebRtcIsac_kQKltCdfPtrGain[12]; + +extern const uint16_t *WebRtcIsac_kQKltCdfPtrShape[108]; + +/* left KLT transforms */ +extern const double WebRtcIsac_kKltT1Gain[4]; + +extern const double WebRtcIsac_kKltT1Shape[324]; + +/* right KLT transforms */ +extern const double WebRtcIsac_kKltT2Gain[36]; + +extern const double WebRtcIsac_kKltT2Shape[36]; + +/* means of log gains and LAR coefficients */ +extern const double WebRtcIsac_kLpcMeansGain[12]; + +extern const double WebRtcIsac_kLpcMeansShape[108]; + +#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_LPC_TABLES_H_ */ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/os_specific_inline.h b/webrtc/modules/audio_coding/codecs/isac/main/source/os_specific_inline.h new file mode 100644 index 0000000..2b446e9 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/os_specific_inline.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_ + +#include +#include "webrtc/typedefs.h" + +#if defined(WEBRTC_POSIX) +#define WebRtcIsac_lrint lrint +#elif (defined(WEBRTC_ARCH_X86) && defined(WIN32)) +static __inline long int WebRtcIsac_lrint(double x_dbl) { + long int x_int; + + __asm { + fld x_dbl + fistp x_int + }; + + return x_int; +} +#else // Do a slow but correct implementation of lrint + +static __inline long int WebRtcIsac_lrint(double x_dbl) { + long int x_int; + x_int = (long int)floor(x_dbl + 0.499999999999); + return x_int; +} + +#endif + +#endif // WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_OS_SPECIFIC_INLINE_H_ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c b/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c new file mode 100644 index 0000000..090b94c --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.c @@ -0,0 +1,623 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "pitch_estimator.h" + +#include +#include +#include +#ifdef WEBRTC_ANDROID +#include +#endif + +static const double kInterpolWin[8] = {-0.00067556028640, 0.02184247643159, -0.12203175715679, 0.60086484101160, + 0.60086484101160, -0.12203175715679, 0.02184247643159, -0.00067556028640}; + +/* interpolation filter */ +__inline static void IntrepolFilter(double *data_ptr, double *intrp) +{ + *intrp = kInterpolWin[0] * data_ptr[-3]; + *intrp += kInterpolWin[1] * data_ptr[-2]; + *intrp += kInterpolWin[2] * data_ptr[-1]; + *intrp += kInterpolWin[3] * data_ptr[0]; + *intrp += kInterpolWin[4] * data_ptr[1]; + *intrp += kInterpolWin[5] * data_ptr[2]; + *intrp += kInterpolWin[6] * data_ptr[3]; + *intrp += kInterpolWin[7] * data_ptr[4]; +} + + +/* 2D parabolic interpolation */ +/* probably some 0.5 factors can be eliminated, and the square-roots can be removed from the Cholesky fact. */ +__inline static void Intrpol2D(double T[3][3], double *x, double *y, double *peak_val) +{ + double c, b[2], A[2][2]; + double t1, t2, d; + double delta1, delta2; + + + // double T[3][3] = {{-1.25, -.25,-.25}, {-.25, .75, .75}, {-.25, .75, .75}}; + // should result in: delta1 = 0.5; delta2 = 0.0; peak_val = 1.0 + + c = T[1][1]; + b[0] = 0.5 * (T[1][2] + T[2][1] - T[0][1] - T[1][0]); + b[1] = 0.5 * (T[1][0] + T[2][1] - T[0][1] - T[1][2]); + A[0][1] = -0.5 * (T[0][1] + T[2][1] - T[1][0] - T[1][2]); + t1 = 0.5 * (T[0][0] + T[2][2]) - c; + t2 = 0.5 * (T[2][0] + T[0][2]) - c; + d = (T[0][1] + T[1][2] + T[1][0] + T[2][1]) - 4.0 * c - t1 - t2; + A[0][0] = -t1 - 0.5 * d; + A[1][1] = -t2 - 0.5 * d; + + /* deal with singularities or ill-conditioned cases */ + if ( (A[0][0] < 1e-7) || ((A[0][0] * A[1][1] - A[0][1] * A[0][1]) < 1e-7) ) { + *peak_val = T[1][1]; + return; + } + + /* Cholesky decomposition: replace A by upper-triangular factor */ + A[0][0] = sqrt(A[0][0]); + A[0][1] = A[0][1] / A[0][0]; + A[1][1] = sqrt(A[1][1] - A[0][1] * A[0][1]); + + /* compute [x; y] = -0.5 * inv(A) * b */ + t1 = b[0] / A[0][0]; + t2 = (b[1] - t1 * A[0][1]) / A[1][1]; + delta2 = t2 / A[1][1]; + delta1 = 0.5 * (t1 - delta2 * A[0][1]) / A[0][0]; + delta2 *= 0.5; + + /* limit norm */ + t1 = delta1 * delta1 + delta2 * delta2; + if (t1 > 1.0) { + delta1 /= t1; + delta2 /= t1; + } + + *peak_val = 0.5 * (b[0] * delta1 + b[1] * delta2) + c; + + *x += delta1; + *y += delta2; +} + + +static void PCorr(const double *in, double *outcorr) +{ + double sum, ysum, prod; + const double *x, *inptr; + int k, n; + + //ysum = 1e-6; /* use this with float (i.s.o. double)! */ + ysum = 1e-13; + sum = 0.0; + x = in + PITCH_MAX_LAG/2 + 2; + for (n = 0; n < PITCH_CORR_LEN2; n++) { + ysum += in[n] * in[n]; + sum += x[n] * in[n]; + } + + outcorr += PITCH_LAG_SPAN2 - 1; /* index of last element in array */ + *outcorr = sum / sqrt(ysum); + + for (k = 1; k < PITCH_LAG_SPAN2; k++) { + ysum -= in[k-1] * in[k-1]; + ysum += in[PITCH_CORR_LEN2 + k - 1] * in[PITCH_CORR_LEN2 + k - 1]; + sum = 0.0; + inptr = &in[k]; + prod = x[0] * inptr[0]; + for (n = 1; n < PITCH_CORR_LEN2; n++) { + sum += prod; + prod = x[n] * inptr[n]; + } + sum += prod; + outcorr--; + *outcorr = sum / sqrt(ysum); + } +} + + +void WebRtcIsac_InitializePitch(const double *in, + const double old_lag, + const double old_gain, + PitchAnalysisStruct *State, + double *lags) +{ + double buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2]; + double ratio, log_lag, gain_bias; + double bias; + double corrvec1[PITCH_LAG_SPAN2]; + double corrvec2[PITCH_LAG_SPAN2]; + int m, k; + // Allocating 10 extra entries at the begining of the CorrSurf + double corrSurfBuff[10 + (2*PITCH_BW+3)*(PITCH_LAG_SPAN2+4)]; + double* CorrSurf[2*PITCH_BW+3]; + double *CorrSurfPtr1, *CorrSurfPtr2; + double LagWin[3] = {0.2, 0.5, 0.98}; + int ind1, ind2, peaks_ind, peak, max_ind; + int peaks[PITCH_MAX_NUM_PEAKS]; + double adj, gain_tmp; + double corr, corr_max; + double intrp_a, intrp_b, intrp_c, intrp_d; + double peak_vals[PITCH_MAX_NUM_PEAKS]; + double lags1[PITCH_MAX_NUM_PEAKS]; + double lags2[PITCH_MAX_NUM_PEAKS]; + double T[3][3]; + int row; + + for(k = 0; k < 2*PITCH_BW+3; k++) + { + CorrSurf[k] = &corrSurfBuff[10 + k * (PITCH_LAG_SPAN2+4)]; + } + /* reset CorrSurf matrix */ + memset(corrSurfBuff, 0, sizeof(double) * (10 + (2*PITCH_BW+3) * (PITCH_LAG_SPAN2+4))); + + //warnings -DH + max_ind = 0; + peak = 0; + + /* copy old values from state buffer */ + memcpy(buf_dec, State->dec_buffer, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); + + /* decimation; put result after the old values */ + WebRtcIsac_DecimateAllpass(in, State->decimator_state, PITCH_FRAME_LEN, + &buf_dec[PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2]); + + /* low-pass filtering */ + for (k = PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2; k < PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2+2; k++) + buf_dec[k] += 0.75 * buf_dec[k-1] - 0.25 * buf_dec[k-2]; + + /* copy end part back into state buffer */ + memcpy(State->dec_buffer, buf_dec+PITCH_FRAME_LEN/2, sizeof(double) * (PITCH_CORR_LEN2+PITCH_CORR_STEP2+PITCH_MAX_LAG/2-PITCH_FRAME_LEN/2+2)); + + /* compute correlation for first and second half of the frame */ + PCorr(buf_dec, corrvec1); + PCorr(buf_dec + PITCH_CORR_STEP2, corrvec2); + + /* bias towards pitch lag of previous frame */ + log_lag = log(0.5 * old_lag); + gain_bias = 4.0 * old_gain * old_gain; + if (gain_bias > 0.8) gain_bias = 0.8; + for (k = 0; k < PITCH_LAG_SPAN2; k++) + { + ratio = log((double) (k + (PITCH_MIN_LAG/2-2))) - log_lag; + bias = 1.0 + gain_bias * exp(-5.0 * ratio * ratio); + corrvec1[k] *= bias; + } + + /* taper correlation functions */ + for (k = 0; k < 3; k++) { + gain_tmp = LagWin[k]; + corrvec1[k] *= gain_tmp; + corrvec2[k] *= gain_tmp; + corrvec1[PITCH_LAG_SPAN2-1-k] *= gain_tmp; + corrvec2[PITCH_LAG_SPAN2-1-k] *= gain_tmp; + } + + corr_max = 0.0; + /* fill middle row of correlation surface */ + ind1 = 0; + ind2 = 0; + CorrSurfPtr1 = &CorrSurf[PITCH_BW][2]; + for (k = 0; k < PITCH_LAG_SPAN2; k++) { + corr = corrvec1[ind1++] + corrvec2[ind2++]; + CorrSurfPtr1[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + } + } + /* fill first and last rows of correlation surface */ + ind1 = 0; + ind2 = PITCH_BW; + CorrSurfPtr1 = &CorrSurf[0][2]; + CorrSurfPtr2 = &CorrSurf[2*PITCH_BW][PITCH_BW+2]; + for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW; k++) { + ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); + adj = 0.2 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ + corr = adj * (corrvec1[ind1] + corrvec2[ind2]); + CorrSurfPtr1[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + } + corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); + CorrSurfPtr2[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); + } + } + /* fill second and next to last rows of correlation surface */ + ind1 = 0; + ind2 = PITCH_BW-1; + CorrSurfPtr1 = &CorrSurf[1][2]; + CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-1][PITCH_BW+1]; + for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+1; k++) { + ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); + adj = 0.9 * ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ + corr = adj * (corrvec1[ind1] + corrvec2[ind2]); + CorrSurfPtr1[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + } + corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); + CorrSurfPtr2[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); + } + } + /* fill remainder of correlation surface */ + for (m = 2; m < PITCH_BW; m++) { + ind1 = 0; + ind2 = PITCH_BW - m; /* always larger than ind1 */ + CorrSurfPtr1 = &CorrSurf[m][2]; + CorrSurfPtr2 = &CorrSurf[2*PITCH_BW-m][PITCH_BW+2-m]; + for (k = 0; k < PITCH_LAG_SPAN2-PITCH_BW+m; k++) { + ratio = ((double) (ind1 + 12)) / ((double) (ind2 + 12)); + adj = ratio * (2.0 - ratio); /* adjustment factor; inverse parabola as a function of ratio */ + corr = adj * (corrvec1[ind1] + corrvec2[ind2]); + CorrSurfPtr1[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + } + corr = adj * (corrvec1[ind2++] + corrvec2[ind1++]); + CorrSurfPtr2[k] = corr; + if (corr > corr_max) { + corr_max = corr; /* update maximum */ + max_ind = (int)(&CorrSurfPtr2[k] - &CorrSurf[0][0]); + } + } + } + + /* threshold value to qualify as a peak */ + corr_max *= 0.6; + + peaks_ind = 0; + /* find peaks */ + for (m = 1; m < PITCH_BW+1; m++) { + if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + CorrSurfPtr1 = &CorrSurf[m][2]; + for (k = 2; k < PITCH_LAG_SPAN2-PITCH_BW-2+m; k++) { + corr = CorrSurfPtr1[k]; + if (corr > corr_max) { + if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { + if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { + /* found a peak; store index into matrix */ + peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + } + } + } + } + } + for (m = PITCH_BW+1; m < 2*PITCH_BW; m++) { + if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + CorrSurfPtr1 = &CorrSurf[m][2]; + for (k = 2+m-PITCH_BW; k < PITCH_LAG_SPAN2-2; k++) { + corr = CorrSurfPtr1[k]; + if (corr > corr_max) { + if ( (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+5)]) && (corr > CorrSurfPtr1[k - (PITCH_LAG_SPAN2+4)]) ) { + if ( (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+4)]) && (corr > CorrSurfPtr1[k + (PITCH_LAG_SPAN2+5)]) ) { + /* found a peak; store index into matrix */ + peaks[peaks_ind++] = (int)(&CorrSurfPtr1[k] - &CorrSurf[0][0]); + if (peaks_ind == PITCH_MAX_NUM_PEAKS) break; + } + } + } + } + } + + if (peaks_ind > 0) { + /* examine each peak */ + CorrSurfPtr1 = &CorrSurf[0][0]; + for (k = 0; k < peaks_ind; k++) { + peak = peaks[k]; + + /* compute four interpolated values around current peak */ + IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)], &intrp_a); + IntrepolFilter(&CorrSurfPtr1[peak - 1 ], &intrp_b); + IntrepolFilter(&CorrSurfPtr1[peak ], &intrp_c); + IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)], &intrp_d); + + /* determine maximum of the interpolated values */ + corr = CorrSurfPtr1[peak]; + corr_max = intrp_a; + if (intrp_b > corr_max) corr_max = intrp_b; + if (intrp_c > corr_max) corr_max = intrp_c; + if (intrp_d > corr_max) corr_max = intrp_d; + + /* determine where the peak sits and fill a 3x3 matrix around it */ + row = peak / (PITCH_LAG_SPAN2+4); + lags1[k] = (double) ((peak - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); + lags2[k] = (double) (lags1[k] + PITCH_BW - row); + if ( corr > corr_max ) { + T[0][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; + T[2][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; + T[1][1] = corr; + T[0][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; + T[2][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; + T[1][0] = intrp_a; + T[0][1] = intrp_b; + T[2][1] = intrp_c; + T[1][2] = intrp_d; + } else { + if (intrp_a == corr_max) { + lags1[k] -= 0.5; + lags2[k] += 0.5; + IntrepolFilter(&CorrSurfPtr1[peak - 2*(PITCH_LAG_SPAN2+5)], &T[0][0]); + IntrepolFilter(&CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)], &T[2][0]); + T[1][1] = intrp_a; + T[0][2] = intrp_b; + T[2][2] = intrp_c; + T[1][0] = CorrSurfPtr1[peak - (2*PITCH_LAG_SPAN2+9)]; + T[0][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; + T[2][1] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; + T[1][2] = corr; + } else if (intrp_b == corr_max) { + lags1[k] -= 0.5; + lags2[k] -= 0.5; + IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+6)], &T[0][0]); + T[2][0] = intrp_a; + T[1][1] = intrp_b; + IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+3)], &T[0][2]); + T[2][2] = intrp_d; + T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+5)]; + T[0][1] = CorrSurfPtr1[peak - 1]; + T[2][1] = corr; + T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; + } else if (intrp_c == corr_max) { + lags1[k] += 0.5; + lags2[k] += 0.5; + T[0][0] = intrp_a; + IntrepolFilter(&CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)], &T[2][0]); + T[1][1] = intrp_c; + T[0][2] = intrp_d; + IntrepolFilter(&CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)], &T[2][2]); + T[1][0] = CorrSurfPtr1[peak - (PITCH_LAG_SPAN2+4)]; + T[0][1] = corr; + T[2][1] = CorrSurfPtr1[peak + 1]; + T[1][2] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; + } else { + lags1[k] += 0.5; + lags2[k] -= 0.5; + T[0][0] = intrp_b; + T[2][0] = intrp_c; + T[1][1] = intrp_d; + IntrepolFilter(&CorrSurfPtr1[peak + 2*(PITCH_LAG_SPAN2+4)], &T[0][2]); + IntrepolFilter(&CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)], &T[2][2]); + T[1][0] = corr; + T[0][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+4)]; + T[2][1] = CorrSurfPtr1[peak + (PITCH_LAG_SPAN2+5)]; + T[1][2] = CorrSurfPtr1[peak + (2*PITCH_LAG_SPAN2+9)]; + } + } + + /* 2D parabolic interpolation gives more accurate lags and peak value */ + Intrpol2D(T, &lags1[k], &lags2[k], &peak_vals[k]); + } + + /* determine the highest peak, after applying a bias towards short lags */ + corr_max = 0.0; + for (k = 0; k < peaks_ind; k++) { + corr = peak_vals[k] * pow(PITCH_PEAK_DECAY, log(lags1[k] + lags2[k])); + if (corr > corr_max) { + corr_max = corr; + peak = k; + } + } + + lags1[peak] *= 2.0; + lags2[peak] *= 2.0; + + if (lags1[peak] < (double) PITCH_MIN_LAG) lags1[peak] = (double) PITCH_MIN_LAG; + if (lags2[peak] < (double) PITCH_MIN_LAG) lags2[peak] = (double) PITCH_MIN_LAG; + if (lags1[peak] > (double) PITCH_MAX_LAG) lags1[peak] = (double) PITCH_MAX_LAG; + if (lags2[peak] > (double) PITCH_MAX_LAG) lags2[peak] = (double) PITCH_MAX_LAG; + + /* store lags of highest peak in output array */ + lags[0] = lags1[peak]; + lags[1] = lags1[peak]; + lags[2] = lags2[peak]; + lags[3] = lags2[peak]; + } + else + { + row = max_ind / (PITCH_LAG_SPAN2+4); + lags1[0] = (double) ((max_ind - row * (PITCH_LAG_SPAN2+4)) + PITCH_MIN_LAG/2 - 4); + lags2[0] = (double) (lags1[0] + PITCH_BW - row); + + if (lags1[0] < (double) PITCH_MIN_LAG) lags1[0] = (double) PITCH_MIN_LAG; + if (lags2[0] < (double) PITCH_MIN_LAG) lags2[0] = (double) PITCH_MIN_LAG; + if (lags1[0] > (double) PITCH_MAX_LAG) lags1[0] = (double) PITCH_MAX_LAG; + if (lags2[0] > (double) PITCH_MAX_LAG) lags2[0] = (double) PITCH_MAX_LAG; + + /* store lags of highest peak in output array */ + lags[0] = lags1[0]; + lags[1] = lags1[0]; + lags[2] = lags2[0]; + lags[3] = lags2[0]; + } +} + + + +/* create weighting matrix by orthogonalizing a basis of polynomials of increasing order + * t = (0:4)'; + * A = [t.^0, t.^1, t.^2, t.^3, t.^4]; + * [Q, dummy] = qr(A); + * P.Weight = Q * diag([0, .1, .5, 1, 1]) * Q'; */ +static const double kWeight[5][5] = { + { 0.29714285714286, -0.30857142857143, -0.05714285714286, 0.05142857142857, 0.01714285714286}, + {-0.30857142857143, 0.67428571428571, -0.27142857142857, -0.14571428571429, 0.05142857142857}, + {-0.05714285714286, -0.27142857142857, 0.65714285714286, -0.27142857142857, -0.05714285714286}, + { 0.05142857142857, -0.14571428571429, -0.27142857142857, 0.67428571428571, -0.30857142857143}, + { 0.01714285714286, 0.05142857142857, -0.05714285714286, -0.30857142857143, 0.29714285714286} +}; + + +void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */ + double *out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */ + PitchAnalysisStruct *State, + double *lags, + double *gains) +{ + double HPin[PITCH_FRAME_LEN]; + double Weighted[PITCH_FRAME_LEN]; + double Whitened[PITCH_FRAME_LEN + QLOOKAHEAD]; + double inbuf[PITCH_FRAME_LEN + QLOOKAHEAD]; + double out_G[PITCH_FRAME_LEN + QLOOKAHEAD]; // could be removed by using out instead + double out_dG[4][PITCH_FRAME_LEN + QLOOKAHEAD]; + double old_lag, old_gain; + double nrg_wht, tmp; + double Wnrg, Wfluct, Wgain; + double H[4][4]; + double grad[4]; + double dG[4]; + int k, m, n, iter; + + /* high pass filtering using second order pole-zero filter */ + WebRtcIsac_Highpass(in, HPin, State->hp_state, PITCH_FRAME_LEN); + + /* copy from state into buffer */ + memcpy(Whitened, State->whitened_buf, sizeof(double) * QLOOKAHEAD); + + /* compute weighted and whitened signals */ + WebRtcIsac_WeightingFilter(HPin, &Weighted[0], &Whitened[QLOOKAHEAD], &(State->Wghtstr)); + + /* copy from buffer into state */ + memcpy(State->whitened_buf, Whitened+PITCH_FRAME_LEN, sizeof(double) * QLOOKAHEAD); + + old_lag = State->PFstr_wght.oldlagp[0]; + old_gain = State->PFstr_wght.oldgainp[0]; + + /* inital pitch estimate */ + WebRtcIsac_InitializePitch(Weighted, old_lag, old_gain, State, lags); + + + /* Iterative optimization of lags - to be done */ + + /* compute energy of whitened signal */ + nrg_wht = 0.0; + for (k = 0; k < PITCH_FRAME_LEN + QLOOKAHEAD; k++) + nrg_wht += Whitened[k] * Whitened[k]; + + + /* Iterative optimization of gains */ + + /* set weights for energy, gain fluctiation, and spectral gain penalty functions */ + Wnrg = 1.0 / nrg_wht; + Wgain = 0.005; + Wfluct = 3.0; + + /* set initial gains */ + for (k = 0; k < 4; k++) + gains[k] = PITCH_MAX_GAIN_06; + + /* two iterations should be enough */ + for (iter = 0; iter < 2; iter++) { + /* compute Jacobian of pre-filter output towards gains */ + WebRtcIsac_PitchfilterPre_gains(Whitened, out_G, out_dG, &(State->PFstr_wght), lags, gains); + + /* gradient and approximate Hessian (lower triangle) for minimizing the filter's output power */ + for (k = 0; k < 4; k++) { + tmp = 0.0; + for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++) + tmp += out_G[n] * out_dG[k][n]; + grad[k] = tmp * Wnrg; + } + for (k = 0; k < 4; k++) { + for (m = 0; m <= k; m++) { + tmp = 0.0; + for (n = 0; n < PITCH_FRAME_LEN + QLOOKAHEAD; n++) + tmp += out_dG[m][n] * out_dG[k][n]; + H[k][m] = tmp * Wnrg; + } + } + + /* add gradient and Hessian (lower triangle) for dampening fast gain changes */ + for (k = 0; k < 4; k++) { + tmp = kWeight[k+1][0] * old_gain; + for (m = 0; m < 4; m++) + tmp += kWeight[k+1][m+1] * gains[m]; + grad[k] += tmp * Wfluct; + } + for (k = 0; k < 4; k++) { + for (m = 0; m <= k; m++) { + H[k][m] += kWeight[k+1][m+1] * Wfluct; + } + } + + /* add gradient and Hessian for dampening gain */ + for (k = 0; k < 3; k++) { + tmp = 1.0 / (1 - gains[k]); + grad[k] += tmp * tmp * Wgain; + H[k][k] += 2.0 * tmp * (tmp * tmp * Wgain); + } + tmp = 1.0 / (1 - gains[3]); + grad[3] += 1.33 * (tmp * tmp * Wgain); + H[3][3] += 2.66 * tmp * (tmp * tmp * Wgain); + + + /* compute Cholesky factorization of Hessian + * by overwritting the upper triangle; scale factors on diagonal + * (for non pc-platforms store the inverse of the diagonals seperately to minimize divisions) */ + H[0][1] = H[1][0] / H[0][0]; + H[0][2] = H[2][0] / H[0][0]; + H[0][3] = H[3][0] / H[0][0]; + H[1][1] -= H[0][0] * H[0][1] * H[0][1]; + H[1][2] = (H[2][1] - H[0][1] * H[2][0]) / H[1][1]; + H[1][3] = (H[3][1] - H[0][1] * H[3][0]) / H[1][1]; + H[2][2] -= H[0][0] * H[0][2] * H[0][2] + H[1][1] * H[1][2] * H[1][2]; + H[2][3] = (H[3][2] - H[0][2] * H[3][0] - H[1][2] * H[1][1] * H[1][3]) / H[2][2]; + H[3][3] -= H[0][0] * H[0][3] * H[0][3] + H[1][1] * H[1][3] * H[1][3] + H[2][2] * H[2][3] * H[2][3]; + + /* Compute update as delta_gains = -inv(H) * grad */ + /* copy and negate */ + for (k = 0; k < 4; k++) + dG[k] = -grad[k]; + /* back substitution */ + dG[1] -= dG[0] * H[0][1]; + dG[2] -= dG[0] * H[0][2] + dG[1] * H[1][2]; + dG[3] -= dG[0] * H[0][3] + dG[1] * H[1][3] + dG[2] * H[2][3]; + /* scale */ + for (k = 0; k < 4; k++) + dG[k] /= H[k][k]; + /* back substitution */ + dG[2] -= dG[3] * H[2][3]; + dG[1] -= dG[3] * H[1][3] + dG[2] * H[1][2]; + dG[0] -= dG[3] * H[0][3] + dG[2] * H[0][2] + dG[1] * H[0][1]; + + /* update gains and check range */ + for (k = 0; k < 4; k++) { + gains[k] += dG[k]; + if (gains[k] > PITCH_MAX_GAIN) + gains[k] = PITCH_MAX_GAIN; + else if (gains[k] < 0.0) + gains[k] = 0.0; + } + } + + /* update state for next frame */ + WebRtcIsac_PitchfilterPre(Whitened, out, &(State->PFstr_wght), lags, gains); + + /* concatenate previous input's end and current input */ + memcpy(inbuf, State->inbuf, sizeof(double) * QLOOKAHEAD); + memcpy(inbuf+QLOOKAHEAD, in, sizeof(double) * PITCH_FRAME_LEN); + + /* lookahead pitch filtering for masking analysis */ + WebRtcIsac_PitchfilterPre_la(inbuf, out, &(State->PFstr), lags, gains); + + /* store last part of input */ + for (k = 0; k < QLOOKAHEAD; k++) + State->inbuf[k] = inbuf[k + PITCH_FRAME_LEN]; +} diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h b/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h new file mode 100644 index 0000000..6fb02b3 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * pitch_estimator.h + * + * Pitch functions + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_ + +#include "structs.h" + + + +void WebRtcIsac_PitchAnalysis(const double *in, /* PITCH_FRAME_LEN samples */ + double *out, /* PITCH_FRAME_LEN+QLOOKAHEAD samples */ + PitchAnalysisStruct *State, + double *lags, + double *gains); + +void WebRtcIsac_InitializePitch(const double *in, + const double old_lag, + const double old_gain, + PitchAnalysisStruct *State, + double *lags); + +void WebRtcIsac_PitchfilterPre(double *indat, + double *outdat, + PitchFiltstr *pfp, + double *lags, + double *gains); + +void WebRtcIsac_PitchfilterPost(double *indat, + double *outdat, + PitchFiltstr *pfp, + double *lags, + double *gains); + +void WebRtcIsac_PitchfilterPre_la(double *indat, + double *outdat, + PitchFiltstr *pfp, + double *lags, + double *gains); + +void WebRtcIsac_PitchfilterPre_gains(double *indat, + double *outdat, + double out_dG[][PITCH_FRAME_LEN + QLOOKAHEAD], + PitchFiltstr *pfp, + double *lags, + double *gains); + +void WebRtcIsac_WeightingFilter(const double *in, double *weiout, double *whiout, WeightFiltstr *wfdata); + +void WebRtcIsac_Highpass(const double *in, + double *out, + double *state, + size_t N); + +void WebRtcIsac_DecimateAllpass(const double *in, + double *state_in, /* array of size: + * 2*ALLPASSSECTIONS+1 */ + size_t N, /* number of input samples */ + double *out); /* array of size N/2 */ + +#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_ESTIMATOR_H_ */ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_gain_tables.c b/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_gain_tables.c new file mode 100644 index 0000000..947d3e7 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_gain_tables.c @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "pitch_gain_tables.h" + +#include "settings.h" + +/* header file for coding tables for the pitch filter side-info in the entropy coder */ +/********************* Pitch Filter Gain Coefficient Tables ************************/ +/* cdf for quantized pitch filter gains */ +const uint16_t WebRtcIsac_kQPitchGainCdf[255] = { + 0, 2, 4, 6, 64, 901, 903, 905, 16954, 16956, + 16961, 17360, 17362, 17364, 17366, 17368, 17370, 17372, 17374, 17411, + 17514, 17516, 17583, 18790, 18796, 18802, 20760, 20777, 20782, 21722, + 21724, 21728, 21738, 21740, 21742, 21744, 21746, 21748, 22224, 22227, + 22230, 23214, 23229, 23239, 25086, 25108, 25120, 26088, 26094, 26098, + 26175, 26177, 26179, 26181, 26183, 26185, 26484, 26507, 26522, 27705, + 27731, 27750, 29767, 29799, 29817, 30866, 30883, 30885, 31025, 31029, + 31031, 31033, 31035, 31037, 31114, 31126, 31134, 32687, 32722, 32767, + 35718, 35742, 35757, 36943, 36952, 36954, 37115, 37128, 37130, 37132, + 37134, 37136, 37143, 37145, 37152, 38843, 38863, 38897, 47458, 47467, + 47474, 49040, 49061, 49063, 49145, 49157, 49159, 49161, 49163, 49165, + 49167, 49169, 49171, 49757, 49770, 49782, 61333, 61344, 61346, 62860, + 62883, 62885, 62887, 62889, 62891, 62893, 62895, 62897, 62899, 62901, + 62903, 62905, 62907, 62909, 65496, 65498, 65500, 65521, 65523, 65525, + 65527, 65529, 65531, 65533, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535}; + +/* index limits and ranges */ +const int16_t WebRtcIsac_kIndexLowerLimitGain[3] = { + -7, -2, -1}; + +const int16_t WebRtcIsac_kIndexUpperLimitGain[3] = { + 0, 3, 1}; + +const uint16_t WebRtcIsac_kIndexMultsGain[2] = { + 18, 3}; + +/* size of cdf table */ +const uint16_t WebRtcIsac_kQCdfTableSizeGain[1] = { + 256}; + +///////////////////////////FIXED POINT +/* mean values of pitch filter gains in FIXED point */ +const int16_t WebRtcIsac_kQMeanGain1Q12[144] = { + 843, 1092, 1336, 1222, 1405, 1656, 1500, 1815, 1843, 1838, 1839, 1843, 1843, 1843, 1843, 1843, + 1843, 1843, 814, 846, 1092, 1013, 1174, 1383, 1391, 1511, 1584, 1734, 1753, 1843, 1843, 1843, + 1843, 1843, 1843, 1843, 524, 689, 777, 845, 947, 1069, 1090, 1263, 1380, 1447, 1559, 1676, + 1645, 1749, 1843, 1843, 1843, 1843, 81, 477, 563, 611, 706, 806, 849, 1012, 1192, 1128, + 1330, 1489, 1425, 1576, 1826, 1741, 1843, 1843, 0, 290, 305, 356, 488, 575, 602, 741, + 890, 835, 1079, 1196, 1182, 1376, 1519, 1506, 1680, 1843, 0, 47, 97, 69, 289, 381, + 385, 474, 617, 664, 803, 1079, 935, 1160, 1269, 1265, 1506, 1741, 0, 0, 0, 0, + 112, 120, 190, 283, 442, 343, 526, 809, 684, 935, 1134, 1020, 1265, 1506, 0, 0, + 0, 0, 0, 0, 0, 111, 256, 87, 373, 597, 430, 684, 935, 770, 1020, 1265}; + +const int16_t WebRtcIsac_kQMeanGain2Q12[144] = { + 1760, 1525, 1285, 1747, 1671, 1393, 1843, 1826, 1555, 1843, 1784, 1606, 1843, 1843, 1711, 1843, + 1843, 1814, 1389, 1275, 1040, 1564, 1414, 1252, 1610, 1495, 1343, 1753, 1592, 1405, 1804, 1720, + 1475, 1843, 1814, 1581, 1208, 1061, 856, 1349, 1148, 994, 1390, 1253, 1111, 1495, 1343, 1178, + 1770, 1465, 1234, 1814, 1581, 1342, 1040, 793, 713, 1053, 895, 737, 1128, 1003, 861, 1277, + 1094, 981, 1475, 1192, 1019, 1581, 1342, 1098, 855, 570, 483, 833, 648, 540, 948, 744, + 572, 1009, 844, 636, 1234, 934, 685, 1342, 1217, 984, 537, 318, 124, 603, 423, 350, + 687, 479, 322, 791, 581, 430, 987, 671, 488, 1098, 849, 597, 283, 27, 0, 397, + 222, 38, 513, 271, 124, 624, 325, 157, 737, 484, 233, 849, 597, 343, 27, 0, + 0, 141, 0, 0, 256, 69, 0, 370, 87, 0, 484, 229, 0, 597, 343, 87}; + +const int16_t WebRtcIsac_kQMeanGain3Q12[144] = { + 1843, 1843, 1711, 1843, 1818, 1606, 1843, 1827, 1511, 1814, 1639, 1393, 1760, 1525, 1285, 1656, + 1419, 1176, 1835, 1718, 1475, 1841, 1650, 1387, 1648, 1498, 1287, 1600, 1411, 1176, 1522, 1299, + 1040, 1419, 1176, 928, 1773, 1461, 1128, 1532, 1355, 1202, 1429, 1260, 1115, 1398, 1151, 1025, + 1172, 1080, 790, 1176, 928, 677, 1475, 1147, 1019, 1276, 1096, 922, 1214, 1010, 901, 1057, + 893, 800, 1040, 796, 734, 928, 677, 424, 1137, 897, 753, 1120, 830, 710, 875, 751, + 601, 795, 642, 583, 790, 544, 475, 677, 474, 140, 987, 750, 482, 697, 573, 450, + 691, 487, 303, 661, 394, 332, 537, 303, 220, 424, 168, 0, 737, 484, 229, 624, + 348, 153, 441, 261, 136, 397, 166, 51, 283, 27, 0, 168, 0, 0, 484, 229, + 0, 370, 57, 0, 256, 43, 0, 141, 0, 0, 27, 0, 0, 0, 0, 0}; + + +const int16_t WebRtcIsac_kQMeanGain4Q12[144] = { + 1843, 1843, 1843, 1843, 1841, 1843, 1500, 1821, 1843, 1222, 1434, 1656, 843, 1092, 1336, 504, + 757, 1007, 1843, 1843, 1843, 1838, 1791, 1843, 1265, 1505, 1599, 965, 1219, 1425, 730, 821, + 1092, 249, 504, 757, 1783, 1819, 1843, 1351, 1567, 1727, 1096, 1268, 1409, 805, 961, 1131, + 444, 670, 843, 0, 249, 504, 1425, 1655, 1743, 1096, 1324, 1448, 822, 1019, 1199, 490, + 704, 867, 81, 450, 555, 0, 0, 249, 1247, 1428, 1530, 881, 1073, 1283, 610, 759, + 939, 278, 464, 645, 0, 200, 270, 0, 0, 0, 935, 1163, 1410, 528, 790, 1068, + 377, 499, 717, 173, 240, 274, 0, 43, 62, 0, 0, 0, 684, 935, 1182, 343, + 551, 735, 161, 262, 423, 0, 55, 27, 0, 0, 0, 0, 0, 0, 430, 684, + 935, 87, 377, 597, 0, 46, 256, 0, 0, 0, 0, 0, 0, 0, 0, 0}; diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_gain_tables.h b/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_gain_tables.h new file mode 100644 index 0000000..8d708ce --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_gain_tables.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * pitch_gain_tables.h + * + * This file contains tables for the pitch filter side-info in the entropy coder. + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_GAIN_TABLES_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_GAIN_TABLES_H_ + +#include "webrtc/typedefs.h" + +/* header file for coding tables for the pitch filter side-info in the entropy coder */ +/********************* Pitch Filter Gain Coefficient Tables ************************/ +/* cdf for quantized pitch filter gains */ +extern const uint16_t WebRtcIsac_kQPitchGainCdf[255]; + +/* index limits and ranges */ +extern const int16_t WebRtcIsac_kIndexLowerLimitGain[3]; + +extern const int16_t WebRtcIsac_kIndexUpperLimitGain[3]; +extern const uint16_t WebRtcIsac_kIndexMultsGain[2]; + +/* mean values of pitch filter gains */ +//(Y) +extern const int16_t WebRtcIsac_kQMeanGain1Q12[144]; +extern const int16_t WebRtcIsac_kQMeanGain2Q12[144]; +extern const int16_t WebRtcIsac_kQMeanGain3Q12[144]; +extern const int16_t WebRtcIsac_kQMeanGain4Q12[144]; +//(Y) + +/* size of cdf table */ +extern const uint16_t WebRtcIsac_kQCdfTableSizeGain[1]; + +#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_GAIN_TABLES_H_ */ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_lag_tables.c b/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_lag_tables.c new file mode 100644 index 0000000..f845a22 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_lag_tables.c @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "pitch_lag_tables.h" +#include "settings.h" + +/* header file for coding tables for the pitch filter side-info in the entropy coder */ +/********************* Pitch Filter Gain Coefficient Tables ************************/ + +/* tables for use with small pitch gain */ + +/* cdf for quantized pitch filter lags */ +const uint16_t WebRtcIsac_kQPitchLagCdf1Lo[127] = { + 0, 134, 336, 549, 778, 998, 1264, 1512, 1777, 2070, + 2423, 2794, 3051, 3361, 3708, 3979, 4315, 4610, 4933, 5269, + 5575, 5896, 6155, 6480, 6816, 7129, 7477, 7764, 8061, 8358, + 8718, 9020, 9390, 9783, 10177, 10543, 10885, 11342, 11795, 12213, + 12680, 13096, 13524, 13919, 14436, 14903, 15349, 15795, 16267, 16734, + 17266, 17697, 18130, 18632, 19080, 19447, 19884, 20315, 20735, 21288, + 21764, 22264, 22723, 23193, 23680, 24111, 24557, 25022, 25537, 26082, + 26543, 27090, 27620, 28139, 28652, 29149, 29634, 30175, 30692, 31273, + 31866, 32506, 33059, 33650, 34296, 34955, 35629, 36295, 36967, 37726, + 38559, 39458, 40364, 41293, 42256, 43215, 44231, 45253, 46274, 47359, + 48482, 49678, 50810, 51853, 53016, 54148, 55235, 56263, 57282, 58363, + 59288, 60179, 61076, 61806, 62474, 63129, 63656, 64160, 64533, 64856, + 65152, 65535, 65535, 65535, 65535, 65535, 65535}; + +const uint16_t WebRtcIsac_kQPitchLagCdf2Lo[20] = { + 0, 429, 3558, 5861, 8558, 11639, 15210, 19502, 24773, 31983, + 42602, 48567, 52601, 55676, 58160, 60172, 61889, 63235, 65383, 65535}; + +const uint16_t WebRtcIsac_kQPitchLagCdf3Lo[2] = { + 0, 65535}; + +const uint16_t WebRtcIsac_kQPitchLagCdf4Lo[10] = { + 0, 2966, 6368, 11182, 19431, 37793, 48532, 55353, 60626, 65535}; + +const uint16_t *WebRtcIsac_kQPitchLagCdfPtrLo[4] = {WebRtcIsac_kQPitchLagCdf1Lo, WebRtcIsac_kQPitchLagCdf2Lo, WebRtcIsac_kQPitchLagCdf3Lo, WebRtcIsac_kQPitchLagCdf4Lo}; + +/* size of first cdf table */ +const uint16_t WebRtcIsac_kQPitchLagCdfSizeLo[1] = {128}; + +/* index limits and ranges */ +const int16_t WebRtcIsac_kQIndexLowerLimitLagLo[4] = { +-140, -9, 0, -4}; + +const int16_t WebRtcIsac_kQIndexUpperLimitLagLo[4] = { +-20, 9, 0, 4}; + +/* initial index for arithmetic decoder */ +const uint16_t WebRtcIsac_kQInitIndexLagLo[3] = { + 10, 1, 5}; + +/* mean values of pitch filter lags */ +const double WebRtcIsac_kQMeanLag2Lo[19] = { +-17.21385070, -15.82678944, -14.07123081, -12.03003877, -10.01311864, -8.00794627, -5.91162987, -3.89231876, -1.90220980, -0.01879275, + 1.89144232, 3.88123171, 5.92146992, 7.96435361, 9.98923648, 11.98266347, 13.96101002, 15.74855713, 17.10976611}; + +const double WebRtcIsac_kQMeanLag3Lo[1] = { + 0.00000000}; + +const double WebRtcIsac_kQMeanLag4Lo[9] = { +-7.76246496, -5.92083980, -3.94095226, -1.89502305, 0.03724681, 1.93054221, 3.96443467, 5.91726366, 7.78434291}; + +const double WebRtcIsac_kQPitchLagStepsizeLo = 2.000000; + + +/* tables for use with medium pitch gain */ + +/* cdf for quantized pitch filter lags */ +const uint16_t WebRtcIsac_kQPitchLagCdf1Mid[255] = { + 0, 28, 61, 88, 121, 149, 233, 331, 475, 559, + 624, 661, 689, 712, 745, 791, 815, 843, 866, 922, + 959, 1024, 1061, 1117, 1178, 1238, 1280, 1350, 1453, 1513, + 1564, 1625, 1671, 1741, 1788, 1904, 2072, 2421, 2626, 2770, + 2840, 2900, 2942, 3012, 3068, 3115, 3147, 3194, 3254, 3319, + 3366, 3520, 3678, 3780, 3850, 3911, 3957, 4032, 4106, 4185, + 4292, 4474, 4683, 4842, 5019, 5191, 5321, 5428, 5540, 5675, + 5763, 5847, 5959, 6127, 6304, 6564, 6839, 7090, 7263, 7421, + 7556, 7728, 7872, 7984, 8142, 8361, 8580, 8743, 8938, 9227, + 9409, 9539, 9674, 9795, 9930, 10060, 10177, 10382, 10614, 10861, + 11038, 11271, 11415, 11629, 11792, 12044, 12193, 12416, 12574, 12821, + 13007, 13235, 13445, 13654, 13901, 14134, 14488, 15000, 15703, 16285, + 16504, 16797, 17086, 17328, 17579, 17807, 17998, 18268, 18538, 18836, + 19087, 19274, 19474, 19716, 19935, 20270, 20833, 21303, 21532, 21741, + 21978, 22207, 22523, 22770, 23054, 23613, 23943, 24204, 24399, 24651, + 24832, 25074, 25270, 25549, 25759, 26015, 26150, 26424, 26713, 27048, + 27342, 27504, 27681, 27854, 28021, 28207, 28412, 28664, 28859, 29064, + 29278, 29548, 29748, 30107, 30377, 30656, 30856, 31164, 31452, 31755, + 32011, 32328, 32626, 32919, 33319, 33789, 34329, 34925, 35396, 35973, + 36443, 36964, 37551, 38156, 38724, 39357, 40023, 40908, 41587, 42602, + 43924, 45037, 45810, 46597, 47421, 48291, 49092, 50051, 51448, 52719, + 53440, 54241, 54944, 55977, 56676, 57299, 57872, 58389, 59059, 59688, + 60237, 60782, 61094, 61573, 61890, 62290, 62658, 63030, 63217, 63454, + 63622, 63882, 64003, 64273, 64427, 64529, 64581, 64697, 64758, 64902, + 65414, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535}; + +const uint16_t WebRtcIsac_kQPitchLagCdf2Mid[36] = { + 0, 71, 335, 581, 836, 1039, 1323, 1795, 2258, 2608, + 3005, 3591, 4243, 5344, 7163, 10583, 16848, 28078, 49448, 57007, + 60357, 61850, 62837, 63437, 63872, 64188, 64377, 64614, 64774, 64949, + 65039, 65115, 65223, 65360, 65474, 65535}; + +const uint16_t WebRtcIsac_kQPitchLagCdf3Mid[2] = { + 0, 65535}; + +const uint16_t WebRtcIsac_kQPitchLagCdf4Mid[20] = { + 0, 28, 246, 459, 667, 1045, 1523, 2337, 4337, 11347, + 44231, 56709, 60781, 62243, 63161, 63969, 64608, 65062, 65502, 65535}; + +const uint16_t *WebRtcIsac_kQPitchLagCdfPtrMid[4] = {WebRtcIsac_kQPitchLagCdf1Mid, WebRtcIsac_kQPitchLagCdf2Mid, WebRtcIsac_kQPitchLagCdf3Mid, WebRtcIsac_kQPitchLagCdf4Mid}; + +/* size of first cdf table */ +const uint16_t WebRtcIsac_kQPitchLagCdfSizeMid[1] = {256}; + +/* index limits and ranges */ +const int16_t WebRtcIsac_kQIndexLowerLimitLagMid[4] = { +-280, -17, 0, -9}; + +const int16_t WebRtcIsac_kQIndexUpperLimitLagMid[4] = { +-40, 17, 0, 9}; + +/* initial index for arithmetic decoder */ +const uint16_t WebRtcIsac_kQInitIndexLagMid[3] = { + 18, 1, 10}; + +/* mean values of pitch filter lags */ +const double WebRtcIsac_kQMeanLag2Mid[35] = { +-16.89183900, -15.86949778, -15.05476653, -14.00664348, -13.02793036, -12.07324237, -11.00542532, -10.11250602, -8.90792971, -8.02474753, +-7.00426767, -5.94055287, -4.98251338, -3.91053158, -2.98820425, -1.93524245, -0.92978085, -0.01722509, 0.91317387, 1.92973955, + 2.96908851, 3.93728974, 4.96308471, 5.92244151, 7.08673497, 8.00993708, 9.04656316, 9.98538742, 10.97851694, 11.94772884, + 13.02426166, 14.00039951, 15.01347042, 15.80758023, 16.94086895}; + +const double WebRtcIsac_kQMeanLag3Mid[1] = { + 0.00000000}; + +const double WebRtcIsac_kQMeanLag4Mid[19] = { +-8.60409403, -7.89198395, -7.03450280, -5.86260421, -4.93822322, -3.93078706, -2.91302322, -1.91824007, -0.87003282, 0.02822649, + 0.89951758, 1.87495484, 2.91802604, 3.96874074, 5.06571703, 5.93618227, 7.00520185, 7.88497726, 8.64160364}; + +const double WebRtcIsac_kQPitchLagStepsizeMid = 1.000000; + + +/* tables for use with large pitch gain */ + +/* cdf for quantized pitch filter lags */ +const uint16_t WebRtcIsac_kQPitchLagCdf1Hi[511] = { + 0, 7, 18, 33, 69, 105, 156, 228, 315, 612, + 680, 691, 709, 724, 735, 738, 742, 746, 749, 753, + 756, 760, 764, 774, 782, 785, 789, 796, 800, 803, + 807, 814, 818, 822, 829, 832, 847, 854, 858, 869, + 876, 883, 898, 908, 934, 977, 1010, 1050, 1060, 1064, + 1075, 1078, 1086, 1089, 1093, 1104, 1111, 1122, 1133, 1136, + 1151, 1162, 1183, 1209, 1252, 1281, 1339, 1364, 1386, 1401, + 1411, 1415, 1426, 1430, 1433, 1440, 1448, 1455, 1462, 1477, + 1487, 1495, 1502, 1506, 1509, 1516, 1524, 1531, 1535, 1542, + 1553, 1556, 1578, 1589, 1611, 1625, 1639, 1643, 1654, 1665, + 1672, 1687, 1694, 1705, 1708, 1719, 1730, 1744, 1752, 1759, + 1791, 1795, 1820, 1867, 1886, 1915, 1936, 1943, 1965, 1987, + 2041, 2099, 2161, 2175, 2200, 2211, 2226, 2233, 2244, 2251, + 2266, 2280, 2287, 2298, 2309, 2316, 2331, 2342, 2356, 2378, + 2403, 2418, 2447, 2497, 2544, 2602, 2863, 2895, 2903, 2935, + 2950, 2971, 3004, 3011, 3018, 3029, 3040, 3062, 3087, 3127, + 3152, 3170, 3199, 3243, 3293, 3322, 3340, 3377, 3402, 3427, + 3474, 3518, 3543, 3579, 3601, 3637, 3659, 3706, 3731, 3760, + 3818, 3847, 3869, 3901, 3920, 3952, 4068, 4169, 4220, 4271, + 4524, 4571, 4604, 4632, 4672, 4730, 4777, 4806, 4857, 4904, + 4951, 5002, 5031, 5060, 5107, 5150, 5212, 5266, 5331, 5382, + 5432, 5490, 5544, 5610, 5700, 5762, 5812, 5874, 5972, 6022, + 6091, 6163, 6232, 6305, 6402, 6540, 6685, 6880, 7090, 7271, + 7379, 7452, 7542, 7625, 7687, 7770, 7843, 7911, 7966, 8024, + 8096, 8190, 8252, 8320, 8411, 8501, 8585, 8639, 8751, 8842, + 8918, 8986, 9066, 9127, 9203, 9269, 9345, 9406, 9464, 9536, + 9612, 9667, 9735, 9844, 9931, 10036, 10119, 10199, 10260, 10358, + 10441, 10514, 10666, 10734, 10872, 10951, 11053, 11125, 11223, 11324, + 11516, 11664, 11737, 11816, 11892, 12008, 12120, 12200, 12280, 12392, + 12490, 12576, 12685, 12812, 12917, 13003, 13108, 13210, 13300, 13384, + 13470, 13579, 13673, 13771, 13879, 13999, 14136, 14201, 14368, 14614, + 14759, 14867, 14958, 15030, 15121, 15189, 15280, 15385, 15461, 15555, + 15653, 15768, 15884, 15971, 16069, 16145, 16210, 16279, 16380, 16463, + 16539, 16615, 16688, 16818, 16919, 17017, 18041, 18338, 18523, 18649, + 18790, 18917, 19047, 19167, 19315, 19460, 19601, 19731, 19858, 20068, + 20173, 20318, 20466, 20625, 20741, 20911, 21045, 21201, 21396, 21588, + 21816, 22022, 22305, 22547, 22786, 23072, 23322, 23600, 23879, 24168, + 24433, 24769, 25120, 25511, 25895, 26289, 26792, 27219, 27683, 28077, + 28566, 29094, 29546, 29977, 30491, 30991, 31573, 32105, 32594, 33173, + 33788, 34497, 35181, 35833, 36488, 37255, 37921, 38645, 39275, 39894, + 40505, 41167, 41790, 42431, 43096, 43723, 44385, 45134, 45858, 46607, + 47349, 48091, 48768, 49405, 49955, 50555, 51167, 51985, 52611, 53078, + 53494, 53965, 54435, 54996, 55601, 56125, 56563, 56838, 57244, 57566, + 57967, 58297, 58771, 59093, 59419, 59647, 59886, 60143, 60461, 60693, + 60917, 61170, 61416, 61634, 61891, 62122, 62310, 62455, 62632, 62839, + 63103, 63436, 63639, 63805, 63906, 64015, 64192, 64355, 64475, 64558, + 64663, 64742, 64811, 64865, 64916, 64956, 64981, 65025, 65068, 65115, + 65195, 65314, 65419, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, 65535, + 65535}; + +const uint16_t WebRtcIsac_kQPitchLagCdf2Hi[68] = { + 0, 7, 11, 22, 37, 52, 56, 59, 81, 85, + 89, 96, 115, 130, 137, 152, 170, 181, 193, 200, + 207, 233, 237, 259, 289, 318, 363, 433, 592, 992, + 1607, 3062, 6149, 12206, 25522, 48368, 58223, 61918, 63640, 64584, + 64943, 65098, 65206, 65268, 65294, 65335, 65350, 65372, 65387, 65402, + 65413, 65420, 65428, 65435, 65439, 65450, 65454, 65468, 65472, 65476, + 65483, 65491, 65498, 65505, 65516, 65520, 65528, 65535}; + +const uint16_t WebRtcIsac_kQPitchLagCdf3Hi[2] = { + 0, 65535}; + +const uint16_t WebRtcIsac_kQPitchLagCdf4Hi[35] = { + 0, 7, 19, 30, 41, 48, 63, 74, 82, 96, + 122, 152, 215, 330, 701, 2611, 10931, 48106, 61177, 64341, + 65112, 65238, 65309, 65338, 65364, 65379, 65401, 65427, 65453, 65465, + 65476, 65490, 65509, 65528, 65535}; + +const uint16_t *WebRtcIsac_kQPitchLagCdfPtrHi[4] = {WebRtcIsac_kQPitchLagCdf1Hi, WebRtcIsac_kQPitchLagCdf2Hi, WebRtcIsac_kQPitchLagCdf3Hi, WebRtcIsac_kQPitchLagCdf4Hi}; + +/* size of first cdf table */ +const uint16_t WebRtcIsac_kQPitchLagCdfSizeHi[1] = {512}; + +/* index limits and ranges */ +const int16_t WebRtcIsac_kQindexLowerLimitLagHi[4] = { +-552, -34, 0, -16}; + +const int16_t WebRtcIsac_kQindexUpperLimitLagHi[4] = { +-80, 32, 0, 17}; + +/* initial index for arithmetic decoder */ +const uint16_t WebRtcIsac_kQInitIndexLagHi[3] = { + 34, 1, 18}; + +/* mean values of pitch filter lags */ +const double WebRtcIsac_kQMeanLag2Hi[67] = { +-17.07263295, -16.50000000, -15.83966081, -15.55613708, -14.96948007, -14.50000000, -14.00000000, -13.48377986, -13.00000000, -12.50000000, +-11.93199636, -11.44530414, -11.04197641, -10.39910301, -10.15202337, -9.51322461, -8.93357741, -8.46456632, -8.10270672, -7.53751847, +-6.98686404, -6.50000000, -6.08463150, -5.46872991, -5.00864717, -4.50163760, -4.01382410, -3.43856708, -2.96898001, -2.46554810, +-1.96861004, -1.47106701, -0.97197237, -0.46561654, -0.00531409, 0.45767857, 0.96777907, 1.47507903, 1.97740425, 2.46695420, + 3.00695774, 3.47167185, 4.02712538, 4.49280007, 5.01087640, 5.48191963, 6.04916550, 6.51511058, 6.97297819, 7.46565499, + 8.01489405, 8.39912001, 8.91819757, 9.50000000, 10.11654065, 10.50000000, 11.03712583, 11.50000000, 12.00000000, 12.38964346, + 12.89466127, 13.43657881, 13.96013840, 14.46279912, 15.00000000, 15.39412269, 15.96662441}; + +const double WebRtcIsac_kQMeanLag3Hi[1] = { + 0.00000000}; + +const double WebRtcIsac_kQMeanLag4Hi[34] = { +-7.98331221, -7.47988769, -7.03626557, -6.52708003, -6.06982173, -5.51856292, -5.05827033, -4.45909878, -3.99125864, -3.45308135, +-3.02328139, -2.47297273, -1.94341995, -1.44699056, -0.93612243, -0.43012406, 0.01120357, 0.44054812, 0.93199883, 1.45669587, + 1.97218322, 2.50187419, 2.98748690, 3.49343202, 4.01660147, 4.50984306, 5.01402683, 5.58936797, 5.91787793, 6.59998900, + 6.85034315, 7.53503316, 7.87711194, 8.53631648}; + +const double WebRtcIsac_kQPitchLagStepsizeHi = 0.500000; + +/* transform matrix */ +const double WebRtcIsac_kTransform[4][4] = { +{-0.50000000, -0.50000000, -0.50000000, -0.50000000}, +{ 0.67082039, 0.22360680, -0.22360680, -0.67082039}, +{ 0.50000000, -0.50000000, -0.50000000, 0.50000000}, +{ 0.22360680, -0.67082039, 0.67082039, -0.22360680}}; + +/* transpose transform matrix */ +const double WebRtcIsac_kTransformTranspose[4][4] = { +{-0.50000000, 0.67082039, 0.50000000, 0.22360680}, +{-0.50000000, 0.22360680, -0.50000000, -0.67082039}, +{-0.50000000, -0.22360680, -0.50000000, 0.67082039}, +{-0.50000000, -0.67082039, 0.50000000, -0.22360680}}; + diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_lag_tables.h b/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_lag_tables.h new file mode 100644 index 0000000..01989f0 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/pitch_lag_tables.h @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * pitch_lag_tables.h + * + * This file contains tables for the pitch filter side-info in the entropy coder. + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_LAG_TABLES_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_LAG_TABLES_H_ + +#include "webrtc/typedefs.h" +/* header file for coding tables for the pitch filter side-info in the entropy coder */ +/********************* Pitch Filter Lag Coefficient Tables ************************/ + +/* tables for use with small pitch gain */ + +/* cdfs for quantized pitch lags */ +extern const uint16_t WebRtcIsac_kQPitchLagCdf1Lo[127]; +extern const uint16_t WebRtcIsac_kQPitchLagCdf2Lo[20]; +extern const uint16_t WebRtcIsac_kQPitchLagCdf3Lo[2]; +extern const uint16_t WebRtcIsac_kQPitchLagCdf4Lo[10]; + +extern const uint16_t *WebRtcIsac_kQPitchLagCdfPtrLo[4]; + +/* size of first cdf table */ +extern const uint16_t WebRtcIsac_kQPitchLagCdfSizeLo[1]; + +/* index limits and ranges */ +extern const int16_t WebRtcIsac_kQIndexLowerLimitLagLo[4]; +extern const int16_t WebRtcIsac_kQIndexUpperLimitLagLo[4]; + +/* initial index for arithmetic decoder */ +extern const uint16_t WebRtcIsac_kQInitIndexLagLo[3]; + +/* mean values of pitch filter lags */ +extern const double WebRtcIsac_kQMeanLag2Lo[19]; +extern const double WebRtcIsac_kQMeanLag3Lo[1]; +extern const double WebRtcIsac_kQMeanLag4Lo[9]; + +extern const double WebRtcIsac_kQPitchLagStepsizeLo; + + +/* tables for use with medium pitch gain */ + +/* cdfs for quantized pitch lags */ +extern const uint16_t WebRtcIsac_kQPitchLagCdf1Mid[255]; +extern const uint16_t WebRtcIsac_kQPitchLagCdf2Mid[36]; +extern const uint16_t WebRtcIsac_kQPitchLagCdf3Mid[2]; +extern const uint16_t WebRtcIsac_kQPitchLagCdf4Mid[20]; + +extern const uint16_t *WebRtcIsac_kQPitchLagCdfPtrMid[4]; + +/* size of first cdf table */ +extern const uint16_t WebRtcIsac_kQPitchLagCdfSizeMid[1]; + +/* index limits and ranges */ +extern const int16_t WebRtcIsac_kQIndexLowerLimitLagMid[4]; +extern const int16_t WebRtcIsac_kQIndexUpperLimitLagMid[4]; + +/* initial index for arithmetic decoder */ +extern const uint16_t WebRtcIsac_kQInitIndexLagMid[3]; + +/* mean values of pitch filter lags */ +extern const double WebRtcIsac_kQMeanLag2Mid[35]; +extern const double WebRtcIsac_kQMeanLag3Mid[1]; +extern const double WebRtcIsac_kQMeanLag4Mid[19]; + +extern const double WebRtcIsac_kQPitchLagStepsizeMid; + + +/* tables for use with large pitch gain */ + +/* cdfs for quantized pitch lags */ +extern const uint16_t WebRtcIsac_kQPitchLagCdf1Hi[511]; +extern const uint16_t WebRtcIsac_kQPitchLagCdf2Hi[68]; +extern const uint16_t WebRtcIsac_kQPitchLagCdf3Hi[2]; +extern const uint16_t WebRtcIsac_kQPitchLagCdf4Hi[35]; + +extern const uint16_t *WebRtcIsac_kQPitchLagCdfPtrHi[4]; + +/* size of first cdf table */ +extern const uint16_t WebRtcIsac_kQPitchLagCdfSizeHi[1]; + +/* index limits and ranges */ +extern const int16_t WebRtcIsac_kQindexLowerLimitLagHi[4]; +extern const int16_t WebRtcIsac_kQindexUpperLimitLagHi[4]; + +/* initial index for arithmetic decoder */ +extern const uint16_t WebRtcIsac_kQInitIndexLagHi[3]; + +/* mean values of pitch filter lags */ +extern const double WebRtcIsac_kQMeanLag2Hi[67]; +extern const double WebRtcIsac_kQMeanLag3Hi[1]; +extern const double WebRtcIsac_kQMeanLag4Hi[34]; + +extern const double WebRtcIsac_kQPitchLagStepsizeHi; + +/* transform matrix */ +extern const double WebRtcIsac_kTransform[4][4]; + +/* transpose transform matrix */ +extern const double WebRtcIsac_kTransformTranspose[4][4]; + +#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_PITCH_LAG_TABLES_H_ */ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/settings.h b/webrtc/modules/audio_coding/codecs/isac/main/source/settings.h new file mode 100644 index 0000000..5562c35 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/settings.h @@ -0,0 +1,207 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * settings.h + * + * Declaration of #defines used in the iSAC codec + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_ + +/* sampling frequency (Hz) */ +#define FS 16000 + +/* number of samples per frame (either 320 (20ms), 480 (30ms) or 960 (60ms)) */ +#define INITIAL_FRAMESAMPLES 960 + + +#define MAXFFTSIZE 2048 +#define NFACTOR 11 + + + +/* do not modify the following; this will have to be modified if we + * have a 20ms framesize option */ +/**********************************************************************/ +/* miliseconds */ +#define FRAMESIZE 30 +/* number of samples per frame processed in the encoder, 480 */ +#define FRAMESAMPLES 480 /* ((FRAMESIZE*FS)/1000) */ +#define FRAMESAMPLES_HALF 240 +#define FRAMESAMPLES_QUARTER 120 +/**********************************************************************/ + + + +/* max number of samples per frame (= 60 ms frame) */ +#define MAX_FRAMESAMPLES 960 +#define MAX_SWBFRAMESAMPLES (MAX_FRAMESAMPLES * 2) +/* number of samples per 10ms frame */ +#define FRAMESAMPLES_10ms ((10*FS)/1000) +#define SWBFRAMESAMPLES_10ms (FRAMESAMPLES_10ms * 2) +/* number of samples in 30 ms frame */ +#define FRAMESAMPLES_30ms 480 +/* number of subframes */ +#define SUBFRAMES 6 +/* length of a subframe */ +#define UPDATE 80 +/* length of half a subframe (low/high band) */ +#define HALF_SUBFRAMELEN (UPDATE/2) +/* samples of look ahead (in a half-band, so actually + * half the samples of look ahead @ FS) */ +#define QLOOKAHEAD 24 /* 3 ms */ +/* order of AR model in spectral entropy coder */ +#define AR_ORDER 6 +/* order of LP model in spectral entropy coder */ +#define LP_ORDER 0 + +/* window length (masking analysis) */ +#define WINLEN 256 +/* order of low-band pole filter used to approximate masking curve */ +#define ORDERLO 12 +/* order of hi-band pole filter used to approximate masking curve */ +#define ORDERHI 6 + +#define UB_LPC_ORDER 4 +#define UB_LPC_VEC_PER_FRAME 2 +#define UB16_LPC_VEC_PER_FRAME 4 +#define UB_ACTIVE_SUBFRAMES 2 +#define UB_MAX_LPC_ORDER 6 +#define UB_INTERPOL_SEGMENTS 1 +#define UB16_INTERPOL_SEGMENTS 3 +#define LB_TOTAL_DELAY_SAMPLES 48 +enum ISACBandwidth {isac8kHz = 8, isac12kHz = 12, isac16kHz = 16}; +enum ISACBand {kIsacLowerBand = 0, kIsacUpperBand12 = 1, kIsacUpperBand16 = 2}; +enum IsacSamplingRate {kIsacWideband = 16, kIsacSuperWideband = 32}; +#define UB_LPC_GAIN_DIM SUBFRAMES +#define FB_STATE_SIZE_WORD32 6 + + +/* order for post_filter_bank */ +#define POSTQORDER 3 +/* order for pre-filterbank */ +#define QORDER 3 +/* another order */ +#define QORDER_ALL (POSTQORDER+QORDER-1) +/* for decimator */ +#define ALLPASSSECTIONS 2 + + +/* array size for byte stream in number of bytes. */ +/* The old maximum size still needed for the decoding */ +#define STREAM_SIZE_MAX 600 +#define STREAM_SIZE_MAX_30 200 /* 200 bytes=53.4 kbps @ 30 ms.framelength */ +#define STREAM_SIZE_MAX_60 400 /* 400 bytes=53.4 kbps @ 60 ms.framelength */ + +/* storage size for bit counts */ +#define BIT_COUNTER_SIZE 30 +/* maximum order of any AR model or filter */ +#define MAX_AR_MODEL_ORDER 12//50 + + +/* For pitch analysis */ +#define PITCH_FRAME_LEN (FRAMESAMPLES_HALF) /* 30 ms */ +#define PITCH_MAX_LAG 140 /* 57 Hz */ +#define PITCH_MIN_LAG 20 /* 400 Hz */ +#define PITCH_MAX_GAIN 0.45 +#define PITCH_MAX_GAIN_06 0.27 /* PITCH_MAX_GAIN*0.6 */ +#define PITCH_MAX_GAIN_Q12 1843 +#define PITCH_LAG_SPAN2 (PITCH_MAX_LAG/2-PITCH_MIN_LAG/2+5) +#define PITCH_CORR_LEN2 60 /* 15 ms */ +#define PITCH_CORR_STEP2 (PITCH_FRAME_LEN/4) +#define PITCH_BW 11 /* half the band width of correlation surface */ +#define PITCH_SUBFRAMES 4 +#define PITCH_GRAN_PER_SUBFRAME 5 +#define PITCH_SUBFRAME_LEN (PITCH_FRAME_LEN/PITCH_SUBFRAMES) +#define PITCH_UPDATE (PITCH_SUBFRAME_LEN/PITCH_GRAN_PER_SUBFRAME) +/* maximum number of peaks to be examined in correlation surface */ +#define PITCH_MAX_NUM_PEAKS 10 +#define PITCH_PEAK_DECAY 0.85 +/* For weighting filter */ +#define PITCH_WLPCORDER 6 +#define PITCH_WLPCWINLEN PITCH_FRAME_LEN +#define PITCH_WLPCASYM 0.3 /* asymmetry parameter */ +#define PITCH_WLPCBUFLEN PITCH_WLPCWINLEN +/* For pitch filter */ +/* Extra 50 for fraction and LP filters */ +#define PITCH_BUFFSIZE (PITCH_MAX_LAG + 50) +#define PITCH_INTBUFFSIZE (PITCH_FRAME_LEN+PITCH_BUFFSIZE) +/* Max rel. step for interpolation */ +#define PITCH_UPSTEP 1.5 +/* Max rel. step for interpolation */ +#define PITCH_DOWNSTEP 0.67 +#define PITCH_FRACS 8 +#define PITCH_FRACORDER 9 +#define PITCH_DAMPORDER 5 +#define PITCH_FILTDELAY 1.5f +/* stepsize for quantization of the pitch Gain */ +#define PITCH_GAIN_STEPSIZE 0.125 + + + +/* Order of high pass filter */ +#define HPORDER 2 + +/* some mathematical constants */ +/* log2(exp) */ +#define LOG2EXP 1.44269504088896 +#define PI 3.14159265358979 + +/* Maximum number of iterations allowed to limit payload size */ +#define MAX_PAYLOAD_LIMIT_ITERATION 5 + +/* Redundant Coding */ +#define RCU_BOTTLENECK_BPS 16000 +#define RCU_TRANSCODING_SCALE 0.40f +#define RCU_TRANSCODING_SCALE_INVERSE 2.5f + +#define RCU_TRANSCODING_SCALE_UB 0.50f +#define RCU_TRANSCODING_SCALE_UB_INVERSE 2.0f + +#define SIZE_RESAMPLER_STATE 6 + +/* Define Error codes */ +/* 6000 General */ +#define ISAC_MEMORY_ALLOCATION_FAILED 6010 +#define ISAC_MODE_MISMATCH 6020 +#define ISAC_DISALLOWED_BOTTLENECK 6030 +#define ISAC_DISALLOWED_FRAME_LENGTH 6040 +#define ISAC_UNSUPPORTED_SAMPLING_FREQUENCY 6050 + +/* 6200 Bandwidth estimator */ +#define ISAC_RANGE_ERROR_BW_ESTIMATOR 6240 +/* 6400 Encoder */ +#define ISAC_ENCODER_NOT_INITIATED 6410 +#define ISAC_DISALLOWED_CODING_MODE 6420 +#define ISAC_DISALLOWED_FRAME_MODE_ENCODER 6430 +#define ISAC_DISALLOWED_BITSTREAM_LENGTH 6440 +#define ISAC_PAYLOAD_LARGER_THAN_LIMIT 6450 +#define ISAC_DISALLOWED_ENCODER_BANDWIDTH 6460 +/* 6600 Decoder */ +#define ISAC_DECODER_NOT_INITIATED 6610 +#define ISAC_EMPTY_PACKET 6620 +#define ISAC_DISALLOWED_FRAME_MODE_DECODER 6630 +#define ISAC_RANGE_ERROR_DECODE_FRAME_LENGTH 6640 +#define ISAC_RANGE_ERROR_DECODE_BANDWIDTH 6650 +#define ISAC_RANGE_ERROR_DECODE_PITCH_GAIN 6660 +#define ISAC_RANGE_ERROR_DECODE_PITCH_LAG 6670 +#define ISAC_RANGE_ERROR_DECODE_LPC 6680 +#define ISAC_RANGE_ERROR_DECODE_SPECTRUM 6690 +#define ISAC_LENGTH_MISMATCH 6730 +#define ISAC_RANGE_ERROR_DECODE_BANDWITH 6740 +#define ISAC_DISALLOWED_BANDWIDTH_MODE_DECODER 6750 +#define ISAC_DISALLOWED_LPC_MODEL 6760 +/* 6800 Call setup formats */ +#define ISAC_INCOMPATIBLE_FORMATS 6810 + +#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SETTINGS_H_ */ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/spectrum_ar_model_tables.c b/webrtc/modules/audio_coding/codecs/isac/main/source/spectrum_ar_model_tables.c new file mode 100644 index 0000000..0f6d889 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/spectrum_ar_model_tables.c @@ -0,0 +1,139 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "spectrum_ar_model_tables.h" +#include "settings.h" + +/********************* AR Coefficient Tables ************************/ +/* cdf for quantized reflection coefficient 1 */ +const uint16_t WebRtcIsac_kQArRc1Cdf[NUM_AR_RC_QUANT_BAUNDARY] = { + 0, 2, 4, 129, 7707, 57485, 65495, 65527, 65529, 65531, + 65533, 65535}; + +/* cdf for quantized reflection coefficient 2 */ +const uint16_t WebRtcIsac_kQArRc2Cdf[NUM_AR_RC_QUANT_BAUNDARY] = { + 0, 2, 4, 7, 531, 25298, 64525, 65526, 65529, 65531, + 65533, 65535}; + +/* cdf for quantized reflection coefficient 3 */ +const uint16_t WebRtcIsac_kQArRc3Cdf[NUM_AR_RC_QUANT_BAUNDARY] = { + 0, 2, 4, 6, 620, 22898, 64843, 65527, 65529, 65531, + 65533, 65535}; + +/* cdf for quantized reflection coefficient 4 */ +const uint16_t WebRtcIsac_kQArRc4Cdf[NUM_AR_RC_QUANT_BAUNDARY] = { + 0, 2, 4, 6, 35, 10034, 60733, 65506, 65529, 65531, + 65533, 65535}; + +/* cdf for quantized reflection coefficient 5 */ +const uint16_t WebRtcIsac_kQArRc5Cdf[NUM_AR_RC_QUANT_BAUNDARY] = { + 0, 2, 4, 6, 36, 7567, 56727, 65385, 65529, 65531, + 65533, 65535}; + +/* cdf for quantized reflection coefficient 6 */ +const uint16_t WebRtcIsac_kQArRc6Cdf[NUM_AR_RC_QUANT_BAUNDARY] = { + 0, 2, 4, 6, 14, 6579, 57360, 65409, 65529, 65531, + 65533, 65535}; + +/* representation levels for quantized reflection coefficient 1 */ +const int16_t WebRtcIsac_kQArRc1Levels[NUM_AR_RC_QUANT_BAUNDARY - 1] = { + -32104, -29007, -23202, -15496, -9279, -2577, 5934, 17535, 24512, 29503, 32104 +}; + +/* representation levels for quantized reflection coefficient 2 */ +const int16_t WebRtcIsac_kQArRc2Levels[NUM_AR_RC_QUANT_BAUNDARY - 1] = { + -32104, -29503, -23494, -15261, -7309, -1399, 6158, 16381, 24512, 29503, 32104 +}; + +/* representation levels for quantized reflection coefficient 3 */ +const int16_t WebRtcIsac_kQArRc3Levels[NUM_AR_RC_QUANT_BAUNDARY - 1] = { +-32104, -29503, -23157, -15186, -7347, -1359, 5829, 17535, 24512, 29503, 32104 +}; + +/* representation levels for quantized reflection coefficient 4 */ +const int16_t WebRtcIsac_kQArRc4Levels[NUM_AR_RC_QUANT_BAUNDARY - 1] = { +-32104, -29503, -24512, -15362, -6665, -342, 6596, 14585, 24512, 29503, 32104 +}; + +/* representation levels for quantized reflection coefficient 5 */ +const int16_t WebRtcIsac_kQArRc5Levels[NUM_AR_RC_QUANT_BAUNDARY - 1] = { +-32104, -29503, -24512, -15005, -6564, -106, 7123, 14920, 24512, 29503, 32104 +}; + +/* representation levels for quantized reflection coefficient 6 */ +const int16_t WebRtcIsac_kQArRc6Levels[NUM_AR_RC_QUANT_BAUNDARY - 1] = { +-32104, -29503, -24512, -15096, -6656, -37, 7036, 14847, 24512, 29503, 32104 +}; + +/* quantization boundary levels for reflection coefficients */ +const int16_t WebRtcIsac_kQArBoundaryLevels[NUM_AR_RC_QUANT_BAUNDARY] = { +-32768, -31441, -27566, -21458, -13612, -4663, 4663, 13612, 21458, 27566, 31441, +32767 +}; + +/* initial index for AR reflection coefficient quantizer and cdf table search */ +const uint16_t WebRtcIsac_kQArRcInitIndex[6] = { + 5, 5, 5, 5, 5, 5}; + +/* pointers to AR cdf tables */ +const uint16_t *WebRtcIsac_kQArRcCdfPtr[AR_ORDER] = { + WebRtcIsac_kQArRc1Cdf, WebRtcIsac_kQArRc2Cdf, WebRtcIsac_kQArRc3Cdf, + WebRtcIsac_kQArRc4Cdf, WebRtcIsac_kQArRc5Cdf, WebRtcIsac_kQArRc6Cdf +}; + +/* pointers to AR representation levels tables */ +const int16_t *WebRtcIsac_kQArRcLevelsPtr[AR_ORDER] = { + WebRtcIsac_kQArRc1Levels, WebRtcIsac_kQArRc2Levels, WebRtcIsac_kQArRc3Levels, + WebRtcIsac_kQArRc4Levels, WebRtcIsac_kQArRc5Levels, WebRtcIsac_kQArRc6Levels +}; + + +/******************** GAIN Coefficient Tables ***********************/ +/* cdf for Gain coefficient */ +const uint16_t WebRtcIsac_kQGainCdf[19] = { + 0, 2, 4, 6, 8, 10, 12, 14, 16, 1172, + 11119, 29411, 51699, 64445, 65527, 65529, 65531, 65533, 65535}; + +/* representation levels for quantized squared Gain coefficient */ +const int32_t WebRtcIsac_kQGain2Levels[18] = { +// 17, 28, 46, 76, 128, 215, 364, 709, 1268, 1960, 3405, 6078, 11286, 17827, 51918, 134498, 487432, 2048000}; + 128, 128, 128, 128, 128, 215, 364, 709, 1268, 1960, 3405, 6078, 11286, 17827, 51918, 134498, 487432, 2048000}; +/* quantization boundary levels for squared Gain coefficient */ +const int32_t WebRtcIsac_kQGain2BoundaryLevels[19] = { +0, 21, 35, 59, 99, 166, 280, 475, 815, 1414, 2495, 4505, 8397, 16405, 34431, 81359, 240497, 921600, 0x7FFFFFFF}; + +/* pointers to Gain cdf table */ +const uint16_t *WebRtcIsac_kQGainCdf_ptr[1] = {WebRtcIsac_kQGainCdf}; + +/* Gain initial index for gain quantizer and cdf table search */ +const uint16_t WebRtcIsac_kQGainInitIndex[1] = {11}; + +/************************* Cosine Tables ****************************/ +/* Cosine table */ +const int16_t WebRtcIsac_kCos[6][60] = { +{512, 512, 511, 510, 508, 507, 505, 502, 499, 496, 493, 489, 485, 480, 476, 470, 465, 459, 453, 447, +440, 433, 426, 418, 410, 402, 394, 385, 376, 367, 357, 348, 338, 327, 317, 306, 295, 284, 273, 262, +250, 238, 226, 214, 202, 190, 177, 165, 152, 139, 126, 113, 100, 87, 73, 60, 47, 33, 20, 7}, +{512, 510, 508, 503, 498, 491, 483, 473, 462, 450, 437, 422, 406, 389, 371, 352, 333, 312, 290, 268, +244, 220, 196, 171, 145, 120, 93, 67, 40, 13, -13, -40, -67, -93, -120, -145, -171, -196, -220, -244, +-268, -290, -312, -333, -352, -371, -389, -406, -422, -437, -450, -462, -473, -483, -491, -498, -503, -508, -510, -512}, +{512, 508, 502, 493, 480, 465, 447, 426, 402, 376, 348, 317, 284, 250, 214, 177, 139, 100, 60, 20, +-20, -60, -100, -139, -177, -214, -250, -284, -317, -348, -376, -402, -426, -447, -465, -480, -493, -502, -508, -512, +-512, -508, -502, -493, -480, -465, -447, -426, -402, -376, -348, -317, -284, -250, -214, -177, -139, -100, -60, -20}, +{511, 506, 495, 478, 456, 429, 398, 362, 322, 279, 232, 183, 133, 80, 27, -27, -80, -133, -183, -232, +-279, -322, -362, -398, -429, -456, -478, -495, -506, -511, -511, -506, -495, -478, -456, -429, -398, -362, -322, -279, +-232, -183, -133, -80, -27, 27, 80, 133, 183, 232, 279, 322, 362, 398, 429, 456, 478, 495, 506, 511}, +{511, 502, 485, 459, 426, 385, 338, 284, 226, 165, 100, 33, -33, -100, -165, -226, -284, -338, -385, -426, +-459, -485, -502, -511, -511, -502, -485, -459, -426, -385, -338, -284, -226, -165, -100, -33, 33, 100, 165, 226, +284, 338, 385, 426, 459, 485, 502, 511, 511, 502, 485, 459, 426, 385, 338, 284, 226, 165, 100, 33}, +{510, 498, 473, 437, 389, 333, 268, 196, 120, 40, -40, -120, -196, -268, -333, -389, -437, -473, -498, -510, +-510, -498, -473, -437, -389, -333, -268, -196, -120, -40, 40, 120, 196, 268, 333, 389, 437, 473, 498, 510, +510, 498, 473, 437, 389, 333, 268, 196, 120, 40, -40, -120, -196, -268, -333, -389, -437, -473, -498, -510} +}; diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/spectrum_ar_model_tables.h b/webrtc/modules/audio_coding/codecs/isac/main/source/spectrum_ar_model_tables.h new file mode 100644 index 0000000..989cb36 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/spectrum_ar_model_tables.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * spectrum_ar_model_tables.h + * + * This file contains definitions of tables with AR coefficients, + * Gain coefficients and cosine tables. + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SPECTRUM_AR_MODEL_TABLES_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SPECTRUM_AR_MODEL_TABLES_H_ + +#include "structs.h" + +#define NUM_AR_RC_QUANT_BAUNDARY 12 + +/********************* AR Coefficient Tables ************************/ +/* cdf for quantized reflection coefficient 1 */ +extern const uint16_t WebRtcIsac_kQArRc1Cdf[NUM_AR_RC_QUANT_BAUNDARY]; + +/* cdf for quantized reflection coefficient 2 */ +extern const uint16_t WebRtcIsac_kQArRc2Cdf[NUM_AR_RC_QUANT_BAUNDARY]; + +/* cdf for quantized reflection coefficient 3 */ +extern const uint16_t WebRtcIsac_kQArRc3Cdf[NUM_AR_RC_QUANT_BAUNDARY]; + +/* cdf for quantized reflection coefficient 4 */ +extern const uint16_t WebRtcIsac_kQArRc4Cdf[NUM_AR_RC_QUANT_BAUNDARY]; + +/* cdf for quantized reflection coefficient 5 */ +extern const uint16_t WebRtcIsac_kQArRc5Cdf[NUM_AR_RC_QUANT_BAUNDARY]; + +/* cdf for quantized reflection coefficient 6 */ +extern const uint16_t WebRtcIsac_kQArRc6Cdf[NUM_AR_RC_QUANT_BAUNDARY]; + +/* quantization boundary levels for reflection coefficients */ +extern const int16_t WebRtcIsac_kQArBoundaryLevels[NUM_AR_RC_QUANT_BAUNDARY]; + +/* initial indices for AR reflection coefficient quantizer and cdf table search */ +extern const uint16_t WebRtcIsac_kQArRcInitIndex[AR_ORDER]; + +/* pointers to AR cdf tables */ +extern const uint16_t *WebRtcIsac_kQArRcCdfPtr[AR_ORDER]; + +/* pointers to AR representation levels tables */ +extern const int16_t *WebRtcIsac_kQArRcLevelsPtr[AR_ORDER]; + + +/******************** GAIN Coefficient Tables ***********************/ +/* cdf for Gain coefficient */ +extern const uint16_t WebRtcIsac_kQGainCdf[19]; + +/* representation levels for quantized Gain coefficient */ +extern const int32_t WebRtcIsac_kQGain2Levels[18]; + +/* squared quantization boundary levels for Gain coefficient */ +extern const int32_t WebRtcIsac_kQGain2BoundaryLevels[19]; + +/* pointer to Gain cdf table */ +extern const uint16_t *WebRtcIsac_kQGainCdf_ptr[1]; + +/* Gain initial index for gain quantizer and cdf table search */ +extern const uint16_t WebRtcIsac_kQGainInitIndex[1]; + +/************************* Cosine Tables ****************************/ +/* Cosine table */ +extern const int16_t WebRtcIsac_kCos[6][60]; + +#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_SPECTRUM_AR_MODEL_TABLES_H_ */ diff --git a/webrtc/modules/audio_coding/codecs/isac/main/source/structs.h b/webrtc/modules/audio_coding/codecs/isac/main/source/structs.h new file mode 100644 index 0000000..8442878 --- /dev/null +++ b/webrtc/modules/audio_coding/codecs/isac/main/source/structs.h @@ -0,0 +1,498 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * structs.h + * + * This header file contains all the structs used in the ISAC codec + * + */ + +#ifndef WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_ +#define WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_ + +#include "webrtc/modules/audio_coding/codecs/isac/bandwidth_info.h" +#include "webrtc/modules/audio_coding/codecs/isac/main/interface/isac.h" +#include "webrtc/modules/audio_coding/codecs/isac/main/source/settings.h" +#include "webrtc/typedefs.h" + +typedef struct Bitstreamstruct { + + uint8_t stream[STREAM_SIZE_MAX]; + uint32_t W_upper; + uint32_t streamval; + uint32_t stream_index; + +} Bitstr; + +typedef struct { + + double DataBufferLo[WINLEN]; + double DataBufferHi[WINLEN]; + + double CorrBufLo[ORDERLO+1]; + double CorrBufHi[ORDERHI+1]; + + float PreStateLoF[ORDERLO+1]; + float PreStateLoG[ORDERLO+1]; + float PreStateHiF[ORDERHI+1]; + float PreStateHiG[ORDERHI+1]; + float PostStateLoF[ORDERLO+1]; + float PostStateLoG[ORDERLO+1]; + float PostStateHiF[ORDERHI+1]; + float PostStateHiG[ORDERHI+1]; + + double OldEnergy; + +} MaskFiltstr; + + +typedef struct { + + //state vectors for each of the two analysis filters + double INSTAT1[2*(QORDER-1)]; + double INSTAT2[2*(QORDER-1)]; + double INSTATLA1[2*(QORDER-1)]; + double INSTATLA2[2*(QORDER-1)]; + double INLABUF1[QLOOKAHEAD]; + double INLABUF2[QLOOKAHEAD]; + + float INSTAT1_float[2*(QORDER-1)]; + float INSTAT2_float[2*(QORDER-1)]; + float INSTATLA1_float[2*(QORDER-1)]; + float INSTATLA2_float[2*(QORDER-1)]; + float INLABUF1_float[QLOOKAHEAD]; + float INLABUF2_float[QLOOKAHEAD]; + + /* High pass filter */ + double HPstates[HPORDER]; + float HPstates_float[HPORDER]; + +} PreFiltBankstr; + + +typedef struct { + + //state vectors for each of the two analysis filters + double STATE_0_LOWER[2*POSTQORDER]; + double STATE_0_UPPER[2*POSTQORDER]; + + /* High pass filter */ + double HPstates1[HPORDER]; + double HPstates2[HPORDER]; + + float STATE_0_LOWER_float[2*POSTQORDER]; + float STATE_0_UPPER_float[2*POSTQORDER]; + + float HPstates1_float[HPORDER]; + float HPstates2_float[HPORDER]; + +} PostFiltBankstr; + +typedef struct { + + //data buffer for pitch filter + double ubuf[PITCH_BUFFSIZE]; + + //low pass state vector + double ystate[PITCH_DAMPORDER]; + + //old lag and gain + double oldlagp[1]; + double oldgainp[1]; + +} PitchFiltstr; + +typedef struct { + + //data buffer + double buffer[PITCH_WLPCBUFLEN]; + + //state vectors + double istate[PITCH_WLPCORDER]; + double weostate[PITCH_WLPCORDER]; + double whostate[PITCH_WLPCORDER]; + + //LPC window -> should be a global array because constant + double window[PITCH_WLPCWINLEN]; + +} WeightFiltstr; + +typedef struct { + + //for inital estimator + double dec_buffer[PITCH_CORR_LEN2 + PITCH_CORR_STEP2 + + PITCH_MAX_LAG/2 - PITCH_FRAME_LEN/2+2]; + double decimator_state[2*ALLPASSSECTIONS+1]; + double hp_state[2]; + + double whitened_buf[QLOOKAHEAD]; + + double inbuf[QLOOKAHEAD]; + + PitchFiltstr PFstr_wght; + PitchFiltstr PFstr; + WeightFiltstr Wghtstr; + +} PitchAnalysisStruct; + + + +/* Have instance of struct together with other iSAC structs */ +typedef struct { + + /* Previous frame length (in ms) */ + int32_t prev_frame_length; + + /* Previous RTP timestamp from received + packet (in samples relative beginning) */ + int32_t prev_rec_rtp_number; + + /* Send timestamp for previous packet (in ms using timeGetTime()) */ + uint32_t prev_rec_send_ts; + + /* Arrival time for previous packet (in ms using timeGetTime()) */ + uint32_t prev_rec_arr_ts; + + /* rate of previous packet, derived from RTP timestamps (in bits/s) */ + float prev_rec_rtp_rate; + + /* Time sinse the last update of the BN estimate (in ms) */ + uint32_t last_update_ts; + + /* Time sinse the last reduction (in ms) */ + uint32_t last_reduction_ts; + + /* How many times the estimate was update in the beginning */ + int32_t count_tot_updates_rec; + + /* The estimated bottle neck rate from there to here (in bits/s) */ + int32_t rec_bw; + float rec_bw_inv; + float rec_bw_avg; + float rec_bw_avg_Q; + + /* The estimated mean absolute jitter value, + as seen on this side (in ms) */ + float rec_jitter; + float rec_jitter_short_term; + float rec_jitter_short_term_abs; + float rec_max_delay; + float rec_max_delay_avg_Q; + + /* (assumed) bitrate for headers (bps) */ + float rec_header_rate; + + /* The estimated bottle neck rate from here to there (in bits/s) */ + float send_bw_avg; + + /* The estimated mean absolute jitter value, as seen on + the other siee (in ms) */ + float send_max_delay_avg; + + // number of packets received since last update + int num_pkts_rec; + + int num_consec_rec_pkts_over_30k; + + // flag for marking that a high speed network has been + // detected downstream + int hsn_detect_rec; + + int num_consec_snt_pkts_over_30k; + + // flag for marking that a high speed network has + // been detected upstream + int hsn_detect_snd; + + uint32_t start_wait_period; + + int in_wait_period; + + int change_to_WB; + + uint32_t senderTimestamp; + uint32_t receiverTimestamp; + //enum IsacSamplingRate incomingStreamSampFreq; + uint16_t numConsecLatePkts; + float consecLatency; + int16_t inWaitLatePkts; + + IsacBandwidthInfo external_bw_info; +} BwEstimatorstr; + + +typedef struct { + + /* boolean, flags if previous packet exceeded B.N. */ + int PrevExceed; + /* ms */ + int ExceedAgo; + /* packets left to send in current burst */ + int BurstCounter; + /* packets */ + int InitCounter; + /* ms remaining in buffer when next packet will be sent */ + double StillBuffered; + +} RateModel; + + +typedef struct { + + unsigned int SpaceAlloced; + unsigned int MaxPermAlloced; + double Tmp0[MAXFFTSIZE]; + double Tmp1[MAXFFTSIZE]; + double Tmp2[MAXFFTSIZE]; + double Tmp3[MAXFFTSIZE]; + int Perm[MAXFFTSIZE]; + int factor [NFACTOR]; + +} FFTstr; + + +/* The following strutc is used to store data from encoding, to make it + fast and easy to construct a new bitstream with a different Bandwidth + estimate. All values (except framelength and minBytes) is double size to + handle 60 ms of data. +*/ +typedef struct { + + /* Used to keep track of if it is first or second part of 60 msec packet */ + int startIdx; + + /* Frame length in samples */ + int16_t framelength; + + /* Pitch Gain */ + int pitchGain_index[2]; + + /* Pitch Lag */ + double meanGain[2]; + int pitchIndex[PITCH_SUBFRAMES*2]; + + /* LPC */ + int LPCindex_s[108*2]; /* KLT_ORDER_SHAPE = 108 */ + int LPCindex_g[12*2]; /* KLT_ORDER_GAIN = 12 */ + double LPCcoeffs_lo[(ORDERLO+1)*SUBFRAMES*2]; + double LPCcoeffs_hi[(ORDERHI+1)*SUBFRAMES*2]; + + /* Encode Spec */ + int16_t fre[FRAMESAMPLES]; + int16_t fim[FRAMESAMPLES]; + int16_t AvgPitchGain[2]; + + /* Used in adaptive mode only */ + int minBytes; + +} IsacSaveEncoderData; + + +typedef struct { + + int indexLPCShape[UB_LPC_ORDER * UB16_LPC_VEC_PER_FRAME]; + double lpcGain[SUBFRAMES<<1]; + int lpcGainIndex[SUBFRAMES<<1]; + + Bitstr bitStreamObj; + + int16_t realFFT[FRAMESAMPLES_HALF]; + int16_t imagFFT[FRAMESAMPLES_HALF]; +} ISACUBSaveEncDataStruct; + + + +typedef struct { + + Bitstr bitstr_obj; + MaskFiltstr maskfiltstr_obj; + PreFiltBankstr prefiltbankstr_obj; + PitchFiltstr pitchfiltstr_obj; + PitchAnalysisStruct pitchanalysisstr_obj; + FFTstr fftstr_obj; + IsacSaveEncoderData SaveEnc_obj; + + int buffer_index; + int16_t current_framesamples; + + float data_buffer_float[FRAMESAMPLES_30ms]; + + int frame_nb; + double bottleneck; + int16_t new_framelength; + double s2nr; + + /* Maximum allowed number of bits for a 30 msec packet */ + int16_t payloadLimitBytes30; + /* Maximum allowed number of bits for a 30 msec packet */ + int16_t payloadLimitBytes60; + /* Maximum allowed number of bits for both 30 and 60 msec packet */ + int16_t maxPayloadBytes; + /* Maximum allowed rate in bytes per 30 msec packet */ + int16_t maxRateInBytes; + + /*--- + If set to 1 iSAC will not addapt the frame-size, if used in + channel-adaptive mode. The initial value will be used for all rates. + ---*/ + int16_t enforceFrameSize; + + /*----- + This records the BWE index the encoder injected into the bit-stream. + It will be used in RCU. The same BWE index of main payload will be in + the redundant payload. We can not retrive it from BWE because it is + a recursive procedure (WebRtcIsac_GetDownlinkBwJitIndexImpl) and has to be + called only once per each encode. + -----*/ + int16_t lastBWIdx; +} ISACLBEncStruct; + +typedef struct { + + Bitstr bitstr_obj; + MaskFiltstr maskfiltstr_obj; + PreFiltBankstr prefiltbankstr_obj; + FFTstr fftstr_obj; + ISACUBSaveEncDataStruct SaveEnc_obj; + + int buffer_index; + float data_buffer_float[MAX_FRAMESAMPLES + + LB_TOTAL_DELAY_SAMPLES]; + double bottleneck; + /* Maximum allowed number of bits for a 30 msec packet */ + //int16_t payloadLimitBytes30; + /* Maximum allowed number of bits for both 30 and 60 msec packet */ + //int16_t maxPayloadBytes; + int16_t maxPayloadSizeBytes; + + double lastLPCVec[UB_LPC_ORDER]; + int16_t numBytesUsed; + int16_t lastJitterInfo; +} ISACUBEncStruct; + + + +typedef struct { + + Bitstr bitstr_obj; + MaskFiltstr maskfiltstr_obj; + PostFiltBankstr postfiltbankstr_obj; + PitchFiltstr pitchfiltstr_obj; + FFTstr fftstr_obj; + +} ISACLBDecStruct; + +typedef struct { + + Bitstr bitstr_obj; + MaskFiltstr maskfiltstr_obj; + PostFiltBankstr postfiltbankstr_obj; + FFTstr fftstr_obj; + +} ISACUBDecStruct; + + + +typedef struct { + + ISACLBEncStruct ISACencLB_obj; + ISACLBDecStruct ISACdecLB_obj; +} ISACLBStruct; + + +typedef struct { + + ISACUBEncStruct ISACencUB_obj; + ISACUBDecStruct ISACdecUB_obj; +} ISACUBStruct; + +/* + This struct is used to take a snapshot of the entropy coder and LPC gains + right before encoding LPC gains. This allows us to go back to that state + if we like to limit the payload size. +*/ +typedef struct { + /* 6 lower-band & 6 upper-band */ + double loFiltGain[SUBFRAMES]; + double hiFiltGain[SUBFRAMES]; + /* Upper boundary of interval W */ + uint32_t W_upper; + uint32_t streamval; + /* Index to the current position in bytestream */ + uint32_t stream_index; + uint8_t stream[3]; +} transcode_obj; + +typedef struct { + // TODO(kwiberg): The size of these tables could be reduced by storing floats + // instead of doubles, and by making use of the identity cos(x) = + // sin(x+pi/2). They could also be made global constants that we fill in at + // compile time. + double costab1[FRAMESAMPLES_HALF]; + double sintab1[FRAMESAMPLES_HALF]; + double costab2[FRAMESAMPLES_QUARTER]; + double sintab2[FRAMESAMPLES_QUARTER]; +} TransformTables; + +typedef struct { + // lower-band codec instance + ISACLBStruct instLB; + // upper-band codec instance + ISACUBStruct instUB; + + // Bandwidth Estimator and model for the rate. + BwEstimatorstr bwestimator_obj; + RateModel rate_data_obj; + double MaxDelay; + + /* 0 = adaptive; 1 = instantaneous */ + int16_t codingMode; + + // overall bottleneck of the codec + int32_t bottleneck; + + // QMF Filter state + int32_t analysisFBState1[FB_STATE_SIZE_WORD32]; + int32_t analysisFBState2[FB_STATE_SIZE_WORD32]; + int32_t synthesisFBState1[FB_STATE_SIZE_WORD32]; + int32_t synthesisFBState2[FB_STATE_SIZE_WORD32]; + + // Error Code + int16_t errorCode; + + // bandwidth of the encoded audio 8, 12 or 16 kHz + enum ISACBandwidth bandwidthKHz; + // Sampling rate of audio, encoder and decode, 8 or 16 kHz + enum IsacSamplingRate encoderSamplingRateKHz; + enum IsacSamplingRate decoderSamplingRateKHz; + // Flag to keep track of initializations, lower & upper-band + // encoder and decoder. + int16_t initFlag; + + // Flag to to indicate signal bandwidth switch + int16_t resetFlag_8kHz; + + // Maximum allowed rate, measured in Bytes per 30 ms. + int16_t maxRateBytesPer30Ms; + // Maximum allowed payload-size, measured in Bytes. + int16_t maxPayloadSizeBytes; + /* The expected sampling rate of the input signal. Valid values are 16000, + * 32000 and 48000. This is not the operation sampling rate of the codec. + * Input signals at 48 kHz are resampled to 32 kHz, then encoded. */ + uint16_t in_sample_rate_hz; + /* State for the input-resampler. It is only used for 48 kHz input signals. */ + int16_t state_in_resampler[SIZE_RESAMPLER_STATE]; + + // Trig tables for WebRtcIsac_Time2Spec and WebRtcIsac_Spec2time. + TransformTables transform_tables; +} ISACMainStruct; + +#endif /* WEBRTC_MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_STRUCTS_H_ */ diff --git a/webrtc/modules/audio_processing/BUILD.gn b/webrtc/modules/audio_processing/BUILD.gn new file mode 100644 index 0000000..9a45cec --- /dev/null +++ b/webrtc/modules/audio_processing/BUILD.gn @@ -0,0 +1,284 @@ +# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +# +# Use of this source code is governed by a BSD-style license +# that can be found in the LICENSE file in the root of the source +# tree. An additional intellectual property rights grant can be found +# in the file PATENTS. All contributing project authors may +# be found in the AUTHORS file in the root of the source tree. + +import("//build/config/arm.gni") +import("//third_party/protobuf/proto_library.gni") +import("../../build/webrtc.gni") + +declare_args() { + # Outputs some low-level debug files. + aec_debug_dump = false + + # Disables the usual mode where we trust the reported system delay + # values the AEC receives. The corresponding define is set appropriately + # in the code, but it can be force-enabled here for testing. + aec_untrusted_delay_for_testing = false +} + +source_set("audio_processing") { + sources = [ + "aec/aec_core.c", + "aec/aec_core.h", + "aec/aec_core_internal.h", + "aec/aec_rdft.c", + "aec/aec_rdft.h", + "aec/aec_resampler.c", + "aec/aec_resampler.h", + "aec/echo_cancellation.c", + "aec/echo_cancellation_internal.h", + "aec/include/echo_cancellation.h", + "aecm/aecm_core.c", + "aecm/aecm_core.h", + "aecm/echo_control_mobile.c", + "aecm/include/echo_control_mobile.h", + "agc/agc.cc", + "agc/agc.h", + "agc/agc_manager_direct.cc", + "agc/agc_manager_direct.h", + "agc/gain_map_internal.h", + "agc/histogram.cc", + "agc/histogram.h", + "agc/legacy/analog_agc.c", + "agc/legacy/analog_agc.h", + "agc/legacy/digital_agc.c", + "agc/legacy/digital_agc.h", + "agc/legacy/gain_control.h", + "agc/utility.cc", + "agc/utility.h", + "audio_buffer.cc", + "audio_buffer.h", + "audio_processing_impl.cc", + "audio_processing_impl.h", + "beamformer/beamformer.h", + "beamformer/complex_matrix.h", + "beamformer/covariance_matrix_generator.cc", + "beamformer/covariance_matrix_generator.h", + "beamformer/matrix.h", + "beamformer/nonlinear_beamformer.cc", + "beamformer/nonlinear_beamformer.h", + "common.h", + "echo_cancellation_impl.cc", + "echo_cancellation_impl.h", + "echo_control_mobile_impl.cc", + "echo_control_mobile_impl.h", + "gain_control_impl.cc", + "gain_control_impl.h", + "high_pass_filter_impl.cc", + "high_pass_filter_impl.h", + "include/audio_processing.h", + "intelligibility/intelligibility_enhancer.cc", + "intelligibility/intelligibility_enhancer.h", + "intelligibility/intelligibility_utils.cc", + "intelligibility/intelligibility_utils.h", + "level_estimator_impl.cc", + "level_estimator_impl.h", + "logging/aec_logging.h", + "logging/aec_logging_file_handling.cc", + "logging/aec_logging_file_handling.h", + "noise_suppression_impl.cc", + "noise_suppression_impl.h", + "processing_component.cc", + "processing_component.h", + "rms_level.cc", + "rms_level.h", + "splitting_filter.cc", + "splitting_filter.h", + "three_band_filter_bank.cc", + "three_band_filter_bank.h", + "transient/common.h", + "transient/daubechies_8_wavelet_coeffs.h", + "transient/dyadic_decimator.h", + "transient/moving_moments.cc", + "transient/moving_moments.h", + "transient/transient_detector.cc", + "transient/transient_detector.h", + "transient/transient_suppressor.cc", + "transient/transient_suppressor.h", + "transient/wpd_node.cc", + "transient/wpd_node.h", + "transient/wpd_tree.cc", + "transient/wpd_tree.h", + "typing_detection.cc", + "typing_detection.h", + "utility/delay_estimator.c", + "utility/delay_estimator.h", + "utility/delay_estimator_internal.h", + "utility/delay_estimator_wrapper.c", + "utility/delay_estimator_wrapper.h", + "vad/common.h", + "vad/gmm.cc", + "vad/gmm.h", + "vad/noise_gmm_tables.h", + "vad/pitch_based_vad.cc", + "vad/pitch_based_vad.h", + "vad/pitch_internal.cc", + "vad/pitch_internal.h", + "vad/pole_zero_filter.cc", + "vad/pole_zero_filter.h", + "vad/standalone_vad.cc", + "vad/standalone_vad.h", + "vad/vad_audio_proc.cc", + "vad/vad_audio_proc.h", + "vad/vad_audio_proc_internal.h", + "vad/vad_circular_buffer.cc", + "vad/vad_circular_buffer.h", + "vad/voice_activity_detector.cc", + "vad/voice_activity_detector.h", + "vad/voice_gmm_tables.h", + "voice_detection_impl.cc", + "voice_detection_impl.h", + ] + + configs += [ "../..:common_config" ] + public_configs = [ "../..:common_inherited_config" ] + + defines = [] + deps = [ + "../..:webrtc_common", + "../audio_coding:isac", + ] + + if (aec_debug_dump) { + defines += [ "WEBRTC_AEC_DEBUG_DUMP" ] + } + + if (aec_untrusted_delay_for_testing) { + defines += [ "WEBRTC_UNTRUSTED_DELAY" ] + } + + if (rtc_enable_protobuf) { + defines += [ "WEBRTC_AUDIOPROC_DEBUG_DUMP" ] + deps += [ ":audioproc_debug_proto" ] + } + + if (rtc_prefer_fixed_point) { + defines += [ "WEBRTC_NS_FIXED" ] + sources += [ + "ns/include/noise_suppression_x.h", + "ns/noise_suppression_x.c", + "ns/nsx_core.c", + "ns/nsx_core.h", + "ns/nsx_defines.h", + ] + if (current_cpu == "mipsel") { + sources += [ "ns/nsx_core_mips.c" ] + } else { + sources += [ "ns/nsx_core_c.c" ] + } + } else { + defines += [ "WEBRTC_NS_FLOAT" ] + sources += [ + "ns/defines.h", + "ns/include/noise_suppression.h", + "ns/noise_suppression.c", + "ns/ns_core.c", + "ns/ns_core.h", + "ns/windows_private.h", + ] + } + + if (current_cpu == "x86" || current_cpu == "x64") { + deps += [ ":audio_processing_sse2" ] + } + + if (rtc_build_with_neon) { + deps += [ ":audio_processing_neon" ] + } + + if (current_cpu == "mipsel") { + sources += [ "aecm/aecm_core_mips.c" ] + if (mips_float_abi == "hard") { + sources += [ + "aec/aec_core_mips.c", + "aec/aec_rdft_mips.c", + ] + } + } else { + sources += [ "aecm/aecm_core_c.c" ] + } + + if (is_win) { + cflags = [ + # TODO(jschuh): Bug 1348: fix this warning. + "/wd4267", # size_t to int truncations + ] + } + + if (is_clang) { + # Suppress warnings from Chrome's Clang plugins. + # See http://code.google.com/p/webrtc/issues/detail?id=163 for details. + configs -= [ "//build/config/clang:find_bad_constructs" ] + } + + deps += [ + "../../base:rtc_base_approved", + "../../common_audio", + "../../system_wrappers", + ] +} + +if (rtc_enable_protobuf) { + proto_library("audioproc_debug_proto") { + sources = [ + "debug.proto", + ] + + proto_out_dir = "webrtc/audio_processing" + } +} + +if (current_cpu == "x86" || current_cpu == "x64") { + source_set("audio_processing_sse2") { + sources = [ + "aec/aec_core_sse2.c", + "aec/aec_rdft_sse2.c", + ] + + if (is_posix) { + cflags = [ "-msse2" ] + } + + configs += [ "../..:common_config" ] + public_configs = [ "../..:common_inherited_config" ] + } +} + +if (rtc_build_with_neon) { + source_set("audio_processing_neon") { + sources = [ + "aec/aec_core_neon.c", + "aec/aec_rdft_neon.c", + "aecm/aecm_core_neon.c", + "ns/nsx_core_neon.c", + ] + + if (current_cpu != "arm64") { + # Enable compilation for the NEON instruction set. This is needed + # since //build/config/arm.gni only enables NEON for iOS, not Android. + # This provides the same functionality as webrtc/build/arm_neon.gypi. + configs -= [ "//build/config/compiler:compiler_arm_fpu" ] + cflags = [ "-mfpu=neon" ] + } + + # Disable LTO on NEON targets due to compiler bug. + # TODO(fdegans): Enable this. See crbug.com/408997. + if (rtc_use_lto) { + cflags -= [ + "-flto", + "-ffat-lto-objects", + ] + } + + configs += [ "../..:common_config" ] + public_configs = [ "../..:common_inherited_config" ] + + deps = [ + "../../common_audio", + ] + } +} diff --git a/webrtc/modules/audio_processing/Makefile.am b/webrtc/modules/audio_processing/Makefile.am index bd76942..a39fd68 100644 --- a/webrtc/modules/audio_processing/Makefile.am +++ b/webrtc/modules/audio_processing/Makefile.am @@ -1,26 +1,104 @@ -SUBDIRS = utility ns aec aecm agc lib_LTLIBRARIES = libwebrtc_audio_processing.la -if NS_FIXED -COMMON_CXXFLAGS += -DWEBRTC_NS_FIXED=1 -NS_LIB = libns_fix -else -COMMON_CXXFLAGS += -DWEBRTC_NS_FLOAT=1 -NS_LIB = libns -endif - webrtcincludedir = $(includedir)/webrtc_audio_processing -webrtcinclude_HEADERS = $(top_srcdir)/src/typedefs.h \ - $(top_srcdir)/src/modules/interface/module.h \ - interface/audio_processing.h \ - $(top_srcdir)/src/common_types.h \ - $(top_srcdir)/src/modules/interface/module_common_types.h +webrtcinclude_HEADERS = $(top_srcdir)/webrtc/base/arraysize.h \ + $(top_srcdir)/webrtc/base/platform_file.h \ + $(top_srcdir)/webrtc/common.h \ + $(top_srcdir)/webrtc/typedefs.h \ + $(top_srcdir)/webrtc/modules/audio_processing/beamformer/array_util.h \ + include/audio_processing.h -libwebrtc_audio_processing_la_SOURCES = interface/audio_processing.h \ +libwebrtc_audio_processing_la_SOURCES = include/audio_processing.h \ + aec/include/echo_cancellation.h \ + aec/aec_common.h \ + aec/aec_core.c \ + aec/aec_core.h \ + aec/aec_core_internal.h \ + aec/aec_core_sse2.c \ + aec/aec_rdft.c \ + aec/aec_rdft.h \ + aec/aec_rdft_sse2.c \ + aec/aec_resampler.c \ + aec/aec_resampler.h \ + aec/echo_cancellation.c \ + aec/echo_cancellation_internal.h \ + aecm/include/echo_control_mobile.h \ + aecm/echo_control_mobile.c \ + aecm/aecm_core.c \ + aecm/aecm_core.h \ + aecm/aecm_core_c.c \ + agc/legacy/analog_agc.c \ + agc/legacy/analog_agc.h \ + agc/legacy/gain_control.h \ + agc/legacy/digital_agc.c \ + agc/legacy/digital_agc.h \ + agc/agc.cc \ + agc/agc.h \ + agc/agc_manager_direct.cc \ + agc/agc_manager_direct.h \ + agc/gain_map_internal.h \ + agc/histogram.cc \ + agc/histogram.h \ + agc/utility.cc \ + agc/utility.h \ + beamformer/array_util.h \ + beamformer/beamformer.h \ + beamformer/complex_matrix.h \ + beamformer/covariance_matrix_generator.h \ + beamformer/matrix.h \ + beamformer/matrix_test_helpers.h \ + beamformer/nonlinear_beamformer.h \ + beamformer/covariance_matrix_generator.cc \ + beamformer/nonlinear_beamformer.cc \ + logging/aec_logging.h \ + logging/aec_logging_file_handling.h \ + logging/aec_logging_file_handling.cc \ + transient/common.h \ + transient/daubechies_8_wavelet_coeffs.h \ + transient/dyadic_decimator.h \ + transient/file_utils.h \ + transient/moving_moments.h \ + transient/transient_detector.h \ + transient/transient_suppressor.h \ + transient/wpd_node.h \ + transient/wpd_tree.h \ + transient/click_annotate.cc \ + transient/file_utils.cc \ + transient/moving_moments.cc \ + transient/transient_detector.cc \ + transient/transient_suppressor.cc \ + transient/wpd_node.cc \ + transient/wpd_tree.cc \ + utility/delay_estimator.c \ + utility/delay_estimator.h \ + utility/delay_estimator_internal.h \ + utility/delay_estimator_wrapper.c \ + utility/delay_estimator_wrapper.h \ + vad/common.h \ + vad/gmm.h \ + vad/noise_gmm_tables.h \ + vad/pitch_based_vad.h \ + vad/pitch_internal.h \ + vad/pole_zero_filter.h \ + vad/standalone_vad.h \ + vad/vad_audio_proc.h \ + vad/vad_audio_proc_internal.h \ + vad/vad_circular_buffer.h \ + vad/voice_activity_detector.h \ + vad/voice_gmm_tables.h \ + vad/gmm.cc \ + vad/pitch_based_vad.cc \ + vad/pitch_internal.cc \ + vad/pole_zero_filter.cc \ + vad/standalone_vad.cc \ + vad/vad_audio_proc.cc \ + vad/vad_circular_buffer.cc \ + vad/voice_activity_detector.cc \ audio_buffer.cc \ audio_buffer.h \ audio_processing_impl.cc \ audio_processing_impl.h \ + common.h \ echo_cancellation_impl.cc \ echo_cancellation_impl.h \ echo_control_mobile_impl.cc \ @@ -33,27 +111,56 @@ libwebrtc_audio_processing_la_SOURCES = interface/audio_processing.h \ level_estimator_impl.h \ noise_suppression_impl.cc \ noise_suppression_impl.h \ + rms_level.cc \ + rms_level.h \ splitting_filter.cc \ splitting_filter.h \ processing_component.cc \ processing_component.h \ + three_band_filter_bank.cc \ + three_band_filter_bank.h \ + typing_detection.cc \ + typing_detection.h \ voice_detection_impl.cc \ voice_detection_impl.h -libwebrtc_audio_processing_la_CXXFLAGS = $(AM_CXXFLAGS) $(COMMON_CXXFLAGS) \ - -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \ - -I$(top_srcdir)/src/common_audio/vad/main/interface \ - -I$(top_srcdir)/src/system_wrappers/interface \ - -I$(top_srcdir)/src/modules/audio_processing/utility \ - -I$(top_srcdir)/src/modules/audio_processing/ns/interface \ - -I$(top_srcdir)/src/modules/audio_processing/aec/interface \ - -I$(top_srcdir)/src/modules/audio_processing/aecm/interface \ - -I$(top_srcdir)/src/modules/audio_processing/agc/interface -libwebrtc_audio_processing_la_LIBADD = $(top_builddir)/src/system_wrappers/libsystem_wrappers.la \ - $(top_builddir)/src/common_audio/signal_processing_library/libspl.la \ - $(top_builddir)/src/common_audio/vad/libvad.la \ - $(top_builddir)/src/modules/audio_processing/utility/libapm_util.la \ - $(top_builddir)/src/modules/audio_processing/ns/$(NS_LIB).la \ - $(top_builddir)/src/modules/audio_processing/aec/libaec.la \ - $(top_builddir)/src/modules/audio_processing/aecm/libaecm.la \ - $(top_builddir)/src/modules/audio_processing/agc/libagc.la + +if NS_FIXED +COMMON_CXXFLAGS += -DWEBRTC_NS_FIXED=0 +libwebrtc_audio_processing_la_SOURCES += \ + ns/include/noise_suppression_x.h \ + ns/noise_suppression_x.c \ + ns/nsx_defines.h \ + ns/nsx_core.c \ + ns/nsx_core.h \ + ns/nsx_core_c.c +else +COMMON_CXXFLAGS += -DWEBRTC_NS_FIXED=1 +libwebrtc_audio_processing_la_SOURCES += \ + ns/include/noise_suppression.h \ + ns/noise_suppression.c \ + ns/defines.h \ + ns/ns_core.c \ + ns/ns_core.h \ + ns/windows_private.h +endif + +libwebrtc_audio_processing_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) +libwebrtc_audio_processing_la_CXXFLAGS = $(AM_CXXFLAGS) $(COMMON_CXXFLAGS) + +libwebrtc_audio_processing_la_LIBADD = $(top_builddir)/webrtc/base/libbase.la \ + $(top_builddir)/webrtc/system_wrappers/libsystem_wrappers.la \ + $(top_builddir)/webrtc/common_audio/libcommon_audio.la + $(top_builddir)/webrtc/modules/audio_coding/libaudio_coding.la libwebrtc_audio_processing_la_LDFLAGS = $(AM_LDFLAGS) -version-info $(LIBWEBRTC_AUDIO_PROCESSING_VERSION_INFO) + +# FIXME: +# x86: aec/aec_core_sse2.c +# aec/aec_rdft_sse2.c +# NEON: aec/aec_core_neon.c +# aec/aec_rdft_neon.c +# aecm/aecm_core_neon.c +# ns/nsx_core_neon.c +# MIPS: aec/aec_core_mips.c +# aec/aec_rdft_neon.c +# aecm/aecm_core_mips.c +# ns/nsx_core_mips.c diff --git a/webrtc/modules/audio_processing/OWNERS b/webrtc/modules/audio_processing/OWNERS deleted file mode 100644 index 5a25634..0000000 --- a/webrtc/modules/audio_processing/OWNERS +++ /dev/null @@ -1,2 +0,0 @@ -andrew@webrtc.org -bjornv@webrtc.org diff --git a/webrtc/modules/audio_processing/aec/Makefile.am b/webrtc/modules/audio_processing/aec/Makefile.am deleted file mode 100644 index fa77479..0000000 --- a/webrtc/modules/audio_processing/aec/Makefile.am +++ /dev/null @@ -1,16 +0,0 @@ -noinst_LTLIBRARIES = libaec.la - -libaec_la_SOURCES = interface/echo_cancellation.h \ - echo_cancellation.c \ - aec_core.h \ - aec_core.c \ - aec_core_sse2.c \ - aec_rdft.h \ - aec_rdft.c \ - aec_rdft_sse2.c \ - resampler.h \ - resampler.c -libaec_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \ - -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \ - -I$(top_srcdir)/src/system_wrappers/interface \ - -I$(top_srcdir)/src/modules/audio_processing/utility diff --git a/webrtc/modules/audio_processing/aec/aec.gypi b/webrtc/modules/audio_processing/aec/aec.gypi deleted file mode 100644 index 8a99f47..0000000 --- a/webrtc/modules/audio_processing/aec/aec.gypi +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. -# -# Use of this source code is governed by a BSD-style license -# that can be found in the LICENSE file in the root of the source -# tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may -# be found in the AUTHORS file in the root of the source tree. - -{ - 'targets': [ - { - 'target_name': 'aec', - 'type': '<(library)', - 'dependencies': [ - '<(webrtc_root)/common_audio/common_audio.gyp:spl', - 'apm_util' - ], - 'include_dirs': [ - 'interface', - ], - 'direct_dependent_settings': { - 'include_dirs': [ - 'interface', - ], - }, - 'sources': [ - 'interface/echo_cancellation.h', - 'echo_cancellation.c', - 'aec_core.h', - 'aec_core.c', - 'aec_core_sse2.c', - 'aec_rdft.h', - 'aec_rdft.c', - 'aec_rdft_sse2.c', - 'resampler.h', - 'resampler.c', - ], - }, - ], -} diff --git a/webrtc/modules/audio_processing/aec/aec_common.h b/webrtc/modules/audio_processing/aec/aec_common.h new file mode 100644 index 0000000..1e24ca9 --- /dev/null +++ b/webrtc/modules/audio_processing/aec/aec_common.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_ + +#include "webrtc/typedefs.h" + +#ifdef _MSC_VER /* visual c++ */ +#define ALIGN16_BEG __declspec(align(16)) +#define ALIGN16_END +#else /* gcc or icc */ +#define ALIGN16_BEG +#define ALIGN16_END __attribute__((aligned(16))) +#endif + +extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65]; +extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65]; +extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65]; +extern const float WebRtcAec_kExtendedSmoothingCoefficients[2][2]; +extern const float WebRtcAec_kNormalSmoothingCoefficients[2][2]; +extern const float WebRtcAec_kMinFarendPSD; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_ + diff --git a/webrtc/modules/audio_processing/aec/aec_core.c b/webrtc/modules/audio_processing/aec/aec_core.c index e01728f..b2162ac 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.c +++ b/webrtc/modules/audio_processing/aec/aec_core.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -12,277 +12,227 @@ * The core AEC algorithm, which is presented with time-aligned signals. */ -#include "aec_core.h" +#include "webrtc/modules/audio_processing/aec/aec_core.h" +#ifdef WEBRTC_AEC_DEBUG_DUMP +#include +#endif + +#include #include +#include // size_t #include #include -#include "aec_rdft.h" -#include "delay_estimator_float.h" -#include "ring_buffer.h" -#include "system_wrappers/interface/cpu_features_wrapper.h" +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aec/aec_common.h" +#include "webrtc/modules/audio_processing/aec/aec_core_internal.h" +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" +#include "webrtc/modules/audio_processing/logging/aec_logging.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" +#include "webrtc/typedefs.h" -// Noise suppression -static const int converged = 250; + +// Buffer size (samples) +static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz. // Metrics static const int subCountLen = 4; static const int countLen = 50; +static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. // Quantities to control H band scaling for SWB input -static const int flagHbandCn = 1; // flag for adding comfort noise in H band -static const float cnScaleHband = (float)0.4; // scale for comfort noise in H band +static const int flagHbandCn = 1; // flag for adding comfort noise in H band +static const float cnScaleHband = + (float)0.4; // scale for comfort noise in H band // Initial bin for averaging nlp gain in low band static const int freqAvgIc = PART_LEN / 2; // Matlab code to produce table: // win = sqrt(hanning(63)); win = [0 ; win(1:32)]; // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); -static const float sqrtHanning[65] = { - 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, - 0.07356456359967f, 0.09801714032956f, 0.12241067519922f, - 0.14673047445536f, 0.17096188876030f, 0.19509032201613f, - 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, - 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, - 0.35989503653499f, 0.38268343236509f, 0.40524131400499f, - 0.42755509343028f, 0.44961132965461f, 0.47139673682600f, - 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, - 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, - 0.61523159058063f, 0.63439328416365f, 0.65317284295378f, - 0.67155895484702f, 0.68954054473707f, 0.70710678118655f, - 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, - 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, - 0.81758481315158f, 0.83146961230255f, 0.84485356524971f, - 0.85772861000027f, 0.87008699110871f, 0.88192126434835f, - 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, - 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, - 0.94952818059304f, 0.95694033573221f, 0.96377606579544f, - 0.97003125319454f, 0.97570213003853f, 0.98078528040323f, - 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, - 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, - 0.99969881869620f, 1.00000000000000f -}; +ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = { + 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, + 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f, + 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, + 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f, + 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f, + 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, + 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f, + 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f, + 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, + 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f, + 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f, + 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, + 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f, + 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f, + 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, + 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f, + 1.00000000000000f}; // Matlab code to produce table: // weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1]; // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve); -const float WebRtcAec_weightCurve[65] = { - 0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, - 0.1845f, 0.1926f, 0.2000f, 0.2069f, 0.2134f, 0.2195f, - 0.2254f, 0.2309f, 0.2363f, 0.2414f, 0.2464f, 0.2512f, - 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f, - 0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, - 0.3035f, 0.3070f, 0.3104f, 0.3138f, 0.3171f, 0.3204f, - 0.3236f, 0.3268f, 0.3299f, 0.3330f, 0.3360f, 0.3390f, - 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f, - 0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, - 0.3752f, 0.3777f, 0.3803f, 0.3828f, 0.3854f, 0.3878f, - 0.3903f, 0.3928f, 0.3952f, 0.3976f, 0.4000f -}; +ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65] = { + 0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, 0.1845f, 0.1926f, + 0.2000f, 0.2069f, 0.2134f, 0.2195f, 0.2254f, 0.2309f, 0.2363f, 0.2414f, + 0.2464f, 0.2512f, 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f, + 0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, 0.3035f, 0.3070f, + 0.3104f, 0.3138f, 0.3171f, 0.3204f, 0.3236f, 0.3268f, 0.3299f, 0.3330f, + 0.3360f, 0.3390f, 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f, + 0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, 0.3752f, 0.3777f, + 0.3803f, 0.3828f, 0.3854f, 0.3878f, 0.3903f, 0.3928f, 0.3952f, 0.3976f, + 0.4000f}; // Matlab code to produce table: // overDriveCurve = [sqrt(linspace(0,1,65))' + 1]; // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve); -const float WebRtcAec_overDriveCurve[65] = { - 1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, - 1.3062f, 1.3307f, 1.3536f, 1.3750f, 1.3953f, 1.4146f, - 1.4330f, 1.4507f, 1.4677f, 1.4841f, 1.5000f, 1.5154f, - 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f, - 1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, - 1.6847f, 1.6960f, 1.7071f, 1.7181f, 1.7289f, 1.7395f, - 1.7500f, 1.7603f, 1.7706f, 1.7806f, 1.7906f, 1.8004f, - 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f, - 1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, - 1.9186f, 1.9270f, 1.9354f, 1.9437f, 1.9520f, 1.9601f, - 1.9682f, 1.9763f, 1.9843f, 1.9922f, 2.0000f +ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65] = { + 1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, 1.3062f, 1.3307f, + 1.3536f, 1.3750f, 1.3953f, 1.4146f, 1.4330f, 1.4507f, 1.4677f, 1.4841f, + 1.5000f, 1.5154f, 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f, + 1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, 1.6847f, 1.6960f, + 1.7071f, 1.7181f, 1.7289f, 1.7395f, 1.7500f, 1.7603f, 1.7706f, 1.7806f, + 1.7906f, 1.8004f, 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f, + 1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, 1.9186f, 1.9270f, + 1.9354f, 1.9437f, 1.9520f, 1.9601f, 1.9682f, 1.9763f, 1.9843f, 1.9922f, + 2.0000f}; + +// Delay Agnostic AEC parameters, still under development and may change. +static const float kDelayQualityThresholdMax = 0.07f; +static const float kDelayQualityThresholdMin = 0.01f; +static const int kInitialShiftOffset = 5; +#if !defined(WEBRTC_ANDROID) +static const int kDelayCorrectionStart = 1500; // 10 ms chunks +#endif + +// Target suppression levels for nlp modes. +// log{0.001, 0.00001, 0.00000001} +static const float kTargetSupp[3] = {-6.9f, -11.5f, -18.4f}; + +// Two sets of parameters, one for the extended filter mode. +static const float kExtendedMinOverDrive[3] = {3.0f, 6.0f, 15.0f}; +static const float kNormalMinOverDrive[3] = {1.0f, 2.0f, 5.0f}; +const float WebRtcAec_kExtendedSmoothingCoefficients[2][2] = {{0.9f, 0.1f}, + {0.92f, 0.08f}}; +const float WebRtcAec_kNormalSmoothingCoefficients[2][2] = {{0.9f, 0.1f}, + {0.93f, 0.07f}}; + +// Number of partitions forming the NLP's "preferred" bands. +enum { + kPrefBandSize = 24 }; -// "Private" function prototypes. -static void ProcessBlock(aec_t *aec, const short *farend, - const short *nearend, const short *nearendH, - short *out, short *outH); +#ifdef WEBRTC_AEC_DEBUG_DUMP +extern int webrtc_aec_instance_count; +#endif -static void BufferFar(aec_t *aec, const short *farend, int farLen); -static void FetchFar(aec_t *aec, short *farend, int farLen, int knownDelay); +WebRtcAecFilterFar WebRtcAec_FilterFar; +WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; +WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; +WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; +WebRtcAecComfortNoise WebRtcAec_ComfortNoise; +WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; -static void NonLinearProcessing(aec_t *aec, short *output, short *outputH); - -static void GetHighbandGain(const float *lambda, float *nlpGainHband); - -// Comfort_noise also computes noise for H band returned in comfortNoiseHband -static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1], - complex_t *comfortNoiseHband, - const float *noisePow, const float *lambda); - -static void WebRtcAec_InitLevel(power_level_t *level); -static void WebRtcAec_InitStats(stats_t *stats); -static void UpdateLevel(power_level_t *level, const short *in); -static void UpdateMetrics(aec_t *aec); - -__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) -{ - return aRe * bRe - aIm * bIm; +__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { + return aRe * bRe - aIm * bIm; } -__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) -{ - return aRe * bIm + aIm * bRe; +__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { + return aRe * bIm + aIm * bRe; } -static int CmpFloat(const void *a, const void *b) -{ - const float *da = (const float *)a; - const float *db = (const float *)b; +static int CmpFloat(const void* a, const void* b) { + const float* da = (const float*)a; + const float* db = (const float*)b; - return (*da > *db) - (*da < *db); + return (*da > *db) - (*da < *db); } -int WebRtcAec_CreateAec(aec_t **aecInst) -{ - aec_t *aec = malloc(sizeof(aec_t)); - *aecInst = aec; - if (aec == NULL) { - return -1; - } - - if (WebRtcApm_CreateBuffer(&aec->farFrBuf, FRAME_LEN + PART_LEN) == -1) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } - - if (WebRtcApm_CreateBuffer(&aec->nearFrBuf, FRAME_LEN + PART_LEN) == -1) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } - - if (WebRtcApm_CreateBuffer(&aec->outFrBuf, FRAME_LEN + PART_LEN) == -1) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } - - if (WebRtcApm_CreateBuffer(&aec->nearFrBufH, FRAME_LEN + PART_LEN) == -1) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } - - if (WebRtcApm_CreateBuffer(&aec->outFrBufH, FRAME_LEN + PART_LEN) == -1) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } - - if (WebRtc_CreateDelayEstimatorFloat(&aec->delay_estimator, - PART_LEN1, - kMaxDelay, - 0) == -1) { - WebRtcAec_FreeAec(aec); - aec = NULL; - return -1; - } - - return 0; -} - -int WebRtcAec_FreeAec(aec_t *aec) -{ - if (aec == NULL) { - return -1; - } - - WebRtcApm_FreeBuffer(aec->farFrBuf); - WebRtcApm_FreeBuffer(aec->nearFrBuf); - WebRtcApm_FreeBuffer(aec->outFrBuf); - - WebRtcApm_FreeBuffer(aec->nearFrBufH); - WebRtcApm_FreeBuffer(aec->outFrBufH); - - WebRtc_FreeDelayEstimatorFloat(aec->delay_estimator); - - free(aec); - return 0; -} - -static void FilterFar(aec_t *aec, float yf[2][PART_LEN1]) -{ +static void FilterFar(AecCore* aec, float yf[2][PART_LEN1]) { int i; - for (i = 0; i < NR_PART; i++) { + for (i = 0; i < aec->num_partitions; i++) { int j; int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= NR_PART) { - xPos -= NR_PART*(PART_LEN1); + if (i + aec->xfBufBlockPos >= aec->num_partitions) { + xPos -= aec->num_partitions * (PART_LEN1); } for (j = 0; j < PART_LEN1; j++) { - yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], - aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); - yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], - aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); + yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); + yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); } } } -static void ScaleErrorSignal(aec_t *aec, float ef[2][PART_LEN1]) -{ +static void ScaleErrorSignal(AecCore* aec, float ef[2][PART_LEN1]) { + const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; + const float error_threshold = aec->extended_filter_enabled + ? kExtendedErrorThreshold + : aec->normal_error_threshold; int i; - float absEf; + float abs_ef; for (i = 0; i < (PART_LEN1); i++) { ef[0][i] /= (aec->xPow[i] + 1e-10f); ef[1][i] /= (aec->xPow[i] + 1e-10f); - absEf = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); + abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); - if (absEf > aec->errThresh) { - absEf = aec->errThresh / (absEf + 1e-10f); - ef[0][i] *= absEf; - ef[1][i] *= absEf; + if (abs_ef > error_threshold) { + abs_ef = error_threshold / (abs_ef + 1e-10f); + ef[0][i] *= abs_ef; + ef[1][i] *= abs_ef; } // Stepsize factor - ef[0][i] *= aec->mu; - ef[1][i] *= aec->mu; + ef[0][i] *= mu; + ef[1][i] *= mu; } } // Time-unconstrined filter adaptation. // TODO(andrew): consider for a low-complexity mode. -//static void FilterAdaptationUnconstrained(aec_t *aec, float *fft, +// static void FilterAdaptationUnconstrained(AecCore* aec, float *fft, // float ef[2][PART_LEN1]) { // int i, j; -// for (i = 0; i < NR_PART; i++) { +// for (i = 0; i < aec->num_partitions; i++) { // int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); // int pos; // // Check for wrap -// if (i + aec->xfBufBlockPos >= NR_PART) { -// xPos -= NR_PART * PART_LEN1; +// if (i + aec->xfBufBlockPos >= aec->num_partitions) { +// xPos -= aec->num_partitions * PART_LEN1; // } // // pos = i * PART_LEN1; // // for (j = 0; j < PART_LEN1; j++) { -// aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0], -// -aec->xfBuf[xPos + j][1], -// ef[j][0], ef[j][1]); -// aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0], -// -aec->xfBuf[xPos + j][1], -// ef[j][0], ef[j][1]); +// aec->wfBuf[0][pos + j] += MulRe(aec->xfBuf[0][xPos + j], +// -aec->xfBuf[1][xPos + j], +// ef[0][j], ef[1][j]); +// aec->wfBuf[1][pos + j] += MulIm(aec->xfBuf[0][xPos + j], +// -aec->xfBuf[1][xPos + j], +// ef[0][j], ef[1][j]); // } // } //} -static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) { +static void FilterAdaptation(AecCore* aec, float* fft, float ef[2][PART_LEN1]) { int i, j; - for (i = 0; i < NR_PART; i++) { - int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); + for (i = 0; i < aec->num_partitions; i++) { + int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); int pos; // Check for wrap - if (i + aec->xfBufBlockPos >= NR_PART) { - xPos -= NR_PART * PART_LEN1; + if (i + aec->xfBufBlockPos >= aec->num_partitions) { + xPos -= aec->num_partitions * PART_LEN1; } pos = i * PART_LEN1; @@ -291,14 +241,17 @@ static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) { fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j], -aec->xfBuf[1][xPos + j], - ef[0][j], ef[1][j]); + ef[0][j], + ef[1][j]); fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j], -aec->xfBuf[1][xPos + j], - ef[0][j], ef[1][j]); + ef[0][j], + ef[1][j]); } fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], -aec->xfBuf[1][xPos + PART_LEN], - ef[0][PART_LEN], ef[1][PART_LEN]); + ef[0][PART_LEN], + ef[1][PART_LEN]); aec_rdft_inverse_128(fft); memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); @@ -322,7 +275,8 @@ static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) { } } -static void OverdriveAndSuppress(aec_t *aec, float hNl[PART_LEN1], +static void OverdriveAndSuppress(AecCore* aec, + float hNl[PART_LEN1], const float hNlFb, float efw[2][PART_LEN1]) { int i; @@ -330,7 +284,7 @@ static void OverdriveAndSuppress(aec_t *aec, float hNl[PART_LEN1], // Weight subbands if (hNl[i] > hNlFb) { hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + - (1 - WebRtcAec_weightCurve[i]) * hNl[i]; + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; } hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); @@ -344,1123 +298,1632 @@ static void OverdriveAndSuppress(aec_t *aec, float hNl[PART_LEN1], } } -WebRtcAec_FilterFar_t WebRtcAec_FilterFar; -WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal; -WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation; -WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress; +static int PartitionDelay(const AecCore* aec) { + // Measures the energy in each filter partition and returns the partition with + // highest energy. + // TODO(bjornv): Spread computational cost by computing one partition per + // block? + float wfEnMax = 0; + int i; + int delay = 0; -int WebRtcAec_InitAec(aec_t *aec, int sampFreq) -{ - int i; - - aec->sampFreq = sampFreq; - - if (sampFreq == 8000) { - aec->mu = 0.6f; - aec->errThresh = 2e-6f; - } - else { - aec->mu = 0.5f; - aec->errThresh = 1.5e-6f; + for (i = 0; i < aec->num_partitions; i++) { + int j; + int pos = i * PART_LEN1; + float wfEn = 0; + for (j = 0; j < PART_LEN1; j++) { + wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + + aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; } - if (WebRtcApm_InitBuffer(aec->farFrBuf) == -1) { - return -1; + if (wfEn > wfEnMax) { + wfEnMax = wfEn; + delay = i; } - - if (WebRtcApm_InitBuffer(aec->nearFrBuf) == -1) { - return -1; - } - - if (WebRtcApm_InitBuffer(aec->outFrBuf) == -1) { - return -1; - } - - if (WebRtcApm_InitBuffer(aec->nearFrBufH) == -1) { - return -1; - } - - if (WebRtcApm_InitBuffer(aec->outFrBufH) == -1) { - return -1; - } - - if (WebRtc_InitDelayEstimatorFloat(aec->delay_estimator) != 0) { - return -1; - } - aec->delay_logging_enabled = 0; - memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram)); - - // Default target suppression level - aec->targetSupp = -11.5; - aec->minOverDrive = 2.0; - - // Sampling frequency multiplier - // SWB is processed as 160 frame size - if (aec->sampFreq == 32000) { - aec->mult = (short)aec->sampFreq / 16000; - } - else { - aec->mult = (short)aec->sampFreq / 8000; - } - - aec->farBufWritePos = 0; - aec->farBufReadPos = 0; - - aec->inSamples = 0; - aec->outSamples = 0; - aec->knownDelay = 0; - - // Initialize buffers - memset(aec->farBuf, 0, sizeof(aec->farBuf)); - memset(aec->xBuf, 0, sizeof(aec->xBuf)); - memset(aec->dBuf, 0, sizeof(aec->dBuf)); - memset(aec->eBuf, 0, sizeof(aec->eBuf)); - // For H band - memset(aec->dBufH, 0, sizeof(aec->dBufH)); - - memset(aec->xPow, 0, sizeof(aec->xPow)); - memset(aec->dPow, 0, sizeof(aec->dPow)); - memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow)); - aec->noisePow = aec->dInitMinPow; - aec->noiseEstCtr = 0; - - // Initial comfort noise power - for (i = 0; i < PART_LEN1; i++) { - aec->dMinPow[i] = 1.0e6f; - } - - // Holds the last block written to - aec->xfBufBlockPos = 0; - // TODO: Investigate need for these initializations. Deleting them doesn't - // change the output at all and yields 0.4% overall speedup. - memset(aec->xfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1); - memset(aec->wfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1); - memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1); - memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1); - memset(aec->xfwBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1); - memset(aec->se, 0, sizeof(float) * PART_LEN1); - - // To prevent numerical instability in the first block. - for (i = 0; i < PART_LEN1; i++) { - aec->sd[i] = 1; - } - for (i = 0; i < PART_LEN1; i++) { - aec->sx[i] = 1; - } - - memset(aec->hNs, 0, sizeof(aec->hNs)); - memset(aec->outBuf, 0, sizeof(float) * PART_LEN); - - aec->hNlFbMin = 1; - aec->hNlFbLocalMin = 1; - aec->hNlXdAvgMin = 1; - aec->hNlNewMin = 0; - aec->hNlMinCtr = 0; - aec->overDrive = 2; - aec->overDriveSm = 2; - aec->delayIdx = 0; - aec->stNearState = 0; - aec->echoState = 0; - aec->divergeState = 0; - - aec->seed = 777; - aec->delayEstCtr = 0; - - // Metrics disabled by default - aec->metricsMode = 0; - WebRtcAec_InitMetrics(aec); - - // Assembly optimization - WebRtcAec_FilterFar = FilterFar; - WebRtcAec_ScaleErrorSignal = ScaleErrorSignal; - WebRtcAec_FilterAdaptation = FilterAdaptation; - WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; - if (WebRtc_GetCPUInfo(kSSE2)) { -#if defined(WEBRTC_USE_SSE2) - WebRtcAec_InitAec_SSE2(); -#endif - } - aec_rdft_init(); - - return 0; + } + return delay; } -void WebRtcAec_InitMetrics(aec_t *aec) -{ - aec->stateCounter = 0; - WebRtcAec_InitLevel(&aec->farlevel); - WebRtcAec_InitLevel(&aec->nearlevel); - WebRtcAec_InitLevel(&aec->linoutlevel); - WebRtcAec_InitLevel(&aec->nlpoutlevel); +// Threshold to protect against the ill-effects of a zero far-end. +const float WebRtcAec_kMinFarendPSD = 15; - WebRtcAec_InitStats(&aec->erl); - WebRtcAec_InitStats(&aec->erle); - WebRtcAec_InitStats(&aec->aNlp); - WebRtcAec_InitStats(&aec->rerl); +// Updates the following smoothed Power Spectral Densities (PSD): +// - sd : near-end +// - se : residual echo +// - sx : far-end +// - sde : cross-PSD of near-end and residual echo +// - sxd : cross-PSD of near-end and far-end +// +// In addition to updating the PSDs, also the filter diverge state is determined +// upon actions are taken. +static void SmoothedPSD(AecCore* aec, + float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], + float xfw[2][PART_LEN1]) { + // Power estimate smoothing coefficients. + const float* ptrGCoh = aec->extended_filter_enabled + ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] + : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1]; + int i; + float sdSum = 0, seSum = 0; + + for (i = 0; i < PART_LEN1; i++) { + aec->sd[i] = ptrGCoh[0] * aec->sd[i] + + ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); + aec->se[i] = ptrGCoh[0] * aec->se[i] + + ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); + // We threshold here to protect against the ill-effects of a zero farend. + // The threshold is not arbitrarily chosen, but balances protection and + // adverse interaction with the algorithm's tuning. + // TODO(bjornv): investigate further why this is so sensitive. + aec->sx[i] = + ptrGCoh[0] * aec->sx[i] + + ptrGCoh[1] * WEBRTC_SPL_MAX( + xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], + WebRtcAec_kMinFarendPSD); + + aec->sde[i][0] = + ptrGCoh[0] * aec->sde[i][0] + + ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); + aec->sde[i][1] = + ptrGCoh[0] * aec->sde[i][1] + + ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); + + aec->sxd[i][0] = + ptrGCoh[0] * aec->sxd[i][0] + + ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); + aec->sxd[i][1] = + ptrGCoh[0] * aec->sxd[i][1] + + ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); + + sdSum += aec->sd[i]; + seSum += aec->se[i]; + } + + // Divergent filter safeguard. + aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; + + if (aec->divergeState) + memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); + + // Reset if error is significantly larger than nearend (13 dB). + if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) + memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); } - -void WebRtcAec_ProcessFrame(aec_t *aec, const short *farend, - const short *nearend, const short *nearendH, - short *out, short *outH, - int knownDelay) -{ - short farBl[PART_LEN], nearBl[PART_LEN], outBl[PART_LEN]; - short farFr[FRAME_LEN]; - // For H band - short nearBlH[PART_LEN], outBlH[PART_LEN]; - - int size = 0; - - // initialize: only used for SWB - memset(nearBlH, 0, sizeof(nearBlH)); - memset(outBlH, 0, sizeof(outBlH)); - - // Buffer the current frame. - // Fetch an older one corresponding to the delay. - BufferFar(aec, farend, FRAME_LEN); - FetchFar(aec, farFr, FRAME_LEN, knownDelay); - - // Buffer the synchronized far and near frames, - // to pass the smaller blocks individually. - WebRtcApm_WriteBuffer(aec->farFrBuf, farFr, FRAME_LEN); - WebRtcApm_WriteBuffer(aec->nearFrBuf, nearend, FRAME_LEN); - // For H band - if (aec->sampFreq == 32000) { - WebRtcApm_WriteBuffer(aec->nearFrBufH, nearendH, FRAME_LEN); - } - - // Process as many blocks as possible. - while (WebRtcApm_get_buffer_size(aec->farFrBuf) >= PART_LEN) { - - WebRtcApm_ReadBuffer(aec->farFrBuf, farBl, PART_LEN); - WebRtcApm_ReadBuffer(aec->nearFrBuf, nearBl, PART_LEN); - - // For H band - if (aec->sampFreq == 32000) { - WebRtcApm_ReadBuffer(aec->nearFrBufH, nearBlH, PART_LEN); - } - - ProcessBlock(aec, farBl, nearBl, nearBlH, outBl, outBlH); - - WebRtcApm_WriteBuffer(aec->outFrBuf, outBl, PART_LEN); - // For H band - if (aec->sampFreq == 32000) { - WebRtcApm_WriteBuffer(aec->outFrBufH, outBlH, PART_LEN); - } - } - - // Stuff the out buffer if we have less than a frame to output. - // This should only happen for the first frame. - size = WebRtcApm_get_buffer_size(aec->outFrBuf); - if (size < FRAME_LEN) { - WebRtcApm_StuffBuffer(aec->outFrBuf, FRAME_LEN - size); - if (aec->sampFreq == 32000) { - WebRtcApm_StuffBuffer(aec->outFrBufH, FRAME_LEN - size); - } - } - - // Obtain an output frame. - WebRtcApm_ReadBuffer(aec->outFrBuf, out, FRAME_LEN); - // For H band - if (aec->sampFreq == 32000) { - WebRtcApm_ReadBuffer(aec->outFrBufH, outH, FRAME_LEN); - } +// Window time domain data to be used by the fft. +__inline static void WindowData(float* x_windowed, const float* x) { + int i; + for (i = 0; i < PART_LEN; i++) { + x_windowed[i] = x[i] * WebRtcAec_sqrtHanning[i]; + x_windowed[PART_LEN + i] = + x[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i]; + } } -static void ProcessBlock(aec_t *aec, const short *farend, - const short *nearend, const short *nearendH, - short *output, short *outputH) -{ - int i; - float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN]; - short eInt16[PART_LEN]; - float scale; - - float fft[PART_LEN2]; - float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1]; - complex_t df[PART_LEN1]; - float far_spectrum = 0.0f; - float near_spectrum = 0.0f; - float abs_far_spectrum[PART_LEN1]; - float abs_near_spectrum[PART_LEN1]; - - const float gPow[2] = {0.9f, 0.1f}; - - // Noise estimate constants. - const int noiseInitBlocks = 500 * aec->mult; - const float step = 0.1f; - const float ramp = 1.0002f; - const float gInitNoise[2] = {0.999f, 0.001f}; - -#ifdef AEC_DEBUG - fwrite(farend, sizeof(short), PART_LEN, aec->farFile); - fwrite(nearend, sizeof(short), PART_LEN, aec->nearFile); -#endif - - memset(dH, 0, sizeof(dH)); - - // ---------- Ooura fft ---------- - // Concatenate old and new farend blocks. - for (i = 0; i < PART_LEN; i++) { - aec->xBuf[i + PART_LEN] = (float)farend[i]; - d[i] = (float)nearend[i]; - } - - if (aec->sampFreq == 32000) { - for (i = 0; i < PART_LEN; i++) { - dH[i] = (float)nearendH[i]; - } - } - - memcpy(fft, aec->xBuf, sizeof(float) * PART_LEN2); - memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN); - // For H band - if (aec->sampFreq == 32000) { - memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN); - } - - aec_rdft_forward_128(fft); - - // Far fft - xf[1][0] = 0; - xf[1][PART_LEN] = 0; - xf[0][0] = fft[0]; - xf[0][PART_LEN] = fft[1]; - - for (i = 1; i < PART_LEN; i++) { - xf[0][i] = fft[2 * i]; - xf[1][i] = fft[2 * i + 1]; - } - - // Near fft - memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); - aec_rdft_forward_128(fft); - df[0][1] = 0; - df[PART_LEN][1] = 0; - df[0][0] = fft[0]; - df[PART_LEN][0] = fft[1]; - - for (i = 1; i < PART_LEN; i++) { - df[i][0] = fft[2 * i]; - df[i][1] = fft[2 * i + 1]; - } - - // Power smoothing - for (i = 0; i < PART_LEN1; i++) { - far_spectrum = xf[0][i] * xf[0][i] + xf[1][i] * xf[1][i]; - aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * NR_PART * far_spectrum; - // Calculate absolute spectra - abs_far_spectrum[i] = sqrtf(far_spectrum); - - near_spectrum = df[i][0] * df[i][0] + df[i][1] * df[i][1]; - aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum; - // Calculate absolute spectra - abs_near_spectrum[i] = sqrtf(near_spectrum); - } - - // Estimate noise power. Wait until dPow is more stable. - if (aec->noiseEstCtr > 50) { - for (i = 0; i < PART_LEN1; i++) { - if (aec->dPow[i] < aec->dMinPow[i]) { - aec->dMinPow[i] = (aec->dPow[i] + step * (aec->dMinPow[i] - - aec->dPow[i])) * ramp; - } - else { - aec->dMinPow[i] *= ramp; - } - } - } - - // Smooth increasing noise power from zero at the start, - // to avoid a sudden burst of comfort noise. - if (aec->noiseEstCtr < noiseInitBlocks) { - aec->noiseEstCtr++; - for (i = 0; i < PART_LEN1; i++) { - if (aec->dMinPow[i] > aec->dInitMinPow[i]) { - aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] + - gInitNoise[1] * aec->dMinPow[i]; - } - else { - aec->dInitMinPow[i] = aec->dMinPow[i]; - } - } - aec->noisePow = aec->dInitMinPow; - } - else { - aec->noisePow = aec->dMinPow; - } - - // Block wise delay estimation used for logging - if (aec->delay_logging_enabled) { - int delay_estimate = 0; - // Estimate the delay - delay_estimate = WebRtc_DelayEstimatorProcessFloat(aec->delay_estimator, - abs_far_spectrum, - abs_near_spectrum, - PART_LEN1, - aec->echoState); - if (delay_estimate >= 0) { - // Update delay estimate buffer - aec->delay_histogram[delay_estimate]++; - } - } - - // Update the xfBuf block position. - aec->xfBufBlockPos--; - if (aec->xfBufBlockPos == -1) { - aec->xfBufBlockPos = NR_PART - 1; - } - - // Buffer xf - memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, xf[0], - sizeof(float) * PART_LEN1); - memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, xf[1], - sizeof(float) * PART_LEN1); - - memset(yf[0], 0, sizeof(float) * (PART_LEN1 * 2)); - - // Filter far - WebRtcAec_FilterFar(aec, yf); - - // Inverse fft to obtain echo estimate and error. - fft[0] = yf[0][0]; - fft[1] = yf[0][PART_LEN]; - for (i = 1; i < PART_LEN; i++) { - fft[2 * i] = yf[0][i]; - fft[2 * i + 1] = yf[1][i]; - } - aec_rdft_inverse_128(fft); - - scale = 2.0f / PART_LEN2; - for (i = 0; i < PART_LEN; i++) { - y[i] = fft[PART_LEN + i] * scale; // fft scaling - } - - for (i = 0; i < PART_LEN; i++) { - e[i] = d[i] - y[i]; - } - - // Error fft - memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN); - memset(fft, 0, sizeof(float) * PART_LEN); - memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN); - aec_rdft_forward_128(fft); - - ef[1][0] = 0; - ef[1][PART_LEN] = 0; - ef[0][0] = fft[0]; - ef[0][PART_LEN] = fft[1]; - for (i = 1; i < PART_LEN; i++) { - ef[0][i] = fft[2 * i]; - ef[1][i] = fft[2 * i + 1]; - } - - // Scale error signal inversely with far power. - WebRtcAec_ScaleErrorSignal(aec, ef); - WebRtcAec_FilterAdaptation(aec, fft, ef); - NonLinearProcessing(aec, output, outputH); - -#ifdef AEC_DEBUG - for (i = 0; i < PART_LEN; i++) { - eInt16[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, e[i], - WEBRTC_SPL_WORD16_MIN); - } -#endif - - if (aec->metricsMode == 1) { - for (i = 0; i < PART_LEN; i++) { - eInt16[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, e[i], - WEBRTC_SPL_WORD16_MIN); - } - - // Update power levels and echo metrics - UpdateLevel(&aec->farlevel, farend); - UpdateLevel(&aec->nearlevel, nearend); - UpdateLevel(&aec->linoutlevel, eInt16); - UpdateLevel(&aec->nlpoutlevel, output); - UpdateMetrics(aec); - } - -#ifdef AEC_DEBUG - fwrite(eInt16, sizeof(short), PART_LEN, aec->outLpFile); - fwrite(output, sizeof(short), PART_LEN, aec->outFile); -#endif +// Puts fft output data into a complex valued array. +__inline static void StoreAsComplex(const float* data, + float data_complex[2][PART_LEN1]) { + int i; + data_complex[0][0] = data[0]; + data_complex[1][0] = 0; + for (i = 1; i < PART_LEN; i++) { + data_complex[0][i] = data[2 * i]; + data_complex[1][i] = data[2 * i + 1]; + } + data_complex[0][PART_LEN] = data[1]; + data_complex[1][PART_LEN] = 0; } -static void NonLinearProcessing(aec_t *aec, short *output, short *outputH) -{ - float efw[2][PART_LEN1], dfw[2][PART_LEN1]; - complex_t xfw[PART_LEN1]; - complex_t comfortNoiseHband[PART_LEN1]; - float fft[PART_LEN2]; - float scale, dtmp; - float nlpGainHband; - int i, j, pos; +static void SubbandCoherence(AecCore* aec, + float efw[2][PART_LEN1], + float xfw[2][PART_LEN1], + float* fft, + float* cohde, + float* cohxd) { + float dfw[2][PART_LEN1]; + int i; - // Coherence and non-linear filter - float cohde[PART_LEN1], cohxd[PART_LEN1]; - float hNlDeAvg, hNlXdAvg; - float hNl[PART_LEN1]; - float hNlPref[PREF_BAND_SIZE]; - float hNlFb = 0, hNlFbLow = 0; - const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f; - const int prefBandSize = PREF_BAND_SIZE / aec->mult; - const int minPrefBand = 4 / aec->mult; + if (aec->delayEstCtr == 0) + aec->delayIdx = PartitionDelay(aec); - // Near and error power sums - float sdSum = 0, seSum = 0; + // Use delayed far. + memcpy(xfw, + aec->xfwBuf + aec->delayIdx * PART_LEN1, + sizeof(xfw[0][0]) * 2 * PART_LEN1); - // Power estimate smoothing coefficients - const float gCoh[2][2] = {{0.9f, 0.1f}, {0.93f, 0.07f}}; - const float *ptrGCoh = gCoh[aec->mult - 1]; + // Windowed near fft + WindowData(fft, aec->dBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, dfw); - // Filter energey - float wfEnMax = 0, wfEn = 0; - const int delayEstInterval = 10 * aec->mult; + // Windowed error fft + WindowData(fft, aec->eBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, efw); - aec->delayEstCtr++; - if (aec->delayEstCtr == delayEstInterval) { - aec->delayEstCtr = 0; - } + SmoothedPSD(aec, efw, dfw, xfw); - // initialize comfort noise for H band - memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband)); - nlpGainHband = (float)0.0; - dtmp = (float)0.0; - - // Measure energy in each filter partition to determine delay. - // TODO: Spread by computing one partition per block? - if (aec->delayEstCtr == 0) { - wfEnMax = 0; - aec->delayIdx = 0; - for (i = 0; i < NR_PART; i++) { - pos = i * PART_LEN1; - wfEn = 0; - for (j = 0; j < PART_LEN1; j++) { - wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + - aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; - } - - if (wfEn > wfEnMax) { - wfEnMax = wfEn; - aec->delayIdx = i; - } - } - } - - // NLP - // Windowed far fft - for (i = 0; i < PART_LEN; i++) { - fft[i] = aec->xBuf[i] * sqrtHanning[i]; - fft[PART_LEN + i] = aec->xBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; - } - aec_rdft_forward_128(fft); - - xfw[0][1] = 0; - xfw[PART_LEN][1] = 0; - xfw[0][0] = fft[0]; - xfw[PART_LEN][0] = fft[1]; - for (i = 1; i < PART_LEN; i++) { - xfw[i][0] = fft[2 * i]; - xfw[i][1] = fft[2 * i + 1]; - } - - // Buffer far. - memcpy(aec->xfwBuf, xfw, sizeof(xfw)); - - // Use delayed far. - memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw)); - - // Windowed near fft - for (i = 0; i < PART_LEN; i++) { - fft[i] = aec->dBuf[i] * sqrtHanning[i]; - fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; - } - aec_rdft_forward_128(fft); - - dfw[1][0] = 0; - dfw[1][PART_LEN] = 0; - dfw[0][0] = fft[0]; - dfw[0][PART_LEN] = fft[1]; - for (i = 1; i < PART_LEN; i++) { - dfw[0][i] = fft[2 * i]; - dfw[1][i] = fft[2 * i + 1]; - } - - // Windowed error fft - for (i = 0; i < PART_LEN; i++) { - fft[i] = aec->eBuf[i] * sqrtHanning[i]; - fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; - } - aec_rdft_forward_128(fft); - efw[1][0] = 0; - efw[1][PART_LEN] = 0; - efw[0][0] = fft[0]; - efw[0][PART_LEN] = fft[1]; - for (i = 1; i < PART_LEN; i++) { - efw[0][i] = fft[2 * i]; - efw[1][i] = fft[2 * i + 1]; - } - - // Smoothed PSD - for (i = 0; i < PART_LEN1; i++) { - aec->sd[i] = ptrGCoh[0] * aec->sd[i] + ptrGCoh[1] * - (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); - aec->se[i] = ptrGCoh[0] * aec->se[i] + ptrGCoh[1] * - (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); - // We threshold here to protect against the ill-effects of a zero farend. - // The threshold is not arbitrarily chosen, but balances protection and - // adverse interaction with the algorithm's tuning. - // TODO: investigate further why this is so sensitive. - aec->sx[i] = ptrGCoh[0] * aec->sx[i] + ptrGCoh[1] * - WEBRTC_SPL_MAX(xfw[i][0] * xfw[i][0] + xfw[i][1] * xfw[i][1], 15); - - aec->sde[i][0] = ptrGCoh[0] * aec->sde[i][0] + ptrGCoh[1] * - (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); - aec->sde[i][1] = ptrGCoh[0] * aec->sde[i][1] + ptrGCoh[1] * - (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); - - aec->sxd[i][0] = ptrGCoh[0] * aec->sxd[i][0] + ptrGCoh[1] * - (dfw[0][i] * xfw[i][0] + dfw[1][i] * xfw[i][1]); - aec->sxd[i][1] = ptrGCoh[0] * aec->sxd[i][1] + ptrGCoh[1] * - (dfw[0][i] * xfw[i][1] - dfw[1][i] * xfw[i][0]); - - sdSum += aec->sd[i]; - seSum += aec->se[i]; - } - - // Divergent filter safeguard. - if (aec->divergeState == 0) { - if (seSum > sdSum) { - aec->divergeState = 1; - } - } - else { - if (seSum * 1.05f < sdSum) { - aec->divergeState = 0; - } - } - - if (aec->divergeState == 1) { - memcpy(efw, dfw, sizeof(efw)); - } - - // Reset if error is significantly larger than nearend (13 dB). - if (seSum > (19.95f * sdSum)) { - memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); - } - - // Subband coherence - for (i = 0; i < PART_LEN1; i++) { - cohde[i] = (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / - (aec->sd[i] * aec->se[i] + 1e-10f); - cohxd[i] = (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / - (aec->sx[i] * aec->sd[i] + 1e-10f); - } - - hNlXdAvg = 0; - for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { - hNlXdAvg += cohxd[i]; - } - hNlXdAvg /= prefBandSize; - hNlXdAvg = 1 - hNlXdAvg; - - hNlDeAvg = 0; - for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { - hNlDeAvg += cohde[i]; - } - hNlDeAvg /= prefBandSize; - - if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) { - aec->hNlXdAvgMin = hNlXdAvg; - } - - if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) { - aec->stNearState = 1; - } - else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) { - aec->stNearState = 0; - } - - if (aec->hNlXdAvgMin == 1) { - aec->echoState = 0; - aec->overDrive = aec->minOverDrive; - - if (aec->stNearState == 1) { - memcpy(hNl, cohde, sizeof(hNl)); - hNlFb = hNlDeAvg; - hNlFbLow = hNlDeAvg; - } - else { - for (i = 0; i < PART_LEN1; i++) { - hNl[i] = 1 - cohxd[i]; - } - hNlFb = hNlXdAvg; - hNlFbLow = hNlXdAvg; - } - } - else { - - if (aec->stNearState == 1) { - aec->echoState = 0; - memcpy(hNl, cohde, sizeof(hNl)); - hNlFb = hNlDeAvg; - hNlFbLow = hNlDeAvg; - } - else { - aec->echoState = 1; - for (i = 0; i < PART_LEN1; i++) { - hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]); - } - - // Select an order statistic from the preferred bands. - // TODO: Using quicksort now, but a selection algorithm may be preferred. - memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize); - qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat); - hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))]; - hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))]; - } - } - - // Track the local filter minimum to determine suppression overdrive. - if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) { - aec->hNlFbLocalMin = hNlFbLow; - aec->hNlFbMin = hNlFbLow; - aec->hNlNewMin = 1; - aec->hNlMinCtr = 0; - } - aec->hNlFbLocalMin = WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1); - aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1); - - if (aec->hNlNewMin == 1) { - aec->hNlMinCtr++; - } - if (aec->hNlMinCtr == 2) { - aec->hNlNewMin = 0; - aec->hNlMinCtr = 0; - aec->overDrive = WEBRTC_SPL_MAX(aec->targetSupp / - ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), aec->minOverDrive); - } - - // Smooth the overdrive. - if (aec->overDrive < aec->overDriveSm) { - aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive; - } - else { - aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive; - } - - WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw); - - // Add comfort noise. - ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); - - // Inverse error fft. - fft[0] = efw[0][0]; - fft[1] = efw[0][PART_LEN]; - for (i = 1; i < PART_LEN; i++) { - fft[2*i] = efw[0][i]; - // Sign change required by Ooura fft. - fft[2*i + 1] = -efw[1][i]; - } - aec_rdft_inverse_128(fft); - - // Overlap and add to obtain output. - scale = 2.0f / PART_LEN2; - for (i = 0; i < PART_LEN; i++) { - fft[i] *= scale; // fft scaling - fft[i] = fft[i]*sqrtHanning[i] + aec->outBuf[i]; - - // Saturation protection - output[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fft[i], - WEBRTC_SPL_WORD16_MIN); - - fft[PART_LEN + i] *= scale; // fft scaling - aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i]; - } - - // For H band - if (aec->sampFreq == 32000) { - - // H band gain - // average nlp over low band: average over second half of freq spectrum - // (4->8khz) - GetHighbandGain(hNl, &nlpGainHband); - - // Inverse comfort_noise - if (flagHbandCn == 1) { - fft[0] = comfortNoiseHband[0][0]; - fft[1] = comfortNoiseHband[PART_LEN][0]; - for (i = 1; i < PART_LEN; i++) { - fft[2*i] = comfortNoiseHband[i][0]; - fft[2*i + 1] = comfortNoiseHband[i][1]; - } - aec_rdft_inverse_128(fft); - scale = 2.0f / PART_LEN2; - } - - // compute gain factor - for (i = 0; i < PART_LEN; i++) { - dtmp = (float)aec->dBufH[i]; - dtmp = (float)dtmp * nlpGainHband; // for variable gain - - // add some comfort noise where Hband is attenuated - if (flagHbandCn == 1) { - fft[i] *= scale; // fft scaling - dtmp += cnScaleHband * fft[i]; - } - - // Saturation protection - outputH[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, dtmp, - WEBRTC_SPL_WORD16_MIN); - } - } - - // Copy the current block to the old position. - memcpy(aec->xBuf, aec->xBuf + PART_LEN, sizeof(float) * PART_LEN); - memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); - memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); - - // Copy the current block to the old position for H band - if (aec->sampFreq == 32000) { - memcpy(aec->dBufH, aec->dBufH + PART_LEN, sizeof(float) * PART_LEN); - } - - memmove(aec->xfwBuf + PART_LEN1, aec->xfwBuf, sizeof(aec->xfwBuf) - - sizeof(complex_t) * PART_LEN1); + // Subband coherence + for (i = 0; i < PART_LEN1; i++) { + cohde[i] = + (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / + (aec->sd[i] * aec->se[i] + 1e-10f); + cohxd[i] = + (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / + (aec->sx[i] * aec->sd[i] + 1e-10f); + } } -static void GetHighbandGain(const float *lambda, float *nlpGainHband) -{ - int i; +static void GetHighbandGain(const float* lambda, float* nlpGainHband) { + int i; - nlpGainHband[0] = (float)0.0; - for (i = freqAvgIc; i < PART_LEN1 - 1; i++) { - nlpGainHband[0] += lambda[i]; - } - nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc); + nlpGainHband[0] = (float)0.0; + for (i = freqAvgIc; i < PART_LEN1 - 1; i++) { + nlpGainHband[0] += lambda[i]; + } + nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc); } -static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1], - complex_t *comfortNoiseHband, const float *noisePow, const float *lambda) -{ - int i, num; - float rand[PART_LEN]; - float noise, noiseAvg, tmp, tmpAvg; - WebRtc_Word16 randW16[PART_LEN]; - complex_t u[PART_LEN1]; +static void ComfortNoise(AecCore* aec, + float efw[2][PART_LEN1], + complex_t* comfortNoiseHband, + const float* noisePow, + const float* lambda) { + int i, num; + float rand[PART_LEN]; + float noise, noiseAvg, tmp, tmpAvg; + int16_t randW16[PART_LEN]; + complex_t u[PART_LEN1]; - const float pi2 = 6.28318530717959f; + const float pi2 = 6.28318530717959f; - // Generate a uniform random array on [0 1] - WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); - for (i = 0; i < PART_LEN; i++) { - rand[i] = ((float)randW16[i]) / 32768; + // Generate a uniform random array on [0 1] + WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); + for (i = 0; i < PART_LEN; i++) { + rand[i] = ((float)randW16[i]) / 32768; + } + + // Reject LF noise + u[0][0] = 0; + u[0][1] = 0; + for (i = 1; i < PART_LEN1; i++) { + tmp = pi2 * rand[i - 1]; + + noise = sqrtf(noisePow[i]); + u[i][0] = noise * cosf(tmp); + u[i][1] = -noise * sinf(tmp); + } + u[PART_LEN][1] = 0; + + for (i = 0; i < PART_LEN1; i++) { + // This is the proper weighting to match the background noise power + tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); + // tmp = 1 - lambda[i]; + efw[0][i] += tmp * u[i][0]; + efw[1][i] += tmp * u[i][1]; + } + + // For H band comfort noise + // TODO: don't compute noise and "tmp" twice. Use the previous results. + noiseAvg = 0.0; + tmpAvg = 0.0; + num = 0; + if (aec->num_bands > 1 && flagHbandCn == 1) { + + // average noise scale + // average over second half of freq spectrum (i.e., 4->8khz) + // TODO: we shouldn't need num. We know how many elements we're summing. + for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { + num++; + noiseAvg += sqrtf(noisePow[i]); } + noiseAvg /= (float)num; + // average nlp scale + // average over second half of freq spectrum (i.e., 4->8khz) + // TODO: we shouldn't need num. We know how many elements we're summing. + num = 0; + for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { + num++; + tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); + } + tmpAvg /= (float)num; + + // Use average noise for H band + // TODO: we should probably have a new random vector here. // Reject LF noise u[0][0] = 0; u[0][1] = 0; for (i = 1; i < PART_LEN1; i++) { - tmp = pi2 * rand[i - 1]; + tmp = pi2 * rand[i - 1]; - noise = sqrtf(noisePow[i]); - u[i][0] = noise * (float)cos(tmp); - u[i][1] = -noise * (float)sin(tmp); + // Use average noise for H band + u[i][0] = noiseAvg * (float)cos(tmp); + u[i][1] = -noiseAvg * (float)sin(tmp); } u[PART_LEN][1] = 0; for (i = 0; i < PART_LEN1; i++) { - // This is the proper weighting to match the background noise power - tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); - //tmp = 1 - lambda[i]; - efw[0][i] += tmp * u[i][0]; - efw[1][i] += tmp * u[i][1]; - } - - // For H band comfort noise - // TODO: don't compute noise and "tmp" twice. Use the previous results. - noiseAvg = 0.0; - tmpAvg = 0.0; - num = 0; - if (aec->sampFreq == 32000 && flagHbandCn == 1) { - - // average noise scale - // average over second half of freq spectrum (i.e., 4->8khz) - // TODO: we shouldn't need num. We know how many elements we're summing. - for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { - num++; - noiseAvg += sqrtf(noisePow[i]); - } - noiseAvg /= (float)num; - - // average nlp scale - // average over second half of freq spectrum (i.e., 4->8khz) - // TODO: we shouldn't need num. We know how many elements we're summing. - num = 0; - for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { - num++; - tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); - } - tmpAvg /= (float)num; - - // Use average noise for H band - // TODO: we should probably have a new random vector here. - // Reject LF noise - u[0][0] = 0; - u[0][1] = 0; - for (i = 1; i < PART_LEN1; i++) { - tmp = pi2 * rand[i - 1]; - - // Use average noise for H band - u[i][0] = noiseAvg * (float)cos(tmp); - u[i][1] = -noiseAvg * (float)sin(tmp); - } - u[PART_LEN][1] = 0; - - for (i = 0; i < PART_LEN1; i++) { - // Use average NLP weight for H band - comfortNoiseHband[i][0] = tmpAvg * u[i][0]; - comfortNoiseHband[i][1] = tmpAvg * u[i][1]; - } + // Use average NLP weight for H band + comfortNoiseHband[i][0] = tmpAvg * u[i][0]; + comfortNoiseHband[i][1] = tmpAvg * u[i][1]; } + } } -// Buffer the farend to account for knownDelay -static void BufferFar(aec_t *aec, const short *farend, int farLen) -{ - int writeLen = farLen, writePos = 0; +static void InitLevel(PowerLevel* level) { + const float kBigFloat = 1E17f; - // Check if the write position must be wrapped. - while (aec->farBufWritePos + writeLen > FAR_BUF_LEN) { - - // Write to remaining buffer space before wrapping. - writeLen = FAR_BUF_LEN - aec->farBufWritePos; - memcpy(aec->farBuf + aec->farBufWritePos, farend + writePos, - sizeof(short) * writeLen); - aec->farBufWritePos = 0; - writePos = writeLen; - writeLen = farLen - writeLen; - } - - memcpy(aec->farBuf + aec->farBufWritePos, farend + writePos, - sizeof(short) * writeLen); - aec->farBufWritePos += writeLen; + level->averagelevel = 0; + level->framelevel = 0; + level->minlevel = kBigFloat; + level->frsum = 0; + level->sfrsum = 0; + level->frcounter = 0; + level->sfrcounter = 0; } -static void FetchFar(aec_t *aec, short *farend, int farLen, int knownDelay) -{ - int readLen = farLen, readPos = 0, delayChange = knownDelay - aec->knownDelay; - - aec->farBufReadPos -= delayChange; - - // Check if delay forces a read position wrap. - while(aec->farBufReadPos < 0) { - aec->farBufReadPos += FAR_BUF_LEN; - } - while(aec->farBufReadPos > FAR_BUF_LEN - 1) { - aec->farBufReadPos -= FAR_BUF_LEN; - } - - aec->knownDelay = knownDelay; - - // Check if read position must be wrapped. - while (aec->farBufReadPos + readLen > FAR_BUF_LEN) { - - // Read from remaining buffer space before wrapping. - readLen = FAR_BUF_LEN - aec->farBufReadPos; - memcpy(farend + readPos, aec->farBuf + aec->farBufReadPos, - sizeof(short) * readLen); - aec->farBufReadPos = 0; - readPos = readLen; - readLen = farLen - readLen; - } - memcpy(farend + readPos, aec->farBuf + aec->farBufReadPos, - sizeof(short) * readLen); - aec->farBufReadPos += readLen; +static void InitStats(Stats* stats) { + stats->instant = kOffsetLevel; + stats->average = kOffsetLevel; + stats->max = kOffsetLevel; + stats->min = kOffsetLevel * (-1); + stats->sum = 0; + stats->hisum = 0; + stats->himean = kOffsetLevel; + stats->counter = 0; + stats->hicounter = 0; } -static void WebRtcAec_InitLevel(power_level_t *level) -{ - const float bigFloat = 1E17f; +static void InitMetrics(AecCore* self) { + self->stateCounter = 0; + InitLevel(&self->farlevel); + InitLevel(&self->nearlevel); + InitLevel(&self->linoutlevel); + InitLevel(&self->nlpoutlevel); - level->averagelevel = 0; - level->framelevel = 0; - level->minlevel = bigFloat; - level->frsum = 0; + InitStats(&self->erl); + InitStats(&self->erle); + InitStats(&self->aNlp); + InitStats(&self->rerl); +} + +static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) { + // Do the energy calculation in the frequency domain. The FFT is performed on + // a segment of PART_LEN2 samples due to overlap, but we only want the energy + // of half that data (the last PART_LEN samples). Parseval's relation states + // that the energy is preserved according to + // + // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2 + // = ENERGY, + // + // where N = PART_LEN2. Since we are only interested in calculating the energy + // for the last PART_LEN samples we approximate by calculating ENERGY and + // divide by 2, + // + // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2 + // + // Since we deal with real valued time domain signals we only store frequency + // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we + // need to add the contribution from the missing part in + // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical + // with the values in [1, PART_LEN-1], hence multiply those values by 2. This + // is the values in the for loop below, but multiplication by 2 and division + // by 2 cancel. + + // TODO(bjornv): Investigate reusing energy calculations performed at other + // places in the code. + int k = 1; + // Imaginary parts are zero at end points and left out of the calculation. + float energy = (in[0][0] * in[0][0]) / 2; + energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2; + + for (k = 1; k < PART_LEN; k++) { + energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]); + } + energy /= PART_LEN2; + + level->sfrsum += energy; + level->sfrcounter++; + + if (level->sfrcounter > subCountLen) { + level->framelevel = level->sfrsum / (subCountLen * PART_LEN); level->sfrsum = 0; - level->frcounter = 0; level->sfrcounter = 0; + if (level->framelevel > 0) { + if (level->framelevel < level->minlevel) { + level->minlevel = level->framelevel; // New minimum. + } else { + level->minlevel *= (1 + 0.001f); // Small increase. + } + } + level->frcounter++; + level->frsum += level->framelevel; + if (level->frcounter > countLen) { + level->averagelevel = level->frsum / countLen; + level->frsum = 0; + level->frcounter = 0; + } + } } -static void WebRtcAec_InitStats(stats_t *stats) -{ - stats->instant = offsetLevel; - stats->average = offsetLevel; - stats->max = offsetLevel; - stats->min = offsetLevel * (-1); - stats->sum = 0; - stats->hisum = 0; - stats->himean = offsetLevel; - stats->counter = 0; - stats->hicounter = 0; +static void UpdateMetrics(AecCore* aec) { + float dtmp, dtmp2; + + const float actThresholdNoisy = 8.0f; + const float actThresholdClean = 40.0f; + const float safety = 0.99995f; + const float noisyPower = 300000.0f; + + float actThreshold; + float echo, suppressedEcho; + + if (aec->echoState) { // Check if echo is likely present + aec->stateCounter++; + } + + if (aec->farlevel.frcounter == 0) { + + if (aec->farlevel.minlevel < noisyPower) { + actThreshold = actThresholdClean; + } else { + actThreshold = actThresholdNoisy; + } + + if ((aec->stateCounter > (0.5f * countLen * subCountLen)) && + (aec->farlevel.sfrcounter == 0) + + // Estimate in active far-end segments only + && + (aec->farlevel.averagelevel > + (actThreshold * aec->farlevel.minlevel))) { + + // Subtract noise power + echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel; + + // ERL + dtmp = 10 * (float)log10(aec->farlevel.averagelevel / + aec->nearlevel.averagelevel + + 1e-10f); + dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f); + + aec->erl.instant = dtmp; + if (dtmp > aec->erl.max) { + aec->erl.max = dtmp; + } + + if (dtmp < aec->erl.min) { + aec->erl.min = dtmp; + } + + aec->erl.counter++; + aec->erl.sum += dtmp; + aec->erl.average = aec->erl.sum / aec->erl.counter; + + // Upper mean + if (dtmp > aec->erl.average) { + aec->erl.hicounter++; + aec->erl.hisum += dtmp; + aec->erl.himean = aec->erl.hisum / aec->erl.hicounter; + } + + // A_NLP + dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / + (2 * aec->linoutlevel.averagelevel) + + 1e-10f); + + // subtract noise power + suppressedEcho = 2 * (aec->linoutlevel.averagelevel - + safety * aec->linoutlevel.minlevel); + + dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); + + aec->aNlp.instant = dtmp2; + if (dtmp > aec->aNlp.max) { + aec->aNlp.max = dtmp; + } + + if (dtmp < aec->aNlp.min) { + aec->aNlp.min = dtmp; + } + + aec->aNlp.counter++; + aec->aNlp.sum += dtmp; + aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter; + + // Upper mean + if (dtmp > aec->aNlp.average) { + aec->aNlp.hicounter++; + aec->aNlp.hisum += dtmp; + aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter; + } + + // ERLE + + // subtract noise power + suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel - + safety * aec->nlpoutlevel.minlevel); + + dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / + (2 * aec->nlpoutlevel.averagelevel) + + 1e-10f); + dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); + + dtmp = dtmp2; + aec->erle.instant = dtmp; + if (dtmp > aec->erle.max) { + aec->erle.max = dtmp; + } + + if (dtmp < aec->erle.min) { + aec->erle.min = dtmp; + } + + aec->erle.counter++; + aec->erle.sum += dtmp; + aec->erle.average = aec->erle.sum / aec->erle.counter; + + // Upper mean + if (dtmp > aec->erle.average) { + aec->erle.hicounter++; + aec->erle.hisum += dtmp; + aec->erle.himean = aec->erle.hisum / aec->erle.hicounter; + } + } + + aec->stateCounter = 0; + } } -static void UpdateLevel(power_level_t *level, const short *in) -{ - int k; +static void UpdateDelayMetrics(AecCore* self) { + int i = 0; + int delay_values = 0; + int median = 0; + int lookahead = WebRtc_lookahead(self->delay_estimator); + const int kMsPerBlock = PART_LEN / (self->mult * 8); + int64_t l1_norm = 0; - for (k = 0; k < PART_LEN; k++) { - level->sfrsum += in[k] * in[k]; + if (self->num_delay_values == 0) { + // We have no new delay value data. Even though -1 is a valid |median| in + // the sense that we allow negative values, it will practically never be + // used since multiples of |kMsPerBlock| will always be returned. + // We therefore use -1 to indicate in the logs that the delay estimator was + // not able to estimate the delay. + self->delay_median = -1; + self->delay_std = -1; + self->fraction_poor_delays = -1; + return; + } + + // Start value for median count down. + delay_values = self->num_delay_values >> 1; + // Get median of delay values since last update. + for (i = 0; i < kHistorySizeBlocks; i++) { + delay_values -= self->delay_histogram[i]; + if (delay_values < 0) { + median = i; + break; } - level->sfrcounter++; + } + // Account for lookahead. + self->delay_median = (median - lookahead) * kMsPerBlock; - if (level->sfrcounter > subCountLen) { - level->framelevel = level->sfrsum / (subCountLen * PART_LEN); - level->sfrsum = 0; - level->sfrcounter = 0; - - if (level->framelevel > 0) { - if (level->framelevel < level->minlevel) { - level->minlevel = level->framelevel; // New minimum - } else { - level->minlevel *= (1 + 0.001f); // Small increase - } - } - level->frcounter++; - level->frsum += level->framelevel; - - if (level->frcounter > countLen) { - level->averagelevel = level->frsum / countLen; - level->frsum = 0; - level->frcounter = 0; - } + // Calculate the L1 norm, with median value as central moment. + for (i = 0; i < kHistorySizeBlocks; i++) { + l1_norm += abs(i - median) * self->delay_histogram[i]; + } + self->delay_std = (int)((l1_norm + self->num_delay_values / 2) / + self->num_delay_values) * kMsPerBlock; + // Determine fraction of delays that are out of bounds, that is, either + // negative (anti-causal system) or larger than the AEC filter length. + { + int num_delays_out_of_bounds = self->num_delay_values; + const int histogram_length = sizeof(self->delay_histogram) / + sizeof(self->delay_histogram[0]); + for (i = lookahead; i < lookahead + self->num_partitions; ++i) { + if (i < histogram_length) + num_delays_out_of_bounds -= self->delay_histogram[i]; } + self->fraction_poor_delays = (float)num_delays_out_of_bounds / + self->num_delay_values; + } + + // Reset histogram. + memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); + self->num_delay_values = 0; + + return; } -static void UpdateMetrics(aec_t *aec) -{ - float dtmp, dtmp2; +static void TimeToFrequency(float time_data[PART_LEN2], + float freq_data[2][PART_LEN1], + int window) { + int i = 0; - const float actThresholdNoisy = 8.0f; - const float actThresholdClean = 40.0f; - const float safety = 0.99995f; - const float noisyPower = 300000.0f; - - float actThreshold; - float echo, suppressedEcho; - - if (aec->echoState) { // Check if echo is likely present - aec->stateCounter++; + // TODO(bjornv): Should we have a different function/wrapper for windowed FFT? + if (window) { + for (i = 0; i < PART_LEN; i++) { + time_data[i] *= WebRtcAec_sqrtHanning[i]; + time_data[PART_LEN + i] *= WebRtcAec_sqrtHanning[PART_LEN - i]; } + } - if (aec->farlevel.frcounter == countLen) { - - if (aec->farlevel.minlevel < noisyPower) { - actThreshold = actThresholdClean; - } - else { - actThreshold = actThresholdNoisy; - } - - if ((aec->stateCounter > (0.5f * countLen * subCountLen)) - && (aec->farlevel.sfrcounter == 0) - - // Estimate in active far-end segments only - && (aec->farlevel.averagelevel > (actThreshold * aec->farlevel.minlevel)) - ) { - - // Subtract noise power - echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel; - - // ERL - dtmp = 10 * (float)log10(aec->farlevel.averagelevel / - aec->nearlevel.averagelevel + 1e-10f); - dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f); - - aec->erl.instant = dtmp; - if (dtmp > aec->erl.max) { - aec->erl.max = dtmp; - } - - if (dtmp < aec->erl.min) { - aec->erl.min = dtmp; - } - - aec->erl.counter++; - aec->erl.sum += dtmp; - aec->erl.average = aec->erl.sum / aec->erl.counter; - - // Upper mean - if (dtmp > aec->erl.average) { - aec->erl.hicounter++; - aec->erl.hisum += dtmp; - aec->erl.himean = aec->erl.hisum / aec->erl.hicounter; - } - - // A_NLP - dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / - aec->linoutlevel.averagelevel + 1e-10f); - - // subtract noise power - suppressedEcho = aec->linoutlevel.averagelevel - safety * aec->linoutlevel.minlevel; - - dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); - - aec->aNlp.instant = dtmp2; - if (dtmp > aec->aNlp.max) { - aec->aNlp.max = dtmp; - } - - if (dtmp < aec->aNlp.min) { - aec->aNlp.min = dtmp; - } - - aec->aNlp.counter++; - aec->aNlp.sum += dtmp; - aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter; - - // Upper mean - if (dtmp > aec->aNlp.average) { - aec->aNlp.hicounter++; - aec->aNlp.hisum += dtmp; - aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter; - } - - // ERLE - - // subtract noise power - suppressedEcho = aec->nlpoutlevel.averagelevel - safety * aec->nlpoutlevel.minlevel; - - dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / - aec->nlpoutlevel.averagelevel + 1e-10f); - dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); - - dtmp = dtmp2; - aec->erle.instant = dtmp; - if (dtmp > aec->erle.max) { - aec->erle.max = dtmp; - } - - if (dtmp < aec->erle.min) { - aec->erle.min = dtmp; - } - - aec->erle.counter++; - aec->erle.sum += dtmp; - aec->erle.average = aec->erle.sum / aec->erle.counter; - - // Upper mean - if (dtmp > aec->erle.average) { - aec->erle.hicounter++; - aec->erle.hisum += dtmp; - aec->erle.himean = aec->erle.hisum / aec->erle.hicounter; - } - } - - aec->stateCounter = 0; - } + aec_rdft_forward_128(time_data); + // Reorder. + freq_data[1][0] = 0; + freq_data[1][PART_LEN] = 0; + freq_data[0][0] = time_data[0]; + freq_data[0][PART_LEN] = time_data[1]; + for (i = 1; i < PART_LEN; i++) { + freq_data[0][i] = time_data[2 * i]; + freq_data[1][i] = time_data[2 * i + 1]; + } } +static int MoveFarReadPtrWithoutSystemDelayUpdate(AecCore* self, int elements) { + WebRtc_MoveReadPtr(self->far_buf_windowed, elements); +#ifdef WEBRTC_AEC_DEBUG_DUMP + WebRtc_MoveReadPtr(self->far_time_buf, elements); +#endif + return WebRtc_MoveReadPtr(self->far_buf, elements); +} + +static int SignalBasedDelayCorrection(AecCore* self) { + int delay_correction = 0; + int last_delay = -2; + assert(self != NULL); +#if !defined(WEBRTC_ANDROID) + // On desktops, turn on correction after |kDelayCorrectionStart| frames. This + // is to let the delay estimation get a chance to converge. Also, if the + // playout audio volume is low (or even muted) the delay estimation can return + // a very large delay, which will break the AEC if it is applied. + if (self->frame_count < kDelayCorrectionStart) { + return 0; + } +#endif + + // 1. Check for non-negative delay estimate. Note that the estimates we get + // from the delay estimation are not compensated for lookahead. Hence, a + // negative |last_delay| is an invalid one. + // 2. Verify that there is a delay change. In addition, only allow a change + // if the delay is outside a certain region taking the AEC filter length + // into account. + // TODO(bjornv): Investigate if we can remove the non-zero delay change check. + // 3. Only allow delay correction if the delay estimation quality exceeds + // |delay_quality_threshold|. + // 4. Finally, verify that the proposed |delay_correction| is feasible by + // comparing with the size of the far-end buffer. + last_delay = WebRtc_last_delay(self->delay_estimator); + if ((last_delay >= 0) && + (last_delay != self->previous_delay) && + (WebRtc_last_delay_quality(self->delay_estimator) > + self->delay_quality_threshold)) { + int delay = last_delay - WebRtc_lookahead(self->delay_estimator); + // Allow for a slack in the actual delay, defined by a |lower_bound| and an + // |upper_bound|. The adaptive echo cancellation filter is currently + // |num_partitions| (of 64 samples) long. If the delay estimate is negative + // or at least 3/4 of the filter length we open up for correction. + const int lower_bound = 0; + const int upper_bound = self->num_partitions * 3 / 4; + const int do_correction = delay <= lower_bound || delay > upper_bound; + if (do_correction == 1) { + int available_read = (int)WebRtc_available_read(self->far_buf); + // With |shift_offset| we gradually rely on the delay estimates. For + // positive delays we reduce the correction by |shift_offset| to lower the + // risk of pushing the AEC into a non causal state. For negative delays + // we rely on the values up to a rounding error, hence compensate by 1 + // element to make sure to push the delay into the causal region. + delay_correction = -delay; + delay_correction += delay > self->shift_offset ? self->shift_offset : 1; + self->shift_offset--; + self->shift_offset = (self->shift_offset <= 1 ? 1 : self->shift_offset); + if (delay_correction > available_read - self->mult - 1) { + // There is not enough data in the buffer to perform this shift. Hence, + // we do not rely on the delay estimate and do nothing. + delay_correction = 0; + } else { + self->previous_delay = last_delay; + ++self->delay_correction_count; + } + } + } + // Update the |delay_quality_threshold| once we have our first delay + // correction. + if (self->delay_correction_count > 0) { + float delay_quality = WebRtc_last_delay_quality(self->delay_estimator); + delay_quality = (delay_quality > kDelayQualityThresholdMax ? + kDelayQualityThresholdMax : delay_quality); + self->delay_quality_threshold = + (delay_quality > self->delay_quality_threshold ? delay_quality : + self->delay_quality_threshold); + } + return delay_correction; +} + +static void NonLinearProcessing(AecCore* aec, + float* output, + float* const* outputH) { + float efw[2][PART_LEN1], xfw[2][PART_LEN1]; + complex_t comfortNoiseHband[PART_LEN1]; + float fft[PART_LEN2]; + float scale, dtmp; + float nlpGainHband; + int i; + size_t j; + + // Coherence and non-linear filter + float cohde[PART_LEN1], cohxd[PART_LEN1]; + float hNlDeAvg, hNlXdAvg; + float hNl[PART_LEN1]; + float hNlPref[kPrefBandSize]; + float hNlFb = 0, hNlFbLow = 0; + const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f; + const int prefBandSize = kPrefBandSize / aec->mult; + const int minPrefBand = 4 / aec->mult; + // Power estimate smoothing coefficients. + const float* min_overdrive = aec->extended_filter_enabled + ? kExtendedMinOverDrive + : kNormalMinOverDrive; + + // Filter energy + const int delayEstInterval = 10 * aec->mult; + + float* xfw_ptr = NULL; + + aec->delayEstCtr++; + if (aec->delayEstCtr == delayEstInterval) { + aec->delayEstCtr = 0; + } + + // initialize comfort noise for H band + memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband)); + nlpGainHband = (float)0.0; + dtmp = (float)0.0; + + // We should always have at least one element stored in |far_buf|. + assert(WebRtc_available_read(aec->far_buf_windowed) > 0); + // NLP + WebRtc_ReadBuffer(aec->far_buf_windowed, (void**)&xfw_ptr, &xfw[0][0], 1); + + // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of + // |xfwBuf|. + // Buffer far. + memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); + + WebRtcAec_SubbandCoherence(aec, efw, xfw, fft, cohde, cohxd); + + hNlXdAvg = 0; + for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { + hNlXdAvg += cohxd[i]; + } + hNlXdAvg /= prefBandSize; + hNlXdAvg = 1 - hNlXdAvg; + + hNlDeAvg = 0; + for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { + hNlDeAvg += cohde[i]; + } + hNlDeAvg /= prefBandSize; + + if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) { + aec->hNlXdAvgMin = hNlXdAvg; + } + + if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) { + aec->stNearState = 1; + } else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) { + aec->stNearState = 0; + } + + if (aec->hNlXdAvgMin == 1) { + aec->echoState = 0; + aec->overDrive = min_overdrive[aec->nlp_mode]; + + if (aec->stNearState == 1) { + memcpy(hNl, cohde, sizeof(hNl)); + hNlFb = hNlDeAvg; + hNlFbLow = hNlDeAvg; + } else { + for (i = 0; i < PART_LEN1; i++) { + hNl[i] = 1 - cohxd[i]; + } + hNlFb = hNlXdAvg; + hNlFbLow = hNlXdAvg; + } + } else { + + if (aec->stNearState == 1) { + aec->echoState = 0; + memcpy(hNl, cohde, sizeof(hNl)); + hNlFb = hNlDeAvg; + hNlFbLow = hNlDeAvg; + } else { + aec->echoState = 1; + for (i = 0; i < PART_LEN1; i++) { + hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]); + } + + // Select an order statistic from the preferred bands. + // TODO: Using quicksort now, but a selection algorithm may be preferred. + memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize); + qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat); + hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))]; + hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))]; + } + } + + // Track the local filter minimum to determine suppression overdrive. + if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) { + aec->hNlFbLocalMin = hNlFbLow; + aec->hNlFbMin = hNlFbLow; + aec->hNlNewMin = 1; + aec->hNlMinCtr = 0; + } + aec->hNlFbLocalMin = + WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1); + aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1); + + if (aec->hNlNewMin == 1) { + aec->hNlMinCtr++; + } + if (aec->hNlMinCtr == 2) { + aec->hNlNewMin = 0; + aec->hNlMinCtr = 0; + aec->overDrive = + WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] / + ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), + min_overdrive[aec->nlp_mode]); + } + + // Smooth the overdrive. + if (aec->overDrive < aec->overDriveSm) { + aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive; + } else { + aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive; + } + + WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw); + + // Add comfort noise. + WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); + + // TODO(bjornv): Investigate how to take the windowing below into account if + // needed. + if (aec->metricsMode == 1) { + // Note that we have a scaling by two in the time domain |eBuf|. + // In addition the time domain signal is windowed before transformation, + // losing half the energy on the average. We take care of the first + // scaling only in UpdateMetrics(). + UpdateLevel(&aec->nlpoutlevel, efw); + } + // Inverse error fft. + fft[0] = efw[0][0]; + fft[1] = efw[0][PART_LEN]; + for (i = 1; i < PART_LEN; i++) { + fft[2 * i] = efw[0][i]; + // Sign change required by Ooura fft. + fft[2 * i + 1] = -efw[1][i]; + } + aec_rdft_inverse_128(fft); + + // Overlap and add to obtain output. + scale = 2.0f / PART_LEN2; + for (i = 0; i < PART_LEN; i++) { + fft[i] *= scale; // fft scaling + fft[i] = fft[i] * WebRtcAec_sqrtHanning[i] + aec->outBuf[i]; + + fft[PART_LEN + i] *= scale; // fft scaling + aec->outBuf[i] = fft[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i]; + + // Saturate output to keep it in the allowed range. + output[i] = WEBRTC_SPL_SAT( + WEBRTC_SPL_WORD16_MAX, fft[i], WEBRTC_SPL_WORD16_MIN); + } + + // For H band + if (aec->num_bands > 1) { + + // H band gain + // average nlp over low band: average over second half of freq spectrum + // (4->8khz) + GetHighbandGain(hNl, &nlpGainHband); + + // Inverse comfort_noise + if (flagHbandCn == 1) { + fft[0] = comfortNoiseHband[0][0]; + fft[1] = comfortNoiseHband[PART_LEN][0]; + for (i = 1; i < PART_LEN; i++) { + fft[2 * i] = comfortNoiseHband[i][0]; + fft[2 * i + 1] = comfortNoiseHband[i][1]; + } + aec_rdft_inverse_128(fft); + scale = 2.0f / PART_LEN2; + } + + // compute gain factor + for (j = 0; j < aec->num_bands - 1; ++j) { + for (i = 0; i < PART_LEN; i++) { + dtmp = aec->dBufH[j][i]; + dtmp = dtmp * nlpGainHband; // for variable gain + + // add some comfort noise where Hband is attenuated + if (flagHbandCn == 1 && j == 0) { + fft[i] *= scale; // fft scaling + dtmp += cnScaleHband * fft[i]; + } + + // Saturate output to keep it in the allowed range. + outputH[j][i] = WEBRTC_SPL_SAT( + WEBRTC_SPL_WORD16_MAX, dtmp, WEBRTC_SPL_WORD16_MIN); + } + } + } + + // Copy the current block to the old position. + memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); + memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); + + // Copy the current block to the old position for H band + for (j = 0; j < aec->num_bands - 1; ++j) { + memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN); + } + + memmove(aec->xfwBuf + PART_LEN1, + aec->xfwBuf, + sizeof(aec->xfwBuf) - sizeof(complex_t) * PART_LEN1); +} + +static void ProcessBlock(AecCore* aec) { + size_t i; + float y[PART_LEN], e[PART_LEN]; + float scale; + + float fft[PART_LEN2]; + float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1]; + float df[2][PART_LEN1]; + float far_spectrum = 0.0f; + float near_spectrum = 0.0f; + float abs_far_spectrum[PART_LEN1]; + float abs_near_spectrum[PART_LEN1]; + + const float gPow[2] = {0.9f, 0.1f}; + + // Noise estimate constants. + const int noiseInitBlocks = 500 * aec->mult; + const float step = 0.1f; + const float ramp = 1.0002f; + const float gInitNoise[2] = {0.999f, 0.001f}; + + float nearend[PART_LEN]; + float* nearend_ptr = NULL; + float output[PART_LEN]; + float outputH[NUM_HIGH_BANDS_MAX][PART_LEN]; + float* outputH_ptr[NUM_HIGH_BANDS_MAX]; + for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { + outputH_ptr[i] = outputH[i]; + } + + float* xf_ptr = NULL; + + // Concatenate old and new nearend blocks. + for (i = 0; i < aec->num_bands - 1; ++i) { + WebRtc_ReadBuffer(aec->nearFrBufH[i], + (void**)&nearend_ptr, + nearend, + PART_LEN); + memcpy(aec->dBufH[i] + PART_LEN, nearend_ptr, sizeof(nearend)); + } + WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN); + memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend)); + + // ---------- Ooura fft ---------- + +#ifdef WEBRTC_AEC_DEBUG_DUMP + { + float farend[PART_LEN]; + float* farend_ptr = NULL; + WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1); + RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, farend_ptr, PART_LEN); + RTC_AEC_DEBUG_WAV_WRITE(aec->nearFile, nearend_ptr, PART_LEN); + } +#endif + + // We should always have at least one element stored in |far_buf|. + assert(WebRtc_available_read(aec->far_buf) > 0); + WebRtc_ReadBuffer(aec->far_buf, (void**)&xf_ptr, &xf[0][0], 1); + + // Near fft + memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); + TimeToFrequency(fft, df, 0); + + // Power smoothing + for (i = 0; i < PART_LEN1; i++) { + far_spectrum = (xf_ptr[i] * xf_ptr[i]) + + (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]); + aec->xPow[i] = + gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum; + // Calculate absolute spectra + abs_far_spectrum[i] = sqrtf(far_spectrum); + + near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i]; + aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum; + // Calculate absolute spectra + abs_near_spectrum[i] = sqrtf(near_spectrum); + } + + // Estimate noise power. Wait until dPow is more stable. + if (aec->noiseEstCtr > 50) { + for (i = 0; i < PART_LEN1; i++) { + if (aec->dPow[i] < aec->dMinPow[i]) { + aec->dMinPow[i] = + (aec->dPow[i] + step * (aec->dMinPow[i] - aec->dPow[i])) * ramp; + } else { + aec->dMinPow[i] *= ramp; + } + } + } + + // Smooth increasing noise power from zero at the start, + // to avoid a sudden burst of comfort noise. + if (aec->noiseEstCtr < noiseInitBlocks) { + aec->noiseEstCtr++; + for (i = 0; i < PART_LEN1; i++) { + if (aec->dMinPow[i] > aec->dInitMinPow[i]) { + aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] + + gInitNoise[1] * aec->dMinPow[i]; + } else { + aec->dInitMinPow[i] = aec->dMinPow[i]; + } + } + aec->noisePow = aec->dInitMinPow; + } else { + aec->noisePow = aec->dMinPow; + } + + // Block wise delay estimation used for logging + if (aec->delay_logging_enabled) { + if (WebRtc_AddFarSpectrumFloat( + aec->delay_estimator_farend, abs_far_spectrum, PART_LEN1) == 0) { + int delay_estimate = WebRtc_DelayEstimatorProcessFloat( + aec->delay_estimator, abs_near_spectrum, PART_LEN1); + if (delay_estimate >= 0) { + // Update delay estimate buffer. + aec->delay_histogram[delay_estimate]++; + aec->num_delay_values++; + } + if (aec->delay_metrics_delivered == 1 && + aec->num_delay_values >= kDelayMetricsAggregationWindow) { + UpdateDelayMetrics(aec); + } + } + } + + // Update the xfBuf block position. + aec->xfBufBlockPos--; + if (aec->xfBufBlockPos == -1) { + aec->xfBufBlockPos = aec->num_partitions - 1; + } + + // Buffer xf + memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, + xf_ptr, + sizeof(float) * PART_LEN1); + memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, + &xf_ptr[PART_LEN1], + sizeof(float) * PART_LEN1); + + memset(yf, 0, sizeof(yf)); + + // Filter far + WebRtcAec_FilterFar(aec, yf); + + // Inverse fft to obtain echo estimate and error. + fft[0] = yf[0][0]; + fft[1] = yf[0][PART_LEN]; + for (i = 1; i < PART_LEN; i++) { + fft[2 * i] = yf[0][i]; + fft[2 * i + 1] = yf[1][i]; + } + aec_rdft_inverse_128(fft); + + scale = 2.0f / PART_LEN2; + for (i = 0; i < PART_LEN; i++) { + y[i] = fft[PART_LEN + i] * scale; // fft scaling + } + + for (i = 0; i < PART_LEN; i++) { + e[i] = nearend_ptr[i] - y[i]; + } + + // Error fft + memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN); + memset(fft, 0, sizeof(float) * PART_LEN); + memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN); + // TODO(bjornv): Change to use TimeToFrequency(). + aec_rdft_forward_128(fft); + + ef[1][0] = 0; + ef[1][PART_LEN] = 0; + ef[0][0] = fft[0]; + ef[0][PART_LEN] = fft[1]; + for (i = 1; i < PART_LEN; i++) { + ef[0][i] = fft[2 * i]; + ef[1][i] = fft[2 * i + 1]; + } + + RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file, + &ef[0][0], + sizeof(ef[0][0]) * PART_LEN1 * 2); + + if (aec->metricsMode == 1) { + // Note that the first PART_LEN samples in fft (before transformation) are + // zero. Hence, the scaling by two in UpdateLevel() should not be + // performed. That scaling is taken care of in UpdateMetrics() instead. + UpdateLevel(&aec->linoutlevel, ef); + } + + // Scale error signal inversely with far power. + WebRtcAec_ScaleErrorSignal(aec, ef); + WebRtcAec_FilterAdaptation(aec, fft, ef); + NonLinearProcessing(aec, output, outputH_ptr); + + if (aec->metricsMode == 1) { + // Update power levels and echo metrics + UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr); + UpdateLevel(&aec->nearlevel, df); + UpdateMetrics(aec); + } + + // Store the output block. + WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN); + // For high bands + for (i = 0; i < aec->num_bands - 1; ++i) { + WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN); + } + + RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, e, PART_LEN); + RTC_AEC_DEBUG_WAV_WRITE(aec->outFile, output, PART_LEN); +} + +AecCore* WebRtcAec_CreateAec() { + int i; + AecCore* aec = malloc(sizeof(AecCore)); + if (!aec) { + return NULL; + } + + aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float)); + if (!aec->nearFrBuf) { + WebRtcAec_FreeAec(aec); + return NULL; + } + + aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float)); + if (!aec->outFrBuf) { + WebRtcAec_FreeAec(aec); + return NULL; + } + + for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { + aec->nearFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(float)); + if (!aec->nearFrBufH[i]) { + WebRtcAec_FreeAec(aec); + return NULL; + } + aec->outFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(float)); + if (!aec->outFrBufH[i]) { + WebRtcAec_FreeAec(aec); + return NULL; + } + } + + // Create far-end buffers. + aec->far_buf = + WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); + if (!aec->far_buf) { + WebRtcAec_FreeAec(aec); + return NULL; + } + aec->far_buf_windowed = + WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * 2 * PART_LEN1); + if (!aec->far_buf_windowed) { + WebRtcAec_FreeAec(aec); + return NULL; + } +#ifdef WEBRTC_AEC_DEBUG_DUMP + aec->instance_index = webrtc_aec_instance_count; + aec->far_time_buf = + WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN); + if (!aec->far_time_buf) { + WebRtcAec_FreeAec(aec); + return NULL; + } + aec->farFile = aec->nearFile = aec->outFile = aec->outLinearFile = NULL; + aec->debug_dump_count = 0; +#endif + aec->delay_estimator_farend = + WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks); + if (aec->delay_estimator_farend == NULL) { + WebRtcAec_FreeAec(aec); + return NULL; + } + // We create the delay_estimator with the same amount of maximum lookahead as + // the delay history size (kHistorySizeBlocks) for symmetry reasons. + aec->delay_estimator = WebRtc_CreateDelayEstimator( + aec->delay_estimator_farend, kHistorySizeBlocks); + if (aec->delay_estimator == NULL) { + WebRtcAec_FreeAec(aec); + return NULL; + } +#ifdef WEBRTC_ANDROID + aec->delay_agnostic_enabled = 1; // DA-AEC enabled by default. + // DA-AEC assumes the system is causal from the beginning and will self adjust + // the lookahead when shifting is required. + WebRtc_set_lookahead(aec->delay_estimator, 0); +#else + aec->delay_agnostic_enabled = 0; + WebRtc_set_lookahead(aec->delay_estimator, kLookaheadBlocks); +#endif + aec->extended_filter_enabled = 0; + + // Assembly optimization + WebRtcAec_FilterFar = FilterFar; + WebRtcAec_ScaleErrorSignal = ScaleErrorSignal; + WebRtcAec_FilterAdaptation = FilterAdaptation; + WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; + WebRtcAec_ComfortNoise = ComfortNoise; + WebRtcAec_SubbandCoherence = SubbandCoherence; + +#if defined(WEBRTC_ARCH_X86_FAMILY) + if (WebRtc_GetCPUInfo(kSSE2)) { + WebRtcAec_InitAec_SSE2(); + } +#endif + +#if defined(MIPS_FPU_LE) + WebRtcAec_InitAec_mips(); +#endif + +#if defined(WEBRTC_HAS_NEON) + WebRtcAec_InitAec_neon(); +#elif defined(WEBRTC_DETECT_NEON) + if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { + WebRtcAec_InitAec_neon(); + } +#endif + + aec_rdft_init(); + + return aec; +} + +void WebRtcAec_FreeAec(AecCore* aec) { + int i; + if (aec == NULL) { + return; + } + + WebRtc_FreeBuffer(aec->nearFrBuf); + WebRtc_FreeBuffer(aec->outFrBuf); + + for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { + WebRtc_FreeBuffer(aec->nearFrBufH[i]); + WebRtc_FreeBuffer(aec->outFrBufH[i]); + } + + WebRtc_FreeBuffer(aec->far_buf); + WebRtc_FreeBuffer(aec->far_buf_windowed); +#ifdef WEBRTC_AEC_DEBUG_DUMP + WebRtc_FreeBuffer(aec->far_time_buf); +#endif + RTC_AEC_DEBUG_WAV_CLOSE(aec->farFile); + RTC_AEC_DEBUG_WAV_CLOSE(aec->nearFile); + RTC_AEC_DEBUG_WAV_CLOSE(aec->outFile); + RTC_AEC_DEBUG_WAV_CLOSE(aec->outLinearFile); + RTC_AEC_DEBUG_RAW_CLOSE(aec->e_fft_file); + + WebRtc_FreeDelayEstimator(aec->delay_estimator); + WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend); + + free(aec); +} + +int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { + int i; + + aec->sampFreq = sampFreq; + + if (sampFreq == 8000) { + aec->normal_mu = 0.6f; + aec->normal_error_threshold = 2e-6f; + aec->num_bands = 1; + } else { + aec->normal_mu = 0.5f; + aec->normal_error_threshold = 1.5e-6f; + aec->num_bands = (size_t)(sampFreq / 16000); + } + + WebRtc_InitBuffer(aec->nearFrBuf); + WebRtc_InitBuffer(aec->outFrBuf); + for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { + WebRtc_InitBuffer(aec->nearFrBufH[i]); + WebRtc_InitBuffer(aec->outFrBufH[i]); + } + + // Initialize far-end buffers. + WebRtc_InitBuffer(aec->far_buf); + WebRtc_InitBuffer(aec->far_buf_windowed); +#ifdef WEBRTC_AEC_DEBUG_DUMP + WebRtc_InitBuffer(aec->far_time_buf); + { + int process_rate = sampFreq > 16000 ? 16000 : sampFreq; + RTC_AEC_DEBUG_WAV_REOPEN("aec_far", aec->instance_index, + aec->debug_dump_count, process_rate, + &aec->farFile ); + RTC_AEC_DEBUG_WAV_REOPEN("aec_near", aec->instance_index, + aec->debug_dump_count, process_rate, + &aec->nearFile); + RTC_AEC_DEBUG_WAV_REOPEN("aec_out", aec->instance_index, + aec->debug_dump_count, process_rate, + &aec->outFile ); + RTC_AEC_DEBUG_WAV_REOPEN("aec_out_linear", aec->instance_index, + aec->debug_dump_count, process_rate, + &aec->outLinearFile); + } + + RTC_AEC_DEBUG_RAW_OPEN("aec_e_fft", + aec->debug_dump_count, + &aec->e_fft_file); + + ++aec->debug_dump_count; +#endif + aec->system_delay = 0; + + if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) { + return -1; + } + if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) { + return -1; + } + aec->delay_logging_enabled = 0; + aec->delay_metrics_delivered = 0; + memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram)); + aec->num_delay_values = 0; + aec->delay_median = -1; + aec->delay_std = -1; + aec->fraction_poor_delays = -1.0f; + + aec->signal_delay_correction = 0; + aec->previous_delay = -2; // (-2): Uninitialized. + aec->delay_correction_count = 0; + aec->shift_offset = kInitialShiftOffset; + aec->delay_quality_threshold = kDelayQualityThresholdMin; + + aec->num_partitions = kNormalNumPartitions; + + // Update the delay estimator with filter length. We use half the + // |num_partitions| to take the echo path into account. In practice we say + // that the echo has a duration of maximum half |num_partitions|, which is not + // true, but serves as a crude measure. + WebRtc_set_allowed_offset(aec->delay_estimator, aec->num_partitions / 2); + // TODO(bjornv): I currently hard coded the enable. Once we've established + // that AECM has no performance regression, robust_validation will be enabled + // all the time and the APIs to turn it on/off will be removed. Hence, remove + // this line then. + WebRtc_enable_robust_validation(aec->delay_estimator, 1); + aec->frame_count = 0; + + // Default target suppression mode. + aec->nlp_mode = 1; + + // Sampling frequency multiplier w.r.t. 8 kHz. + // In case of multiple bands we process the lower band in 16 kHz, hence the + // multiplier is always 2. + if (aec->num_bands > 1) { + aec->mult = 2; + } else { + aec->mult = (short)aec->sampFreq / 8000; + } + + aec->farBufWritePos = 0; + aec->farBufReadPos = 0; + + aec->inSamples = 0; + aec->outSamples = 0; + aec->knownDelay = 0; + + // Initialize buffers + memset(aec->dBuf, 0, sizeof(aec->dBuf)); + memset(aec->eBuf, 0, sizeof(aec->eBuf)); + // For H bands + for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { + memset(aec->dBufH[i], 0, sizeof(aec->dBufH[i])); + } + + memset(aec->xPow, 0, sizeof(aec->xPow)); + memset(aec->dPow, 0, sizeof(aec->dPow)); + memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow)); + aec->noisePow = aec->dInitMinPow; + aec->noiseEstCtr = 0; + + // Initial comfort noise power + for (i = 0; i < PART_LEN1; i++) { + aec->dMinPow[i] = 1.0e6f; + } + + // Holds the last block written to + aec->xfBufBlockPos = 0; + // TODO: Investigate need for these initializations. Deleting them doesn't + // change the output at all and yields 0.4% overall speedup. + memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); + memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); + memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1); + memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1); + memset( + aec->xfwBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); + memset(aec->se, 0, sizeof(float) * PART_LEN1); + + // To prevent numerical instability in the first block. + for (i = 0; i < PART_LEN1; i++) { + aec->sd[i] = 1; + } + for (i = 0; i < PART_LEN1; i++) { + aec->sx[i] = 1; + } + + memset(aec->hNs, 0, sizeof(aec->hNs)); + memset(aec->outBuf, 0, sizeof(float) * PART_LEN); + + aec->hNlFbMin = 1; + aec->hNlFbLocalMin = 1; + aec->hNlXdAvgMin = 1; + aec->hNlNewMin = 0; + aec->hNlMinCtr = 0; + aec->overDrive = 2; + aec->overDriveSm = 2; + aec->delayIdx = 0; + aec->stNearState = 0; + aec->echoState = 0; + aec->divergeState = 0; + + aec->seed = 777; + aec->delayEstCtr = 0; + + // Metrics disabled by default + aec->metricsMode = 0; + InitMetrics(aec); + + return 0; +} + +void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) { + float fft[PART_LEN2]; + float xf[2][PART_LEN1]; + + // Check if the buffer is full, and in that case flush the oldest data. + if (WebRtc_available_write(aec->far_buf) < 1) { + WebRtcAec_MoveFarReadPtr(aec, 1); + } + // Convert far-end partition to the frequency domain without windowing. + memcpy(fft, farend, sizeof(float) * PART_LEN2); + TimeToFrequency(fft, xf, 0); + WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1); + + // Convert far-end partition to the frequency domain with windowing. + memcpy(fft, farend, sizeof(float) * PART_LEN2); + TimeToFrequency(fft, xf, 1); + WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1); +} + +int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) { + int elements_moved = MoveFarReadPtrWithoutSystemDelayUpdate(aec, elements); + aec->system_delay -= elements_moved * PART_LEN; + return elements_moved; +} + +void WebRtcAec_ProcessFrames(AecCore* aec, + const float* const* nearend, + size_t num_bands, + size_t num_samples, + int knownDelay, + float* const* out) { + size_t i, j; + int out_elements = 0; + + aec->frame_count++; + // For each frame the process is as follows: + // 1) If the system_delay indicates on being too small for processing a + // frame we stuff the buffer with enough data for 10 ms. + // 2 a) Adjust the buffer to the system delay, by moving the read pointer. + // b) Apply signal based delay correction, if we have detected poor AEC + // performance. + // 3) TODO(bjornv): Investigate if we need to add this: + // If we can't move read pointer due to buffer size limitations we + // flush/stuff the buffer. + // 4) Process as many partitions as possible. + // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN + // samples. Even though we will have data left to process (we work with + // partitions) we consider updating a whole frame, since that's the + // amount of data we input and output in audio_processing. + // 6) Update the outputs. + + // The AEC has two different delay estimation algorithms built in. The + // first relies on delay input values from the user and the amount of + // shifted buffer elements is controlled by |knownDelay|. This delay will + // give a guess on how much we need to shift far-end buffers to align with + // the near-end signal. The other delay estimation algorithm uses the + // far- and near-end signals to find the offset between them. This one + // (called "signal delay") is then used to fine tune the alignment, or + // simply compensate for errors in the system based one. + // Note that the two algorithms operate independently. Currently, we only + // allow one algorithm to be turned on. + + assert(aec->num_bands == num_bands); + + for (j = 0; j < num_samples; j+= FRAME_LEN) { + // TODO(bjornv): Change the near-end buffer handling to be the same as for + // far-end, that is, with a near_pre_buf. + // Buffer the near-end frame. + WebRtc_WriteBuffer(aec->nearFrBuf, &nearend[0][j], FRAME_LEN); + // For H band + for (i = 1; i < num_bands; ++i) { + WebRtc_WriteBuffer(aec->nearFrBufH[i - 1], &nearend[i][j], FRAME_LEN); + } + + // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we + // have enough far-end data for that by stuffing the buffer if the + // |system_delay| indicates others. + if (aec->system_delay < FRAME_LEN) { + // We don't have enough data so we rewind 10 ms. + WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1)); + } + + if (!aec->delay_agnostic_enabled) { + // 2 a) Compensate for a possible change in the system delay. + + // TODO(bjornv): Investigate how we should round the delay difference; + // right now we know that incoming |knownDelay| is underestimated when + // it's less than |aec->knownDelay|. We therefore, round (-32) in that + // direction. In the other direction, we don't have this situation, but + // might flush one partition too little. This can cause non-causality, + // which should be investigated. Maybe, allow for a non-symmetric + // rounding, like -16. + int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN; + int moved_elements = + MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements); + aec->knownDelay -= moved_elements * PART_LEN; + } else { + // 2 b) Apply signal based delay correction. + int move_elements = SignalBasedDelayCorrection(aec); + int moved_elements = + MoveFarReadPtrWithoutSystemDelayUpdate(aec, move_elements); + int far_near_buffer_diff = WebRtc_available_read(aec->far_buf) - + WebRtc_available_read(aec->nearFrBuf) / PART_LEN; + WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements); + WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend, + moved_elements); + aec->signal_delay_correction += moved_elements; + // If we rely on reported system delay values only, a buffer underrun here + // can never occur since we've taken care of that in 1) above. Here, we + // apply signal based delay correction and can therefore end up with + // buffer underruns since the delay estimation can be wrong. We therefore + // stuff the buffer with enough elements if needed. + if (far_near_buffer_diff < 0) { + WebRtcAec_MoveFarReadPtr(aec, far_near_buffer_diff); + } + } + + // 4) Process as many blocks as possible. + while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) { + ProcessBlock(aec); + } + + // 5) Update system delay with respect to the entire frame. + aec->system_delay -= FRAME_LEN; + + // 6) Update output frame. + // Stuff the out buffer if we have less than a frame to output. + // This should only happen for the first frame. + out_elements = (int)WebRtc_available_read(aec->outFrBuf); + if (out_elements < FRAME_LEN) { + WebRtc_MoveReadPtr(aec->outFrBuf, out_elements - FRAME_LEN); + for (i = 0; i < num_bands - 1; ++i) { + WebRtc_MoveReadPtr(aec->outFrBufH[i], out_elements - FRAME_LEN); + } + } + // Obtain an output frame. + WebRtc_ReadBuffer(aec->outFrBuf, NULL, &out[0][j], FRAME_LEN); + // For H bands. + for (i = 1; i < num_bands; ++i) { + WebRtc_ReadBuffer(aec->outFrBufH[i - 1], NULL, &out[i][j], FRAME_LEN); + } + } +} + +int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std, + float* fraction_poor_delays) { + assert(self != NULL); + assert(median != NULL); + assert(std != NULL); + + if (self->delay_logging_enabled == 0) { + // Logging disabled. + return -1; + } + + if (self->delay_metrics_delivered == 0) { + UpdateDelayMetrics(self); + self->delay_metrics_delivered = 1; + } + *median = self->delay_median; + *std = self->delay_std; + *fraction_poor_delays = self->fraction_poor_delays; + + return 0; +} + +int WebRtcAec_echo_state(AecCore* self) { return self->echoState; } + +void WebRtcAec_GetEchoStats(AecCore* self, + Stats* erl, + Stats* erle, + Stats* a_nlp) { + assert(erl != NULL); + assert(erle != NULL); + assert(a_nlp != NULL); + *erl = self->erl; + *erle = self->erle; + *a_nlp = self->aNlp; +} + +#ifdef WEBRTC_AEC_DEBUG_DUMP +void* WebRtcAec_far_time_buf(AecCore* self) { return self->far_time_buf; } +#endif + +void WebRtcAec_SetConfigCore(AecCore* self, + int nlp_mode, + int metrics_mode, + int delay_logging) { + assert(nlp_mode >= 0 && nlp_mode < 3); + self->nlp_mode = nlp_mode; + self->metricsMode = metrics_mode; + if (self->metricsMode) { + InitMetrics(self); + } + // Turn on delay logging if it is either set explicitly or if delay agnostic + // AEC is enabled (which requires delay estimates). + self->delay_logging_enabled = delay_logging || self->delay_agnostic_enabled; + if (self->delay_logging_enabled) { + memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); + } +} + +void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable) { + self->delay_agnostic_enabled = enable; +} + +int WebRtcAec_delay_agnostic_enabled(AecCore* self) { + return self->delay_agnostic_enabled; +} + +void WebRtcAec_enable_extended_filter(AecCore* self, int enable) { + self->extended_filter_enabled = enable; + self->num_partitions = enable ? kExtendedNumPartitions : kNormalNumPartitions; + // Update the delay estimator with filter length. See InitAEC() for details. + WebRtc_set_allowed_offset(self->delay_estimator, self->num_partitions / 2); +} + +int WebRtcAec_extended_filter_enabled(AecCore* self) { + return self->extended_filter_enabled; +} + +int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; } + +void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { + assert(delay >= 0); + self->system_delay = delay; +} diff --git a/webrtc/modules/audio_processing/aec/aec_core.h b/webrtc/modules/audio_processing/aec/aec_core.h index e693425..241f077 100644 --- a/webrtc/modules/audio_processing/aec/aec_core.h +++ b/webrtc/modules/audio_processing/aec/aec_core.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -12,29 +12,18 @@ * Specifies the interface for the AEC core. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ -#include +#include -#include "signal_processing_library.h" -#include "typedefs.h" - -//#define AEC_DEBUG // for recording files +#include "webrtc/typedefs.h" #define FRAME_LEN 80 -#define PART_LEN 64 // Length of partition -#define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients -#define PART_LEN2 (PART_LEN * 2) // Length of partition * 2 -#define NR_PART 12 // Number of partitions -#define FILT_LEN (PART_LEN * NR_PART) // Filter length -#define FILT_LEN2 (FILT_LEN * 2) // Double filter length -#define FAR_BUF_LEN (FILT_LEN2 * 2) -#define PREF_BAND_SIZE 24 - -#define BLOCKL_MAX FRAME_LEN -// Maximum delay in fixed point delay estimator, used for logging -enum {kMaxDelay = 100}; +#define PART_LEN 64 // Length of partition +#define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients +#define PART_LEN2 (PART_LEN * 2) // Length of partition * 2 +#define NUM_HIGH_BANDS_MAX 2 // Max number of high bands typedef float complex_t[2]; // For performance reasons, some arrays of complex numbers are replaced by twice @@ -46,136 +35,95 @@ typedef float complex_t[2]; // compile time. // Metrics -enum {offsetLevel = -100}; +enum { + kOffsetLevel = -100 +}; -typedef struct { - float sfrsum; - int sfrcounter; - float framelevel; - float frsum; - int frcounter; - float minlevel; - float averagelevel; -} power_level_t; +typedef struct Stats { + float instant; + float average; + float min; + float max; + float sum; + float hisum; + float himean; + int counter; + int hicounter; +} Stats; -typedef struct { - float instant; - float average; - float min; - float max; - float sum; - float hisum; - float himean; - int counter; - int hicounter; -} stats_t; +typedef struct AecCore AecCore; -typedef struct { - int farBufWritePos, farBufReadPos; - - int knownDelay; - int inSamples, outSamples; - int delayEstCtr; - - void *farFrBuf, *nearFrBuf, *outFrBuf; - - void *nearFrBufH; - void *outFrBufH; - - float xBuf[PART_LEN2]; // farend - float dBuf[PART_LEN2]; // nearend - float eBuf[PART_LEN2]; // error - - float dBufH[PART_LEN2]; // nearend - - float xPow[PART_LEN1]; - float dPow[PART_LEN1]; - float dMinPow[PART_LEN1]; - float dInitMinPow[PART_LEN1]; - float *noisePow; - - float xfBuf[2][NR_PART * PART_LEN1]; // farend fft buffer - float wfBuf[2][NR_PART * PART_LEN1]; // filter fft - complex_t sde[PART_LEN1]; // cross-psd of nearend and error - complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend - complex_t xfwBuf[NR_PART * PART_LEN1]; // farend windowed fft buffer - - float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near and error psd - float hNs[PART_LEN1]; - float hNlFbMin, hNlFbLocalMin; - float hNlXdAvgMin; - int hNlNewMin, hNlMinCtr; - float overDrive, overDriveSm; - float targetSupp, minOverDrive; - float outBuf[PART_LEN]; - int delayIdx; - - short stNearState, echoState; - short divergeState; - - int xfBufBlockPos; - - short farBuf[FILT_LEN2 * 2]; - - short mult; // sampling frequency multiple - int sampFreq; - WebRtc_UWord32 seed; - - float mu; // stepsize - float errThresh; // error threshold - - int noiseEstCtr; - - power_level_t farlevel; - power_level_t nearlevel; - power_level_t linoutlevel; - power_level_t nlpoutlevel; - - int metricsMode; - int stateCounter; - stats_t erl; - stats_t erle; - stats_t aNlp; - stats_t rerl; - - // Quantities to control H band scaling for SWB input - int freq_avg_ic; //initial bin for averaging nlp gain - int flag_Hband_cn; //for comfort noise - float cn_scale_Hband; //scale for comfort noise in H band - - int delay_histogram[kMaxDelay]; - int delay_logging_enabled; - void* delay_estimator; - -#ifdef AEC_DEBUG - FILE *farFile; - FILE *nearFile; - FILE *outFile; - FILE *outLpFile; -#endif -} aec_t; - -typedef void (*WebRtcAec_FilterFar_t)(aec_t *aec, float yf[2][PART_LEN1]); -extern WebRtcAec_FilterFar_t WebRtcAec_FilterFar; -typedef void (*WebRtcAec_ScaleErrorSignal_t)(aec_t *aec, float ef[2][PART_LEN1]); -extern WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal; -typedef void (*WebRtcAec_FilterAdaptation_t) - (aec_t *aec, float *fft, float ef[2][PART_LEN1]); -extern WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation; -typedef void (*WebRtcAec_OverdriveAndSuppress_t) - (aec_t *aec, float hNl[PART_LEN1], const float hNlFb, float efw[2][PART_LEN1]); -extern WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress; - -int WebRtcAec_CreateAec(aec_t **aec); -int WebRtcAec_FreeAec(aec_t *aec); -int WebRtcAec_InitAec(aec_t *aec, int sampFreq); +AecCore* WebRtcAec_CreateAec(); // Returns NULL on error. +void WebRtcAec_FreeAec(AecCore* aec); +int WebRtcAec_InitAec(AecCore* aec, int sampFreq); void WebRtcAec_InitAec_SSE2(void); +#if defined(MIPS_FPU_LE) +void WebRtcAec_InitAec_mips(void); +#endif +#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON) +void WebRtcAec_InitAec_neon(void); +#endif -void WebRtcAec_InitMetrics(aec_t *aec); -void WebRtcAec_ProcessFrame(aec_t *aec, const short *farend, - const short *nearend, const short *nearendH, - short *out, short *outH, - int knownDelay); +void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend); +void WebRtcAec_ProcessFrames(AecCore* aec, + const float* const* nearend, + size_t num_bands, + size_t num_samples, + int knownDelay, + float* const* out); -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_ +// A helper function to call WebRtc_MoveReadPtr() for all far-end buffers. +// Returns the number of elements moved, and adjusts |system_delay| by the +// corresponding amount in ms. +int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements); +// Calculates the median, standard deviation and amount of poor values among the +// delay estimates aggregated up to the first call to the function. After that +// first call the metrics are aggregated and updated every second. With poor +// values we mean values that most likely will cause the AEC to perform poorly. +// TODO(bjornv): Consider changing tests and tools to handle constant +// constant aggregation window throughout the session instead. +int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std, + float* fraction_poor_delays); + +// Returns the echo state (1: echo, 0: no echo). +int WebRtcAec_echo_state(AecCore* self); + +// Gets statistics of the echo metrics ERL, ERLE, A_NLP. +void WebRtcAec_GetEchoStats(AecCore* self, + Stats* erl, + Stats* erle, + Stats* a_nlp); +#ifdef WEBRTC_AEC_DEBUG_DUMP +void* WebRtcAec_far_time_buf(AecCore* self); +#endif + +// Sets local configuration modes. +void WebRtcAec_SetConfigCore(AecCore* self, + int nlp_mode, + int metrics_mode, + int delay_logging); + +// Non-zero enables, zero disables. +void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable); + +// Returns non-zero if delay agnostic (i.e., signal based delay estimation) is +// enabled and zero if disabled. +int WebRtcAec_delay_agnostic_enabled(AecCore* self); + +// Enables or disables extended filter mode. Non-zero enables, zero disables. +void WebRtcAec_enable_extended_filter(AecCore* self, int enable); + +// Returns non-zero if extended filter mode is enabled and zero if disabled. +int WebRtcAec_extended_filter_enabled(AecCore* self); + +// Returns the current |system_delay|, i.e., the buffered difference between +// far-end and near-end. +int WebRtcAec_system_delay(AecCore* self); + +// Sets the |system_delay| to |value|. Note that if the value is changed +// improperly, there can be a performance regression. So it should be used with +// care. +void WebRtcAec_SetSystemDelay(AecCore* self, int delay); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_ diff --git a/webrtc/modules/audio_processing/aec/aec_core_internal.h b/webrtc/modules/audio_processing/aec/aec_core_internal.h new file mode 100644 index 0000000..2de0283 --- /dev/null +++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h @@ -0,0 +1,202 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_ + +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/wav_file.h" +#include "webrtc/modules/audio_processing/aec/aec_common.h" +#include "webrtc/modules/audio_processing/aec/aec_core.h" +#include "webrtc/typedefs.h" + +// Number of partitions for the extended filter mode. The first one is an enum +// to be used in array declarations, as it represents the maximum filter length. +enum { + kExtendedNumPartitions = 32 +}; +static const int kNormalNumPartitions = 12; + +// Delay estimator constants, used for logging and delay compensation if +// if reported delays are disabled. +enum { + kLookaheadBlocks = 15 +}; +enum { + // 500 ms for 16 kHz which is equivalent with the limit of reported delays. + kHistorySizeBlocks = 125 +}; + +// Extended filter adaptation parameters. +// TODO(ajm): No narrowband tuning yet. +static const float kExtendedMu = 0.4f; +static const float kExtendedErrorThreshold = 1.0e-6f; + +typedef struct PowerLevel { + float sfrsum; + int sfrcounter; + float framelevel; + float frsum; + int frcounter; + float minlevel; + float averagelevel; +} PowerLevel; + +struct AecCore { + int farBufWritePos, farBufReadPos; + + int knownDelay; + int inSamples, outSamples; + int delayEstCtr; + + RingBuffer* nearFrBuf; + RingBuffer* outFrBuf; + + RingBuffer* nearFrBufH[NUM_HIGH_BANDS_MAX]; + RingBuffer* outFrBufH[NUM_HIGH_BANDS_MAX]; + + float dBuf[PART_LEN2]; // nearend + float eBuf[PART_LEN2]; // error + + float dBufH[NUM_HIGH_BANDS_MAX][PART_LEN2]; // nearend + + float xPow[PART_LEN1]; + float dPow[PART_LEN1]; + float dMinPow[PART_LEN1]; + float dInitMinPow[PART_LEN1]; + float* noisePow; + + float xfBuf[2][kExtendedNumPartitions * PART_LEN1]; // farend fft buffer + float wfBuf[2][kExtendedNumPartitions * PART_LEN1]; // filter fft + complex_t sde[PART_LEN1]; // cross-psd of nearend and error + complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend + // Farend windowed fft buffer. + complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1]; + + float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near, error psd + float hNs[PART_LEN1]; + float hNlFbMin, hNlFbLocalMin; + float hNlXdAvgMin; + int hNlNewMin, hNlMinCtr; + float overDrive, overDriveSm; + int nlp_mode; + float outBuf[PART_LEN]; + int delayIdx; + + short stNearState, echoState; + short divergeState; + + int xfBufBlockPos; + + RingBuffer* far_buf; + RingBuffer* far_buf_windowed; + int system_delay; // Current system delay buffered in AEC. + + int mult; // sampling frequency multiple + int sampFreq; + size_t num_bands; + uint32_t seed; + + float normal_mu; // stepsize + float normal_error_threshold; // error threshold + + int noiseEstCtr; + + PowerLevel farlevel; + PowerLevel nearlevel; + PowerLevel linoutlevel; + PowerLevel nlpoutlevel; + + int metricsMode; + int stateCounter; + Stats erl; + Stats erle; + Stats aNlp; + Stats rerl; + + // Quantities to control H band scaling for SWB input + int freq_avg_ic; // initial bin for averaging nlp gain + int flag_Hband_cn; // for comfort noise + float cn_scale_Hband; // scale for comfort noise in H band + + int delay_metrics_delivered; + int delay_histogram[kHistorySizeBlocks]; + int num_delay_values; + int delay_median; + int delay_std; + float fraction_poor_delays; + int delay_logging_enabled; + void* delay_estimator_farend; + void* delay_estimator; + // Variables associated with delay correction through signal based delay + // estimation feedback. + int signal_delay_correction; + int previous_delay; + int delay_correction_count; + int shift_offset; + float delay_quality_threshold; + int frame_count; + + // 0 = delay agnostic mode (signal based delay correction) disabled. + // Otherwise enabled. + int delay_agnostic_enabled; + // 1 = extended filter mode enabled, 0 = disabled. + int extended_filter_enabled; + // Runtime selection of number of filter partitions. + int num_partitions; + +#ifdef WEBRTC_AEC_DEBUG_DUMP + // Sequence number of this AEC instance, so that different instances can + // choose different dump file names. + int instance_index; + + // Number of times we've restarted dumping; used to pick new dump file names + // each time. + int debug_dump_count; + + RingBuffer* far_time_buf; + rtc_WavWriter* farFile; + rtc_WavWriter* nearFile; + rtc_WavWriter* outFile; + rtc_WavWriter* outLinearFile; + FILE* e_fft_file; +#endif +}; + +typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]); +extern WebRtcAecFilterFar WebRtcAec_FilterFar; +typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]); +extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; +typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec, + float* fft, + float ef[2][PART_LEN1]); +extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; +typedef void (*WebRtcAecOverdriveAndSuppress)(AecCore* aec, + float hNl[PART_LEN1], + const float hNlFb, + float efw[2][PART_LEN1]); +extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; + +typedef void (*WebRtcAecComfortNoise)(AecCore* aec, + float efw[2][PART_LEN1], + complex_t* comfortNoiseHband, + const float* noisePow, + const float* lambda); +extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise; + +typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec, + float efw[2][PART_LEN1], + float xfw[2][PART_LEN1], + float* fft, + float* cohde, + float* cohxd); +extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_ diff --git a/webrtc/modules/audio_processing/aec/aec_core_mips.c b/webrtc/modules/audio_processing/aec/aec_core_mips.c new file mode 100644 index 0000000..bb33087 --- /dev/null +++ b/webrtc/modules/audio_processing/aec/aec_core_mips.c @@ -0,0 +1,774 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The core AEC algorithm, which is presented with time-aligned signals. + */ + +#include "webrtc/modules/audio_processing/aec/aec_core.h" + +#include + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aec/aec_core_internal.h" +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" + +static const int flagHbandCn = 1; // flag for adding comfort noise in H band +extern const float WebRtcAec_weightCurve[65]; +extern const float WebRtcAec_overDriveCurve[65]; + +void WebRtcAec_ComfortNoise_mips(AecCore* aec, + float efw[2][PART_LEN1], + complex_t* comfortNoiseHband, + const float* noisePow, + const float* lambda) { + int i, num; + float rand[PART_LEN]; + float noise, noiseAvg, tmp, tmpAvg; + int16_t randW16[PART_LEN]; + complex_t u[PART_LEN1]; + + const float pi2 = 6.28318530717959f; + const float pi2t = pi2 / 32768; + + // Generate a uniform random array on [0 1] + WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); + + int16_t* randWptr = randW16; + float randTemp, randTemp2, randTemp3, randTemp4; + int32_t tmp1s, tmp2s, tmp3s, tmp4s; + + for (i = 0; i < PART_LEN; i+=4) { + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[tmp1s], 0(%[randWptr]) \n\t" + "lh %[tmp2s], 2(%[randWptr]) \n\t" + "lh %[tmp3s], 4(%[randWptr]) \n\t" + "lh %[tmp4s], 6(%[randWptr]) \n\t" + "mtc1 %[tmp1s], %[randTemp] \n\t" + "mtc1 %[tmp2s], %[randTemp2] \n\t" + "mtc1 %[tmp3s], %[randTemp3] \n\t" + "mtc1 %[tmp4s], %[randTemp4] \n\t" + "cvt.s.w %[randTemp], %[randTemp] \n\t" + "cvt.s.w %[randTemp2], %[randTemp2] \n\t" + "cvt.s.w %[randTemp3], %[randTemp3] \n\t" + "cvt.s.w %[randTemp4], %[randTemp4] \n\t" + "addiu %[randWptr], %[randWptr], 8 \n\t" + "mul.s %[randTemp], %[randTemp], %[pi2t] \n\t" + "mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t" + "mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t" + "mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t" + ".set pop \n\t" + : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp), + [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3), + [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s), + [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s), + [tmp4s] "=&r" (tmp4s) + : [pi2t] "f" (pi2t) + : "memory" + ); + + u[i+1][0] = cosf(randTemp); + u[i+1][1] = sinf(randTemp); + u[i+2][0] = cosf(randTemp2); + u[i+2][1] = sinf(randTemp2); + u[i+3][0] = cosf(randTemp3); + u[i+3][1] = sinf(randTemp3); + u[i+4][0] = cosf(randTemp4); + u[i+4][1] = sinf(randTemp4); + } + + // Reject LF noise + float* u_ptr = &u[1][0]; + float noise2, noise3, noise4; + float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f; + + u[0][0] = 0; + u[0][1] = 0; + for (i = 1; i < PART_LEN1; i+=4) { + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lwc1 %[noise], 4(%[noisePow]) \n\t" + "lwc1 %[noise2], 8(%[noisePow]) \n\t" + "lwc1 %[noise3], 12(%[noisePow]) \n\t" + "lwc1 %[noise4], 16(%[noisePow]) \n\t" + "sqrt.s %[noise], %[noise] \n\t" + "sqrt.s %[noise2], %[noise2] \n\t" + "sqrt.s %[noise3], %[noise3] \n\t" + "sqrt.s %[noise4], %[noise4] \n\t" + "lwc1 %[tmp1f], 0(%[u_ptr]) \n\t" + "lwc1 %[tmp2f], 4(%[u_ptr]) \n\t" + "lwc1 %[tmp3f], 8(%[u_ptr]) \n\t" + "lwc1 %[tmp4f], 12(%[u_ptr]) \n\t" + "lwc1 %[tmp5f], 16(%[u_ptr]) \n\t" + "lwc1 %[tmp6f], 20(%[u_ptr]) \n\t" + "lwc1 %[tmp7f], 24(%[u_ptr]) \n\t" + "lwc1 %[tmp8f], 28(%[u_ptr]) \n\t" + "addiu %[noisePow], %[noisePow], 16 \n\t" + "mul.s %[tmp1f], %[tmp1f], %[noise] \n\t" + "mul.s %[tmp2f], %[tmp2f], %[noise] \n\t" + "mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t" + "mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t" + "mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t" + "mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t" + "swc1 %[tmp1f], 0(%[u_ptr]) \n\t" + "swc1 %[tmp3f], 8(%[u_ptr]) \n\t" + "mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t" + "mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t" + "neg.s %[tmp2f] \n\t" + "neg.s %[tmp4f] \n\t" + "neg.s %[tmp6f] \n\t" + "neg.s %[tmp8f] \n\t" + "swc1 %[tmp5f], 16(%[u_ptr]) \n\t" + "swc1 %[tmp7f], 24(%[u_ptr]) \n\t" + "swc1 %[tmp2f], 4(%[u_ptr]) \n\t" + "swc1 %[tmp4f], 12(%[u_ptr]) \n\t" + "swc1 %[tmp6f], 20(%[u_ptr]) \n\t" + "swc1 %[tmp8f], 28(%[u_ptr]) \n\t" + "addiu %[u_ptr], %[u_ptr], 32 \n\t" + ".set pop \n\t" + : [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow), + [noise] "=&f" (noise), [noise2] "=&f" (noise2), + [noise3] "=&f" (noise3), [noise4] "=&f" (noise4), + [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), + [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f), + [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f), + [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f) + : + : "memory" + ); + } + u[PART_LEN][1] = 0; + noisePow -= PART_LEN; + + u_ptr = &u[0][0]; + float* u_ptr_end = &u[PART_LEN][0]; + float* efw_ptr_0 = &efw[0][0]; + float* efw_ptr_1 = &efw[1][0]; + float tmp9f, tmp10f; + const float tmp1c = 1.0; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "lwc1 %[tmp1f], 0(%[lambda]) \n\t" + "lwc1 %[tmp6f], 4(%[lambda]) \n\t" + "addiu %[lambda], %[lambda], 8 \n\t" + "c.lt.s %[tmp1f], %[tmp1c] \n\t" + "bc1f 4f \n\t" + " nop \n\t" + "c.lt.s %[tmp6f], %[tmp1c] \n\t" + "bc1f 3f \n\t" + " nop \n\t" + "2: \n\t" + "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" + "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" + "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" + "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" + "sqrt.s %[tmp1f], %[tmp1f] \n\t" + "sqrt.s %[tmp6f], %[tmp6f] \n\t" + "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" + "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" + "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" + "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" + "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" + "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" + "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" + "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" + "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" + "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" + "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" + "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" + "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" + "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" + "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" + "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" + "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" + "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" + "swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" + "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" + "b 5f \n\t" + " swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" + "3: \n\t" + "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" + "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" + "sqrt.s %[tmp1f], %[tmp1f] \n\t" + "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" + "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" + "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" + "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" + "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" + "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" + "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" + "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" + "b 5f \n\t" + " swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" + "4: \n\t" + "c.lt.s %[tmp6f], %[tmp1c] \n\t" + "bc1f 5f \n\t" + " nop \n\t" + "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" + "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" + "sqrt.s %[tmp6f], %[tmp6f] \n\t" + "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" + "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" + "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" + "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" + "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" + "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" + "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" + "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" + "swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" + "5: \n\t" + "addiu %[u_ptr], %[u_ptr], 16 \n\t" + "addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t" + "bne %[u_ptr], %[u_ptr_end], 1b \n\t" + " addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t" + ".set pop \n\t" + : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr), + [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1), + [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f), + [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f), + [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f), + [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f) + : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end) + : "memory" + ); + + lambda -= PART_LEN; + tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0)); + //tmp = 1 - lambda[i]; + efw[0][PART_LEN] += tmp * u[PART_LEN][0]; + efw[1][PART_LEN] += tmp * u[PART_LEN][1]; + + // For H band comfort noise + // TODO: don't compute noise and "tmp" twice. Use the previous results. + noiseAvg = 0.0; + tmpAvg = 0.0; + num = 0; + if ((aec->sampFreq == 32000 || aec->sampFreq == 48000) && flagHbandCn == 1) { + for (i = 0; i < PART_LEN; i++) { + rand[i] = ((float)randW16[i]) / 32768; + } + + // average noise scale + // average over second half of freq spectrum (i.e., 4->8khz) + // TODO: we shouldn't need num. We know how many elements we're summing. + for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { + num++; + noiseAvg += sqrtf(noisePow[i]); + } + noiseAvg /= (float)num; + + // average nlp scale + // average over second half of freq spectrum (i.e., 4->8khz) + // TODO: we shouldn't need num. We know how many elements we're summing. + num = 0; + for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { + num++; + tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); + } + tmpAvg /= (float)num; + + // Use average noise for H band + // TODO: we should probably have a new random vector here. + // Reject LF noise + u[0][0] = 0; + u[0][1] = 0; + for (i = 1; i < PART_LEN1; i++) { + tmp = pi2 * rand[i - 1]; + + // Use average noise for H band + u[i][0] = noiseAvg * (float)cos(tmp); + u[i][1] = -noiseAvg * (float)sin(tmp); + } + u[PART_LEN][1] = 0; + + for (i = 0; i < PART_LEN1; i++) { + // Use average NLP weight for H band + comfortNoiseHband[i][0] = tmpAvg * u[i][0]; + comfortNoiseHband[i][1] = tmpAvg * u[i][1]; + } + } +} + +void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) { + int i; + for (i = 0; i < aec->num_partitions; i++) { + int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int pos = i * PART_LEN1; + // Check for wrap + if (i + aec->xfBufBlockPos >= aec->num_partitions) { + xPos -= aec->num_partitions * (PART_LEN1); + } + float* yf0 = yf[0]; + float* yf1 = yf[1]; + float* aRe = aec->xfBuf[0] + xPos; + float* aIm = aec->xfBuf[1] + xPos; + float* bRe = aec->wfBuf[0] + pos; + float* bIm = aec->wfBuf[1] + pos; + float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13; + int len = PART_LEN1 >> 1; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[bRe]) \n\t" + "lwc1 %[f2], 0(%[bIm]) \n\t" + "lwc1 %[f3], 0(%[aIm]) \n\t" + "lwc1 %[f4], 4(%[aRe]) \n\t" + "lwc1 %[f5], 4(%[bRe]) \n\t" + "lwc1 %[f6], 4(%[bIm]) \n\t" + "mul.s %[f8], %[f0], %[f1] \n\t" + "mul.s %[f0], %[f0], %[f2] \n\t" + "mul.s %[f9], %[f4], %[f5] \n\t" + "mul.s %[f4], %[f4], %[f6] \n\t" + "lwc1 %[f7], 4(%[aIm]) \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f12], %[f2], %[f3] \n\t" + "mul.s %[f1], %[f3], %[f1] \n\t" + "mul.s %[f11], %[f6], %[f7] \n\t" + "addiu %[aRe], %[aRe], 8 \n\t" + "addiu %[aIm], %[aIm], 8 \n\t" + "addiu %[len], %[len], -1 \n\t" + "sub.s %[f8], %[f8], %[f12] \n\t" + "mul.s %[f12], %[f7], %[f5] \n\t" + "lwc1 %[f2], 0(%[yf0]) \n\t" + "add.s %[f1], %[f0], %[f1] \n\t" + "lwc1 %[f3], 0(%[yf1]) \n\t" + "sub.s %[f9], %[f9], %[f11] \n\t" + "lwc1 %[f6], 4(%[yf0]) \n\t" + "add.s %[f4], %[f4], %[f12] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "addiu %[aRe], %[aRe], 8 \n\t" + "addiu %[aIm], %[aIm], 8 \n\t" + "addiu %[len], %[len], -1 \n\t" + "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" + "lwc1 %[f2], 0(%[yf0]) \n\t" + "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" + "lwc1 %[f3], 0(%[yf1]) \n\t" + "nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t" + "lwc1 %[f6], 4(%[yf0]) \n\t" + "madd.s %[f4], %[f4], %[f7], %[f5] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "lwc1 %[f5], 4(%[yf1]) \n\t" + "add.s %[f2], %[f2], %[f8] \n\t" + "addiu %[bRe], %[bRe], 8 \n\t" + "addiu %[bIm], %[bIm], 8 \n\t" + "add.s %[f3], %[f3], %[f1] \n\t" + "add.s %[f6], %[f6], %[f9] \n\t" + "add.s %[f5], %[f5], %[f4] \n\t" + "swc1 %[f2], 0(%[yf0]) \n\t" + "swc1 %[f3], 0(%[yf1]) \n\t" + "swc1 %[f6], 4(%[yf0]) \n\t" + "swc1 %[f5], 4(%[yf1]) \n\t" + "addiu %[yf0], %[yf0], 8 \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[yf1], %[yf1], 8 \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[bRe]) \n\t" + "lwc1 %[f2], 0(%[bIm]) \n\t" + "lwc1 %[f3], 0(%[aIm]) \n\t" + "mul.s %[f8], %[f0], %[f1] \n\t" + "mul.s %[f0], %[f0], %[f2] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f12], %[f2], %[f3] \n\t" + "mul.s %[f1], %[f3], %[f1] \n\t" + "sub.s %[f8], %[f8], %[f12] \n\t" + "lwc1 %[f2], 0(%[yf0]) \n\t" + "add.s %[f1], %[f0], %[f1] \n\t" + "lwc1 %[f3], 0(%[yf1]) \n\t" +#else // #if !defined(MIPS32_R2_LE) + "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" + "lwc1 %[f2], 0(%[yf0]) \n\t" + "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" + "lwc1 %[f3], 0(%[yf1]) \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "add.s %[f2], %[f2], %[f8] \n\t" + "add.s %[f3], %[f3], %[f1] \n\t" + "swc1 %[f2], 0(%[yf0]) \n\t" + "swc1 %[f3], 0(%[yf1]) \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), + [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), + [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), + [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), + [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe), + [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm), + [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len) + : + : "memory" + ); + } +} + +void WebRtcAec_FilterAdaptation_mips(AecCore* aec, + float* fft, + float ef[2][PART_LEN1]) { + int i; + for (i = 0; i < aec->num_partitions; i++) { + int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); + int pos; + // Check for wrap + if (i + aec->xfBufBlockPos >= aec->num_partitions) { + xPos -= aec->num_partitions * PART_LEN1; + } + + pos = i * PART_LEN1; + float* aRe = aec->xfBuf[0] + xPos; + float* aIm = aec->xfBuf[1] + xPos; + float* bRe = ef[0]; + float* bIm = ef[1]; + float* fft_tmp; + + float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12; + int len = PART_LEN >> 1; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[fft_tmp], %[fft], 0 \n\t" + "1: \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[bRe]) \n\t" + "lwc1 %[f2], 0(%[bIm]) \n\t" + "lwc1 %[f4], 4(%[aRe]) \n\t" + "lwc1 %[f5], 4(%[bRe]) \n\t" + "lwc1 %[f6], 4(%[bIm]) \n\t" + "addiu %[aRe], %[aRe], 8 \n\t" + "addiu %[bRe], %[bRe], 8 \n\t" + "mul.s %[f8], %[f0], %[f1] \n\t" + "mul.s %[f0], %[f0], %[f2] \n\t" + "lwc1 %[f3], 0(%[aIm]) \n\t" + "mul.s %[f9], %[f4], %[f5] \n\t" + "lwc1 %[f7], 4(%[aIm]) \n\t" + "mul.s %[f4], %[f4], %[f6] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f10], %[f3], %[f2] \n\t" + "mul.s %[f1], %[f3], %[f1] \n\t" + "mul.s %[f11], %[f7], %[f6] \n\t" + "mul.s %[f5], %[f7], %[f5] \n\t" + "addiu %[aIm], %[aIm], 8 \n\t" + "addiu %[bIm], %[bIm], 8 \n\t" + "addiu %[len], %[len], -1 \n\t" + "add.s %[f8], %[f8], %[f10] \n\t" + "sub.s %[f1], %[f0], %[f1] \n\t" + "add.s %[f9], %[f9], %[f11] \n\t" + "sub.s %[f5], %[f4], %[f5] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "addiu %[aIm], %[aIm], 8 \n\t" + "addiu %[bIm], %[bIm], 8 \n\t" + "addiu %[len], %[len], -1 \n\t" + "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" + "nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t" + "madd.s %[f9], %[f9], %[f7], %[f6] \n\t" + "nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[f8], 0(%[fft_tmp]) \n\t" + "swc1 %[f1], 4(%[fft_tmp]) \n\t" + "swc1 %[f9], 8(%[fft_tmp]) \n\t" + "swc1 %[f5], 12(%[fft_tmp]) \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[fft_tmp], %[fft_tmp], 16 \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[bRe]) \n\t" + "lwc1 %[f2], 0(%[bIm]) \n\t" + "lwc1 %[f3], 0(%[aIm]) \n\t" + "mul.s %[f8], %[f0], %[f1] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f10], %[f3], %[f2] \n\t" + "add.s %[f8], %[f8], %[f10] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[f8], 4(%[fft]) \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), + [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), + [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), + [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), + [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm), + [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp), + [len] "+r" (len) + : [fft] "r" (fft) + : "memory" + ); + + aec_rdft_inverse_128(fft); + memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); + + // fft scaling + { + float scale = 2.0f / PART_LEN2; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[fft_tmp], %[fft], 0 \n\t" + "addiu %[len], $zero, 8 \n\t" + "1: \n\t" + "addiu %[len], %[len], -1 \n\t" + "lwc1 %[f0], 0(%[fft_tmp]) \n\t" + "lwc1 %[f1], 4(%[fft_tmp]) \n\t" + "lwc1 %[f2], 8(%[fft_tmp]) \n\t" + "lwc1 %[f3], 12(%[fft_tmp]) \n\t" + "mul.s %[f0], %[f0], %[scale] \n\t" + "mul.s %[f1], %[f1], %[scale] \n\t" + "mul.s %[f2], %[f2], %[scale] \n\t" + "mul.s %[f3], %[f3], %[scale] \n\t" + "lwc1 %[f4], 16(%[fft_tmp]) \n\t" + "lwc1 %[f5], 20(%[fft_tmp]) \n\t" + "lwc1 %[f6], 24(%[fft_tmp]) \n\t" + "lwc1 %[f7], 28(%[fft_tmp]) \n\t" + "mul.s %[f4], %[f4], %[scale] \n\t" + "mul.s %[f5], %[f5], %[scale] \n\t" + "mul.s %[f6], %[f6], %[scale] \n\t" + "mul.s %[f7], %[f7], %[scale] \n\t" + "swc1 %[f0], 0(%[fft_tmp]) \n\t" + "swc1 %[f1], 4(%[fft_tmp]) \n\t" + "swc1 %[f2], 8(%[fft_tmp]) \n\t" + "swc1 %[f3], 12(%[fft_tmp]) \n\t" + "swc1 %[f4], 16(%[fft_tmp]) \n\t" + "swc1 %[f5], 20(%[fft_tmp]) \n\t" + "swc1 %[f6], 24(%[fft_tmp]) \n\t" + "swc1 %[f7], 28(%[fft_tmp]) \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[fft_tmp], %[fft_tmp], 32 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), + [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), + [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), + [fft_tmp] "=&r" (fft_tmp) + : [scale] "f" (scale), [fft] "r" (fft) + : "memory" + ); + } + aec_rdft_forward_128(fft); + aRe = aec->wfBuf[0] + pos; + aIm = aec->wfBuf[1] + pos; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[fft_tmp], %[fft], 0 \n\t" + "addiu %[len], $zero, 31 \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[fft_tmp]) \n\t" + "lwc1 %[f2], 256(%[aRe]) \n\t" + "lwc1 %[f3], 4(%[fft_tmp]) \n\t" + "lwc1 %[f4], 4(%[aRe]) \n\t" + "lwc1 %[f5], 8(%[fft_tmp]) \n\t" + "lwc1 %[f6], 4(%[aIm]) \n\t" + "lwc1 %[f7], 12(%[fft_tmp]) \n\t" + "add.s %[f0], %[f0], %[f1] \n\t" + "add.s %[f2], %[f2], %[f3] \n\t" + "add.s %[f4], %[f4], %[f5] \n\t" + "add.s %[f6], %[f6], %[f7] \n\t" + "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" + "swc1 %[f0], 0(%[aRe]) \n\t" + "swc1 %[f2], 256(%[aRe]) \n\t" + "swc1 %[f4], 4(%[aRe]) \n\t" + "addiu %[aRe], %[aRe], 8 \n\t" + "swc1 %[f6], 4(%[aIm]) \n\t" + "addiu %[aIm], %[aIm], 8 \n\t" + "1: \n\t" + "lwc1 %[f0], 0(%[aRe]) \n\t" + "lwc1 %[f1], 0(%[fft_tmp]) \n\t" + "lwc1 %[f2], 0(%[aIm]) \n\t" + "lwc1 %[f3], 4(%[fft_tmp]) \n\t" + "lwc1 %[f4], 4(%[aRe]) \n\t" + "lwc1 %[f5], 8(%[fft_tmp]) \n\t" + "lwc1 %[f6], 4(%[aIm]) \n\t" + "lwc1 %[f7], 12(%[fft_tmp]) \n\t" + "add.s %[f0], %[f0], %[f1] \n\t" + "add.s %[f2], %[f2], %[f3] \n\t" + "add.s %[f4], %[f4], %[f5] \n\t" + "add.s %[f6], %[f6], %[f7] \n\t" + "addiu %[len], %[len], -1 \n\t" + "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" + "swc1 %[f0], 0(%[aRe]) \n\t" + "swc1 %[f2], 0(%[aIm]) \n\t" + "swc1 %[f4], 4(%[aRe]) \n\t" + "addiu %[aRe], %[aRe], 8 \n\t" + "swc1 %[f6], 4(%[aIm]) \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[aIm], %[aIm], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), + [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), + [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), + [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm) + : [fft] "r" (fft) + : "memory" + ); + } +} + +void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec, + float hNl[PART_LEN1], + const float hNlFb, + float efw[2][PART_LEN1]) { + int i; + const float one = 1.0; + float* p_hNl; + float* p_efw0; + float* p_efw1; + float* p_WebRtcAec_wC; + float temp1, temp2, temp3, temp4; + + p_hNl = &hNl[0]; + p_efw0 = &efw[0][0]; + p_efw1 = &efw[1][0]; + p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0]; + + for (i = 0; i < PART_LEN1; i++) { + // Weight subbands + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lwc1 %[temp1], 0(%[p_hNl]) \n\t" + "lwc1 %[temp2], 0(%[p_wC]) \n\t" + "c.lt.s %[hNlFb], %[temp1] \n\t" + "bc1f 1f \n\t" + " mul.s %[temp3], %[temp2], %[hNlFb] \n\t" + "sub.s %[temp4], %[one], %[temp2] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[temp1], %[temp1], %[temp4] \n\t" + "add.s %[temp1], %[temp3], %[temp1] \n\t" +#else // #if !defined(MIPS32_R2_LE) + "madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t" +#endif // #if !defined(MIPS32_R2_LE) + "swc1 %[temp1], 0(%[p_hNl]) \n\t" + "1: \n\t" + "addiu %[p_wC], %[p_wC], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), + [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC) + : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl) + : "memory" + ); + + hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); + + __asm __volatile ( + "lwc1 %[temp1], 0(%[p_hNl]) \n\t" + "lwc1 %[temp3], 0(%[p_efw1]) \n\t" + "lwc1 %[temp2], 0(%[p_efw0]) \n\t" + "addiu %[p_hNl], %[p_hNl], 4 \n\t" + "mul.s %[temp3], %[temp3], %[temp1] \n\t" + "mul.s %[temp2], %[temp2], %[temp1] \n\t" + "addiu %[p_efw0], %[p_efw0], 4 \n\t" + "addiu %[p_efw1], %[p_efw1], 4 \n\t" + "neg.s %[temp4], %[temp3] \n\t" + "swc1 %[temp2], -4(%[p_efw0]) \n\t" + "swc1 %[temp4], -4(%[p_efw1]) \n\t" + : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), + [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1), + [p_hNl] "+r" (p_hNl) + : + : "memory" + ); + } +} + +void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) { + const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; + const float error_threshold = aec->extended_filter_enabled + ? kExtendedErrorThreshold + : aec->normal_error_threshold; + int len = (PART_LEN1); + float* ef0 = ef[0]; + float* ef1 = ef[1]; + float* xPow = aec->xPow; + float fac1 = 1e-10f; + float err_th2 = error_threshold * error_threshold; + float f0, f1, f2; +#if !defined(MIPS32_R2_LE) + float f3; +#endif + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "lwc1 %[f0], 0(%[xPow]) \n\t" + "lwc1 %[f1], 0(%[ef0]) \n\t" + "lwc1 %[f2], 0(%[ef1]) \n\t" + "add.s %[f0], %[f0], %[fac1] \n\t" + "div.s %[f1], %[f1], %[f0] \n\t" + "div.s %[f2], %[f2], %[f0] \n\t" + "mul.s %[f0], %[f1], %[f1] \n\t" +#if defined(MIPS32_R2_LE) + "madd.s %[f0], %[f0], %[f2], %[f2] \n\t" +#else + "mul.s %[f3], %[f2], %[f2] \n\t" + "add.s %[f0], %[f0], %[f3] \n\t" +#endif + "c.le.s %[f0], %[err_th2] \n\t" + "nop \n\t" + "bc1t 2f \n\t" + " nop \n\t" + "sqrt.s %[f0], %[f0] \n\t" + "add.s %[f0], %[f0], %[fac1] \n\t" + "div.s %[f0], %[err_th], %[f0] \n\t" + "mul.s %[f1], %[f1], %[f0] \n\t" + "mul.s %[f2], %[f2], %[f0] \n\t" + "2: \n\t" + "mul.s %[f1], %[f1], %[mu] \n\t" + "mul.s %[f2], %[f2], %[mu] \n\t" + "swc1 %[f1], 0(%[ef0]) \n\t" + "swc1 %[f2], 0(%[ef1]) \n\t" + "addiu %[len], %[len], -1 \n\t" + "addiu %[xPow], %[xPow], 4 \n\t" + "addiu %[ef0], %[ef0], 4 \n\t" + "bgtz %[len], 1b \n\t" + " addiu %[ef1], %[ef1], 4 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), +#if !defined(MIPS32_R2_LE) + [f3] "=&f" (f3), +#endif + [xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1), + [len] "+r" (len) + : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu), + [err_th] "f" (error_threshold) + : "memory" + ); +} + +void WebRtcAec_InitAec_mips(void) { + WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips; + WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips; + WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips; + WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips; + WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips; +} + diff --git a/webrtc/modules/audio_processing/aec/aec_core_neon.c b/webrtc/modules/audio_processing/aec/aec_core_neon.c new file mode 100644 index 0000000..9a677aa --- /dev/null +++ b/webrtc/modules/audio_processing/aec/aec_core_neon.c @@ -0,0 +1,736 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The core AEC algorithm, neon version of speed-critical functions. + * + * Based on aec_core_sse2.c. + */ + +#include +#include +#include // memset + +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aec/aec_common.h" +#include "webrtc/modules/audio_processing/aec/aec_core_internal.h" +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" + +enum { kShiftExponentIntoTopMantissa = 8 }; +enum { kFloatExponentShift = 23 }; + +__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { + return aRe * bRe - aIm * bIm; +} + +__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { + return aRe * bIm + aIm * bRe; +} + +static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) { + int i; + const int num_partitions = aec->num_partitions; + for (i = 0; i < num_partitions; i++) { + int j; + int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int pos = i * PART_LEN1; + // Check for wrap + if (i + aec->xfBufBlockPos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; + } + + // vectorized code (four at once) + for (j = 0; j + 3 < PART_LEN1; j += 4) { + const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); + const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); + const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); + const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); + const float32x4_t yf_re = vld1q_f32(&yf[0][j]); + const float32x4_t yf_im = vld1q_f32(&yf[1][j]); + const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re); + const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im); + const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im); + const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re); + const float32x4_t g = vaddq_f32(yf_re, e); + const float32x4_t h = vaddq_f32(yf_im, f); + vst1q_f32(&yf[0][j], g); + vst1q_f32(&yf[1][j], h); + } + // scalar code for the remaining items. + for (; j < PART_LEN1; j++) { + yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); + yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); + } + } +} + +// ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32. +#if !defined (WEBRTC_ARCH_ARM64) +static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) { + int i; + float32x4_t x = vrecpeq_f32(b); + // from arm documentation + // The Newton-Raphson iteration: + // x[n+1] = x[n] * (2 - d * x[n]) + // converges to (1/d) if x0 is the result of VRECPE applied to d. + // + // Note: The precision did not improve after 2 iterations. + for (i = 0; i < 2; i++) { + x = vmulq_f32(vrecpsq_f32(b, x), x); + } + // a/b = a*(1/b) + return vmulq_f32(a, x); +} + +static float32x4_t vsqrtq_f32(float32x4_t s) { + int i; + float32x4_t x = vrsqrteq_f32(s); + + // Code to handle sqrt(0). + // If the input to sqrtf() is zero, a zero will be returned. + // If the input to vrsqrteq_f32() is zero, positive infinity is returned. + const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000); + // check for divide by zero + const uint32x4_t div_by_zero = vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(x)); + // zero out the positive infinity results + x = vreinterpretq_f32_u32(vandq_u32(vmvnq_u32(div_by_zero), + vreinterpretq_u32_f32(x))); + // from arm documentation + // The Newton-Raphson iteration: + // x[n+1] = x[n] * (3 - d * (x[n] * x[n])) / 2) + // converges to (1/√d) if x0 is the result of VRSQRTE applied to d. + // + // Note: The precision did not improve after 2 iterations. + for (i = 0; i < 2; i++) { + x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x); + } + // sqrt(s) = s * 1/sqrt(s) + return vmulq_f32(s, x);; +} +#endif // WEBRTC_ARCH_ARM64 + +static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) { + const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; + const float error_threshold = aec->extended_filter_enabled ? + kExtendedErrorThreshold : aec->normal_error_threshold; + const float32x4_t k1e_10f = vdupq_n_f32(1e-10f); + const float32x4_t kMu = vmovq_n_f32(mu); + const float32x4_t kThresh = vmovq_n_f32(error_threshold); + int i; + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const float32x4_t xPow = vld1q_f32(&aec->xPow[i]); + const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]); + const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]); + const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f); + float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus); + float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus); + const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re); + const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im); + const float32x4_t absEf = vsqrtq_f32(ef_sum2); + const uint32x4_t bigger = vcgtq_f32(absEf, kThresh); + const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f); + const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus); + uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv)); + uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv)); + uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger), + vreinterpretq_u32_f32(ef_re)); + uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger), + vreinterpretq_u32_f32(ef_im)); + ef_re_if = vandq_u32(bigger, ef_re_if); + ef_im_if = vandq_u32(bigger, ef_im_if); + ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if); + ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if); + ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu); + ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu); + vst1q_f32(&ef[0][i], ef_re); + vst1q_f32(&ef[1][i], ef_im); + } + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + float abs_ef; + ef[0][i] /= (aec->xPow[i] + 1e-10f); + ef[1][i] /= (aec->xPow[i] + 1e-10f); + abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); + + if (abs_ef > error_threshold) { + abs_ef = error_threshold / (abs_ef + 1e-10f); + ef[0][i] *= abs_ef; + ef[1][i] *= abs_ef; + } + + // Stepsize factor + ef[0][i] *= mu; + ef[1][i] *= mu; + } +} + +static void FilterAdaptationNEON(AecCore* aec, + float* fft, + float ef[2][PART_LEN1]) { + int i; + const int num_partitions = aec->num_partitions; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; + int pos = i * PART_LEN1; + int j; + // Check for wrap + if (i + aec->xfBufBlockPos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; + } + + // Process the whole array... + for (j = 0; j < PART_LEN; j += 4) { + // Load xfBuf and ef. + const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]); + const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]); + const float32x4_t ef_re = vld1q_f32(&ef[0][j]); + const float32x4_t ef_im = vld1q_f32(&ef[1][j]); + // Calculate the product of conjugate(xfBuf) by ef. + // re(conjugate(a) * b) = aRe * bRe + aIm * bIm + // im(conjugate(a) * b)= aRe * bIm - aIm * bRe + const float32x4_t a = vmulq_f32(xfBuf_re, ef_re); + const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im); + const float32x4_t c = vmulq_f32(xfBuf_re, ef_im); + const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re); + // Interleave real and imaginary parts. + const float32x4x2_t g_n_h = vzipq_f32(e, f); + // Store + vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]); + vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]); + } + // ... and fixup the first imaginary entry. + fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], + -aec->xfBuf[1][xPos + PART_LEN], + ef[0][PART_LEN], + ef[1][PART_LEN]); + + aec_rdft_inverse_128(fft); + memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); + + // fft scaling + { + const float scale = 2.0f / PART_LEN2; + const float32x4_t scale_ps = vmovq_n_f32(scale); + for (j = 0; j < PART_LEN; j += 4) { + const float32x4_t fft_ps = vld1q_f32(&fft[j]); + const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps); + vst1q_f32(&fft[j], fft_scale); + } + } + aec_rdft_forward_128(fft); + + { + const float wt1 = aec->wfBuf[1][pos]; + aec->wfBuf[0][pos + PART_LEN] += fft[1]; + for (j = 0; j < PART_LEN; j += 4) { + float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]); + float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]); + const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]); + const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]); + const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4); + wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]); + wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]); + + vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re); + vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im); + } + aec->wfBuf[1][pos] = wt1; + } + } +} + +static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) { + // a^b = exp2(b * log2(a)) + // exp2(x) and log2(x) are calculated using polynomial approximations. + float32x4_t log2_a, b_log2_a, a_exp_b; + + // Calculate log2(x), x = a. + { + // To calculate log2(x), we decompose x like this: + // x = y * 2^n + // n is an integer + // y is in the [1.0, 2.0) range + // + // log2(x) = log2(y) + n + // n can be evaluated by playing with float representation. + // log2(y) in a small range can be approximated, this code uses an order + // five polynomial approximation. The coefficients have been + // estimated with the Remez algorithm and the resulting + // polynomial has a maximum relative error of 0.00086%. + + // Compute n. + // This is done by masking the exponent, shifting it into the top bit of + // the mantissa, putting eight into the biased exponent (to shift/ + // compensate the fact that the exponent has been shifted in the top/ + // fractional part and finally getting rid of the implicit leading one + // from the mantissa by substracting it out. + const uint32x4_t vec_float_exponent_mask = vdupq_n_u32(0x7F800000); + const uint32x4_t vec_eight_biased_exponent = vdupq_n_u32(0x43800000); + const uint32x4_t vec_implicit_leading_one = vdupq_n_u32(0x43BF8000); + const uint32x4_t two_n = vandq_u32(vreinterpretq_u32_f32(a), + vec_float_exponent_mask); + const uint32x4_t n_1 = vshrq_n_u32(two_n, kShiftExponentIntoTopMantissa); + const uint32x4_t n_0 = vorrq_u32(n_1, vec_eight_biased_exponent); + const float32x4_t n = + vsubq_f32(vreinterpretq_f32_u32(n_0), + vreinterpretq_f32_u32(vec_implicit_leading_one)); + // Compute y. + const uint32x4_t vec_mantissa_mask = vdupq_n_u32(0x007FFFFF); + const uint32x4_t vec_zero_biased_exponent_is_one = vdupq_n_u32(0x3F800000); + const uint32x4_t mantissa = vandq_u32(vreinterpretq_u32_f32(a), + vec_mantissa_mask); + const float32x4_t y = + vreinterpretq_f32_u32(vorrq_u32(mantissa, + vec_zero_biased_exponent_is_one)); + // Approximate log2(y) ~= (y - 1) * pol5(y). + // pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0 + const float32x4_t C5 = vdupq_n_f32(-3.4436006e-2f); + const float32x4_t C4 = vdupq_n_f32(3.1821337e-1f); + const float32x4_t C3 = vdupq_n_f32(-1.2315303f); + const float32x4_t C2 = vdupq_n_f32(2.5988452f); + const float32x4_t C1 = vdupq_n_f32(-3.3241990f); + const float32x4_t C0 = vdupq_n_f32(3.1157899f); + float32x4_t pol5_y = C5; + pol5_y = vmlaq_f32(C4, y, pol5_y); + pol5_y = vmlaq_f32(C3, y, pol5_y); + pol5_y = vmlaq_f32(C2, y, pol5_y); + pol5_y = vmlaq_f32(C1, y, pol5_y); + pol5_y = vmlaq_f32(C0, y, pol5_y); + const float32x4_t y_minus_one = + vsubq_f32(y, vreinterpretq_f32_u32(vec_zero_biased_exponent_is_one)); + const float32x4_t log2_y = vmulq_f32(y_minus_one, pol5_y); + + // Combine parts. + log2_a = vaddq_f32(n, log2_y); + } + + // b * log2(a) + b_log2_a = vmulq_f32(b, log2_a); + + // Calculate exp2(x), x = b * log2(a). + { + // To calculate 2^x, we decompose x like this: + // x = n + y + // n is an integer, the value of x - 0.5 rounded down, therefore + // y is in the [0.5, 1.5) range + // + // 2^x = 2^n * 2^y + // 2^n can be evaluated by playing with float representation. + // 2^y in a small range can be approximated, this code uses an order two + // polynomial approximation. The coefficients have been estimated + // with the Remez algorithm and the resulting polynomial has a + // maximum relative error of 0.17%. + // To avoid over/underflow, we reduce the range of input to ]-127, 129]. + const float32x4_t max_input = vdupq_n_f32(129.f); + const float32x4_t min_input = vdupq_n_f32(-126.99999f); + const float32x4_t x_min = vminq_f32(b_log2_a, max_input); + const float32x4_t x_max = vmaxq_f32(x_min, min_input); + // Compute n. + const float32x4_t half = vdupq_n_f32(0.5f); + const float32x4_t x_minus_half = vsubq_f32(x_max, half); + const int32x4_t x_minus_half_floor = vcvtq_s32_f32(x_minus_half); + + // Compute 2^n. + const int32x4_t float_exponent_bias = vdupq_n_s32(127); + const int32x4_t two_n_exponent = + vaddq_s32(x_minus_half_floor, float_exponent_bias); + const float32x4_t two_n = + vreinterpretq_f32_s32(vshlq_n_s32(two_n_exponent, kFloatExponentShift)); + // Compute y. + const float32x4_t y = vsubq_f32(x_max, vcvtq_f32_s32(x_minus_half_floor)); + + // Approximate 2^y ~= C2 * y^2 + C1 * y + C0. + const float32x4_t C2 = vdupq_n_f32(3.3718944e-1f); + const float32x4_t C1 = vdupq_n_f32(6.5763628e-1f); + const float32x4_t C0 = vdupq_n_f32(1.0017247f); + float32x4_t exp2_y = C2; + exp2_y = vmlaq_f32(C1, y, exp2_y); + exp2_y = vmlaq_f32(C0, y, exp2_y); + + // Combine parts. + a_exp_b = vmulq_f32(exp2_y, two_n); + } + + return a_exp_b; +} + +static void OverdriveAndSuppressNEON(AecCore* aec, + float hNl[PART_LEN1], + const float hNlFb, + float efw[2][PART_LEN1]) { + int i; + const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb); + const float32x4_t vec_one = vdupq_n_f32(1.0f); + const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f); + const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm); + + // vectorized code (four at once) + for (i = 0; i + 3 < PART_LEN1; i += 4) { + // Weight subbands + float32x4_t vec_hNl = vld1q_f32(&hNl[i]); + const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]); + const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb); + const float32x4_t vec_weightCurve_hNlFb = vmulq_f32(vec_weightCurve, + vec_hNlFb); + const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve); + const float32x4_t vec_one_weightCurve_hNl = vmulq_f32(vec_one_weightCurve, + vec_hNl); + const uint32x4_t vec_if0 = vandq_u32(vmvnq_u32(bigger), + vreinterpretq_u32_f32(vec_hNl)); + const float32x4_t vec_one_weightCurve_add = + vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl); + const uint32x4_t vec_if1 = + vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add)); + + vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1)); + + { + const float32x4_t vec_overDriveCurve = + vld1q_f32(&WebRtcAec_overDriveCurve[i]); + const float32x4_t vec_overDriveSm_overDriveCurve = + vmulq_f32(vec_overDriveSm, vec_overDriveCurve); + vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve); + vst1q_f32(&hNl[i], vec_hNl); + } + + // Suppress error signal + { + float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]); + float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]); + vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl); + vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl); + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one); + vst1q_f32(&efw[0][i], vec_efw_re); + vst1q_f32(&efw[1][i], vec_efw_im); + } + } + + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + // Weight subbands + if (hNl[i] > hNlFb) { + hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; + } + + hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); + + // Suppress error signal + efw[0][i] *= hNl[i]; + efw[1][i] *= hNl[i]; + + // Ooura fft returns incorrect sign on imaginary component. It matters + // here because we are making an additive change with comfort noise. + efw[1][i] *= -1; + } +} + +static int PartitionDelay(const AecCore* aec) { + // Measures the energy in each filter partition and returns the partition with + // highest energy. + // TODO(bjornv): Spread computational cost by computing one partition per + // block? + float wfEnMax = 0; + int i; + int delay = 0; + + for (i = 0; i < aec->num_partitions; i++) { + int j; + int pos = i * PART_LEN1; + float wfEn = 0; + float32x4_t vec_wfEn = vdupq_n_f32(0.0f); + // vectorized code (four at once) + for (j = 0; j + 3 < PART_LEN1; j += 4) { + const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]); + const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]); + vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0); + vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1); + } + { + float32x2_t vec_total; + // A B C D + vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn)); + // A+B C+D + vec_total = vpadd_f32(vec_total, vec_total); + // A+B+C+D A+B+C+D + wfEn = vget_lane_f32(vec_total, 0); + } + + // scalar code for the remaining items. + for (; j < PART_LEN1; j++) { + wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + + aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; + } + + if (wfEn > wfEnMax) { + wfEnMax = wfEn; + delay = i; + } + } + return delay; +} + +// Updates the following smoothed Power Spectral Densities (PSD): +// - sd : near-end +// - se : residual echo +// - sx : far-end +// - sde : cross-PSD of near-end and residual echo +// - sxd : cross-PSD of near-end and far-end +// +// In addition to updating the PSDs, also the filter diverge state is determined +// upon actions are taken. +static void SmoothedPSD(AecCore* aec, + float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], + float xfw[2][PART_LEN1]) { + // Power estimate smoothing coefficients. + const float* ptrGCoh = aec->extended_filter_enabled + ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] + : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1]; + int i; + float sdSum = 0, seSum = 0; + const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD); + float32x4_t vec_sdSum = vdupq_n_f32(0.0f); + float32x4_t vec_seSum = vdupq_n_f32(0.0f); + + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]); + const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]); + const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]); + const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]); + const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]); + const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]); + float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]); + float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]); + float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]); + float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0); + float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0); + float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0); + + vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1); + vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1); + vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1); + vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15); + vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]); + vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]); + vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]); + + vst1q_f32(&aec->sd[i], vec_sd); + vst1q_f32(&aec->se[i], vec_se); + vst1q_f32(&aec->sx[i], vec_sx); + + { + float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]); + float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0); + float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1); + vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]); + vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]); + vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1); + vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0); + vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]); + vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]); + vst2q_f32(&aec->sde[i][0], vec_sde); + } + + { + float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]); + float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0); + float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1); + vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]); + vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]); + vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1); + vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0); + vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]); + vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]); + vst2q_f32(&aec->sxd[i][0], vec_sxd); + } + + vec_sdSum = vaddq_f32(vec_sdSum, vec_sd); + vec_seSum = vaddq_f32(vec_seSum, vec_se); + } + { + float32x2_t vec_sdSum_total; + float32x2_t vec_seSum_total; + // A B C D + vec_sdSum_total = vpadd_f32(vget_low_f32(vec_sdSum), + vget_high_f32(vec_sdSum)); + vec_seSum_total = vpadd_f32(vget_low_f32(vec_seSum), + vget_high_f32(vec_seSum)); + // A+B C+D + vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total); + vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total); + // A+B+C+D A+B+C+D + sdSum = vget_lane_f32(vec_sdSum_total, 0); + seSum = vget_lane_f32(vec_seSum_total, 0); + } + + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + aec->sd[i] = ptrGCoh[0] * aec->sd[i] + + ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); + aec->se[i] = ptrGCoh[0] * aec->se[i] + + ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); + // We threshold here to protect against the ill-effects of a zero farend. + // The threshold is not arbitrarily chosen, but balances protection and + // adverse interaction with the algorithm's tuning. + // TODO(bjornv): investigate further why this is so sensitive. + aec->sx[i] = + ptrGCoh[0] * aec->sx[i] + + ptrGCoh[1] * WEBRTC_SPL_MAX( + xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], + WebRtcAec_kMinFarendPSD); + + aec->sde[i][0] = + ptrGCoh[0] * aec->sde[i][0] + + ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); + aec->sde[i][1] = + ptrGCoh[0] * aec->sde[i][1] + + ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); + + aec->sxd[i][0] = + ptrGCoh[0] * aec->sxd[i][0] + + ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); + aec->sxd[i][1] = + ptrGCoh[0] * aec->sxd[i][1] + + ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); + + sdSum += aec->sd[i]; + seSum += aec->se[i]; + } + + // Divergent filter safeguard. + aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; + + if (aec->divergeState) + memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); + + // Reset if error is significantly larger than nearend (13 dB). + if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) + memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); +} + +// Window time domain data to be used by the fft. +__inline static void WindowData(float* x_windowed, const float* x) { + int i; + for (i = 0; i < PART_LEN; i += 4) { + const float32x4_t vec_Buf1 = vld1q_f32(&x[i]); + const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]); + const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]); + // A B C D + float32x4_t vec_sqrtHanning_rev = + vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]); + // B A D C + vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev); + // D C B A + vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev), + vget_low_f32(vec_sqrtHanning_rev)); + vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning)); + vst1q_f32(&x_windowed[PART_LEN + i], + vmulq_f32(vec_Buf2, vec_sqrtHanning_rev)); + } +} + +// Puts fft output data into a complex valued array. +__inline static void StoreAsComplex(const float* data, + float data_complex[2][PART_LEN1]) { + int i; + for (i = 0; i < PART_LEN; i += 4) { + const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]); + vst1q_f32(&data_complex[0][i], vec_data.val[0]); + vst1q_f32(&data_complex[1][i], vec_data.val[1]); + } + // fix beginning/end values + data_complex[1][0] = 0; + data_complex[1][PART_LEN] = 0; + data_complex[0][0] = data[0]; + data_complex[0][PART_LEN] = data[1]; +} + +static void SubbandCoherenceNEON(AecCore* aec, + float efw[2][PART_LEN1], + float xfw[2][PART_LEN1], + float* fft, + float* cohde, + float* cohxd) { + float dfw[2][PART_LEN1]; + int i; + + if (aec->delayEstCtr == 0) + aec->delayIdx = PartitionDelay(aec); + + // Use delayed far. + memcpy(xfw, + aec->xfwBuf + aec->delayIdx * PART_LEN1, + sizeof(xfw[0][0]) * 2 * PART_LEN1); + + // Windowed near fft + WindowData(fft, aec->dBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, dfw); + + // Windowed error fft + WindowData(fft, aec->eBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, efw); + + SmoothedPSD(aec, efw, dfw, xfw); + + { + const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f); + + // Subband coherence + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]); + const float32x4_t vec_se = vld1q_f32(&aec->se[i]); + const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]); + const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se); + const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx); + float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]); + float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]); + float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]); + float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]); + vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]); + vec_cohde = vdivq_f32(vec_cohde, vec_sdse); + vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]); + vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx); + + vst1q_f32(&cohde[i], vec_cohde); + vst1q_f32(&cohxd[i], vec_cohxd); + } + } + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + cohde[i] = + (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / + (aec->sd[i] * aec->se[i] + 1e-10f); + cohxd[i] = + (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / + (aec->sx[i] * aec->sd[i] + 1e-10f); + } +} + +void WebRtcAec_InitAec_neon(void) { + WebRtcAec_FilterFar = FilterFarNEON; + WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON; + WebRtcAec_FilterAdaptation = FilterAdaptationNEON; + WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON; + WebRtcAec_SubbandCoherence = SubbandCoherenceNEON; +} + diff --git a/webrtc/modules/audio_processing/aec/aec_core_sse2.c b/webrtc/modules/audio_processing/aec/aec_core_sse2.c index 8894f28..b1bffcb 100644 --- a/webrtc/modules/audio_processing/aec/aec_core_sse2.c +++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.c @@ -12,35 +12,33 @@ * The core AEC algorithm, SSE2 version of speed-critical functions. */ -#include "typedefs.h" - -#if defined(WEBRTC_USE_SSE2) #include #include +#include // memset -#include "aec_core.h" -#include "aec_rdft.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aec/aec_common.h" +#include "webrtc/modules/audio_processing/aec/aec_core_internal.h" +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" -__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) -{ +__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { return aRe * bRe - aIm * bIm; } -__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) -{ +__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { return aRe * bIm + aIm * bRe; } -static void FilterFarSSE2(aec_t *aec, float yf[2][PART_LEN1]) -{ +static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) { int i; - for (i = 0; i < NR_PART; i++) { + const int num_partitions = aec->num_partitions; + for (i = 0; i < num_partitions; i++) { int j; int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= NR_PART) { - xPos -= NR_PART*(PART_LEN1); + if (i + aec->xfBufBlockPos >= num_partitions) { + xPos -= num_partitions * (PART_LEN1); } // vectorized code (four at once) @@ -64,19 +62,25 @@ static void FilterFarSSE2(aec_t *aec, float yf[2][PART_LEN1]) } // scalar code for the remaining items. for (; j < PART_LEN1; j++) { - yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], - aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); - yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], - aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); + yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); + yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], + aec->xfBuf[1][xPos + j], + aec->wfBuf[0][pos + j], + aec->wfBuf[1][pos + j]); } } } -static void ScaleErrorSignalSSE2(aec_t *aec, float ef[2][PART_LEN1]) -{ +static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) { const __m128 k1e_10f = _mm_set1_ps(1e-10f); - const __m128 kThresh = _mm_set1_ps(aec->errThresh); - const __m128 kMu = _mm_set1_ps(aec->mu); + const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu) + : _mm_set1_ps(aec->normal_mu); + const __m128 kThresh = aec->extended_filter_enabled + ? _mm_set1_ps(kExtendedErrorThreshold) + : _mm_set1_ps(aec->normal_error_threshold); int i; // vectorized code (four at once) @@ -110,36 +114,46 @@ static void ScaleErrorSignalSSE2(aec_t *aec, float ef[2][PART_LEN1]) _mm_storeu_ps(&ef[1][i], ef_im); } // scalar code for the remaining items. - for (; i < (PART_LEN1); i++) { - float absEf; - ef[0][i] /= (aec->xPow[i] + 1e-10f); - ef[1][i] /= (aec->xPow[i] + 1e-10f); - absEf = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); + { + const float mu = + aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; + const float error_threshold = aec->extended_filter_enabled + ? kExtendedErrorThreshold + : aec->normal_error_threshold; + for (; i < (PART_LEN1); i++) { + float abs_ef; + ef[0][i] /= (aec->xPow[i] + 1e-10f); + ef[1][i] /= (aec->xPow[i] + 1e-10f); + abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); - if (absEf > aec->errThresh) { - absEf = aec->errThresh / (absEf + 1e-10f); - ef[0][i] *= absEf; - ef[1][i] *= absEf; + if (abs_ef > error_threshold) { + abs_ef = error_threshold / (abs_ef + 1e-10f); + ef[0][i] *= abs_ef; + ef[1][i] *= abs_ef; + } + + // Stepsize factor + ef[0][i] *= mu; + ef[1][i] *= mu; } - - // Stepsize factor - ef[0][i] *= aec->mu; - ef[1][i] *= aec->mu; } } -static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1]) { +static void FilterAdaptationSSE2(AecCore* aec, + float* fft, + float ef[2][PART_LEN1]) { int i, j; - for (i = 0; i < NR_PART; i++) { - int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); + const int num_partitions = aec->num_partitions; + for (i = 0; i < num_partitions; i++) { + int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1); int pos = i * PART_LEN1; // Check for wrap - if (i + aec->xfBufBlockPos >= NR_PART) { - xPos -= NR_PART * PART_LEN1; + if (i + aec->xfBufBlockPos >= num_partitions) { + xPos -= num_partitions * PART_LEN1; } // Process the whole array... - for (j = 0; j < PART_LEN; j+= 4) { + for (j = 0; j < PART_LEN; j += 4) { // Load xfBuf and ef. const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]); const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]); @@ -158,22 +172,23 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1]) const __m128 g = _mm_unpacklo_ps(e, f); const __m128 h = _mm_unpackhi_ps(e, f); // Store - _mm_storeu_ps(&fft[2*j + 0], g); - _mm_storeu_ps(&fft[2*j + 4], h); + _mm_storeu_ps(&fft[2 * j + 0], g); + _mm_storeu_ps(&fft[2 * j + 4], h); } // ... and fixup the first imaginary entry. fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], -aec->xfBuf[1][xPos + PART_LEN], - ef[0][PART_LEN], ef[1][PART_LEN]); + ef[0][PART_LEN], + ef[1][PART_LEN]); aec_rdft_inverse_128(fft); - memset(fft + PART_LEN, 0, sizeof(float)*PART_LEN); + memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); // fft scaling { float scale = 2.0f / PART_LEN2; const __m128 scale_ps = _mm_load_ps1(&scale); - for (j = 0; j < PART_LEN; j+=4) { + for (j = 0; j < PART_LEN; j += 4) { const __m128 fft_ps = _mm_loadu_ps(&fft[j]); const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps); _mm_storeu_ps(&fft[j], fft_scale); @@ -184,13 +199,15 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1]) { float wt1 = aec->wfBuf[1][pos]; aec->wfBuf[0][pos + PART_LEN] += fft[1]; - for (j = 0; j < PART_LEN; j+= 4) { + for (j = 0; j < PART_LEN; j += 4) { __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]); const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]); - const __m128 fft_re = _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2 ,0)); - const __m128 fft_im = _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3 ,1)); + const __m128 fft_re = + _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0)); + const __m128 fft_im = + _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1)); wtBuf_re = _mm_add_ps(wtBuf_re, fft_re); wtBuf_im = _mm_add_ps(wtBuf_im, fft_im); _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re); @@ -201,8 +218,7 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1]) } } -static __m128 mm_pow_ps(__m128 a, __m128 b) -{ +static __m128 mm_pow_ps(__m128 a, __m128 b) { // a^b = exp2(b * log2(a)) // exp2(x) and log2(x) are calculated using polynomial approximations. __m128 log2_a, b_log2_a, a_exp_b; @@ -227,55 +243,55 @@ static __m128 mm_pow_ps(__m128 a, __m128 b) // compensate the fact that the exponent has been shifted in the top/ // fractional part and finally getting rid of the implicit leading one // from the mantissa by substracting it out. - static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = - {0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000}; - static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = - {0x43800000, 0x43800000, 0x43800000, 0x43800000}; - static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = - {0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000}; + static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = { + 0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000}; + static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = { + 0x43800000, 0x43800000, 0x43800000, 0x43800000}; + static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = { + 0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000}; static const int shift_exponent_into_top_mantissa = 8; - const __m128 two_n = _mm_and_ps(a, *((__m128 *)float_exponent_mask)); - const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(_mm_castps_si128(two_n), - shift_exponent_into_top_mantissa)); - const __m128 n_0 = _mm_or_ps(n_1, *((__m128 *)eight_biased_exponent)); - const __m128 n = _mm_sub_ps(n_0, *((__m128 *)implicit_leading_one)); + const __m128 two_n = _mm_and_ps(a, *((__m128*)float_exponent_mask)); + const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32( + _mm_castps_si128(two_n), shift_exponent_into_top_mantissa)); + const __m128 n_0 = _mm_or_ps(n_1, *((__m128*)eight_biased_exponent)); + const __m128 n = _mm_sub_ps(n_0, *((__m128*)implicit_leading_one)); // Compute y. - static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = - {0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF}; - static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = - {0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000}; - const __m128 mantissa = _mm_and_ps(a, *((__m128 *)mantissa_mask)); - const __m128 y = _mm_or_ps( - mantissa, *((__m128 *)zero_biased_exponent_is_one)); + static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = { + 0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF}; + static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = { + 0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000}; + const __m128 mantissa = _mm_and_ps(a, *((__m128*)mantissa_mask)); + const __m128 y = + _mm_or_ps(mantissa, *((__m128*)zero_biased_exponent_is_one)); // Approximate log2(y) ~= (y - 1) * pol5(y). // pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0 - static const ALIGN16_BEG float ALIGN16_END C5[4] = - {-3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f}; - static const ALIGN16_BEG float ALIGN16_END C4[4] = - {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f}; - static const ALIGN16_BEG float ALIGN16_END C3[4] = - {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f}; - static const ALIGN16_BEG float ALIGN16_END C2[4] = - {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f}; - static const ALIGN16_BEG float ALIGN16_END C1[4] = - {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f}; - static const ALIGN16_BEG float ALIGN16_END C0[4] = - {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f}; - const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128 *)C5)); - const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128 *)C4)); + static const ALIGN16_BEG float ALIGN16_END C5[4] = { + -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f}; + static const ALIGN16_BEG float ALIGN16_END + C4[4] = {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f}; + static const ALIGN16_BEG float ALIGN16_END + C3[4] = {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f}; + static const ALIGN16_BEG float ALIGN16_END + C2[4] = {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f}; + static const ALIGN16_BEG float ALIGN16_END + C1[4] = {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f}; + static const ALIGN16_BEG float ALIGN16_END + C0[4] = {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f}; + const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128*)C5)); + const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128*)C4)); const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y); - const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128 *)C3)); + const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128*)C3)); const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y); - const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128 *)C2)); + const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128*)C2)); const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y); - const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128 *)C1)); + const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128*)C1)); const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y); - const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128 *)C0)); - const __m128 y_minus_one = _mm_sub_ps( - y, *((__m128 *)zero_biased_exponent_is_one)); - const __m128 log2_y = _mm_mul_ps(y_minus_one , pol5_y); + const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128*)C0)); + const __m128 y_minus_one = + _mm_sub_ps(y, *((__m128*)zero_biased_exponent_is_one)); + const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y); // Combine parts. log2_a = _mm_add_ps(n, log2_y); @@ -299,38 +315,38 @@ static __m128 mm_pow_ps(__m128 a, __m128 b) // maximum relative error of 0.17%. // To avoid over/underflow, we reduce the range of input to ]-127, 129]. - static const ALIGN16_BEG float max_input[4] ALIGN16_END = - {129.f, 129.f, 129.f, 129.f}; - static const ALIGN16_BEG float min_input[4] ALIGN16_END = - {-126.99999f, -126.99999f, -126.99999f, -126.99999f}; - const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128 *)max_input)); - const __m128 x_max = _mm_max_ps(x_min, *((__m128 *)min_input)); + static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f, + 129.f, 129.f}; + static const ALIGN16_BEG float min_input[4] ALIGN16_END = { + -126.99999f, -126.99999f, -126.99999f, -126.99999f}; + const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128*)max_input)); + const __m128 x_max = _mm_max_ps(x_min, *((__m128*)min_input)); // Compute n. - static const ALIGN16_BEG float half[4] ALIGN16_END = - {0.5f, 0.5f, 0.5f, 0.5f}; - const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128 *)half)); + static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f, + 0.5f, 0.5f}; + const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128*)half)); const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half); // Compute 2^n. - static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = - {127, 127, 127, 127}; + static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = { + 127, 127, 127, 127}; static const int float_exponent_shift = 23; - const __m128i two_n_exponent = _mm_add_epi32( - x_minus_half_floor, *((__m128i *)float_exponent_bias)); - const __m128 two_n = _mm_castsi128_ps(_mm_slli_epi32( - two_n_exponent, float_exponent_shift)); + const __m128i two_n_exponent = + _mm_add_epi32(x_minus_half_floor, *((__m128i*)float_exponent_bias)); + const __m128 two_n = + _mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift)); // Compute y. const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor)); // Approximate 2^y ~= C2 * y^2 + C1 * y + C0. - static const ALIGN16_BEG float C2[4] ALIGN16_END = - {3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f}; - static const ALIGN16_BEG float C1[4] ALIGN16_END = - {6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f}; - static const ALIGN16_BEG float C0[4] ALIGN16_END = - {1.0017247f, 1.0017247f, 1.0017247f, 1.0017247f}; - const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128 *)C2)); - const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128 *)C1)); + static const ALIGN16_BEG float C2[4] ALIGN16_END = { + 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f}; + static const ALIGN16_BEG float C1[4] ALIGN16_END = { + 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f}; + static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f, + 1.0017247f, 1.0017247f}; + const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128*)C2)); + const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128*)C1)); const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y); - const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128 *)C0)); + const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128*)C0)); // Combine parts. a_exp_b = _mm_mul_ps(exp2_y, two_n); @@ -338,10 +354,8 @@ static __m128 mm_pow_ps(__m128 a, __m128 b) return a_exp_b; } -extern const float WebRtcAec_weightCurve[65]; -extern const float WebRtcAec_overDriveCurve[65]; - -static void OverdriveAndSuppressSSE2(aec_t *aec, float hNl[PART_LEN1], +static void OverdriveAndSuppressSSE2(AecCore* aec, + float hNl[PART_LEN1], const float hNlFb, float efw[2][PART_LEN1]) { int i; @@ -350,26 +364,25 @@ static void OverdriveAndSuppressSSE2(aec_t *aec, float hNl[PART_LEN1], const __m128 vec_minus_one = _mm_set1_ps(-1.0f); const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm); // vectorized code (four at once) - for (i = 0; i + 3 < PART_LEN1; i+=4) { + for (i = 0; i + 3 < PART_LEN1; i += 4) { // Weight subbands __m128 vec_hNl = _mm_loadu_ps(&hNl[i]); const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]); const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb); - const __m128 vec_weightCurve_hNlFb = _mm_mul_ps( - vec_weightCurve, vec_hNlFb); + const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb); const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve); - const __m128 vec_one_weightCurve_hNl = _mm_mul_ps( - vec_one_weightCurve, vec_hNl); + const __m128 vec_one_weightCurve_hNl = + _mm_mul_ps(vec_one_weightCurve, vec_hNl); const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl); const __m128 vec_if1 = _mm_and_ps( bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl)); vec_hNl = _mm_or_ps(vec_if0, vec_if1); { - const __m128 vec_overDriveCurve = _mm_loadu_ps( - &WebRtcAec_overDriveCurve[i]); - const __m128 vec_overDriveSm_overDriveCurve = _mm_mul_ps( - vec_overDriveSm, vec_overDriveCurve); + const __m128 vec_overDriveCurve = + _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]); + const __m128 vec_overDriveSm_overDriveCurve = + _mm_mul_ps(vec_overDriveSm, vec_overDriveCurve); vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve); _mm_storeu_ps(&hNl[i], vec_hNl); } @@ -393,7 +406,7 @@ static void OverdriveAndSuppressSSE2(aec_t *aec, float hNl[PART_LEN1], // Weight subbands if (hNl[i] > hNlFb) { hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + - (1 - WebRtcAec_weightCurve[i]) * hNl[i]; + (1 - WebRtcAec_weightCurve[i]) * hNl[i]; } hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); @@ -407,11 +420,312 @@ static void OverdriveAndSuppressSSE2(aec_t *aec, float hNl[PART_LEN1], } } +__inline static void _mm_add_ps_4x1(__m128 sum, float *dst) { + // A+B C+D + sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2))); + // A+B+C+D A+B+C+D + sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1))); + _mm_store_ss(dst, sum); +} +static int PartitionDelay(const AecCore* aec) { + // Measures the energy in each filter partition and returns the partition with + // highest energy. + // TODO(bjornv): Spread computational cost by computing one partition per + // block? + float wfEnMax = 0; + int i; + int delay = 0; + + for (i = 0; i < aec->num_partitions; i++) { + int j; + int pos = i * PART_LEN1; + float wfEn = 0; + __m128 vec_wfEn = _mm_set1_ps(0.0f); + // vectorized code (four at once) + for (j = 0; j + 3 < PART_LEN1; j += 4) { + const __m128 vec_wfBuf0 = _mm_loadu_ps(&aec->wfBuf[0][pos + j]); + const __m128 vec_wfBuf1 = _mm_loadu_ps(&aec->wfBuf[1][pos + j]); + vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0)); + vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1)); + } + _mm_add_ps_4x1(vec_wfEn, &wfEn); + + // scalar code for the remaining items. + for (; j < PART_LEN1; j++) { + wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + + aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; + } + + if (wfEn > wfEnMax) { + wfEnMax = wfEn; + delay = i; + } + } + return delay; +} + +// Updates the following smoothed Power Spectral Densities (PSD): +// - sd : near-end +// - se : residual echo +// - sx : far-end +// - sde : cross-PSD of near-end and residual echo +// - sxd : cross-PSD of near-end and far-end +// +// In addition to updating the PSDs, also the filter diverge state is determined +// upon actions are taken. +static void SmoothedPSD(AecCore* aec, + float efw[2][PART_LEN1], + float dfw[2][PART_LEN1], + float xfw[2][PART_LEN1]) { + // Power estimate smoothing coefficients. + const float* ptrGCoh = aec->extended_filter_enabled + ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] + : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1]; + int i; + float sdSum = 0, seSum = 0; + const __m128 vec_15 = _mm_set1_ps(WebRtcAec_kMinFarendPSD); + const __m128 vec_GCoh0 = _mm_set1_ps(ptrGCoh[0]); + const __m128 vec_GCoh1 = _mm_set1_ps(ptrGCoh[1]); + __m128 vec_sdSum = _mm_set1_ps(0.0f); + __m128 vec_seSum = _mm_set1_ps(0.0f); + + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const __m128 vec_dfw0 = _mm_loadu_ps(&dfw[0][i]); + const __m128 vec_dfw1 = _mm_loadu_ps(&dfw[1][i]); + const __m128 vec_efw0 = _mm_loadu_ps(&efw[0][i]); + const __m128 vec_efw1 = _mm_loadu_ps(&efw[1][i]); + const __m128 vec_xfw0 = _mm_loadu_ps(&xfw[0][i]); + const __m128 vec_xfw1 = _mm_loadu_ps(&xfw[1][i]); + __m128 vec_sd = _mm_mul_ps(_mm_loadu_ps(&aec->sd[i]), vec_GCoh0); + __m128 vec_se = _mm_mul_ps(_mm_loadu_ps(&aec->se[i]), vec_GCoh0); + __m128 vec_sx = _mm_mul_ps(_mm_loadu_ps(&aec->sx[i]), vec_GCoh0); + __m128 vec_dfw_sumsq = _mm_mul_ps(vec_dfw0, vec_dfw0); + __m128 vec_efw_sumsq = _mm_mul_ps(vec_efw0, vec_efw0); + __m128 vec_xfw_sumsq = _mm_mul_ps(vec_xfw0, vec_xfw0); + vec_dfw_sumsq = _mm_add_ps(vec_dfw_sumsq, _mm_mul_ps(vec_dfw1, vec_dfw1)); + vec_efw_sumsq = _mm_add_ps(vec_efw_sumsq, _mm_mul_ps(vec_efw1, vec_efw1)); + vec_xfw_sumsq = _mm_add_ps(vec_xfw_sumsq, _mm_mul_ps(vec_xfw1, vec_xfw1)); + vec_xfw_sumsq = _mm_max_ps(vec_xfw_sumsq, vec_15); + vec_sd = _mm_add_ps(vec_sd, _mm_mul_ps(vec_dfw_sumsq, vec_GCoh1)); + vec_se = _mm_add_ps(vec_se, _mm_mul_ps(vec_efw_sumsq, vec_GCoh1)); + vec_sx = _mm_add_ps(vec_sx, _mm_mul_ps(vec_xfw_sumsq, vec_GCoh1)); + _mm_storeu_ps(&aec->sd[i], vec_sd); + _mm_storeu_ps(&aec->se[i], vec_se); + _mm_storeu_ps(&aec->sx[i], vec_sx); + + { + const __m128 vec_3210 = _mm_loadu_ps(&aec->sde[i][0]); + const __m128 vec_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]); + __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654, + _MM_SHUFFLE(2, 0, 2, 0)); + __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654, + _MM_SHUFFLE(3, 1, 3, 1)); + __m128 vec_dfwefw0011 = _mm_mul_ps(vec_dfw0, vec_efw0); + __m128 vec_dfwefw0110 = _mm_mul_ps(vec_dfw0, vec_efw1); + vec_a = _mm_mul_ps(vec_a, vec_GCoh0); + vec_b = _mm_mul_ps(vec_b, vec_GCoh0); + vec_dfwefw0011 = _mm_add_ps(vec_dfwefw0011, + _mm_mul_ps(vec_dfw1, vec_efw1)); + vec_dfwefw0110 = _mm_sub_ps(vec_dfwefw0110, + _mm_mul_ps(vec_dfw1, vec_efw0)); + vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwefw0011, vec_GCoh1)); + vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwefw0110, vec_GCoh1)); + _mm_storeu_ps(&aec->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b)); + _mm_storeu_ps(&aec->sde[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b)); + } + + { + const __m128 vec_3210 = _mm_loadu_ps(&aec->sxd[i][0]); + const __m128 vec_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]); + __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654, + _MM_SHUFFLE(2, 0, 2, 0)); + __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654, + _MM_SHUFFLE(3, 1, 3, 1)); + __m128 vec_dfwxfw0011 = _mm_mul_ps(vec_dfw0, vec_xfw0); + __m128 vec_dfwxfw0110 = _mm_mul_ps(vec_dfw0, vec_xfw1); + vec_a = _mm_mul_ps(vec_a, vec_GCoh0); + vec_b = _mm_mul_ps(vec_b, vec_GCoh0); + vec_dfwxfw0011 = _mm_add_ps(vec_dfwxfw0011, + _mm_mul_ps(vec_dfw1, vec_xfw1)); + vec_dfwxfw0110 = _mm_sub_ps(vec_dfwxfw0110, + _mm_mul_ps(vec_dfw1, vec_xfw0)); + vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwxfw0011, vec_GCoh1)); + vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwxfw0110, vec_GCoh1)); + _mm_storeu_ps(&aec->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b)); + _mm_storeu_ps(&aec->sxd[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b)); + } + + vec_sdSum = _mm_add_ps(vec_sdSum, vec_sd); + vec_seSum = _mm_add_ps(vec_seSum, vec_se); + } + + _mm_add_ps_4x1(vec_sdSum, &sdSum); + _mm_add_ps_4x1(vec_seSum, &seSum); + + for (; i < PART_LEN1; i++) { + aec->sd[i] = ptrGCoh[0] * aec->sd[i] + + ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); + aec->se[i] = ptrGCoh[0] * aec->se[i] + + ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); + // We threshold here to protect against the ill-effects of a zero farend. + // The threshold is not arbitrarily chosen, but balances protection and + // adverse interaction with the algorithm's tuning. + // TODO(bjornv): investigate further why this is so sensitive. + aec->sx[i] = + ptrGCoh[0] * aec->sx[i] + + ptrGCoh[1] * WEBRTC_SPL_MAX( + xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], + WebRtcAec_kMinFarendPSD); + + aec->sde[i][0] = + ptrGCoh[0] * aec->sde[i][0] + + ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); + aec->sde[i][1] = + ptrGCoh[0] * aec->sde[i][1] + + ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); + + aec->sxd[i][0] = + ptrGCoh[0] * aec->sxd[i][0] + + ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); + aec->sxd[i][1] = + ptrGCoh[0] * aec->sxd[i][1] + + ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); + + sdSum += aec->sd[i]; + seSum += aec->se[i]; + } + + // Divergent filter safeguard. + aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; + + if (aec->divergeState) + memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); + + // Reset if error is significantly larger than nearend (13 dB). + if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum)) + memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); +} + +// Window time domain data to be used by the fft. +__inline static void WindowData(float* x_windowed, const float* x) { + int i; + for (i = 0; i < PART_LEN; i += 4) { + const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]); + const __m128 vec_Buf2 = _mm_loadu_ps(&x[PART_LEN + i]); + const __m128 vec_sqrtHanning = _mm_load_ps(&WebRtcAec_sqrtHanning[i]); + // A B C D + __m128 vec_sqrtHanning_rev = + _mm_loadu_ps(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]); + // D C B A + vec_sqrtHanning_rev = + _mm_shuffle_ps(vec_sqrtHanning_rev, vec_sqrtHanning_rev, + _MM_SHUFFLE(0, 1, 2, 3)); + _mm_storeu_ps(&x_windowed[i], _mm_mul_ps(vec_Buf1, vec_sqrtHanning)); + _mm_storeu_ps(&x_windowed[PART_LEN + i], + _mm_mul_ps(vec_Buf2, vec_sqrtHanning_rev)); + } +} + +// Puts fft output data into a complex valued array. +__inline static void StoreAsComplex(const float* data, + float data_complex[2][PART_LEN1]) { + int i; + for (i = 0; i < PART_LEN; i += 4) { + const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]); + const __m128 vec_fft4 = _mm_loadu_ps(&data[2 * i + 4]); + const __m128 vec_a = _mm_shuffle_ps(vec_fft0, vec_fft4, + _MM_SHUFFLE(2, 0, 2, 0)); + const __m128 vec_b = _mm_shuffle_ps(vec_fft0, vec_fft4, + _MM_SHUFFLE(3, 1, 3, 1)); + _mm_storeu_ps(&data_complex[0][i], vec_a); + _mm_storeu_ps(&data_complex[1][i], vec_b); + } + // fix beginning/end values + data_complex[1][0] = 0; + data_complex[1][PART_LEN] = 0; + data_complex[0][0] = data[0]; + data_complex[0][PART_LEN] = data[1]; +} + +static void SubbandCoherenceSSE2(AecCore* aec, + float efw[2][PART_LEN1], + float xfw[2][PART_LEN1], + float* fft, + float* cohde, + float* cohxd) { + float dfw[2][PART_LEN1]; + int i; + + if (aec->delayEstCtr == 0) + aec->delayIdx = PartitionDelay(aec); + + // Use delayed far. + memcpy(xfw, + aec->xfwBuf + aec->delayIdx * PART_LEN1, + sizeof(xfw[0][0]) * 2 * PART_LEN1); + + // Windowed near fft + WindowData(fft, aec->dBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, dfw); + + // Windowed error fft + WindowData(fft, aec->eBuf); + aec_rdft_forward_128(fft); + StoreAsComplex(fft, efw); + + SmoothedPSD(aec, efw, dfw, xfw); + + { + const __m128 vec_1eminus10 = _mm_set1_ps(1e-10f); + + // Subband coherence + for (i = 0; i + 3 < PART_LEN1; i += 4) { + const __m128 vec_sd = _mm_loadu_ps(&aec->sd[i]); + const __m128 vec_se = _mm_loadu_ps(&aec->se[i]); + const __m128 vec_sx = _mm_loadu_ps(&aec->sx[i]); + const __m128 vec_sdse = _mm_add_ps(vec_1eminus10, + _mm_mul_ps(vec_sd, vec_se)); + const __m128 vec_sdsx = _mm_add_ps(vec_1eminus10, + _mm_mul_ps(vec_sd, vec_sx)); + const __m128 vec_sde_3210 = _mm_loadu_ps(&aec->sde[i][0]); + const __m128 vec_sde_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]); + const __m128 vec_sxd_3210 = _mm_loadu_ps(&aec->sxd[i][0]); + const __m128 vec_sxd_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]); + const __m128 vec_sde_0 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654, + _MM_SHUFFLE(2, 0, 2, 0)); + const __m128 vec_sde_1 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654, + _MM_SHUFFLE(3, 1, 3, 1)); + const __m128 vec_sxd_0 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654, + _MM_SHUFFLE(2, 0, 2, 0)); + const __m128 vec_sxd_1 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654, + _MM_SHUFFLE(3, 1, 3, 1)); + __m128 vec_cohde = _mm_mul_ps(vec_sde_0, vec_sde_0); + __m128 vec_cohxd = _mm_mul_ps(vec_sxd_0, vec_sxd_0); + vec_cohde = _mm_add_ps(vec_cohde, _mm_mul_ps(vec_sde_1, vec_sde_1)); + vec_cohde = _mm_div_ps(vec_cohde, vec_sdse); + vec_cohxd = _mm_add_ps(vec_cohxd, _mm_mul_ps(vec_sxd_1, vec_sxd_1)); + vec_cohxd = _mm_div_ps(vec_cohxd, vec_sdsx); + _mm_storeu_ps(&cohde[i], vec_cohde); + _mm_storeu_ps(&cohxd[i], vec_cohxd); + } + + // scalar code for the remaining items. + for (; i < PART_LEN1; i++) { + cohde[i] = + (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / + (aec->sd[i] * aec->se[i] + 1e-10f); + cohxd[i] = + (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / + (aec->sx[i] * aec->sd[i] + 1e-10f); + } + } +} + void WebRtcAec_InitAec_SSE2(void) { WebRtcAec_FilterFar = FilterFarSSE2; WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2; WebRtcAec_FilterAdaptation = FilterAdaptationSSE2; WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2; + WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2; } - -#endif // WEBRTC_USE_SSE2 diff --git a/webrtc/modules/audio_processing/aec/aec_rdft.c b/webrtc/modules/audio_processing/aec/aec_rdft.c index 9222334..2c3cff2 100644 --- a/webrtc/modules/audio_processing/aec/aec_rdft.c +++ b/webrtc/modules/audio_processing/aec/aec_rdft.c @@ -19,200 +19,193 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "aec_rdft.h" +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" #include -#include "system_wrappers/interface/cpu_features_wrapper.h" -#include "typedefs.h" +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" +#include "webrtc/typedefs.h" -// constants shared by all paths (C, SSE2). -float rdft_w[64]; -// constants used by the C path. -float rdft_wk3ri_first[32]; -float rdft_wk3ri_second[32]; -// constants used by SSE2 but initialized in C path. -ALIGN16_BEG float ALIGN16_END rdft_wk1r[32]; -ALIGN16_BEG float ALIGN16_END rdft_wk2r[32]; -ALIGN16_BEG float ALIGN16_END rdft_wk3r[32]; -ALIGN16_BEG float ALIGN16_END rdft_wk1i[32]; -ALIGN16_BEG float ALIGN16_END rdft_wk2i[32]; -ALIGN16_BEG float ALIGN16_END rdft_wk3i[32]; -ALIGN16_BEG float ALIGN16_END cftmdl_wk1r[4]; +// These tables used to be computed at run-time. For example, refer to: +// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/aec/aec_rdft.c?r=6564 +// to see the initialization code. +const float rdft_w[64] = { + 1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f, + 0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f, + 0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f, + 0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f, + 0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f, + 0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f, + 0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f, + 0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f, + 0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f, + 0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f, + 0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f, + 0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f, + 0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f, + 0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f, + 0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f, + 0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f, +}; +const float rdft_wk3ri_first[16] = { + 1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f, + 0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f, + 0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f, + 0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f, +}; +const float rdft_wk3ri_second[16] = { + -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f, + -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f, + -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f, + -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = { + 1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f, + 0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f, + 0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f, + 0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f, + 0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f, + 0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f, + 0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f, + 0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = { + 1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f, + 0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f, + 0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f, + 0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f, + 0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f, + 0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f, + 0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f, + 0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = { + 1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f, + 0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f, + 0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f, + -0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f, + 0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f, + 0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f, + 0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f, + -0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = { + -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, + -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f, + -0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f, + -0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f, + -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f, + -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f, + -0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f, + -0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = { + -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f, + -0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f, + -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f, + -0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f, + -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f, + -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f, + -0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f, + -0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f, +}; +ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = { + -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, + -0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f, + -0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f, + -0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f, + -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f, + -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f, + -0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f, + -0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f, +}; +ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = { + 0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f, +}; -static int ip[16]; +static void bitrv2_128_C(float* a) { + /* + Following things have been attempted but are no faster: + (a) Storing the swap indexes in a LUT (index calculations are done + for 'free' while waiting on memory/L1). + (b) Consolidate the load/store of two consecutive floats by a 64 bit + integer (execution is memory/L1 bound). + (c) Do a mix of floats and 64 bit integer to maximize register + utilization (execution is memory/L1 bound). + (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5). + (e) Hard-coding of the offsets to completely eliminates index + calculations. + */ -static void bitrv2_32or128(int n, int *ip, float *a) { - // n is 32 or 128 - int j, j1, k, k1, m, m2; + unsigned int j, j1, k, k1; float xr, xi, yr, yi; - ip[0] = 0; - { - int l = n; - m = 1; - while ((m << 3) < l) { - l >>= 1; - for (j = 0; j < m; j++) { - ip[m + j] = ip[j] + l; - } - m <<= 1; - } - } - m2 = 2 * m; - for (k = 0; k < m; k++) { + static const int ip[4] = {0, 64, 32, 96}; + for (k = 0; k < 4; k++) { for (j = 0; j < k; j++) { j1 = 2 * j + ip[k]; k1 = 2 * k + ip[j]; - xr = a[j1]; + xr = a[j1 + 0]; xi = a[j1 + 1]; - yr = a[k1]; + yr = a[k1 + 0]; yi = a[k1 + 1]; - a[j1] = yr; + a[j1 + 0] = yr; a[j1 + 1] = yi; - a[k1] = xr; + a[k1 + 0] = xr; a[k1 + 1] = xi; - j1 += m2; - k1 += 2 * m2; - xr = a[j1]; + j1 += 8; + k1 += 16; + xr = a[j1 + 0]; xi = a[j1 + 1]; - yr = a[k1]; + yr = a[k1 + 0]; yi = a[k1 + 1]; - a[j1] = yr; + a[j1 + 0] = yr; a[j1 + 1] = yi; - a[k1] = xr; + a[k1 + 0] = xr; a[k1 + 1] = xi; - j1 += m2; - k1 -= m2; - xr = a[j1]; + j1 += 8; + k1 -= 8; + xr = a[j1 + 0]; xi = a[j1 + 1]; - yr = a[k1]; + yr = a[k1 + 0]; yi = a[k1 + 1]; - a[j1] = yr; + a[j1 + 0] = yr; a[j1 + 1] = yi; - a[k1] = xr; + a[k1 + 0] = xr; a[k1 + 1] = xi; - j1 += m2; - k1 += 2 * m2; - xr = a[j1]; + j1 += 8; + k1 += 16; + xr = a[j1 + 0]; xi = a[j1 + 1]; - yr = a[k1]; + yr = a[k1 + 0]; yi = a[k1 + 1]; - a[j1] = yr; + a[j1 + 0] = yr; a[j1 + 1] = yi; - a[k1] = xr; + a[k1 + 0] = xr; a[k1 + 1] = xi; } - j1 = 2 * k + m2 + ip[k]; - k1 = j1 + m2; - xr = a[j1]; + j1 = 2 * k + 8 + ip[k]; + k1 = j1 + 8; + xr = a[j1 + 0]; xi = a[j1 + 1]; - yr = a[k1]; + yr = a[k1 + 0]; yi = a[k1 + 1]; - a[j1] = yr; + a[j1 + 0] = yr; a[j1 + 1] = yi; - a[k1] = xr; + a[k1 + 0] = xr; a[k1 + 1] = xi; } } -static void makewt_32(void) { - const int nw = 32; - int j, nwh; - float delta, x, y; - - ip[0] = nw; - ip[1] = 1; - nwh = nw >> 1; - delta = atanf(1.0f) / nwh; - rdft_w[0] = 1; - rdft_w[1] = 0; - rdft_w[nwh] = cosf(delta * nwh); - rdft_w[nwh + 1] = rdft_w[nwh]; - for (j = 2; j < nwh; j += 2) { - x = cosf(delta * j); - y = sinf(delta * j); - rdft_w[j] = x; - rdft_w[j + 1] = y; - rdft_w[nw - j] = y; - rdft_w[nw - j + 1] = x; - } - bitrv2_32or128(nw, ip + 2, rdft_w); - - // pre-calculate constants used by cft1st_128 and cftmdl_128... - cftmdl_wk1r[0] = rdft_w[2]; - cftmdl_wk1r[1] = rdft_w[2]; - cftmdl_wk1r[2] = rdft_w[2]; - cftmdl_wk1r[3] = -rdft_w[2]; - { - int k1; - - for (k1 = 0, j = 0; j < 128; j += 16, k1 += 2) { - const int k2 = 2 * k1; - const float wk2r = rdft_w[k1 + 0]; - const float wk2i = rdft_w[k1 + 1]; - float wk1r, wk1i; - // ... scalar version. - wk1r = rdft_w[k2 + 0]; - wk1i = rdft_w[k2 + 1]; - rdft_wk3ri_first[k1 + 0] = wk1r - 2 * wk2i * wk1i; - rdft_wk3ri_first[k1 + 1] = 2 * wk2i * wk1r - wk1i; - wk1r = rdft_w[k2 + 2]; - wk1i = rdft_w[k2 + 3]; - rdft_wk3ri_second[k1 + 0] = wk1r - 2 * wk2r * wk1i; - rdft_wk3ri_second[k1 + 1] = 2 * wk2r * wk1r - wk1i; - // ... vector version. - rdft_wk1r[k2 + 0] = rdft_w[k2 + 0]; - rdft_wk1r[k2 + 1] = rdft_w[k2 + 0]; - rdft_wk1r[k2 + 2] = rdft_w[k2 + 2]; - rdft_wk1r[k2 + 3] = rdft_w[k2 + 2]; - rdft_wk2r[k2 + 0] = rdft_w[k1 + 0]; - rdft_wk2r[k2 + 1] = rdft_w[k1 + 0]; - rdft_wk2r[k2 + 2] = -rdft_w[k1 + 1]; - rdft_wk2r[k2 + 3] = -rdft_w[k1 + 1]; - rdft_wk3r[k2 + 0] = rdft_wk3ri_first[k1 + 0]; - rdft_wk3r[k2 + 1] = rdft_wk3ri_first[k1 + 0]; - rdft_wk3r[k2 + 2] = rdft_wk3ri_second[k1 + 0]; - rdft_wk3r[k2 + 3] = rdft_wk3ri_second[k1 + 0]; - rdft_wk1i[k2 + 0] = -rdft_w[k2 + 1]; - rdft_wk1i[k2 + 1] = rdft_w[k2 + 1]; - rdft_wk1i[k2 + 2] = -rdft_w[k2 + 3]; - rdft_wk1i[k2 + 3] = rdft_w[k2 + 3]; - rdft_wk2i[k2 + 0] = -rdft_w[k1 + 1]; - rdft_wk2i[k2 + 1] = rdft_w[k1 + 1]; - rdft_wk2i[k2 + 2] = -rdft_w[k1 + 0]; - rdft_wk2i[k2 + 3] = rdft_w[k1 + 0]; - rdft_wk3i[k2 + 0] = -rdft_wk3ri_first[k1 + 1]; - rdft_wk3i[k2 + 1] = rdft_wk3ri_first[k1 + 1]; - rdft_wk3i[k2 + 2] = -rdft_wk3ri_second[k1 + 1]; - rdft_wk3i[k2 + 3] = rdft_wk3ri_second[k1 + 1]; - } - } -} - -static void makect_32(void) { - float *c = rdft_w + 32; - const int nc = 32; - int j, nch; - float delta; - - ip[1] = nc; - nch = nc >> 1; - delta = atanf(1.0f) / nch; - c[0] = cosf(delta * nch); - c[nch] = 0.5f * c[0]; - for (j = 1; j < nch; j++) { - c[j] = 0.5f * cosf(delta * j); - c[nc - j] = 0.5f * sinf(delta * j); - } -} - -static void cft1st_128_C(float *a) { +static void cft1st_128_C(float* a) { const int n = 128; int j, k1, k2; float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; + // The processing of the first set of elements was simplified in C to avoid + // some operations (multiplication by zero or one, addition of two elements + // multiplied by the same weight, ...). x0r = a[0] + a[2]; x0i = a[1] + a[3]; x1r = a[0] - a[2]; @@ -311,7 +304,7 @@ static void cft1st_128_C(float *a) { } } -static void cftmdl_128_C(float *a) { +static void cftmdl_128_C(float* a) { const int l = 8; const int n = 128; const int m = 32; @@ -320,7 +313,7 @@ static void cftmdl_128_C(float *a) { float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; for (j0 = 0; j0 < l; j0 += 2) { - j1 = j0 + 8; + j1 = j0 + 8; j2 = j0 + 16; j3 = j0 + 24; x0r = a[j0 + 0] + a[j1 + 0]; @@ -342,7 +335,7 @@ static void cftmdl_128_C(float *a) { } wk1r = rdft_w[2]; for (j0 = m; j0 < l + m; j0 += 2) { - j1 = j0 + 8; + j1 = j0 + 8; j2 = j0 + 16; j3 = j0 + 24; x0r = a[j0 + 0] + a[j1 + 0]; @@ -378,7 +371,7 @@ static void cftmdl_128_C(float *a) { wk3r = rdft_wk3ri_first[k1 + 0]; wk3i = rdft_wk3ri_first[k1 + 1]; for (j0 = k; j0 < l + k; j0 += 2) { - j1 = j0 + 8; + j1 = j0 + 8; j2 = j0 + 16; j3 = j0 + 24; x0r = a[j0 + 0] + a[j1 + 0]; @@ -409,7 +402,7 @@ static void cftmdl_128_C(float *a) { wk3r = rdft_wk3ri_second[k1 + 0]; wk3i = rdft_wk3ri_second[k1 + 1]; for (j0 = k + m; j0 < l + (k + m); j0 += 2) { - j1 = j0 + 8; + j1 = j0 + 8; j2 = j0 + 16; j3 = j0 + 24; x0r = a[j0 + 0] + a[j1 + 0]; @@ -438,7 +431,7 @@ static void cftmdl_128_C(float *a) { } } -static void cftfsub_128(float *a) { +static void cftfsub_128_C(float* a) { int j, j1, j2, j3, l; float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; @@ -468,7 +461,7 @@ static void cftfsub_128(float *a) { } } -static void cftbsub_128(float *a) { +static void cftbsub_128_C(float* a) { int j, j1, j2, j3, l; float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; @@ -499,14 +492,14 @@ static void cftbsub_128(float *a) { } } -static void rftfsub_128_C(float *a) { - const float *c = rdft_w + 32; +static void rftfsub_128_C(float* a) { + const float* c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi; for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { k2 = 128 - j2; - k1 = 32 - j1; + k1 = 32 - j1; wkr = 0.5f - c[k1]; wki = c[j1]; xr = a[j2 + 0] - a[k2 + 0]; @@ -520,15 +513,15 @@ static void rftfsub_128_C(float *a) { } } -static void rftbsub_128_C(float *a) { - const float *c = rdft_w + 32; +static void rftbsub_128_C(float* a) { + const float* c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi; a[1] = -a[1]; for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { k2 = 128 - j2; - k1 = 32 - j1; + k1 = 32 - j1; wkr = 0.5f - c[k1]; wki = c[j1]; xr = a[j2 + 0] - a[k2 + 0]; @@ -543,11 +536,9 @@ static void rftbsub_128_C(float *a) { a[65] = -a[65]; } -void aec_rdft_forward_128(float *a) { - const int n = 128; +void aec_rdft_forward_128(float* a) { float xi; - - bitrv2_32or128(n, ip + 2, a); + bitrv2_128(a); cftfsub_128(a); rftfsub_128(a); xi = a[0] - a[1]; @@ -555,33 +546,44 @@ void aec_rdft_forward_128(float *a) { a[1] = xi; } -void aec_rdft_inverse_128(float *a) { - const int n = 128; - +void aec_rdft_inverse_128(float* a) { a[1] = 0.5f * (a[0] - a[1]); a[0] -= a[1]; rftbsub_128(a); - bitrv2_32or128(n, ip + 2, a); + bitrv2_128(a); cftbsub_128(a); } // code path selection -rft_sub_128_t cft1st_128; -rft_sub_128_t cftmdl_128; -rft_sub_128_t rftfsub_128; -rft_sub_128_t rftbsub_128; +RftSub128 cft1st_128; +RftSub128 cftmdl_128; +RftSub128 rftfsub_128; +RftSub128 rftbsub_128; +RftSub128 cftfsub_128; +RftSub128 cftbsub_128; +RftSub128 bitrv2_128; void aec_rdft_init(void) { cft1st_128 = cft1st_128_C; cftmdl_128 = cftmdl_128_C; rftfsub_128 = rftfsub_128_C; rftbsub_128 = rftbsub_128_C; + cftfsub_128 = cftfsub_128_C; + cftbsub_128 = cftbsub_128_C; + bitrv2_128 = bitrv2_128_C; +#if defined(WEBRTC_ARCH_X86_FAMILY) if (WebRtc_GetCPUInfo(kSSE2)) { -#if defined(WEBRTC_USE_SSE2) aec_rdft_init_sse2(); -#endif } - // init library constants. - makewt_32(); - makect_32(); +#endif +#if defined(MIPS_FPU_LE) + aec_rdft_init_mips(); +#endif +#if defined(WEBRTC_HAS_NEON) + aec_rdft_init_neon(); +#elif defined(WEBRTC_DETECT_NEON) + if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { + aec_rdft_init_neon(); + } +#endif } diff --git a/webrtc/modules/audio_processing/aec/aec_rdft.h b/webrtc/modules/audio_processing/aec/aec_rdft.h index 91bedc9..18eb7a5 100644 --- a/webrtc/modules/audio_processing/aec/aec_rdft.h +++ b/webrtc/modules/audio_processing/aec/aec_rdft.h @@ -11,6 +11,8 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_ +#include "webrtc/modules/audio_processing/aec/aec_common.h" + // These intrinsics were unavailable before VS 2008. // TODO(andrew): move to a common file. #if defined(_MSC_VER) && _MSC_VER < 1500 @@ -19,39 +21,41 @@ static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; } static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; } #endif -#ifdef _MSC_VER /* visual c++ */ -# define ALIGN16_BEG __declspec(align(16)) -# define ALIGN16_END -#else /* gcc or icc */ -# define ALIGN16_BEG -# define ALIGN16_END __attribute__((aligned(16))) -#endif - -// constants shared by all paths (C, SSE2). -extern float rdft_w[64]; -// constants used by the C path. -extern float rdft_wk3ri_first[32]; -extern float rdft_wk3ri_second[32]; -// constants used by SSE2 but initialized in C path. -extern float rdft_wk1r[32]; -extern float rdft_wk2r[32]; -extern float rdft_wk3r[32]; -extern float rdft_wk1i[32]; -extern float rdft_wk2i[32]; -extern float rdft_wk3i[32]; -extern float cftmdl_wk1r[4]; +// Constants shared by all paths (C, SSE2, NEON). +extern const float rdft_w[64]; +// Constants used by the C path. +extern const float rdft_wk3ri_first[16]; +extern const float rdft_wk3ri_second[16]; +// Constants used by SSE2 and NEON but initialized in the C path. +extern ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32]; +extern ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32]; +extern ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32]; +extern ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32]; +extern ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32]; +extern ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32]; +extern ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4]; // code path selection function pointers -typedef void (*rft_sub_128_t)(float *a); -extern rft_sub_128_t rftfsub_128; -extern rft_sub_128_t rftbsub_128; -extern rft_sub_128_t cft1st_128; -extern rft_sub_128_t cftmdl_128; +typedef void (*RftSub128)(float* a); +extern RftSub128 rftfsub_128; +extern RftSub128 rftbsub_128; +extern RftSub128 cft1st_128; +extern RftSub128 cftmdl_128; +extern RftSub128 cftfsub_128; +extern RftSub128 cftbsub_128; +extern RftSub128 bitrv2_128; // entry points void aec_rdft_init(void); void aec_rdft_init_sse2(void); -void aec_rdft_forward_128(float *a); -void aec_rdft_inverse_128(float *a); +void aec_rdft_forward_128(float* a); +void aec_rdft_inverse_128(float* a); + +#if defined(MIPS_FPU_LE) +void aec_rdft_init_mips(void); +#endif +#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON) +void aec_rdft_init_neon(void); +#endif #endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_ diff --git a/webrtc/modules/audio_processing/aec/aec_rdft_mips.c b/webrtc/modules/audio_processing/aec/aec_rdft_mips.c new file mode 100644 index 0000000..7e64e65 --- /dev/null +++ b/webrtc/modules/audio_processing/aec/aec_rdft_mips.c @@ -0,0 +1,1187 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" +#include "webrtc/typedefs.h" + +static void bitrv2_128_mips(float* a) { + // n is 128 + float xr, xi, yr, yi; + + xr = a[8]; + xi = a[9]; + yr = a[16]; + yi = a[17]; + a[8] = yr; + a[9] = yi; + a[16] = xr; + a[17] = xi; + + xr = a[64]; + xi = a[65]; + yr = a[2]; + yi = a[3]; + a[64] = yr; + a[65] = yi; + a[2] = xr; + a[3] = xi; + + xr = a[72]; + xi = a[73]; + yr = a[18]; + yi = a[19]; + a[72] = yr; + a[73] = yi; + a[18] = xr; + a[19] = xi; + + xr = a[80]; + xi = a[81]; + yr = a[10]; + yi = a[11]; + a[80] = yr; + a[81] = yi; + a[10] = xr; + a[11] = xi; + + xr = a[88]; + xi = a[89]; + yr = a[26]; + yi = a[27]; + a[88] = yr; + a[89] = yi; + a[26] = xr; + a[27] = xi; + + xr = a[74]; + xi = a[75]; + yr = a[82]; + yi = a[83]; + a[74] = yr; + a[75] = yi; + a[82] = xr; + a[83] = xi; + + xr = a[32]; + xi = a[33]; + yr = a[4]; + yi = a[5]; + a[32] = yr; + a[33] = yi; + a[4] = xr; + a[5] = xi; + + xr = a[40]; + xi = a[41]; + yr = a[20]; + yi = a[21]; + a[40] = yr; + a[41] = yi; + a[20] = xr; + a[21] = xi; + + xr = a[48]; + xi = a[49]; + yr = a[12]; + yi = a[13]; + a[48] = yr; + a[49] = yi; + a[12] = xr; + a[13] = xi; + + xr = a[56]; + xi = a[57]; + yr = a[28]; + yi = a[29]; + a[56] = yr; + a[57] = yi; + a[28] = xr; + a[29] = xi; + + xr = a[34]; + xi = a[35]; + yr = a[68]; + yi = a[69]; + a[34] = yr; + a[35] = yi; + a[68] = xr; + a[69] = xi; + + xr = a[42]; + xi = a[43]; + yr = a[84]; + yi = a[85]; + a[42] = yr; + a[43] = yi; + a[84] = xr; + a[85] = xi; + + xr = a[50]; + xi = a[51]; + yr = a[76]; + yi = a[77]; + a[50] = yr; + a[51] = yi; + a[76] = xr; + a[77] = xi; + + xr = a[58]; + xi = a[59]; + yr = a[92]; + yi = a[93]; + a[58] = yr; + a[59] = yi; + a[92] = xr; + a[93] = xi; + + xr = a[44]; + xi = a[45]; + yr = a[52]; + yi = a[53]; + a[44] = yr; + a[45] = yi; + a[52] = xr; + a[53] = xi; + + xr = a[96]; + xi = a[97]; + yr = a[6]; + yi = a[7]; + a[96] = yr; + a[97] = yi; + a[6] = xr; + a[7] = xi; + + xr = a[104]; + xi = a[105]; + yr = a[22]; + yi = a[23]; + a[104] = yr; + a[105] = yi; + a[22] = xr; + a[23] = xi; + + xr = a[112]; + xi = a[113]; + yr = a[14]; + yi = a[15]; + a[112] = yr; + a[113] = yi; + a[14] = xr; + a[15] = xi; + + xr = a[120]; + xi = a[121]; + yr = a[30]; + yi = a[31]; + a[120] = yr; + a[121] = yi; + a[30] = xr; + a[31] = xi; + + xr = a[98]; + xi = a[99]; + yr = a[70]; + yi = a[71]; + a[98] = yr; + a[99] = yi; + a[70] = xr; + a[71] = xi; + + xr = a[106]; + xi = a[107]; + yr = a[86]; + yi = a[87]; + a[106] = yr; + a[107] = yi; + a[86] = xr; + a[87] = xi; + + xr = a[114]; + xi = a[115]; + yr = a[78]; + yi = a[79]; + a[114] = yr; + a[115] = yi; + a[78] = xr; + a[79] = xi; + + xr = a[122]; + xi = a[123]; + yr = a[94]; + yi = a[95]; + a[122] = yr; + a[123] = yi; + a[94] = xr; + a[95] = xi; + + xr = a[100]; + xi = a[101]; + yr = a[38]; + yi = a[39]; + a[100] = yr; + a[101] = yi; + a[38] = xr; + a[39] = xi; + + xr = a[108]; + xi = a[109]; + yr = a[54]; + yi = a[55]; + a[108] = yr; + a[109] = yi; + a[54] = xr; + a[55] = xi; + + xr = a[116]; + xi = a[117]; + yr = a[46]; + yi = a[47]; + a[116] = yr; + a[117] = yi; + a[46] = xr; + a[47] = xi; + + xr = a[124]; + xi = a[125]; + yr = a[62]; + yi = a[63]; + a[124] = yr; + a[125] = yi; + a[62] = xr; + a[63] = xi; + + xr = a[110]; + xi = a[111]; + yr = a[118]; + yi = a[119]; + a[110] = yr; + a[111] = yi; + a[118] = xr; + a[119] = xi; +} + +static void cft1st_128_mips(float* a) { + float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14; + int a_ptr, p1_rdft, p2_rdft, count; + const float* first = rdft_wk3ri_first; + const float* second = rdft_wk3ri_second; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + // first 8 + "lwc1 %[f0], 0(%[a]) \n\t" + "lwc1 %[f1], 4(%[a]) \n\t" + "lwc1 %[f2], 8(%[a]) \n\t" + "lwc1 %[f3], 12(%[a]) \n\t" + "lwc1 %[f4], 16(%[a]) \n\t" + "lwc1 %[f5], 20(%[a]) \n\t" + "lwc1 %[f6], 24(%[a]) \n\t" + "lwc1 %[f7], 28(%[a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "sub.s %[f2], %[f1], %[f4] \n\t" + "add.s %[f1], %[f1], %[f4] \n\t" + "add.s %[f4], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f6], %[f3] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f7], 0(%[a]) \n\t" + "swc1 %[f8], 16(%[a]) \n\t" + "swc1 %[f2], 28(%[a]) \n\t" + "swc1 %[f1], 12(%[a]) \n\t" + "swc1 %[f4], 4(%[a]) \n\t" + "swc1 %[f6], 20(%[a]) \n\t" + "swc1 %[f3], 8(%[a]) \n\t" + "swc1 %[f0], 24(%[a]) \n\t" + // second 8 + "lwc1 %[f0], 32(%[a]) \n\t" + "lwc1 %[f1], 36(%[a]) \n\t" + "lwc1 %[f2], 40(%[a]) \n\t" + "lwc1 %[f3], 44(%[a]) \n\t" + "lwc1 %[f4], 48(%[a]) \n\t" + "lwc1 %[f5], 52(%[a]) \n\t" + "lwc1 %[f6], 56(%[a]) \n\t" + "lwc1 %[f7], 60(%[a]) \n\t" + "add.s %[f8], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f4], %[f1] \n\t" + "sub.s %[f4], %[f4], %[f1] \n\t" + "add.s %[f1], %[f3], %[f8] \n\t" + "sub.s %[f3], %[f3], %[f8] \n\t" + "sub.s %[f8], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "add.s %[f5], %[f6], %[f2] \n\t" + "sub.s %[f6], %[f2], %[f6] \n\t" + "lwc1 %[f9], 8(%[rdft_w]) \n\t" + "sub.s %[f2], %[f8], %[f7] \n\t" + "add.s %[f8], %[f8], %[f7] \n\t" + "sub.s %[f7], %[f4], %[f0] \n\t" + "add.s %[f4], %[f4], %[f0] \n\t" + // prepare for loop + "addiu %[a_ptr], %[a], 64 \n\t" + "addiu %[p1_rdft], %[rdft_w], 8 \n\t" + "addiu %[p2_rdft], %[rdft_w], 16 \n\t" + "addiu %[count], $zero, 7 \n\t" + // finish second 8 + "mul.s %[f2], %[f9], %[f2] \n\t" + "mul.s %[f8], %[f9], %[f8] \n\t" + "mul.s %[f7], %[f9], %[f7] \n\t" + "mul.s %[f4], %[f9], %[f4] \n\t" + "swc1 %[f1], 32(%[a]) \n\t" + "swc1 %[f3], 52(%[a]) \n\t" + "swc1 %[f5], 36(%[a]) \n\t" + "swc1 %[f6], 48(%[a]) \n\t" + "swc1 %[f2], 40(%[a]) \n\t" + "swc1 %[f8], 44(%[a]) \n\t" + "swc1 %[f7], 56(%[a]) \n\t" + "swc1 %[f4], 60(%[a]) \n\t" + // loop + "1: \n\t" + "lwc1 %[f0], 0(%[a_ptr]) \n\t" + "lwc1 %[f1], 4(%[a_ptr]) \n\t" + "lwc1 %[f2], 8(%[a_ptr]) \n\t" + "lwc1 %[f3], 12(%[a_ptr]) \n\t" + "lwc1 %[f4], 16(%[a_ptr]) \n\t" + "lwc1 %[f5], 20(%[a_ptr]) \n\t" + "lwc1 %[f6], 24(%[a_ptr]) \n\t" + "lwc1 %[f7], 28(%[a_ptr]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "lwc1 %[f10], 4(%[p1_rdft]) \n\t" + "lwc1 %[f11], 0(%[p2_rdft]) \n\t" + "lwc1 %[f12], 4(%[p2_rdft]) \n\t" + "lwc1 %[f13], 8(%[first]) \n\t" + "lwc1 %[f14], 12(%[first]) \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f6], %[f3] \n\t" + "add.s %[f3], %[f0], %[f5] \n\t" + "sub.s %[f0], %[f0], %[f5] \n\t" + "add.s %[f5], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "swc1 %[f7], 0(%[a_ptr]) \n\t" + "swc1 %[f2], 4(%[a_ptr]) \n\t" + "mul.s %[f4], %[f9], %[f8] \n\t" +#if defined(MIPS32_R2_LE) + "mul.s %[f8], %[f10], %[f8] \n\t" + "mul.s %[f7], %[f11], %[f0] \n\t" + "mul.s %[f0], %[f12], %[f0] \n\t" + "mul.s %[f2], %[f13], %[f3] \n\t" + "mul.s %[f3], %[f14], %[f3] \n\t" + "nmsub.s %[f4], %[f4], %[f10], %[f6] \n\t" + "madd.s %[f8], %[f8], %[f9], %[f6] \n\t" + "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t" + "madd.s %[f0], %[f0], %[f11], %[f5] \n\t" + "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t" + "madd.s %[f3], %[f3], %[f13], %[f1] \n\t" +#else + "mul.s %[f7], %[f10], %[f6] \n\t" + "mul.s %[f6], %[f9], %[f6] \n\t" + "mul.s %[f8], %[f10], %[f8] \n\t" + "mul.s %[f2], %[f11], %[f0] \n\t" + "mul.s %[f11], %[f11], %[f5] \n\t" + "mul.s %[f5], %[f12], %[f5] \n\t" + "mul.s %[f0], %[f12], %[f0] \n\t" + "mul.s %[f12], %[f13], %[f3] \n\t" + "mul.s %[f13], %[f13], %[f1] \n\t" + "mul.s %[f1], %[f14], %[f1] \n\t" + "mul.s %[f3], %[f14], %[f3] \n\t" + "sub.s %[f4], %[f4], %[f7] \n\t" + "add.s %[f8], %[f6], %[f8] \n\t" + "sub.s %[f7], %[f2], %[f5] \n\t" + "add.s %[f0], %[f11], %[f0] \n\t" + "sub.s %[f2], %[f12], %[f1] \n\t" + "add.s %[f3], %[f13], %[f3] \n\t" +#endif + "swc1 %[f4], 16(%[a_ptr]) \n\t" + "swc1 %[f8], 20(%[a_ptr]) \n\t" + "swc1 %[f7], 8(%[a_ptr]) \n\t" + "swc1 %[f0], 12(%[a_ptr]) \n\t" + "swc1 %[f2], 24(%[a_ptr]) \n\t" + "swc1 %[f3], 28(%[a_ptr]) \n\t" + "lwc1 %[f0], 32(%[a_ptr]) \n\t" + "lwc1 %[f1], 36(%[a_ptr]) \n\t" + "lwc1 %[f2], 40(%[a_ptr]) \n\t" + "lwc1 %[f3], 44(%[a_ptr]) \n\t" + "lwc1 %[f4], 48(%[a_ptr]) \n\t" + "lwc1 %[f5], 52(%[a_ptr]) \n\t" + "lwc1 %[f6], 56(%[a_ptr]) \n\t" + "lwc1 %[f7], 60(%[a_ptr]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "lwc1 %[f11], 8(%[p2_rdft]) \n\t" + "lwc1 %[f12], 12(%[p2_rdft]) \n\t" + "lwc1 %[f13], 8(%[second]) \n\t" + "lwc1 %[f14], 12(%[second]) \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f2], %[f8] \n\t" + "add.s %[f2], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f3], %[f6] \n\t" + "add.s %[f3], %[f0], %[f5] \n\t" + "sub.s %[f0], %[f0], %[f5] \n\t" + "add.s %[f5], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "swc1 %[f7], 32(%[a_ptr]) \n\t" + "swc1 %[f2], 36(%[a_ptr]) \n\t" + "mul.s %[f4], %[f10], %[f8] \n\t" +#if defined(MIPS32_R2_LE) + "mul.s %[f10], %[f10], %[f6] \n\t" + "mul.s %[f7], %[f11], %[f0] \n\t" + "mul.s %[f11], %[f11], %[f5] \n\t" + "mul.s %[f2], %[f13], %[f3] \n\t" + "mul.s %[f13], %[f13], %[f1] \n\t" + "madd.s %[f4], %[f4], %[f9], %[f6] \n\t" + "nmsub.s %[f10], %[f10], %[f9], %[f8] \n\t" + "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t" + "madd.s %[f11], %[f11], %[f12], %[f0] \n\t" + "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t" + "madd.s %[f13], %[f13], %[f14], %[f3] \n\t" +#else + "mul.s %[f2], %[f9], %[f6] \n\t" + "mul.s %[f10], %[f10], %[f6] \n\t" + "mul.s %[f9], %[f9], %[f8] \n\t" + "mul.s %[f7], %[f11], %[f0] \n\t" + "mul.s %[f8], %[f12], %[f5] \n\t" + "mul.s %[f11], %[f11], %[f5] \n\t" + "mul.s %[f12], %[f12], %[f0] \n\t" + "mul.s %[f5], %[f13], %[f3] \n\t" + "mul.s %[f0], %[f14], %[f1] \n\t" + "mul.s %[f13], %[f13], %[f1] \n\t" + "mul.s %[f14], %[f14], %[f3] \n\t" + "add.s %[f4], %[f4], %[f2] \n\t" + "sub.s %[f10], %[f10], %[f9] \n\t" + "sub.s %[f7], %[f7], %[f8] \n\t" + "add.s %[f11], %[f11], %[f12] \n\t" + "sub.s %[f2], %[f5], %[f0] \n\t" + "add.s %[f13], %[f13], %[f14] \n\t" +#endif + "swc1 %[f4], 48(%[a_ptr]) \n\t" + "swc1 %[f10], 52(%[a_ptr]) \n\t" + "swc1 %[f7], 40(%[a_ptr]) \n\t" + "swc1 %[f11], 44(%[a_ptr]) \n\t" + "swc1 %[f2], 56(%[a_ptr]) \n\t" + "swc1 %[f13], 60(%[a_ptr]) \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f9], 8(%[p1_rdft]) \n\t" + "addiu %[a_ptr], %[a_ptr], 64 \n\t" + "addiu %[p1_rdft], %[p1_rdft], 8 \n\t" + "addiu %[p2_rdft], %[p2_rdft], 16 \n\t" + "addiu %[first], %[first], 8 \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[second], %[second], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), + [f12] "=&f" (f12), [f13] "=&f" (f13), [f14] "=&f" (f14), + [a_ptr] "=&r" (a_ptr), [p1_rdft] "=&r" (p1_rdft), [first] "+r" (first), + [p2_rdft] "=&r" (p2_rdft), [count] "=&r" (count), [second] "+r" (second) + : [a] "r" (a), [rdft_w] "r" (rdft_w) + : "memory" + ); +} + +static void cftmdl_128_mips(float* a) { + float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14; + int tmp_a, count; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 0 \n\t" + "addiu %[count], $zero, 4 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 32(%[tmp_a]) \n\t" + "lwc1 %[f4], 64(%[tmp_a]) \n\t" + "lwc1 %[f6], 96(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 36(%[tmp_a]) \n\t" + "lwc1 %[f5], 68(%[tmp_a]) \n\t" + "lwc1 %[f7], 100(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "add.s %[f4], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f6], %[f3] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f7], 0(%[tmp_a]) \n\t" + "swc1 %[f8], 64(%[tmp_a]) \n\t" + "swc1 %[f2], 36(%[tmp_a]) \n\t" + "swc1 %[f1], 100(%[tmp_a]) \n\t" + "swc1 %[f4], 4(%[tmp_a]) \n\t" + "swc1 %[f6], 68(%[tmp_a]) \n\t" + "swc1 %[f3], 32(%[tmp_a]) \n\t" + "swc1 %[f0], 96(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a) + : "memory" + ); + f9 = rdft_w[2]; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 128 \n\t" + "addiu %[count], $zero, 4 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 32(%[tmp_a]) \n\t" + "lwc1 %[f5], 68(%[tmp_a]) \n\t" + "lwc1 %[f7], 100(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 36(%[tmp_a]) \n\t" + "lwc1 %[f4], 64(%[tmp_a]) \n\t" + "lwc1 %[f6], 96(%[tmp_a]) \n\t" + "sub.s %[f8], %[f0], %[f2] \n\t" + "add.s %[f0], %[f0], %[f2] \n\t" + "sub.s %[f2], %[f5], %[f7] \n\t" + "add.s %[f5], %[f5], %[f7] \n\t" + "sub.s %[f7], %[f1], %[f3] \n\t" + "add.s %[f1], %[f1], %[f3] \n\t" + "sub.s %[f3], %[f4], %[f6] \n\t" + "add.s %[f4], %[f4], %[f6] \n\t" + "sub.s %[f6], %[f8], %[f2] \n\t" + "add.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f5], %[f1] \n\t" + "sub.s %[f5], %[f5], %[f1] \n\t" + "add.s %[f1], %[f3], %[f7] \n\t" + "sub.s %[f3], %[f3], %[f7] \n\t" + "add.s %[f7], %[f0], %[f4] \n\t" + "sub.s %[f0], %[f0], %[f4] \n\t" + "sub.s %[f4], %[f6], %[f1] \n\t" + "add.s %[f6], %[f6], %[f1] \n\t" + "sub.s %[f1], %[f3], %[f8] \n\t" + "add.s %[f3], %[f3], %[f8] \n\t" + "mul.s %[f4], %[f4], %[f9] \n\t" + "mul.s %[f6], %[f6], %[f9] \n\t" + "mul.s %[f1], %[f1], %[f9] \n\t" + "mul.s %[f3], %[f3], %[f9] \n\t" + "swc1 %[f7], 0(%[tmp_a]) \n\t" + "swc1 %[f2], 4(%[tmp_a]) \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f0], 68(%[tmp_a]) \n\t" + "swc1 %[f4], 32(%[tmp_a]) \n\t" + "swc1 %[f6], 36(%[tmp_a]) \n\t" + "swc1 %[f1], 96(%[tmp_a]) \n\t" + "swc1 %[f3], 100(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a), [f9] "f" (f9) + : "memory" + ); + f10 = rdft_w[3]; + f11 = rdft_w[4]; + f12 = rdft_w[5]; + f13 = rdft_wk3ri_first[2]; + f14 = rdft_wk3ri_first[3]; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 256 \n\t" + "addiu %[count], $zero, 4 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 32(%[tmp_a]) \n\t" + "lwc1 %[f4], 64(%[tmp_a]) \n\t" + "lwc1 %[f6], 96(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 36(%[tmp_a]) \n\t" + "lwc1 %[f5], 68(%[tmp_a]) \n\t" + "lwc1 %[f7], 100(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "sub.s %[f7], %[f8], %[f2] \n\t" + "add.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "sub.s %[f4], %[f6], %[f3] \n\t" + "add.s %[f6], %[f6], %[f3] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f8], 0(%[tmp_a]) \n\t" + "swc1 %[f6], 4(%[tmp_a]) \n\t" + "mul.s %[f5], %[f9], %[f7] \n\t" +#if defined(MIPS32_R2_LE) + "mul.s %[f7], %[f10], %[f7] \n\t" + "mul.s %[f8], %[f11], %[f3] \n\t" + "mul.s %[f3], %[f12], %[f3] \n\t" + "mul.s %[f6], %[f13], %[f0] \n\t" + "mul.s %[f0], %[f14], %[f0] \n\t" + "nmsub.s %[f5], %[f5], %[f10], %[f4] \n\t" + "madd.s %[f7], %[f7], %[f9], %[f4] \n\t" + "nmsub.s %[f8], %[f8], %[f12], %[f2] \n\t" + "madd.s %[f3], %[f3], %[f11], %[f2] \n\t" + "nmsub.s %[f6], %[f6], %[f14], %[f1] \n\t" + "madd.s %[f0], %[f0], %[f13], %[f1] \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f7], 68(%[tmp_a]) \n\t" +#else + "mul.s %[f8], %[f10], %[f4] \n\t" + "mul.s %[f4], %[f9], %[f4] \n\t" + "mul.s %[f7], %[f10], %[f7] \n\t" + "mul.s %[f6], %[f11], %[f3] \n\t" + "mul.s %[f3], %[f12], %[f3] \n\t" + "sub.s %[f5], %[f5], %[f8] \n\t" + "mul.s %[f8], %[f12], %[f2] \n\t" + "mul.s %[f2], %[f11], %[f2] \n\t" + "add.s %[f7], %[f4], %[f7] \n\t" + "mul.s %[f4], %[f13], %[f0] \n\t" + "mul.s %[f0], %[f14], %[f0] \n\t" + "sub.s %[f8], %[f6], %[f8] \n\t" + "mul.s %[f6], %[f14], %[f1] \n\t" + "mul.s %[f1], %[f13], %[f1] \n\t" + "add.s %[f3], %[f2], %[f3] \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f7], 68(%[tmp_a]) \n\t" + "sub.s %[f6], %[f4], %[f6] \n\t" + "add.s %[f0], %[f1], %[f0] \n\t" +#endif + "swc1 %[f8], 32(%[tmp_a]) \n\t" + "swc1 %[f3], 36(%[tmp_a]) \n\t" + "swc1 %[f6], 96(%[tmp_a]) \n\t" + "swc1 %[f0], 100(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11), + [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14) + : "memory" + ); + f11 = rdft_w[6]; + f12 = rdft_w[7]; + f13 = rdft_wk3ri_second[2]; + f14 = rdft_wk3ri_second[3]; + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 384 \n\t" + "addiu %[count], $zero, 4 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f2], 32(%[tmp_a]) \n\t" + "lwc1 %[f3], 36(%[tmp_a]) \n\t" + "lwc1 %[f4], 64(%[tmp_a]) \n\t" + "lwc1 %[f5], 68(%[tmp_a]) \n\t" + "lwc1 %[f6], 96(%[tmp_a]) \n\t" + "lwc1 %[f7], 100(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "sub.s %[f7], %[f2], %[f8] \n\t" + "add.s %[f2], %[f2], %[f8] \n\t" + "add.s %[f8], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "sub.s %[f4], %[f3], %[f6] \n\t" + "add.s %[f3], %[f3], %[f6] \n\t" + "sub.s %[f6], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f2], 0(%[tmp_a]) \n\t" + "swc1 %[f3], 4(%[tmp_a]) \n\t" + "mul.s %[f5], %[f10], %[f7] \n\t" +#if defined(MIPS32_R2_LE) + "mul.s %[f7], %[f9], %[f7] \n\t" + "mul.s %[f2], %[f12], %[f8] \n\t" + "mul.s %[f8], %[f11], %[f8] \n\t" + "mul.s %[f3], %[f14], %[f1] \n\t" + "mul.s %[f1], %[f13], %[f1] \n\t" + "madd.s %[f5], %[f5], %[f9], %[f4] \n\t" + "msub.s %[f7], %[f7], %[f10], %[f4] \n\t" + "msub.s %[f2], %[f2], %[f11], %[f6] \n\t" + "madd.s %[f8], %[f8], %[f12], %[f6] \n\t" + "msub.s %[f3], %[f3], %[f13], %[f0] \n\t" + "madd.s %[f1], %[f1], %[f14], %[f0] \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f7], 68(%[tmp_a]) \n\t" +#else + "mul.s %[f2], %[f9], %[f4] \n\t" + "mul.s %[f4], %[f10], %[f4] \n\t" + "mul.s %[f7], %[f9], %[f7] \n\t" + "mul.s %[f3], %[f11], %[f6] \n\t" + "mul.s %[f6], %[f12], %[f6] \n\t" + "add.s %[f5], %[f5], %[f2] \n\t" + "sub.s %[f7], %[f4], %[f7] \n\t" + "mul.s %[f2], %[f12], %[f8] \n\t" + "mul.s %[f8], %[f11], %[f8] \n\t" + "mul.s %[f4], %[f14], %[f1] \n\t" + "mul.s %[f1], %[f13], %[f1] \n\t" + "sub.s %[f2], %[f3], %[f2] \n\t" + "mul.s %[f3], %[f13], %[f0] \n\t" + "mul.s %[f0], %[f14], %[f0] \n\t" + "add.s %[f8], %[f8], %[f6] \n\t" + "swc1 %[f5], 64(%[tmp_a]) \n\t" + "swc1 %[f7], 68(%[tmp_a]) \n\t" + "sub.s %[f3], %[f3], %[f4] \n\t" + "add.s %[f1], %[f1], %[f0] \n\t" +#endif + "swc1 %[f2], 32(%[tmp_a]) \n\t" + "swc1 %[f8], 36(%[tmp_a]) \n\t" + "swc1 %[f3], 96(%[tmp_a]) \n\t" + "swc1 %[f1], 100(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a), [f9] "f" (f9), [f10] "f" (f10), [f11] "f" (f11), + [f12] "f" (f12), [f13] "f" (f13), [f14] "f" (f14) + : "memory" + ); +} + +static void cftfsub_128_mips(float* a) { + float f0, f1, f2, f3, f4, f5, f6, f7, f8; + int tmp_a, count; + + cft1st_128(a); + cftmdl_128(a); + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 0 \n\t" + "addiu %[count], $zero, 16 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 128(%[tmp_a]) \n\t" + "lwc1 %[f4], 256(%[tmp_a]) \n\t" + "lwc1 %[f6], 384(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 132(%[tmp_a]) \n\t" + "lwc1 %[f5], 260(%[tmp_a]) \n\t" + "lwc1 %[f7], 388(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f1], %[f3] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "add.s %[f2], %[f1], %[f4] \n\t" + "sub.s %[f1], %[f1], %[f4] \n\t" + "add.s %[f4], %[f6], %[f3] \n\t" + "sub.s %[f6], %[f6], %[f3] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "swc1 %[f7], 0(%[tmp_a]) \n\t" + "swc1 %[f8], 256(%[tmp_a]) \n\t" + "swc1 %[f2], 132(%[tmp_a]) \n\t" + "swc1 %[f1], 388(%[tmp_a]) \n\t" + "swc1 %[f4], 4(%[tmp_a]) \n\t" + "swc1 %[f6], 260(%[tmp_a]) \n\t" + "swc1 %[f3], 128(%[tmp_a]) \n\t" + "swc1 %[f0], 384(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), + [count] "=&r" (count) + : [a] "r" (a) + : "memory" + ); +} + +static void cftbsub_128_mips(float* a) { + float f0, f1, f2, f3, f4, f5, f6, f7, f8; + int tmp_a, count; + + cft1st_128(a); + cftmdl_128(a); + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[tmp_a], %[a], 0 \n\t" + "addiu %[count], $zero, 16 \n\t" + "1: \n\t" + "addiu %[count], %[count], -1 \n\t" + "lwc1 %[f0], 0(%[tmp_a]) \n\t" + "lwc1 %[f2], 128(%[tmp_a]) \n\t" + "lwc1 %[f4], 256(%[tmp_a]) \n\t" + "lwc1 %[f6], 384(%[tmp_a]) \n\t" + "lwc1 %[f1], 4(%[tmp_a]) \n\t" + "lwc1 %[f3], 132(%[tmp_a]) \n\t" + "lwc1 %[f5], 260(%[tmp_a]) \n\t" + "lwc1 %[f7], 388(%[tmp_a]) \n\t" + "add.s %[f8], %[f0], %[f2] \n\t" + "sub.s %[f0], %[f0], %[f2] \n\t" + "add.s %[f2], %[f4], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "add.s %[f6], %[f1], %[f3] \n\t" + "sub.s %[f1], %[f3], %[f1] \n\t" + "add.s %[f3], %[f5], %[f7] \n\t" + "sub.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f7], %[f8], %[f2] \n\t" + "sub.s %[f8], %[f8], %[f2] \n\t" + "sub.s %[f2], %[f1], %[f4] \n\t" + "add.s %[f1], %[f1], %[f4] \n\t" + "add.s %[f4], %[f3], %[f6] \n\t" + "sub.s %[f6], %[f3], %[f6] \n\t" + "sub.s %[f3], %[f0], %[f5] \n\t" + "add.s %[f0], %[f0], %[f5] \n\t" + "neg.s %[f4], %[f4] \n\t" + "swc1 %[f7], 0(%[tmp_a]) \n\t" + "swc1 %[f8], 256(%[tmp_a]) \n\t" + "swc1 %[f2], 132(%[tmp_a]) \n\t" + "swc1 %[f1], 388(%[tmp_a]) \n\t" + "swc1 %[f6], 260(%[tmp_a]) \n\t" + "swc1 %[f3], 128(%[tmp_a]) \n\t" + "swc1 %[f0], 384(%[tmp_a]) \n\t" + "swc1 %[f4], 4(%[tmp_a]) \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[tmp_a], %[tmp_a], 8 \n\t" + ".set pop \n\t" + : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), + [f4] "=&f" (f4), [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), + [f8] "=&f" (f8), [tmp_a] "=&r" (tmp_a), [count] "=&r" (count) + : [a] "r" (a) + : "memory" + ); +} + +static void rftfsub_128_mips(float* a) { + const float* c = rdft_w + 32; + const float f0 = 0.5f; + float* a1 = &a[2]; + float* a2 = &a[126]; + const float* c1 = &c[1]; + const float* c2 = &c[31]; + float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15; + int count; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lwc1 %[f6], 0(%[c2]) \n\t" + "lwc1 %[f1], 0(%[a1]) \n\t" + "lwc1 %[f2], 0(%[a2]) \n\t" + "lwc1 %[f3], 4(%[a1]) \n\t" + "lwc1 %[f4], 4(%[a2]) \n\t" + "lwc1 %[f5], 0(%[c1]) \n\t" + "sub.s %[f6], %[f0], %[f6] \n\t" + "sub.s %[f7], %[f1], %[f2] \n\t" + "add.s %[f8], %[f3], %[f4] \n\t" + "addiu %[count], $zero, 15 \n\t" + "mul.s %[f9], %[f6], %[f7] \n\t" + "mul.s %[f6], %[f6], %[f8] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f8], %[f5], %[f8] \n\t" + "mul.s %[f5], %[f5], %[f7] \n\t" + "sub.s %[f9], %[f9], %[f8] \n\t" + "add.s %[f6], %[f6], %[f5] \n\t" +#else + "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t" + "madd.s %[f6], %[f6], %[f5], %[f7] \n\t" +#endif + "sub.s %[f1], %[f1], %[f9] \n\t" + "add.s %[f2], %[f2], %[f9] \n\t" + "sub.s %[f3], %[f3], %[f6] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "swc1 %[f3], 4(%[a1]) \n\t" + "swc1 %[f4], 4(%[a2]) \n\t" + "addiu %[a1], %[a1], 8 \n\t" + "addiu %[a2], %[a2], -8 \n\t" + "addiu %[c1], %[c1], 4 \n\t" + "addiu %[c2], %[c2], -4 \n\t" + "1: \n\t" + "lwc1 %[f6], 0(%[c2]) \n\t" + "lwc1 %[f1], 0(%[a1]) \n\t" + "lwc1 %[f2], 0(%[a2]) \n\t" + "lwc1 %[f3], 4(%[a1]) \n\t" + "lwc1 %[f4], 4(%[a2]) \n\t" + "lwc1 %[f5], 0(%[c1]) \n\t" + "sub.s %[f6], %[f0], %[f6] \n\t" + "sub.s %[f7], %[f1], %[f2] \n\t" + "add.s %[f8], %[f3], %[f4] \n\t" + "lwc1 %[f10], -4(%[c2]) \n\t" + "lwc1 %[f11], 8(%[a1]) \n\t" + "lwc1 %[f12], -8(%[a2]) \n\t" + "mul.s %[f9], %[f6], %[f7] \n\t" + "mul.s %[f6], %[f6], %[f8] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f8], %[f5], %[f8] \n\t" + "mul.s %[f5], %[f5], %[f7] \n\t" + "lwc1 %[f13], 12(%[a1]) \n\t" + "lwc1 %[f14], -4(%[a2]) \n\t" + "lwc1 %[f15], 4(%[c1]) \n\t" + "sub.s %[f9], %[f9], %[f8] \n\t" + "add.s %[f6], %[f6], %[f5] \n\t" +#else + "lwc1 %[f13], 12(%[a1]) \n\t" + "lwc1 %[f14], -4(%[a2]) \n\t" + "lwc1 %[f15], 4(%[c1]) \n\t" + "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t" + "madd.s %[f6], %[f6], %[f5], %[f7] \n\t" +#endif + "sub.s %[f10], %[f0], %[f10] \n\t" + "sub.s %[f5], %[f11], %[f12] \n\t" + "add.s %[f7], %[f13], %[f14] \n\t" + "sub.s %[f1], %[f1], %[f9] \n\t" + "add.s %[f2], %[f2], %[f9] \n\t" + "sub.s %[f3], %[f3], %[f6] \n\t" + "mul.s %[f8], %[f10], %[f5] \n\t" + "mul.s %[f10], %[f10], %[f7] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f9], %[f15], %[f7] \n\t" + "mul.s %[f15], %[f15], %[f5] \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "sub.s %[f8], %[f8], %[f9] \n\t" + "add.s %[f10], %[f10], %[f15] \n\t" +#else + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "sub.s %[f4], %[f4], %[f6] \n\t" + "nmsub.s %[f8], %[f8], %[f15], %[f7] \n\t" + "madd.s %[f10], %[f10], %[f15], %[f5] \n\t" +#endif + "swc1 %[f3], 4(%[a1]) \n\t" + "swc1 %[f4], 4(%[a2]) \n\t" + "sub.s %[f11], %[f11], %[f8] \n\t" + "add.s %[f12], %[f12], %[f8] \n\t" + "sub.s %[f13], %[f13], %[f10] \n\t" + "sub.s %[f14], %[f14], %[f10] \n\t" + "addiu %[c2], %[c2], -8 \n\t" + "addiu %[c1], %[c1], 8 \n\t" + "swc1 %[f11], 8(%[a1]) \n\t" + "swc1 %[f12], -8(%[a2]) \n\t" + "swc1 %[f13], 12(%[a1]) \n\t" + "swc1 %[f14], -4(%[a2]) \n\t" + "addiu %[a1], %[a1], 16 \n\t" + "addiu %[count], %[count], -1 \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[a2], %[a2], -16 \n\t" + ".set pop \n\t" + : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2), + [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4), + [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), + [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12), + [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15), + [count] "=&r" (count) + : [f0] "f" (f0) + : "memory" + ); +} + +static void rftbsub_128_mips(float* a) { + const float *c = rdft_w + 32; + const float f0 = 0.5f; + float* a1 = &a[2]; + float* a2 = &a[126]; + const float* c1 = &c[1]; + const float* c2 = &c[31]; + float f1, f2, f3 ,f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15; + int count; + + a[1] = -a[1]; + a[65] = -a[65]; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lwc1 %[f6], 0(%[c2]) \n\t" + "lwc1 %[f1], 0(%[a1]) \n\t" + "lwc1 %[f2], 0(%[a2]) \n\t" + "lwc1 %[f3], 4(%[a1]) \n\t" + "lwc1 %[f4], 4(%[a2]) \n\t" + "lwc1 %[f5], 0(%[c1]) \n\t" + "sub.s %[f6], %[f0], %[f6] \n\t" + "sub.s %[f7], %[f1], %[f2] \n\t" + "add.s %[f8], %[f3], %[f4] \n\t" + "addiu %[count], $zero, 15 \n\t" + "mul.s %[f9], %[f6], %[f7] \n\t" + "mul.s %[f6], %[f6], %[f8] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f8], %[f5], %[f8] \n\t" + "mul.s %[f5], %[f5], %[f7] \n\t" + "add.s %[f9], %[f9], %[f8] \n\t" + "sub.s %[f6], %[f6], %[f5] \n\t" +#else + "madd.s %[f9], %[f9], %[f5], %[f8] \n\t" + "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t" +#endif + "sub.s %[f1], %[f1], %[f9] \n\t" + "add.s %[f2], %[f2], %[f9] \n\t" + "sub.s %[f3], %[f6], %[f3] \n\t" + "sub.s %[f4], %[f6], %[f4] \n\t" + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "swc1 %[f3], 4(%[a1]) \n\t" + "swc1 %[f4], 4(%[a2]) \n\t" + "addiu %[a1], %[a1], 8 \n\t" + "addiu %[a2], %[a2], -8 \n\t" + "addiu %[c1], %[c1], 4 \n\t" + "addiu %[c2], %[c2], -4 \n\t" + "1: \n\t" + "lwc1 %[f6], 0(%[c2]) \n\t" + "lwc1 %[f1], 0(%[a1]) \n\t" + "lwc1 %[f2], 0(%[a2]) \n\t" + "lwc1 %[f3], 4(%[a1]) \n\t" + "lwc1 %[f4], 4(%[a2]) \n\t" + "lwc1 %[f5], 0(%[c1]) \n\t" + "sub.s %[f6], %[f0], %[f6] \n\t" + "sub.s %[f7], %[f1], %[f2] \n\t" + "add.s %[f8], %[f3], %[f4] \n\t" + "lwc1 %[f10], -4(%[c2]) \n\t" + "lwc1 %[f11], 8(%[a1]) \n\t" + "lwc1 %[f12], -8(%[a2]) \n\t" + "mul.s %[f9], %[f6], %[f7] \n\t" + "mul.s %[f6], %[f6], %[f8] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f8], %[f5], %[f8] \n\t" + "mul.s %[f5], %[f5], %[f7] \n\t" + "lwc1 %[f13], 12(%[a1]) \n\t" + "lwc1 %[f14], -4(%[a2]) \n\t" + "lwc1 %[f15], 4(%[c1]) \n\t" + "add.s %[f9], %[f9], %[f8] \n\t" + "sub.s %[f6], %[f6], %[f5] \n\t" +#else + "lwc1 %[f13], 12(%[a1]) \n\t" + "lwc1 %[f14], -4(%[a2]) \n\t" + "lwc1 %[f15], 4(%[c1]) \n\t" + "madd.s %[f9], %[f9], %[f5], %[f8] \n\t" + "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t" +#endif + "sub.s %[f10], %[f0], %[f10] \n\t" + "sub.s %[f5], %[f11], %[f12] \n\t" + "add.s %[f7], %[f13], %[f14] \n\t" + "sub.s %[f1], %[f1], %[f9] \n\t" + "add.s %[f2], %[f2], %[f9] \n\t" + "sub.s %[f3], %[f6], %[f3] \n\t" + "mul.s %[f8], %[f10], %[f5] \n\t" + "mul.s %[f10], %[f10], %[f7] \n\t" +#if !defined(MIPS32_R2_LE) + "mul.s %[f9], %[f15], %[f7] \n\t" + "mul.s %[f15], %[f15], %[f5] \n\t" + "sub.s %[f4], %[f6], %[f4] \n\t" + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "add.s %[f8], %[f8], %[f9] \n\t" + "sub.s %[f10], %[f10], %[f15] \n\t" +#else + "swc1 %[f1], 0(%[a1]) \n\t" + "swc1 %[f2], 0(%[a2]) \n\t" + "sub.s %[f4], %[f6], %[f4] \n\t" + "madd.s %[f8], %[f8], %[f15], %[f7] \n\t" + "nmsub.s %[f10], %[f10], %[f15], %[f5] \n\t" +#endif + "swc1 %[f3], 4(%[a1]) \n\t" + "swc1 %[f4], 4(%[a2]) \n\t" + "sub.s %[f11], %[f11], %[f8] \n\t" + "add.s %[f12], %[f12], %[f8] \n\t" + "sub.s %[f13], %[f10], %[f13] \n\t" + "sub.s %[f14], %[f10], %[f14] \n\t" + "addiu %[c2], %[c2], -8 \n\t" + "addiu %[c1], %[c1], 8 \n\t" + "swc1 %[f11], 8(%[a1]) \n\t" + "swc1 %[f12], -8(%[a2]) \n\t" + "swc1 %[f13], 12(%[a1]) \n\t" + "swc1 %[f14], -4(%[a2]) \n\t" + "addiu %[a1], %[a1], 16 \n\t" + "addiu %[count], %[count], -1 \n\t" + "bgtz %[count], 1b \n\t" + " addiu %[a2], %[a2], -16 \n\t" + ".set pop \n\t" + : [a1] "+r" (a1), [a2] "+r" (a2), [c1] "+r" (c1), [c2] "+r" (c2), + [f1] "=&f" (f1), [f2] "=&f" (f2), [f3] "=&f" (f3), [f4] "=&f" (f4), + [f5] "=&f" (f5), [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), + [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), [f12] "=&f" (f12), + [f13] "=&f" (f13), [f14] "=&f" (f14), [f15] "=&f" (f15), + [count] "=&r" (count) + : [f0] "f" (f0) + : "memory" + ); +} + +void aec_rdft_init_mips(void) { + cft1st_128 = cft1st_128_mips; + cftmdl_128 = cftmdl_128_mips; + rftfsub_128 = rftfsub_128_mips; + rftbsub_128 = rftbsub_128_mips; + cftfsub_128 = cftfsub_128_mips; + cftbsub_128 = cftbsub_128_mips; + bitrv2_128 = bitrv2_128_mips; +} diff --git a/webrtc/modules/audio_processing/aec/aec_rdft_neon.c b/webrtc/modules/audio_processing/aec/aec_rdft_neon.c new file mode 100644 index 0000000..43b6a68 --- /dev/null +++ b/webrtc/modules/audio_processing/aec/aec_rdft_neon.c @@ -0,0 +1,355 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* + * The rdft AEC algorithm, neon version of speed-critical functions. + * + * Based on the sse2 version. + */ + + +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" + +#include + +static const ALIGN16_BEG float ALIGN16_END + k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f}; + +static void cft1st_128_neon(float* a) { + const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign); + int j, k2; + + for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) { + float32x4_t a00v = vld1q_f32(&a[j + 0]); + float32x4_t a04v = vld1q_f32(&a[j + 4]); + float32x4_t a08v = vld1q_f32(&a[j + 8]); + float32x4_t a12v = vld1q_f32(&a[j + 12]); + float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v)); + float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v)); + float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v)); + float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v)); + const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]); + const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2]); + const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2]); + const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2]); + const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2]); + const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2]); + float32x4_t x0v = vaddq_f32(a01v, a23v); + const float32x4_t x1v = vsubq_f32(a01v, a23v); + const float32x4_t x2v = vaddq_f32(a45v, a67v); + const float32x4_t x3v = vsubq_f32(a45v, a67v); + const float32x4_t x3w = vrev64q_f32(x3v); + float32x4_t x0w; + a01v = vaddq_f32(x0v, x2v); + x0v = vsubq_f32(x0v, x2v); + x0w = vrev64q_f32(x0v); + a45v = vmulq_f32(wk2rv, x0v); + a45v = vmlaq_f32(a45v, wk2iv, x0w); + x0v = vmlaq_f32(x1v, x3w, vec_swap_sign); + x0w = vrev64q_f32(x0v); + a23v = vmulq_f32(wk1rv, x0v); + a23v = vmlaq_f32(a23v, wk1iv, x0w); + x0v = vmlsq_f32(x1v, x3w, vec_swap_sign); + x0w = vrev64q_f32(x0v); + a67v = vmulq_f32(wk3rv, x0v); + a67v = vmlaq_f32(a67v, wk3iv, x0w); + a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v)); + a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v)); + a08v = vcombine_f32(vget_high_f32(a01v), vget_high_f32(a23v)); + a12v = vcombine_f32(vget_high_f32(a45v), vget_high_f32(a67v)); + vst1q_f32(&a[j + 0], a00v); + vst1q_f32(&a[j + 4], a04v); + vst1q_f32(&a[j + 8], a08v); + vst1q_f32(&a[j + 12], a12v); + } +} + +static void cftmdl_128_neon(float* a) { + int j; + const int l = 8; + const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign); + float32x4_t wk1rv = vld1q_f32(cftmdl_wk1r); + + for (j = 0; j < l; j += 2) { + const float32x2_t a_00 = vld1_f32(&a[j + 0]); + const float32x2_t a_08 = vld1_f32(&a[j + 8]); + const float32x2_t a_32 = vld1_f32(&a[j + 32]); + const float32x2_t a_40 = vld1_f32(&a[j + 40]); + const float32x4_t a_00_32 = vcombine_f32(a_00, a_32); + const float32x4_t a_08_40 = vcombine_f32(a_08, a_40); + const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40); + const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40); + const float32x2_t a_16 = vld1_f32(&a[j + 16]); + const float32x2_t a_24 = vld1_f32(&a[j + 24]); + const float32x2_t a_48 = vld1_f32(&a[j + 48]); + const float32x2_t a_56 = vld1_f32(&a[j + 56]); + const float32x4_t a_16_48 = vcombine_f32(a_16, a_48); + const float32x4_t a_24_56 = vcombine_f32(a_24, a_56); + const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56); + const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56); + const float32x4_t xx0 = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1); + const float32x4_t x1_x3_add = + vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1); + const float32x4_t x1_x3_sub = + vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1); + const float32x2_t yy0_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 0); + const float32x2_t yy0_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 0); + const float32x4_t yy0_as = vcombine_f32(yy0_a, yy0_s); + const float32x2_t yy1_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 1); + const float32x2_t yy1_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 1); + const float32x4_t yy1_as = vcombine_f32(yy1_a, yy1_s); + const float32x4_t yy0 = vmlaq_f32(yy0_as, vec_swap_sign, yy1_as); + const float32x4_t yy4 = vmulq_f32(wk1rv, yy0); + const float32x4_t xx1_rev = vrev64q_f32(xx1); + const float32x4_t yy4_rev = vrev64q_f32(yy4); + + vst1_f32(&a[j + 0], vget_low_f32(xx0)); + vst1_f32(&a[j + 32], vget_high_f32(xx0)); + vst1_f32(&a[j + 16], vget_low_f32(xx1)); + vst1_f32(&a[j + 48], vget_high_f32(xx1_rev)); + + a[j + 48] = -a[j + 48]; + + vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add)); + vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub)); + vst1_f32(&a[j + 40], vget_low_f32(yy4)); + vst1_f32(&a[j + 56], vget_high_f32(yy4_rev)); + } + + { + const int k = 64; + const int k1 = 2; + const int k2 = 2 * k1; + const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2 + 0]); + const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2 + 0]); + const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2 + 0]); + const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2 + 0]); + const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2 + 0]); + wk1rv = vld1q_f32(&rdft_wk1r[k2 + 0]); + for (j = k; j < l + k; j += 2) { + const float32x2_t a_00 = vld1_f32(&a[j + 0]); + const float32x2_t a_08 = vld1_f32(&a[j + 8]); + const float32x2_t a_32 = vld1_f32(&a[j + 32]); + const float32x2_t a_40 = vld1_f32(&a[j + 40]); + const float32x4_t a_00_32 = vcombine_f32(a_00, a_32); + const float32x4_t a_08_40 = vcombine_f32(a_08, a_40); + const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40); + const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40); + const float32x2_t a_16 = vld1_f32(&a[j + 16]); + const float32x2_t a_24 = vld1_f32(&a[j + 24]); + const float32x2_t a_48 = vld1_f32(&a[j + 48]); + const float32x2_t a_56 = vld1_f32(&a[j + 56]); + const float32x4_t a_16_48 = vcombine_f32(a_16, a_48); + const float32x4_t a_24_56 = vcombine_f32(a_24, a_56); + const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56); + const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56); + const float32x4_t xx = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1); + const float32x4_t x1_x3_add = + vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1); + const float32x4_t x1_x3_sub = + vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1); + float32x4_t xx4 = vmulq_f32(wk2rv, xx1); + float32x4_t xx12 = vmulq_f32(wk1rv, x1_x3_add); + float32x4_t xx22 = vmulq_f32(wk3rv, x1_x3_sub); + xx4 = vmlaq_f32(xx4, wk2iv, vrev64q_f32(xx1)); + xx12 = vmlaq_f32(xx12, wk1iv, vrev64q_f32(x1_x3_add)); + xx22 = vmlaq_f32(xx22, wk3iv, vrev64q_f32(x1_x3_sub)); + + vst1_f32(&a[j + 0], vget_low_f32(xx)); + vst1_f32(&a[j + 32], vget_high_f32(xx)); + vst1_f32(&a[j + 16], vget_low_f32(xx4)); + vst1_f32(&a[j + 48], vget_high_f32(xx4)); + vst1_f32(&a[j + 8], vget_low_f32(xx12)); + vst1_f32(&a[j + 40], vget_high_f32(xx12)); + vst1_f32(&a[j + 24], vget_low_f32(xx22)); + vst1_f32(&a[j + 56], vget_high_f32(xx22)); + } + } +} + +__inline static float32x4_t reverse_order_f32x4(float32x4_t in) { + // A B C D -> C D A B + const float32x4_t rev = vcombine_f32(vget_high_f32(in), vget_low_f32(in)); + // C D A B -> D C B A + return vrev64q_f32(rev); +} + +static void rftfsub_128_neon(float* a) { + const float* c = rdft_w + 32; + int j1, j2; + const float32x4_t mm_half = vdupq_n_f32(0.5f); + + // Vectorized code (four at once). + // Note: commented number are indexes for the first iteration of the loop. + for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { + // Load 'wk'. + const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4, + const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31, + const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31, + const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28, + const float32x4_t wki_ = c_j1; // 1, 2, 3, 4, + // Load and shuffle 'a'. + // 2, 4, 6, 8, 3, 5, 7, 9 + float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]); + // 120, 122, 124, 126, 121, 123, 125, 127, + const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]); + // 126, 124, 122, 120 + const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]); + // 127, 125, 123, 121 + const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]); + // Calculate 'x'. + const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0); + // 2-126, 4-124, 6-122, 8-120, + const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1); + // 3-127, 5-125, 7-123, 9-121, + // Calculate product into 'y'. + // yr = wkr * xr - wki * xi; + // yi = wkr * xi + wki * xr; + const float32x4_t a_ = vmulq_f32(wkr_, xr_); + const float32x4_t b_ = vmulq_f32(wki_, xi_); + const float32x4_t c_ = vmulq_f32(wkr_, xi_); + const float32x4_t d_ = vmulq_f32(wki_, xr_); + const float32x4_t yr_ = vsubq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const float32x4_t yi_ = vaddq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] -= yr; + // a[j2 + 1] -= yi; + // a[k2 + 0] += yr; + // a[k2 + 1] -= yi; + // 126, 124, 122, 120, + const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_); + // 127, 125, 123, 121, + const float32x4_t a_k2_p1n = vsubq_f32(a_k2_p1, yi_); + // Shuffle in right order and store. + const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n); + const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n); + // 124, 125, 126, 127, 120, 121, 122, 123 + const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr); + // 2, 4, 6, 8, + a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_); + // 3, 5, 7, 9, + a_j2_p.val[1] = vsubq_f32(a_j2_p.val[1], yi_); + // 2, 3, 4, 5, 6, 7, 8, 9, + vst2q_f32(&a[0 + j2], a_j2_p); + + vst1q_f32(&a[122 - j2], a_k2_n.val[1]); + vst1q_f32(&a[126 - j2], a_k2_n.val[0]); + } + + // Scalar code for the remaining items. + for (; j2 < 64; j1 += 1, j2 += 2) { + const int k2 = 128 - j2; + const int k1 = 32 - j1; + const float wkr = 0.5f - c[k1]; + const float wki = c[j1]; + const float xr = a[j2 + 0] - a[k2 + 0]; + const float xi = a[j2 + 1] + a[k2 + 1]; + const float yr = wkr * xr - wki * xi; + const float yi = wkr * xi + wki * xr; + a[j2 + 0] -= yr; + a[j2 + 1] -= yi; + a[k2 + 0] += yr; + a[k2 + 1] -= yi; + } +} + +static void rftbsub_128_neon(float* a) { + const float* c = rdft_w + 32; + int j1, j2; + const float32x4_t mm_half = vdupq_n_f32(0.5f); + + a[1] = -a[1]; + // Vectorized code (four at once). + // Note: commented number are indexes for the first iteration of the loop. + for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { + // Load 'wk'. + const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4, + const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31, + const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31, + const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28, + const float32x4_t wki_ = c_j1; // 1, 2, 3, 4, + // Load and shuffle 'a'. + // 2, 4, 6, 8, 3, 5, 7, 9 + float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]); + // 120, 122, 124, 126, 121, 123, 125, 127, + const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]); + // 126, 124, 122, 120 + const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]); + // 127, 125, 123, 121 + const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]); + // Calculate 'x'. + const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0); + // 2-126, 4-124, 6-122, 8-120, + const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1); + // 3-127, 5-125, 7-123, 9-121, + // Calculate product into 'y'. + // yr = wkr * xr - wki * xi; + // yi = wkr * xi + wki * xr; + const float32x4_t a_ = vmulq_f32(wkr_, xr_); + const float32x4_t b_ = vmulq_f32(wki_, xi_); + const float32x4_t c_ = vmulq_f32(wkr_, xi_); + const float32x4_t d_ = vmulq_f32(wki_, xr_); + const float32x4_t yr_ = vaddq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const float32x4_t yi_ = vsubq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] -= yr; + // a[j2 + 1] -= yi; + // a[k2 + 0] += yr; + // a[k2 + 1] -= yi; + // 126, 124, 122, 120, + const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_); + // 127, 125, 123, 121, + const float32x4_t a_k2_p1n = vsubq_f32(yi_, a_k2_p1); + // Shuffle in right order and store. + // 2, 3, 4, 5, 6, 7, 8, 9, + const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n); + const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n); + // 124, 125, 126, 127, 120, 121, 122, 123 + const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr); + // 2, 4, 6, 8, + a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_); + // 3, 5, 7, 9, + a_j2_p.val[1] = vsubq_f32(yi_, a_j2_p.val[1]); + // 2, 3, 4, 5, 6, 7, 8, 9, + vst2q_f32(&a[0 + j2], a_j2_p); + + vst1q_f32(&a[122 - j2], a_k2_n.val[1]); + vst1q_f32(&a[126 - j2], a_k2_n.val[0]); + } + + // Scalar code for the remaining items. + for (; j2 < 64; j1 += 1, j2 += 2) { + const int k2 = 128 - j2; + const int k1 = 32 - j1; + const float wkr = 0.5f - c[k1]; + const float wki = c[j1]; + const float xr = a[j2 + 0] - a[k2 + 0]; + const float xi = a[j2 + 1] + a[k2 + 1]; + const float yr = wkr * xr + wki * xi; + const float yi = wkr * xi - wki * xr; + a[j2 + 0] = a[j2 + 0] - yr; + a[j2 + 1] = yi - a[j2 + 1]; + a[k2 + 0] = yr + a[k2 + 0]; + a[k2 + 1] = yi - a[k2 + 1]; + } + a[65] = -a[65]; +} + +void aec_rdft_init_neon(void) { + cft1st_128 = cft1st_128_neon; + cftmdl_128 = cftmdl_128_neon; + rftfsub_128 = rftfsub_128_neon; + rftbsub_128 = rftbsub_128_neon; +} + diff --git a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c b/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c index f936e2a..b4e453f 100644 --- a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c +++ b/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c @@ -8,172 +8,168 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "typedefs.h" +#include "webrtc/modules/audio_processing/aec/aec_rdft.h" -#if defined(WEBRTC_USE_SSE2) #include -#include "aec_rdft.h" +static const ALIGN16_BEG float ALIGN16_END + k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f}; -static const ALIGN16_BEG float ALIGN16_END k_swap_sign[4] = - {-1.f, 1.f, -1.f, 1.f}; - -static void cft1st_128_SSE2(float *a) { +static void cft1st_128_SSE2(float* a) { const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); int j, k2; for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) { - __m128 a00v = _mm_loadu_ps(&a[j + 0]); - __m128 a04v = _mm_loadu_ps(&a[j + 4]); - __m128 a08v = _mm_loadu_ps(&a[j + 8]); - __m128 a12v = _mm_loadu_ps(&a[j + 12]); - __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1 ,0)); - __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3 ,2)); - __m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1 ,0)); - __m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3 ,2)); + __m128 a00v = _mm_loadu_ps(&a[j + 0]); + __m128 a04v = _mm_loadu_ps(&a[j + 4]); + __m128 a08v = _mm_loadu_ps(&a[j + 8]); + __m128 a12v = _mm_loadu_ps(&a[j + 12]); + __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0)); + __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2)); + __m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0)); + __m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2)); - const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]); - const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]); - const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]); - const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]); - const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]); - const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]); - __m128 x0v = _mm_add_ps(a01v, a23v); - const __m128 x1v = _mm_sub_ps(a01v, a23v); - const __m128 x2v = _mm_add_ps(a45v, a67v); - const __m128 x3v = _mm_sub_ps(a45v, a67v); - __m128 x0w; - a01v = _mm_add_ps(x0v, x2v); - x0v = _mm_sub_ps(x0v, x2v); - x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1)); + const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]); + const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]); + const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]); + const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]); + const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]); + const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]); + __m128 x0v = _mm_add_ps(a01v, a23v); + const __m128 x1v = _mm_sub_ps(a01v, a23v); + const __m128 x2v = _mm_add_ps(a45v, a67v); + const __m128 x3v = _mm_sub_ps(a45v, a67v); + __m128 x0w; + a01v = _mm_add_ps(x0v, x2v); + x0v = _mm_sub_ps(x0v, x2v); + x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); { const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v); const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w); - a45v = _mm_add_ps(a45_0v, a45_1v); + a45v = _mm_add_ps(a45_0v, a45_1v); } { - __m128 a23_0v, a23_1v; - const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0 ,1)); - const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w); - x0v = _mm_add_ps(x1v, x3s); - x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1)); - a23_0v = _mm_mul_ps(wk1rv, x0v); - a23_1v = _mm_mul_ps(wk1iv, x0w); - a23v = _mm_add_ps(a23_0v, a23_1v); + __m128 a23_0v, a23_1v; + const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1)); + const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w); + x0v = _mm_add_ps(x1v, x3s); + x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); + a23_0v = _mm_mul_ps(wk1rv, x0v); + a23_1v = _mm_mul_ps(wk1iv, x0w); + a23v = _mm_add_ps(a23_0v, a23_1v); - x0v = _mm_sub_ps(x1v, x3s); - x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1)); + x0v = _mm_sub_ps(x1v, x3s); + x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); } { const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v); const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w); - a67v = _mm_add_ps(a67_0v, a67_1v); + a67v = _mm_add_ps(a67_0v, a67_1v); } - a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1 ,0)); - a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1 ,0)); - a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3 ,2)); - a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3 ,2)); - _mm_storeu_ps(&a[j + 0], a00v); - _mm_storeu_ps(&a[j + 4], a04v); - _mm_storeu_ps(&a[j + 8], a08v); + a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0)); + a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0)); + a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2)); + a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2)); + _mm_storeu_ps(&a[j + 0], a00v); + _mm_storeu_ps(&a[j + 4], a04v); + _mm_storeu_ps(&a[j + 8], a08v); _mm_storeu_ps(&a[j + 12], a12v); } } -static void cftmdl_128_SSE2(float *a) { +static void cftmdl_128_SSE2(float* a) { const int l = 8; const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); int j0; __m128 wk1rv = _mm_load_ps(cftmdl_wk1r); for (j0 = 0; j0 < l; j0 += 2) { - const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); - const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); + const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); + const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]); const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]); - const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00), - _mm_castsi128_ps(a_32), - _MM_SHUFFLE(1, 0, 1 ,0)); - const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08), - _mm_castsi128_ps(a_40), - _MM_SHUFFLE(1, 0, 1 ,0)); - __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); - const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); + const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00), + _mm_castsi128_ps(a_32), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08), + _mm_castsi128_ps(a_40), + _MM_SHUFFLE(1, 0, 1, 0)); + __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); + const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]); const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]); const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]); const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]); - const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16), - _mm_castsi128_ps(a_48), - _MM_SHUFFLE(1, 0, 1 ,0)); - const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24), - _mm_castsi128_ps(a_56), - _MM_SHUFFLE(1, 0, 1 ,0)); - const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); - const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); + const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16), + _mm_castsi128_ps(a_48), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24), + _mm_castsi128_ps(a_56), + _MM_SHUFFLE(1, 0, 1, 0)); + const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); + const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); - const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); - const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); + const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); - const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps( - _mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1), - _MM_SHUFFLE(2, 3, 0, 1))); - const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); - const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); - const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32( + _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1))); + const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); + const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); - const __m128 yy0 = _mm_shuffle_ps(x1_x3_add, x1_x3_sub, - _MM_SHUFFLE(2, 2, 2 ,2)); - const __m128 yy1 = _mm_shuffle_ps(x1_x3_add, x1_x3_sub, - _MM_SHUFFLE(3, 3, 3 ,3)); + const __m128 yy0 = + _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2)); + const __m128 yy1 = + _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3)); const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1); const __m128 yy3 = _mm_add_ps(yy0, yy2); const __m128 yy4 = _mm_mul_ps(wk1rv, yy3); - _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0)); - _mm_storel_epi64((__m128i*)&a[j0 + 32], - _mm_shuffle_epi32(_mm_castps_si128(xx0), - _MM_SHUFFLE(3, 2, 3, 2))); + _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 32], + _mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2))); _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1)); - _mm_storel_epi64((__m128i*)&a[j0 + 48], - _mm_shuffle_epi32(_mm_castps_si128(xx1), - _MM_SHUFFLE(2, 3, 2, 3))); + _mm_storel_epi64( + (__m128i*)&a[j0 + 48], + _mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3))); a[j0 + 48] = -a[j0 + 48]; - _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add)); + _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add)); _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub)); _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4)); - _mm_storel_epi64((__m128i*)&a[j0 + 56], - _mm_shuffle_epi32(_mm_castps_si128(yy4), - _MM_SHUFFLE(2, 3, 2, 3))); + _mm_storel_epi64( + (__m128i*)&a[j0 + 56], + _mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3))); } { int k = 64; int k1 = 2; int k2 = 2 * k1; - const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2+0]); - const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2+0]); - const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2+0]); - const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2+0]); - const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2+0]); - wk1rv = _mm_load_ps(&rdft_wk1r[k2+0]); + const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]); + const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]); + const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]); + const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]); + const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]); + wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]); for (j0 = k; j0 < l + k; j0 += 2) { - const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); - const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); + const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); + const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]); const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]); const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32), - _MM_SHUFFLE(1, 0, 1 ,0)); + _MM_SHUFFLE(1, 0, 1, 0)); const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40), - _MM_SHUFFLE(1, 0, 1 ,0)); - __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); + _MM_SHUFFLE(1, 0, 1, 0)); + __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]); @@ -182,100 +178,102 @@ static void cftmdl_128_SSE2(float *a) { const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]); const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48), - _MM_SHUFFLE(1, 0, 1 ,0)); + _MM_SHUFFLE(1, 0, 1, 0)); const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56), - _MM_SHUFFLE(1, 0, 1 ,0)); + _MM_SHUFFLE(1, 0, 1, 0)); const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); - const __m128 xx2 = _mm_mul_ps(xx1 , wk2rv); - const __m128 xx3 = _mm_mul_ps(wk2iv, - _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1), - _MM_SHUFFLE(2, 3, 0, 1)))); + const __m128 xx2 = _mm_mul_ps(xx1, wk2rv); + const __m128 xx3 = + _mm_mul_ps(wk2iv, + _mm_castsi128_ps(_mm_shuffle_epi32( + _mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1)))); const __m128 xx4 = _mm_add_ps(xx2, xx3); - const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps( - _mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1), - _MM_SHUFFLE(2, 3, 0, 1))); - const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); - const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); - const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32( + _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1))); + const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); + const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); + const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv); - const __m128 xx11 = _mm_mul_ps(wk1iv, + const __m128 xx11 = _mm_mul_ps( + wk1iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add), _MM_SHUFFLE(2, 3, 0, 1)))); const __m128 xx12 = _mm_add_ps(xx10, xx11); const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv); - const __m128 xx21 = _mm_mul_ps(wk3iv, + const __m128 xx21 = _mm_mul_ps( + wk3iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub), - _MM_SHUFFLE(2, 3, 0, 1)))); + _MM_SHUFFLE(2, 3, 0, 1)))); const __m128 xx22 = _mm_add_ps(xx20, xx21); - _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx)); - _mm_storel_epi64((__m128i*)&a[j0 + 32], - _mm_shuffle_epi32(_mm_castps_si128(xx), - _MM_SHUFFLE(3, 2, 3, 2))); + _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 32], + _mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2))); _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4)); - _mm_storel_epi64((__m128i*)&a[j0 + 48], - _mm_shuffle_epi32(_mm_castps_si128(xx4), - _MM_SHUFFLE(3, 2, 3, 2))); + _mm_storel_epi64( + (__m128i*)&a[j0 + 48], + _mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2))); - _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12)); - _mm_storel_epi64((__m128i*)&a[j0 + 40], - _mm_shuffle_epi32(_mm_castps_si128(xx12), - _MM_SHUFFLE(3, 2, 3, 2))); + _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12)); + _mm_storel_epi64( + (__m128i*)&a[j0 + 40], + _mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2))); _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22)); - _mm_storel_epi64((__m128i*)&a[j0 + 56], - _mm_shuffle_epi32(_mm_castps_si128(xx22), - _MM_SHUFFLE(3, 2, 3, 2))); + _mm_storel_epi64( + (__m128i*)&a[j0 + 56], + _mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2))); } } } -static void rftfsub_128_SSE2(float *a) { - const float *c = rdft_w + 32; +static void rftfsub_128_SSE2(float* a) { + const float* c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi; - static const ALIGN16_BEG float ALIGN16_END k_half[4] = - {0.5f, 0.5f, 0.5f, 0.5f}; + static const ALIGN16_BEG float ALIGN16_END + k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f}; const __m128 mm_half = _mm_load_ps(k_half); // Vectorized code (four at once). // Note: commented number are indexes for the first iteration of the loop. for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { // Load 'wk'. - const __m128 c_j1 = _mm_loadu_ps(&c[ j1]); // 1, 2, 3, 4, - const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, - const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, + const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4, + const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, + const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, const __m128 wkr_ = - _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, - const __m128 wki_ = c_j1; // 1, 2, 3, 4, + _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, + const __m128 wki_ = c_j1; // 1, 2, 3, 4, // Load and shuffle 'a'. - const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, - const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, + const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, + const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123, const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127, - const __m128 a_j2_p0 = _mm_shuffle_ps(a_j2_0, a_j2_4, - _MM_SHUFFLE(2, 0, 2 ,0)); // 2, 4, 6, 8, - const __m128 a_j2_p1 = _mm_shuffle_ps(a_j2_0, a_j2_4, - _MM_SHUFFLE(3, 1, 3 ,1)); // 3, 5, 7, 9, - const __m128 a_k2_p0 = _mm_shuffle_ps(a_k2_4, a_k2_0, - _MM_SHUFFLE(0, 2, 0 ,2)); // 126, 124, 122, 120, - const __m128 a_k2_p1 = _mm_shuffle_ps(a_k2_4, a_k2_0, - _MM_SHUFFLE(1, 3, 1 ,3)); // 127, 125, 123, 121, + const __m128 a_j2_p0 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8, + const __m128 a_j2_p1 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9, + const __m128 a_k2_p0 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120, + const __m128 a_k2_p1 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121, // Calculate 'x'. const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0); - // 2-126, 4-124, 6-122, 8-120, + // 2-126, 4-124, 6-122, 8-120, const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1); - // 3-127, 5-125, 7-123, 9-121, + // 3-127, 5-125, 7-123, 9-121, // Calculate product into 'y'. // yr = wkr * xr - wki * xi; // yi = wkr * xi + wki * xr; @@ -283,12 +281,12 @@ static void rftfsub_128_SSE2(float *a) { const __m128 b_ = _mm_mul_ps(wki_, xi_); const __m128 c_ = _mm_mul_ps(wkr_, xi_); const __m128 d_ = _mm_mul_ps(wki_, xr_); - const __m128 yr_ = _mm_sub_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, - const __m128 yi_ = _mm_add_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, - // Update 'a'. - // a[j2 + 0] -= yr; - // a[j2 + 1] -= yi; - // a[k2 + 0] += yr; + const __m128 yr_ = _mm_sub_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const __m128 yi_ = _mm_add_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] -= yr; + // a[j2 + 1] -= yi; + // a[k2 + 0] += yr; // a[k2 + 1] -= yi; const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8, const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_); // 3, 5, 7, 9, @@ -296,26 +294,26 @@ static void rftfsub_128_SSE2(float *a) { const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_); // 127, 125, 123, 121, // Shuffle in right order and store. const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n); - // 2, 3, 4, 5, + // 2, 3, 4, 5, const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n); - // 6, 7, 8, 9, + // 6, 7, 8, 9, const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n); - // 122, 123, 120, 121, + // 122, 123, 120, 121, const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n); - // 126, 127, 124, 125, - const __m128 a_k2_0n = _mm_shuffle_ps(a_k2_0nt, a_k2_0nt, - _MM_SHUFFLE(1, 0, 3 ,2)); // 120, 121, 122, 123, - const __m128 a_k2_4n = _mm_shuffle_ps(a_k2_4nt, a_k2_4nt, - _MM_SHUFFLE(1, 0, 3 ,2)); // 124, 125, 126, 127, - _mm_storeu_ps(&a[0 + j2], a_j2_0n); - _mm_storeu_ps(&a[4 + j2], a_j2_4n); + // 126, 127, 124, 125, + const __m128 a_k2_0n = _mm_shuffle_ps( + a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123, + const __m128 a_k2_4n = _mm_shuffle_ps( + a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127, + _mm_storeu_ps(&a[0 + j2], a_j2_0n); + _mm_storeu_ps(&a[4 + j2], a_j2_4n); _mm_storeu_ps(&a[122 - j2], a_k2_0n); _mm_storeu_ps(&a[126 - j2], a_k2_4n); } // Scalar code for the remaining items. for (; j2 < 64; j1 += 1, j2 += 2) { k2 = 128 - j2; - k1 = 32 - j1; + k1 = 32 - j1; wkr = 0.5f - c[k1]; wki = c[j1]; xr = a[j2 + 0] - a[k2 + 0]; @@ -329,13 +327,13 @@ static void rftfsub_128_SSE2(float *a) { } } -static void rftbsub_128_SSE2(float *a) { - const float *c = rdft_w + 32; +static void rftbsub_128_SSE2(float* a) { + const float* c = rdft_w + 32; int j1, j2, k1, k2; float wkr, wki, xr, xi, yr, yi; - static const ALIGN16_BEG float ALIGN16_END k_half[4] = - {0.5f, 0.5f, 0.5f, 0.5f}; + static const ALIGN16_BEG float ALIGN16_END + k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f}; const __m128 mm_half = _mm_load_ps(k_half); a[1] = -a[1]; @@ -343,30 +341,30 @@ static void rftbsub_128_SSE2(float *a) { // Note: commented number are indexes for the first iteration of the loop. for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { // Load 'wk'. - const __m128 c_j1 = _mm_loadu_ps(&c[ j1]); // 1, 2, 3, 4, - const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, - const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, + const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4, + const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, + const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, const __m128 wkr_ = - _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, - const __m128 wki_ = c_j1; // 1, 2, 3, 4, + _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, + const __m128 wki_ = c_j1; // 1, 2, 3, 4, // Load and shuffle 'a'. - const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, - const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, + const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, + const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123, const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127, - const __m128 a_j2_p0 = _mm_shuffle_ps(a_j2_0, a_j2_4, - _MM_SHUFFLE(2, 0, 2 ,0)); // 2, 4, 6, 8, - const __m128 a_j2_p1 = _mm_shuffle_ps(a_j2_0, a_j2_4, - _MM_SHUFFLE(3, 1, 3 ,1)); // 3, 5, 7, 9, - const __m128 a_k2_p0 = _mm_shuffle_ps(a_k2_4, a_k2_0, - _MM_SHUFFLE(0, 2, 0 ,2)); // 126, 124, 122, 120, - const __m128 a_k2_p1 = _mm_shuffle_ps(a_k2_4, a_k2_0, - _MM_SHUFFLE(1, 3, 1 ,3)); // 127, 125, 123, 121, + const __m128 a_j2_p0 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8, + const __m128 a_j2_p1 = _mm_shuffle_ps( + a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9, + const __m128 a_k2_p0 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120, + const __m128 a_k2_p1 = _mm_shuffle_ps( + a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121, // Calculate 'x'. const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0); - // 2-126, 4-124, 6-122, 8-120, + // 2-126, 4-124, 6-122, 8-120, const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1); - // 3-127, 5-125, 7-123, 9-121, + // 3-127, 5-125, 7-123, 9-121, // Calculate product into 'y'. // yr = wkr * xr + wki * xi; // yi = wkr * xi - wki * xr; @@ -374,12 +372,12 @@ static void rftbsub_128_SSE2(float *a) { const __m128 b_ = _mm_mul_ps(wki_, xi_); const __m128 c_ = _mm_mul_ps(wkr_, xi_); const __m128 d_ = _mm_mul_ps(wki_, xr_); - const __m128 yr_ = _mm_add_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, - const __m128 yi_ = _mm_sub_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, - // Update 'a'. - // a[j2 + 0] = a[j2 + 0] - yr; - // a[j2 + 1] = yi - a[j2 + 1]; - // a[k2 + 0] = yr + a[k2 + 0]; + const __m128 yr_ = _mm_add_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, + const __m128 yi_ = _mm_sub_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, + // Update 'a'. + // a[j2 + 0] = a[j2 + 0] - yr; + // a[j2 + 1] = yi - a[j2 + 1]; + // a[k2 + 0] = yr + a[k2 + 0]; // a[k2 + 1] = yi - a[k2 + 1]; const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8, const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1); // 3, 5, 7, 9, @@ -387,26 +385,26 @@ static void rftbsub_128_SSE2(float *a) { const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1); // 127, 125, 123, 121, // Shuffle in right order and store. const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n); - // 2, 3, 4, 5, + // 2, 3, 4, 5, const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n); - // 6, 7, 8, 9, + // 6, 7, 8, 9, const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n); - // 122, 123, 120, 121, + // 122, 123, 120, 121, const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n); - // 126, 127, 124, 125, - const __m128 a_k2_0n = _mm_shuffle_ps(a_k2_0nt, a_k2_0nt, - _MM_SHUFFLE(1, 0, 3 ,2)); // 120, 121, 122, 123, - const __m128 a_k2_4n = _mm_shuffle_ps(a_k2_4nt, a_k2_4nt, - _MM_SHUFFLE(1, 0, 3 ,2)); // 124, 125, 126, 127, - _mm_storeu_ps(&a[0 + j2], a_j2_0n); - _mm_storeu_ps(&a[4 + j2], a_j2_4n); + // 126, 127, 124, 125, + const __m128 a_k2_0n = _mm_shuffle_ps( + a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123, + const __m128 a_k2_4n = _mm_shuffle_ps( + a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127, + _mm_storeu_ps(&a[0 + j2], a_j2_0n); + _mm_storeu_ps(&a[4 + j2], a_j2_4n); _mm_storeu_ps(&a[122 - j2], a_k2_0n); _mm_storeu_ps(&a[126 - j2], a_k2_4n); } // Scalar code for the remaining items. for (; j2 < 64; j1 += 1, j2 += 2) { k2 = 128 - j2; - k1 = 32 - j1; + k1 = 32 - j1; wkr = 0.5f - c[k1]; wki = c[j1]; xr = a[j2 + 0] - a[k2 + 0]; @@ -427,5 +425,3 @@ void aec_rdft_init_sse2(void) { rftfsub_128 = rftfsub_128_SSE2; rftbsub_128 = rftbsub_128_SSE2; } - -#endif // WEBRTC_USE_SS2 diff --git a/webrtc/modules/audio_processing/aec/aec_resampler.c b/webrtc/modules/audio_processing/aec/aec_resampler.c new file mode 100644 index 0000000..99c39ef --- /dev/null +++ b/webrtc/modules/audio_processing/aec/aec_resampler.c @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for + * clock skew by resampling the farend signal. + */ + +#include "webrtc/modules/audio_processing/aec/aec_resampler.h" + +#include +#include +#include +#include + +#include "webrtc/modules/audio_processing/aec/aec_core.h" + +enum { + kEstimateLengthFrames = 400 +}; + +typedef struct { + float buffer[kResamplerBufferSize]; + float position; + + int deviceSampleRateHz; + int skewData[kEstimateLengthFrames]; + int skewDataIndex; + float skewEstimate; +} AecResampler; + +static int EstimateSkew(const int* rawSkew, + int size, + int absLimit, + float* skewEst); + +void* WebRtcAec_CreateResampler() { + return malloc(sizeof(AecResampler)); +} + +int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz) { + AecResampler* obj = (AecResampler*)resampInst; + memset(obj->buffer, 0, sizeof(obj->buffer)); + obj->position = 0.0; + + obj->deviceSampleRateHz = deviceSampleRateHz; + memset(obj->skewData, 0, sizeof(obj->skewData)); + obj->skewDataIndex = 0; + obj->skewEstimate = 0.0; + + return 0; +} + +void WebRtcAec_FreeResampler(void* resampInst) { + AecResampler* obj = (AecResampler*)resampInst; + free(obj); +} + +void WebRtcAec_ResampleLinear(void* resampInst, + const float* inspeech, + size_t size, + float skew, + float* outspeech, + size_t* size_out) { + AecResampler* obj = (AecResampler*)resampInst; + + float* y; + float be, tnew; + size_t tn, mm; + + assert(size <= 2 * FRAME_LEN); + assert(resampInst != NULL); + assert(inspeech != NULL); + assert(outspeech != NULL); + assert(size_out != NULL); + + // Add new frame data in lookahead + memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay], + inspeech, + size * sizeof(inspeech[0])); + + // Sample rate ratio + be = 1 + skew; + + // Loop over input frame + mm = 0; + y = &obj->buffer[FRAME_LEN]; // Point at current frame + + tnew = be * mm + obj->position; + tn = (size_t)tnew; + + while (tn < size) { + + // Interpolation + outspeech[mm] = y[tn] + (tnew - tn) * (y[tn + 1] - y[tn]); + mm++; + + tnew = be * mm + obj->position; + tn = (int)tnew; + } + + *size_out = mm; + obj->position += (*size_out) * be - size; + + // Shift buffer + memmove(obj->buffer, + &obj->buffer[size], + (kResamplerBufferSize - size) * sizeof(obj->buffer[0])); +} + +int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst) { + AecResampler* obj = (AecResampler*)resampInst; + int err = 0; + + if (obj->skewDataIndex < kEstimateLengthFrames) { + obj->skewData[obj->skewDataIndex] = rawSkew; + obj->skewDataIndex++; + } else if (obj->skewDataIndex == kEstimateLengthFrames) { + err = EstimateSkew( + obj->skewData, kEstimateLengthFrames, obj->deviceSampleRateHz, skewEst); + obj->skewEstimate = *skewEst; + obj->skewDataIndex++; + } else { + *skewEst = obj->skewEstimate; + } + + return err; +} + +int EstimateSkew(const int* rawSkew, + int size, + int deviceSampleRateHz, + float* skewEst) { + const int absLimitOuter = (int)(0.04f * deviceSampleRateHz); + const int absLimitInner = (int)(0.0025f * deviceSampleRateHz); + int i = 0; + int n = 0; + float rawAvg = 0; + float err = 0; + float rawAbsDev = 0; + int upperLimit = 0; + int lowerLimit = 0; + float cumSum = 0; + float x = 0; + float x2 = 0; + float y = 0; + float xy = 0; + float xAvg = 0; + float denom = 0; + float skew = 0; + + *skewEst = 0; // Set in case of error below. + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { + n++; + rawAvg += rawSkew[i]; + } + } + + if (n == 0) { + return -1; + } + assert(n > 0); + rawAvg /= n; + + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { + err = rawSkew[i] - rawAvg; + rawAbsDev += err >= 0 ? err : -err; + } + } + assert(n > 0); + rawAbsDev /= n; + upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1); // +1 for ceiling. + lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1); // -1 for floor. + + n = 0; + for (i = 0; i < size; i++) { + if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) || + (rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) { + n++; + cumSum += rawSkew[i]; + x += n; + x2 += n * n; + y += cumSum; + xy += n * cumSum; + } + } + + if (n == 0) { + return -1; + } + assert(n > 0); + xAvg = x / n; + denom = x2 - xAvg * x; + + if (denom != 0) { + skew = (xy - xAvg * y) / denom; + } + + *skewEst = skew; + return 0; +} diff --git a/webrtc/modules/audio_processing/aec/aec_resampler.h b/webrtc/modules/audio_processing/aec/aec_resampler.h new file mode 100644 index 0000000..a5002c1 --- /dev/null +++ b/webrtc/modules/audio_processing/aec/aec_resampler.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_ + +#include "webrtc/modules/audio_processing/aec/aec_core.h" + +enum { + kResamplingDelay = 1 +}; +enum { + kResamplerBufferSize = FRAME_LEN * 4 +}; + +// Unless otherwise specified, functions return 0 on success and -1 on error. +void* WebRtcAec_CreateResampler(); // Returns NULL on error. +int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz); +void WebRtcAec_FreeResampler(void* resampInst); + +// Estimates skew from raw measurement. +int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst); + +// Resamples input using linear interpolation. +void WebRtcAec_ResampleLinear(void* resampInst, + const float* inspeech, + size_t size, + float skew, + float* outspeech, + size_t* size_out); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_ diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation.c b/webrtc/modules/audio_processing/aec/echo_cancellation.c index f35105f..0f5cd31 100644 --- a/webrtc/modules/audio_processing/aec/echo_cancellation.c +++ b/webrtc/modules/audio_processing/aec/echo_cancellation.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -11,737 +11,550 @@ /* * Contains the API functions for the AEC. */ -#include "echo_cancellation.h" +#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h" #include -#ifdef AEC_DEBUG +#ifdef WEBRTC_AEC_DEBUG_DUMP #include #endif #include #include -#include "aec_core.h" -#include "resampler.h" -#include "ring_buffer.h" +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aec/aec_core.h" +#include "webrtc/modules/audio_processing/aec/aec_resampler.h" +#include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h" +#include "webrtc/typedefs.h" + +// Measured delays [ms] +// Device Chrome GTP +// MacBook Air 10 +// MacBook Retina 10 100 +// MacPro 30? +// +// Win7 Desktop 70 80? +// Win7 T430s 110 +// Win8 T420s 70 +// +// Daisy 50 +// Pixel (w/ preproc?) 240 +// Pixel (w/o preproc?) 110 110 + +// The extended filter mode gives us the flexibility to ignore the system's +// reported delays. We do this for platforms which we believe provide results +// which are incompatible with the AEC's expectations. Based on measurements +// (some provided above) we set a conservative (i.e. lower than measured) +// fixed delay. +// +// WEBRTC_UNTRUSTED_DELAY will only have an impact when |extended_filter_mode| +// is enabled. See the note along with |DelayCorrection| in +// echo_cancellation_impl.h for more details on the mode. +// +// Justification: +// Chromium/Mac: Here, the true latency is so low (~10-20 ms), that it plays +// havoc with the AEC's buffering. To avoid this, we set a fixed delay of 20 ms +// and then compensate by rewinding by 10 ms (in wideband) through +// kDelayDiffOffsetSamples. This trick does not seem to work for larger rewind +// values, but fortunately this is sufficient. +// +// Chromium/Linux(ChromeOS): The values we get on this platform don't correspond +// well to reality. The variance doesn't match the AEC's buffer changes, and the +// bulk values tend to be too low. However, the range across different hardware +// appears to be too large to choose a single value. +// +// GTP/Linux(ChromeOS): TBD, but for the moment we will trust the values. +#if defined(WEBRTC_CHROMIUM_BUILD) && defined(WEBRTC_MAC) +#define WEBRTC_UNTRUSTED_DELAY +#endif + +#if defined(WEBRTC_UNTRUSTED_DELAY) && defined(WEBRTC_MAC) +static const int kDelayDiffOffsetSamples = -160; +#else +// Not enabled for now. +static const int kDelayDiffOffsetSamples = 0; +#endif + +#if defined(WEBRTC_MAC) +static const int kFixedDelayMs = 20; +#else +static const int kFixedDelayMs = 50; +#endif +#if !defined(WEBRTC_UNTRUSTED_DELAY) +static const int kMinTrustedDelayMs = 20; +#endif +static const int kMaxTrustedDelayMs = 500; -#define BUF_SIZE_FRAMES 50 // buffer size (frames) // Maximum length of resampled signal. Must be an integer multiple of frames // (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN // The factor of 2 handles wb, and the + 1 is as a safety margin +// TODO(bjornv): Replace with kResamplerBufferSize #define MAX_RESAMP_LEN (5 * FRAME_LEN) -static const int bufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples) -static const int sampMsNb = 8; // samples per ms in nb -// Target suppression levels for nlp modes -// log{0.001, 0.00001, 0.00000001} -static const float targetSupp[3] = {-6.9f, -11.5f, -18.4f}; -static const float minOverDrive[3] = {1.0f, 2.0f, 5.0f}; +static const int kMaxBufSizeStart = 62; // In partitions +static const int sampMsNb = 8; // samples per ms in nb static const int initCheck = 42; -typedef struct { - int delayCtr; - int sampFreq; - int splitSampFreq; - int scSampFreq; - float sampFactor; // scSampRate / sampFreq - short nlpMode; - short autoOnOff; - short activity; - short skewMode; - short bufSizeStart; - //short bufResetCtr; // counts number of noncausal frames - int knownDelay; +#ifdef WEBRTC_AEC_DEBUG_DUMP +int webrtc_aec_instance_count = 0; +#endif - // Stores the last frame added to the farend buffer - short farendOld[2][FRAME_LEN]; - short initFlag; // indicates if AEC has been initialized - - // Variables used for averaging far end buffer size - short counter; - short sum; - short firstVal; - short checkBufSizeCtr; - - // Variables used for delay shifts - short msInSndCardBuf; - short filtDelay; - int timeForDelayChange; - int ECstartup; - int checkBuffSize; - int delayChange; - short lastDelayDiff; - -#ifdef AEC_DEBUG - FILE *bufFile; - FILE *delayFile; - FILE *skewFile; - FILE *preCompFile; - FILE *postCompFile; -#endif // AEC_DEBUG - - // Structures - void *farendBuf; - void *resampler; - - int skewFrCtr; - int resample; // if the skew is small enough we don't resample - int highSkewCtr; - float skew; - - int lastError; - - aec_t *aec; -} aecpc_t; - -// Estimates delay to set the position of the farend buffer read pointer +// Estimates delay to set the position of the far-end buffer read pointer // (controlled by knownDelay) -static int EstBufDelay(aecpc_t *aecInst, short msInSndCardBuf); - -// Stuffs the farend buffer if the estimated delay is too large -static int DelayComp(aecpc_t *aecInst); - -WebRtc_Word32 WebRtcAec_Create(void **aecInst) -{ - aecpc_t *aecpc; - if (aecInst == NULL) { - return -1; - } - - aecpc = malloc(sizeof(aecpc_t)); - *aecInst = aecpc; - if (aecpc == NULL) { - return -1; - } - - if (WebRtcAec_CreateAec(&aecpc->aec) == -1) { - WebRtcAec_Free(aecpc); - aecpc = NULL; - return -1; - } - - if (WebRtcApm_CreateBuffer(&aecpc->farendBuf, bufSizeSamp) == -1) { - WebRtcAec_Free(aecpc); - aecpc = NULL; - return -1; - } - - if (WebRtcAec_CreateResampler(&aecpc->resampler) == -1) { - WebRtcAec_Free(aecpc); - aecpc = NULL; - return -1; - } - - aecpc->initFlag = 0; - aecpc->lastError = 0; - -#ifdef AEC_DEBUG - aecpc->aec->farFile = fopen("aecFar.pcm","wb"); - aecpc->aec->nearFile = fopen("aecNear.pcm","wb"); - aecpc->aec->outFile = fopen("aecOut.pcm","wb"); - aecpc->aec->outLpFile = fopen("aecOutLp.pcm","wb"); - - aecpc->bufFile = fopen("aecBuf.dat", "wb"); - aecpc->skewFile = fopen("aecSkew.dat", "wb"); - aecpc->delayFile = fopen("aecDelay.dat", "wb"); - aecpc->preCompFile = fopen("preComp.pcm", "wb"); - aecpc->postCompFile = fopen("postComp.pcm", "wb"); -#endif // AEC_DEBUG - - return 0; -} - -WebRtc_Word32 WebRtcAec_Free(void *aecInst) -{ - aecpc_t *aecpc = aecInst; - - if (aecpc == NULL) { - return -1; - } - -#ifdef AEC_DEBUG - fclose(aecpc->aec->farFile); - fclose(aecpc->aec->nearFile); - fclose(aecpc->aec->outFile); - fclose(aecpc->aec->outLpFile); - - fclose(aecpc->bufFile); - fclose(aecpc->skewFile); - fclose(aecpc->delayFile); - fclose(aecpc->preCompFile); - fclose(aecpc->postCompFile); -#endif // AEC_DEBUG - - WebRtcAec_FreeAec(aecpc->aec); - WebRtcApm_FreeBuffer(aecpc->farendBuf); - WebRtcAec_FreeResampler(aecpc->resampler); - free(aecpc); - - return 0; -} - -WebRtc_Word32 WebRtcAec_Init(void *aecInst, WebRtc_Word32 sampFreq, WebRtc_Word32 scSampFreq) -{ - aecpc_t *aecpc = aecInst; - AecConfig aecConfig; - - if (aecpc == NULL) { - return -1; - } - - if (sampFreq != 8000 && sampFreq != 16000 && sampFreq != 32000) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; - } - aecpc->sampFreq = sampFreq; - - if (scSampFreq < 1 || scSampFreq > 96000) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; - } - aecpc->scSampFreq = scSampFreq; - - // Initialize echo canceller core - if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; - } - - // Initialize farend buffer - if (WebRtcApm_InitBuffer(aecpc->farendBuf) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; - } - - if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; - } - - aecpc->initFlag = initCheck; // indicates that initialization has been done - - if (aecpc->sampFreq == 32000) { - aecpc->splitSampFreq = 16000; - } - else { - aecpc->splitSampFreq = sampFreq; - } - - aecpc->skewFrCtr = 0; - aecpc->activity = 0; - - aecpc->delayChange = 1; - aecpc->delayCtr = 0; - - aecpc->sum = 0; - aecpc->counter = 0; - aecpc->checkBuffSize = 1; - aecpc->firstVal = 0; - - aecpc->ECstartup = 1; - aecpc->bufSizeStart = 0; - aecpc->checkBufSizeCtr = 0; - aecpc->filtDelay = 0; - aecpc->timeForDelayChange =0; - aecpc->knownDelay = 0; - aecpc->lastDelayDiff = 0; - - aecpc->skew = 0; - aecpc->resample = kAecFalse; - aecpc->highSkewCtr = 0; - aecpc->sampFactor = (aecpc->scSampFreq * 1.0f) / aecpc->splitSampFreq; - - memset(&aecpc->farendOld[0][0], 0, 160); - - // Default settings. - aecConfig.nlpMode = kAecNlpModerate; - aecConfig.skewMode = kAecFalse; - aecConfig.metricsMode = kAecFalse; - aecConfig.delay_logging = kAecFalse; - - if (WebRtcAec_set_config(aecpc, aecConfig) == -1) { - aecpc->lastError = AEC_UNSPECIFIED_ERROR; - return -1; - } - - return 0; -} - -// only buffer L band for farend -WebRtc_Word32 WebRtcAec_BufferFarend(void *aecInst, const WebRtc_Word16 *farend, - WebRtc_Word16 nrOfSamples) -{ - aecpc_t *aecpc = aecInst; - WebRtc_Word32 retVal = 0; - short newNrOfSamples; - short newFarend[MAX_RESAMP_LEN]; - float skew; - - if (aecpc == NULL) { - return -1; - } - - if (farend == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; - } - - if (aecpc->initFlag != initCheck) { - aecpc->lastError = AEC_UNINITIALIZED_ERROR; - return -1; - } - - // number of samples == 160 for SWB input - if (nrOfSamples != 80 && nrOfSamples != 160) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; - } - - skew = aecpc->skew; - - // TODO: Is this really a good idea? - if (!aecpc->ECstartup) { - DelayComp(aecpc); - } - - if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { - // Resample and get a new number of samples - newNrOfSamples = WebRtcAec_ResampleLinear(aecpc->resampler, - farend, - nrOfSamples, - skew, - newFarend); - WebRtcApm_WriteBuffer(aecpc->farendBuf, newFarend, newNrOfSamples); - -#ifdef AEC_DEBUG - fwrite(farend, 2, nrOfSamples, aecpc->preCompFile); - fwrite(newFarend, 2, newNrOfSamples, aecpc->postCompFile); -#endif - } - else { - WebRtcApm_WriteBuffer(aecpc->farendBuf, farend, nrOfSamples); - } - - return retVal; -} - -WebRtc_Word32 WebRtcAec_Process(void *aecInst, const WebRtc_Word16 *nearend, - const WebRtc_Word16 *nearendH, WebRtc_Word16 *out, WebRtc_Word16 *outH, - WebRtc_Word16 nrOfSamples, WebRtc_Word16 msInSndCardBuf, WebRtc_Word32 skew) -{ - aecpc_t *aecpc = aecInst; - WebRtc_Word32 retVal = 0; - short i; - short farend[FRAME_LEN]; - short nmbrOfFilledBuffers; - short nBlocks10ms; - short nFrames; -#ifdef AEC_DEBUG - short msInAECBuf; -#endif - // Limit resampling to doubling/halving of signal - const float minSkewEst = -0.5f; - const float maxSkewEst = 1.0f; - - if (aecpc == NULL) { - return -1; - } - - if (nearend == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; - } - - if (out == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; - } - - if (aecpc->initFlag != initCheck) { - aecpc->lastError = AEC_UNINITIALIZED_ERROR; - return -1; - } - - // number of samples == 160 for SWB input - if (nrOfSamples != 80 && nrOfSamples != 160) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; - } - - // Check for valid pointers based on sampling rate - if (aecpc->sampFreq == 32000 && nearendH == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; - } - - if (msInSndCardBuf < 0) { - msInSndCardBuf = 0; - aecpc->lastError = AEC_BAD_PARAMETER_WARNING; - retVal = -1; - } - else if (msInSndCardBuf > 500) { - msInSndCardBuf = 500; - aecpc->lastError = AEC_BAD_PARAMETER_WARNING; - retVal = -1; - } - msInSndCardBuf += 10; - aecpc->msInSndCardBuf = msInSndCardBuf; - - if (aecpc->skewMode == kAecTrue) { - if (aecpc->skewFrCtr < 25) { - aecpc->skewFrCtr++; - } - else { - retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew); - if (retVal == -1) { - aecpc->skew = 0; - aecpc->lastError = AEC_BAD_PARAMETER_WARNING; - } - - aecpc->skew /= aecpc->sampFactor*nrOfSamples; - - if (aecpc->skew < 1.0e-3 && aecpc->skew > -1.0e-3) { - aecpc->resample = kAecFalse; - } - else { - aecpc->resample = kAecTrue; - } - - if (aecpc->skew < minSkewEst) { - aecpc->skew = minSkewEst; - } - else if (aecpc->skew > maxSkewEst) { - aecpc->skew = maxSkewEst; - } - -#ifdef AEC_DEBUG - fwrite(&aecpc->skew, sizeof(aecpc->skew), 1, aecpc->skewFile); -#endif - } - } - - nFrames = nrOfSamples / FRAME_LEN; - nBlocks10ms = nFrames / aecpc->aec->mult; - - if (aecpc->ECstartup) { - if (nearend != out) { - // Only needed if they don't already point to the same place. - memcpy(out, nearend, sizeof(short) * nrOfSamples); - } - nmbrOfFilledBuffers = WebRtcApm_get_buffer_size(aecpc->farendBuf) / FRAME_LEN; - - // The AEC is in the start up mode - // AEC is disabled until the soundcard buffer and farend buffers are OK - - // Mechanism to ensure that the soundcard buffer is reasonably stable. - if (aecpc->checkBuffSize) { - - aecpc->checkBufSizeCtr++; - // Before we fill up the far end buffer we require the amount of data on the - // sound card to be stable (+/-8 ms) compared to the first value. This - // comparison is made during the following 4 consecutive frames. If it seems - // to be stable then we start to fill up the far end buffer. - - if (aecpc->counter == 0) { - aecpc->firstVal = aecpc->msInSndCardBuf; - aecpc->sum = 0; - } - - if (abs(aecpc->firstVal - aecpc->msInSndCardBuf) < - WEBRTC_SPL_MAX(0.2 * aecpc->msInSndCardBuf, sampMsNb)) { - aecpc->sum += aecpc->msInSndCardBuf; - aecpc->counter++; - } - else { - aecpc->counter = 0; - } - - if (aecpc->counter*nBlocks10ms >= 6) { - // The farend buffer size is determined in blocks of 80 samples - // Use 75% of the average value of the soundcard buffer - aecpc->bufSizeStart = WEBRTC_SPL_MIN((int) (0.75 * (aecpc->sum * - aecpc->aec->mult) / (aecpc->counter * 10)), BUF_SIZE_FRAMES); - // buffersize has now been determined - aecpc->checkBuffSize = 0; - } - - if (aecpc->checkBufSizeCtr * nBlocks10ms > 50) { - // for really bad sound cards, don't disable echocanceller for more than 0.5 sec - aecpc->bufSizeStart = WEBRTC_SPL_MIN((int) (0.75 * (aecpc->msInSndCardBuf * - aecpc->aec->mult) / 10), BUF_SIZE_FRAMES); - aecpc->checkBuffSize = 0; - } - } - - // if checkBuffSize changed in the if-statement above - if (!aecpc->checkBuffSize) { - // soundcard buffer is now reasonably stable - // When the far end buffer is filled with approximately the same amount of - // data as the amount on the sound card we end the start up phase and start - // to cancel echoes. - - if (nmbrOfFilledBuffers == aecpc->bufSizeStart) { - aecpc->ECstartup = 0; // Enable the AEC - } - else if (nmbrOfFilledBuffers > aecpc->bufSizeStart) { - WebRtcApm_FlushBuffer(aecpc->farendBuf, WebRtcApm_get_buffer_size(aecpc->farendBuf) - - aecpc->bufSizeStart * FRAME_LEN); - aecpc->ECstartup = 0; - } - } - - } - else { - // AEC is enabled - - // Note only 1 block supported for nb and 2 blocks for wb - for (i = 0; i < nFrames; i++) { - nmbrOfFilledBuffers = WebRtcApm_get_buffer_size(aecpc->farendBuf) / FRAME_LEN; - - // Check that there is data in the far end buffer - if (nmbrOfFilledBuffers > 0) { - // Get the next 80 samples from the farend buffer - WebRtcApm_ReadBuffer(aecpc->farendBuf, farend, FRAME_LEN); - - // Always store the last frame for use when we run out of data - memcpy(&(aecpc->farendOld[i][0]), farend, FRAME_LEN * sizeof(short)); - } - else { - // We have no data so we use the last played frame - memcpy(farend, &(aecpc->farendOld[i][0]), FRAME_LEN * sizeof(short)); - } - - // Call buffer delay estimator when all data is extracted, - // i.e. i = 0 for NB and i = 1 for WB or SWB - if ((i == 0 && aecpc->splitSampFreq == 8000) || - (i == 1 && (aecpc->splitSampFreq == 16000))) { - EstBufDelay(aecpc, aecpc->msInSndCardBuf); - } - - // Call the AEC - WebRtcAec_ProcessFrame(aecpc->aec, farend, &nearend[FRAME_LEN * i], &nearendH[FRAME_LEN * i], - &out[FRAME_LEN * i], &outH[FRAME_LEN * i], aecpc->knownDelay); - } - } - -#ifdef AEC_DEBUG - msInAECBuf = WebRtcApm_get_buffer_size(aecpc->farendBuf) / (sampMsNb*aecpc->aec->mult); - fwrite(&msInAECBuf, 2, 1, aecpc->bufFile); - fwrite(&(aecpc->knownDelay), sizeof(aecpc->knownDelay), 1, aecpc->delayFile); +static void EstBufDelayNormal(Aec* aecInst); +static void EstBufDelayExtended(Aec* aecInst); +static int ProcessNormal(Aec* self, + const float* const* near, + size_t num_bands, + float* const* out, + size_t num_samples, + int16_t reported_delay_ms, + int32_t skew); +static void ProcessExtended(Aec* self, + const float* const* near, + size_t num_bands, + float* const* out, + size_t num_samples, + int16_t reported_delay_ms, + int32_t skew); + +void* WebRtcAec_Create() { + Aec* aecpc = malloc(sizeof(Aec)); + + if (!aecpc) { + return NULL; + } + + aecpc->aec = WebRtcAec_CreateAec(); + if (!aecpc->aec) { + WebRtcAec_Free(aecpc); + return NULL; + } + aecpc->resampler = WebRtcAec_CreateResampler(); + if (!aecpc->resampler) { + WebRtcAec_Free(aecpc); + return NULL; + } + // Create far-end pre-buffer. The buffer size has to be large enough for + // largest possible drift compensation (kResamplerBufferSize) + "almost" an + // FFT buffer (PART_LEN2 - 1). + aecpc->far_pre_buf = + WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize, sizeof(float)); + if (!aecpc->far_pre_buf) { + WebRtcAec_Free(aecpc); + return NULL; + } + + aecpc->initFlag = 0; + aecpc->lastError = 0; + +#ifdef WEBRTC_AEC_DEBUG_DUMP + { + char filename[64]; + sprintf(filename, "aec_buf%d.dat", webrtc_aec_instance_count); + aecpc->bufFile = fopen(filename, "wb"); + sprintf(filename, "aec_skew%d.dat", webrtc_aec_instance_count); + aecpc->skewFile = fopen(filename, "wb"); + sprintf(filename, "aec_delay%d.dat", webrtc_aec_instance_count); + aecpc->delayFile = fopen(filename, "wb"); + webrtc_aec_instance_count++; + } #endif - return retVal; + return aecpc; } -WebRtc_Word32 WebRtcAec_set_config(void *aecInst, AecConfig config) -{ - aecpc_t *aecpc = aecInst; +void WebRtcAec_Free(void* aecInst) { + Aec* aecpc = aecInst; - if (aecpc == NULL) { - return -1; - } + if (aecpc == NULL) { + return; + } - if (aecpc->initFlag != initCheck) { - aecpc->lastError = AEC_UNINITIALIZED_ERROR; - return -1; - } + WebRtc_FreeBuffer(aecpc->far_pre_buf); - if (config.skewMode != kAecFalse && config.skewMode != kAecTrue) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; - } - aecpc->skewMode = config.skewMode; +#ifdef WEBRTC_AEC_DEBUG_DUMP + fclose(aecpc->bufFile); + fclose(aecpc->skewFile); + fclose(aecpc->delayFile); +#endif - if (config.nlpMode != kAecNlpConservative && config.nlpMode != - kAecNlpModerate && config.nlpMode != kAecNlpAggressive) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; - } - aecpc->nlpMode = config.nlpMode; - aecpc->aec->targetSupp = targetSupp[aecpc->nlpMode]; - aecpc->aec->minOverDrive = minOverDrive[aecpc->nlpMode]; + WebRtcAec_FreeAec(aecpc->aec); + WebRtcAec_FreeResampler(aecpc->resampler); + free(aecpc); +} - if (config.metricsMode != kAecFalse && config.metricsMode != kAecTrue) { - aecpc->lastError = AEC_BAD_PARAMETER_ERROR; - return -1; - } - aecpc->aec->metricsMode = config.metricsMode; - if (aecpc->aec->metricsMode == kAecTrue) { - WebRtcAec_InitMetrics(aecpc->aec); - } +int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) { + Aec* aecpc = aecInst; + AecConfig aecConfig; - if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) { + if (sampFreq != 8000 && + sampFreq != 16000 && + sampFreq != 32000 && + sampFreq != 48000) { aecpc->lastError = AEC_BAD_PARAMETER_ERROR; return -1; } - aecpc->aec->delay_logging_enabled = config.delay_logging; - if (aecpc->aec->delay_logging_enabled == kAecTrue) { - memset(aecpc->aec->delay_histogram, 0, sizeof(aecpc->aec->delay_histogram)); - } + aecpc->sampFreq = sampFreq; - return 0; -} - -WebRtc_Word32 WebRtcAec_get_config(void *aecInst, AecConfig *config) -{ - aecpc_t *aecpc = aecInst; - - if (aecpc == NULL) { - return -1; - } - - if (config == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; - } - - if (aecpc->initFlag != initCheck) { - aecpc->lastError = AEC_UNINITIALIZED_ERROR; - return -1; - } - - config->nlpMode = aecpc->nlpMode; - config->skewMode = aecpc->skewMode; - config->metricsMode = aecpc->aec->metricsMode; - config->delay_logging = aecpc->aec->delay_logging_enabled; - - return 0; -} - -WebRtc_Word32 WebRtcAec_get_echo_status(void *aecInst, WebRtc_Word16 *status) -{ - aecpc_t *aecpc = aecInst; - - if (aecpc == NULL) { - return -1; - } - - if (status == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; - } - - if (aecpc->initFlag != initCheck) { - aecpc->lastError = AEC_UNINITIALIZED_ERROR; - return -1; - } - - *status = aecpc->aec->echoState; - - return 0; -} - -WebRtc_Word32 WebRtcAec_GetMetrics(void *aecInst, AecMetrics *metrics) -{ - const float upweight = 0.7f; - float dtmp; - short stmp; - aecpc_t *aecpc = aecInst; - - if (aecpc == NULL) { - return -1; - } - - if (metrics == NULL) { - aecpc->lastError = AEC_NULL_POINTER_ERROR; - return -1; - } - - if (aecpc->initFlag != initCheck) { - aecpc->lastError = AEC_UNINITIALIZED_ERROR; - return -1; - } - - // ERL - metrics->erl.instant = (short) aecpc->aec->erl.instant; - - if ((aecpc->aec->erl.himean > offsetLevel) && (aecpc->aec->erl.average > offsetLevel)) { - // Use a mix between regular average and upper part average - dtmp = upweight * aecpc->aec->erl.himean + (1 - upweight) * aecpc->aec->erl.average; - metrics->erl.average = (short) dtmp; - } - else { - metrics->erl.average = offsetLevel; - } - - metrics->erl.max = (short) aecpc->aec->erl.max; - - if (aecpc->aec->erl.min < (offsetLevel * (-1))) { - metrics->erl.min = (short) aecpc->aec->erl.min; - } - else { - metrics->erl.min = offsetLevel; - } - - // ERLE - metrics->erle.instant = (short) aecpc->aec->erle.instant; - - if ((aecpc->aec->erle.himean > offsetLevel) && (aecpc->aec->erle.average > offsetLevel)) { - // Use a mix between regular average and upper part average - dtmp = upweight * aecpc->aec->erle.himean + (1 - upweight) * aecpc->aec->erle.average; - metrics->erle.average = (short) dtmp; - } - else { - metrics->erle.average = offsetLevel; - } - - metrics->erle.max = (short) aecpc->aec->erle.max; - - if (aecpc->aec->erle.min < (offsetLevel * (-1))) { - metrics->erle.min = (short) aecpc->aec->erle.min; - } else { - metrics->erle.min = offsetLevel; - } - - // RERL - if ((metrics->erl.average > offsetLevel) && (metrics->erle.average > offsetLevel)) { - stmp = metrics->erl.average + metrics->erle.average; - } - else { - stmp = offsetLevel; - } - metrics->rerl.average = stmp; - - // No other statistics needed, but returned for completeness - metrics->rerl.instant = stmp; - metrics->rerl.max = stmp; - metrics->rerl.min = stmp; - - // A_NLP - metrics->aNlp.instant = (short) aecpc->aec->aNlp.instant; - - if ((aecpc->aec->aNlp.himean > offsetLevel) && (aecpc->aec->aNlp.average > offsetLevel)) { - // Use a mix between regular average and upper part average - dtmp = upweight * aecpc->aec->aNlp.himean + (1 - upweight) * aecpc->aec->aNlp.average; - metrics->aNlp.average = (short) dtmp; - } - else { - metrics->aNlp.average = offsetLevel; - } - - metrics->aNlp.max = (short) aecpc->aec->aNlp.max; - - if (aecpc->aec->aNlp.min < (offsetLevel * (-1))) { - metrics->aNlp.min = (short) aecpc->aec->aNlp.min; - } - else { - metrics->aNlp.min = offsetLevel; - } - - return 0; -} - -int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std) { - aecpc_t* self = handle; - int i = 0; - int delay_values = 0; - int num_delay_values = 0; - int my_median = 0; - const int kMsPerBlock = (PART_LEN * 1000) / self->splitSampFreq; - float l1_norm = 0; - - if (self == NULL) { + if (scSampFreq < 1 || scSampFreq > 96000) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; return -1; } + aecpc->scSampFreq = scSampFreq; + + // Initialize echo canceller core + if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } + + if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } + + WebRtc_InitBuffer(aecpc->far_pre_buf); + WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN); // Start overlap. + + aecpc->initFlag = initCheck; // indicates that initialization has been done + + if (aecpc->sampFreq == 32000 || aecpc->sampFreq == 48000) { + aecpc->splitSampFreq = 16000; + } else { + aecpc->splitSampFreq = sampFreq; + } + + aecpc->delayCtr = 0; + aecpc->sampFactor = (aecpc->scSampFreq * 1.0f) / aecpc->splitSampFreq; + // Sampling frequency multiplier (SWB is processed as 160 frame size). + aecpc->rate_factor = aecpc->splitSampFreq / 8000; + + aecpc->sum = 0; + aecpc->counter = 0; + aecpc->checkBuffSize = 1; + aecpc->firstVal = 0; + + // We skip the startup_phase completely (setting to 0) if DA-AEC is enabled, + // but not extended_filter mode. + aecpc->startup_phase = WebRtcAec_extended_filter_enabled(aecpc->aec) || + !WebRtcAec_delay_agnostic_enabled(aecpc->aec); + aecpc->bufSizeStart = 0; + aecpc->checkBufSizeCtr = 0; + aecpc->msInSndCardBuf = 0; + aecpc->filtDelay = -1; // -1 indicates an initialized state. + aecpc->timeForDelayChange = 0; + aecpc->knownDelay = 0; + aecpc->lastDelayDiff = 0; + + aecpc->skewFrCtr = 0; + aecpc->resample = kAecFalse; + aecpc->highSkewCtr = 0; + aecpc->skew = 0; + + aecpc->farend_started = 0; + + // Default settings. + aecConfig.nlpMode = kAecNlpModerate; + aecConfig.skewMode = kAecFalse; + aecConfig.metricsMode = kAecFalse; + aecConfig.delay_logging = kAecFalse; + + if (WebRtcAec_set_config(aecpc, aecConfig) == -1) { + aecpc->lastError = AEC_UNSPECIFIED_ERROR; + return -1; + } + + return 0; +} + +// only buffer L band for farend +int32_t WebRtcAec_BufferFarend(void* aecInst, + const float* farend, + size_t nrOfSamples) { + Aec* aecpc = aecInst; + size_t newNrOfSamples = nrOfSamples; + float new_farend[MAX_RESAMP_LEN]; + const float* farend_ptr = farend; + + if (farend == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + + if (aecpc->initFlag != initCheck) { + aecpc->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + // number of samples == 160 for SWB input + if (nrOfSamples != 80 && nrOfSamples != 160) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + + if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { + // Resample and get a new number of samples + WebRtcAec_ResampleLinear(aecpc->resampler, + farend, + nrOfSamples, + aecpc->skew, + new_farend, + &newNrOfSamples); + farend_ptr = new_farend; + } + + aecpc->farend_started = 1; + WebRtcAec_SetSystemDelay( + aecpc->aec, WebRtcAec_system_delay(aecpc->aec) + (int)newNrOfSamples); + + // Write the time-domain data to |far_pre_buf|. + WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_ptr, newNrOfSamples); + + // Transform to frequency domain if we have enough data. + while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) { + // We have enough data to pass to the FFT, hence read PART_LEN2 samples. + { + float* ptmp = NULL; + float tmp[PART_LEN2]; + WebRtc_ReadBuffer(aecpc->far_pre_buf, (void**)&ptmp, tmp, PART_LEN2); + WebRtcAec_BufferFarendPartition(aecpc->aec, ptmp); +#ifdef WEBRTC_AEC_DEBUG_DUMP + WebRtc_WriteBuffer( + WebRtcAec_far_time_buf(aecpc->aec), &ptmp[PART_LEN], 1); +#endif + } + + // Rewind |far_pre_buf| PART_LEN samples for overlap before continuing. + WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN); + } + + return 0; +} + +int32_t WebRtcAec_Process(void* aecInst, + const float* const* nearend, + size_t num_bands, + float* const* out, + size_t nrOfSamples, + int16_t msInSndCardBuf, + int32_t skew) { + Aec* aecpc = aecInst; + int32_t retVal = 0; + + if (out == NULL) { + aecpc->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + + if (aecpc->initFlag != initCheck) { + aecpc->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + // number of samples == 160 for SWB input + if (nrOfSamples != 80 && nrOfSamples != 160) { + aecpc->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + + if (msInSndCardBuf < 0) { + msInSndCardBuf = 0; + aecpc->lastError = AEC_BAD_PARAMETER_WARNING; + retVal = -1; + } else if (msInSndCardBuf > kMaxTrustedDelayMs) { + // The clamping is now done in ProcessExtended/Normal(). + aecpc->lastError = AEC_BAD_PARAMETER_WARNING; + retVal = -1; + } + + // This returns the value of aec->extended_filter_enabled. + if (WebRtcAec_extended_filter_enabled(aecpc->aec)) { + ProcessExtended(aecpc, + nearend, + num_bands, + out, + nrOfSamples, + msInSndCardBuf, + skew); + } else { + if (ProcessNormal(aecpc, + nearend, + num_bands, + out, + nrOfSamples, + msInSndCardBuf, + skew) != 0) { + retVal = -1; + } + } + +#ifdef WEBRTC_AEC_DEBUG_DUMP + { + int16_t far_buf_size_ms = (int16_t)(WebRtcAec_system_delay(aecpc->aec) / + (sampMsNb * aecpc->rate_factor)); + (void)fwrite(&far_buf_size_ms, 2, 1, aecpc->bufFile); + (void)fwrite( + &aecpc->knownDelay, sizeof(aecpc->knownDelay), 1, aecpc->delayFile); + } +#endif + + return retVal; +} + +int WebRtcAec_set_config(void* handle, AecConfig config) { + Aec* self = (Aec*)handle; + if (self->initFlag != initCheck) { + self->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + if (config.skewMode != kAecFalse && config.skewMode != kAecTrue) { + self->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + self->skewMode = config.skewMode; + + if (config.nlpMode != kAecNlpConservative && + config.nlpMode != kAecNlpModerate && + config.nlpMode != kAecNlpAggressive) { + self->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + + if (config.metricsMode != kAecFalse && config.metricsMode != kAecTrue) { + self->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + + if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) { + self->lastError = AEC_BAD_PARAMETER_ERROR; + return -1; + } + + WebRtcAec_SetConfigCore( + self->aec, config.nlpMode, config.metricsMode, config.delay_logging); + return 0; +} + +int WebRtcAec_get_echo_status(void* handle, int* status) { + Aec* self = (Aec*)handle; + if (status == NULL) { + self->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + if (self->initFlag != initCheck) { + self->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + *status = WebRtcAec_echo_state(self->aec); + + return 0; +} + +int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) { + const float kUpWeight = 0.7f; + float dtmp; + int stmp; + Aec* self = (Aec*)handle; + Stats erl; + Stats erle; + Stats a_nlp; + + if (handle == NULL) { + return -1; + } + if (metrics == NULL) { + self->lastError = AEC_NULL_POINTER_ERROR; + return -1; + } + if (self->initFlag != initCheck) { + self->lastError = AEC_UNINITIALIZED_ERROR; + return -1; + } + + WebRtcAec_GetEchoStats(self->aec, &erl, &erle, &a_nlp); + + // ERL + metrics->erl.instant = (int)erl.instant; + + if ((erl.himean > kOffsetLevel) && (erl.average > kOffsetLevel)) { + // Use a mix between regular average and upper part average. + dtmp = kUpWeight * erl.himean + (1 - kUpWeight) * erl.average; + metrics->erl.average = (int)dtmp; + } else { + metrics->erl.average = kOffsetLevel; + } + + metrics->erl.max = (int)erl.max; + + if (erl.min < (kOffsetLevel * (-1))) { + metrics->erl.min = (int)erl.min; + } else { + metrics->erl.min = kOffsetLevel; + } + + // ERLE + metrics->erle.instant = (int)erle.instant; + + if ((erle.himean > kOffsetLevel) && (erle.average > kOffsetLevel)) { + // Use a mix between regular average and upper part average. + dtmp = kUpWeight * erle.himean + (1 - kUpWeight) * erle.average; + metrics->erle.average = (int)dtmp; + } else { + metrics->erle.average = kOffsetLevel; + } + + metrics->erle.max = (int)erle.max; + + if (erle.min < (kOffsetLevel * (-1))) { + metrics->erle.min = (int)erle.min; + } else { + metrics->erle.min = kOffsetLevel; + } + + // RERL + if ((metrics->erl.average > kOffsetLevel) && + (metrics->erle.average > kOffsetLevel)) { + stmp = metrics->erl.average + metrics->erle.average; + } else { + stmp = kOffsetLevel; + } + metrics->rerl.average = stmp; + + // No other statistics needed, but returned for completeness. + metrics->rerl.instant = stmp; + metrics->rerl.max = stmp; + metrics->rerl.min = stmp; + + // A_NLP + metrics->aNlp.instant = (int)a_nlp.instant; + + if ((a_nlp.himean > kOffsetLevel) && (a_nlp.average > kOffsetLevel)) { + // Use a mix between regular average and upper part average. + dtmp = kUpWeight * a_nlp.himean + (1 - kUpWeight) * a_nlp.average; + metrics->aNlp.average = (int)dtmp; + } else { + metrics->aNlp.average = kOffsetLevel; + } + + metrics->aNlp.max = (int)a_nlp.max; + + if (a_nlp.min < (kOffsetLevel * (-1))) { + metrics->aNlp.min = (int)a_nlp.min; + } else { + metrics->aNlp.min = kOffsetLevel; + } + + return 0; +} + +int WebRtcAec_GetDelayMetrics(void* handle, + int* median, + int* std, + float* fraction_poor_delays) { + Aec* self = handle; if (median == NULL) { self->lastError = AEC_NULL_POINTER_ERROR; return -1; @@ -754,148 +567,357 @@ int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std) { self->lastError = AEC_UNINITIALIZED_ERROR; return -1; } - if (self->aec->delay_logging_enabled == 0) { - // Logging disabled + if (WebRtcAec_GetDelayMetricsCore(self->aec, median, std, + fraction_poor_delays) == + -1) { + // Logging disabled. self->lastError = AEC_UNSUPPORTED_FUNCTION_ERROR; return -1; } - // Get number of delay values since last update - for (i = 0; i < kMaxDelay; i++) { - num_delay_values += self->aec->delay_histogram[i]; - } - if (num_delay_values == 0) { - // We have no new delay value data - *median = -1; - *std = -1; - return 0; - } - - delay_values = num_delay_values >> 1; // Start value for median count down - // Get median of delay values since last update - for (i = 0; i < kMaxDelay; i++) { - delay_values -= self->aec->delay_histogram[i]; - if (delay_values < 0) { - my_median = i; - break; - } - } - *median = my_median * kMsPerBlock; - - // Calculate the L1 norm, with median value as central moment - for (i = 0; i < kMaxDelay; i++) { - l1_norm += (float) (fabs(i - my_median) * self->aec->delay_histogram[i]); - } - *std = (int) (l1_norm / (float) num_delay_values + 0.5f) * kMsPerBlock; - - // Reset histogram - memset(self->aec->delay_histogram, 0, sizeof(self->aec->delay_histogram)); - return 0; } -WebRtc_Word32 WebRtcAec_get_version(WebRtc_Word8 *versionStr, WebRtc_Word16 len) -{ - const char version[] = "AEC 2.5.0"; - const short versionLen = (short)strlen(version) + 1; // +1 for null-termination - - if (versionStr == NULL) { - return -1; - } - - if (versionLen > len) { - return -1; - } - - strncpy(versionStr, version, versionLen); - return 0; +int32_t WebRtcAec_get_error_code(void* aecInst) { + Aec* aecpc = aecInst; + return aecpc->lastError; } -WebRtc_Word32 WebRtcAec_get_error_code(void *aecInst) -{ - aecpc_t *aecpc = aecInst; - - if (aecpc == NULL) { - return -1; - } - - return aecpc->lastError; +AecCore* WebRtcAec_aec_core(void* handle) { + if (!handle) { + return NULL; + } + return ((Aec*)handle)->aec; } -static int EstBufDelay(aecpc_t *aecpc, short msInSndCardBuf) -{ - short delayNew, nSampFar, nSampSndCard; - short diff; +static int ProcessNormal(Aec* aecpc, + const float* const* nearend, + size_t num_bands, + float* const* out, + size_t nrOfSamples, + int16_t msInSndCardBuf, + int32_t skew) { + int retVal = 0; + size_t i; + size_t nBlocks10ms; + // Limit resampling to doubling/halving of signal + const float minSkewEst = -0.5f; + const float maxSkewEst = 1.0f; - nSampFar = WebRtcApm_get_buffer_size(aecpc->farendBuf); - nSampSndCard = msInSndCardBuf * sampMsNb * aecpc->aec->mult; + msInSndCardBuf = + msInSndCardBuf > kMaxTrustedDelayMs ? kMaxTrustedDelayMs : msInSndCardBuf; + // TODO(andrew): we need to investigate if this +10 is really wanted. + msInSndCardBuf += 10; + aecpc->msInSndCardBuf = msInSndCardBuf; - delayNew = nSampSndCard - nSampFar; + if (aecpc->skewMode == kAecTrue) { + if (aecpc->skewFrCtr < 25) { + aecpc->skewFrCtr++; + } else { + retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew); + if (retVal == -1) { + aecpc->skew = 0; + aecpc->lastError = AEC_BAD_PARAMETER_WARNING; + } - // Account for resampling frame delay - if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { - delayNew -= kResamplingDelay; + aecpc->skew /= aecpc->sampFactor * nrOfSamples; + + if (aecpc->skew < 1.0e-3 && aecpc->skew > -1.0e-3) { + aecpc->resample = kAecFalse; + } else { + aecpc->resample = kAecTrue; + } + + if (aecpc->skew < minSkewEst) { + aecpc->skew = minSkewEst; + } else if (aecpc->skew > maxSkewEst) { + aecpc->skew = maxSkewEst; + } + +#ifdef WEBRTC_AEC_DEBUG_DUMP + (void)fwrite(&aecpc->skew, sizeof(aecpc->skew), 1, aecpc->skewFile); +#endif + } + } + + nBlocks10ms = nrOfSamples / (FRAME_LEN * aecpc->rate_factor); + + if (aecpc->startup_phase) { + for (i = 0; i < num_bands; ++i) { + // Only needed if they don't already point to the same place. + if (nearend[i] != out[i]) { + memcpy(out[i], nearend[i], sizeof(nearend[i][0]) * nrOfSamples); + } } - if (delayNew < FRAME_LEN) { - WebRtcApm_FlushBuffer(aecpc->farendBuf, FRAME_LEN); - delayNew += FRAME_LEN; + // The AEC is in the start up mode + // AEC is disabled until the system delay is OK + + // Mechanism to ensure that the system delay is reasonably stable. + if (aecpc->checkBuffSize) { + aecpc->checkBufSizeCtr++; + // Before we fill up the far-end buffer we require the system delay + // to be stable (+/-8 ms) compared to the first value. This + // comparison is made during the following 6 consecutive 10 ms + // blocks. If it seems to be stable then we start to fill up the + // far-end buffer. + if (aecpc->counter == 0) { + aecpc->firstVal = aecpc->msInSndCardBuf; + aecpc->sum = 0; + } + + if (abs(aecpc->firstVal - aecpc->msInSndCardBuf) < + WEBRTC_SPL_MAX(0.2 * aecpc->msInSndCardBuf, sampMsNb)) { + aecpc->sum += aecpc->msInSndCardBuf; + aecpc->counter++; + } else { + aecpc->counter = 0; + } + + if (aecpc->counter * nBlocks10ms >= 6) { + // The far-end buffer size is determined in partitions of + // PART_LEN samples. Use 75% of the average value of the system + // delay as buffer size to start with. + aecpc->bufSizeStart = + WEBRTC_SPL_MIN((3 * aecpc->sum * aecpc->rate_factor * 8) / + (4 * aecpc->counter * PART_LEN), + kMaxBufSizeStart); + // Buffer size has now been determined. + aecpc->checkBuffSize = 0; + } + + if (aecpc->checkBufSizeCtr * nBlocks10ms > 50) { + // For really bad systems, don't disable the echo canceller for + // more than 0.5 sec. + aecpc->bufSizeStart = WEBRTC_SPL_MIN( + (aecpc->msInSndCardBuf * aecpc->rate_factor * 3) / 40, + kMaxBufSizeStart); + aecpc->checkBuffSize = 0; + } } - aecpc->filtDelay = WEBRTC_SPL_MAX(0, (short)(0.8*aecpc->filtDelay + 0.2*delayNew)); + // If |checkBuffSize| changed in the if-statement above. + if (!aecpc->checkBuffSize) { + // The system delay is now reasonably stable (or has been unstable + // for too long). When the far-end buffer is filled with + // approximately the same amount of data as reported by the system + // we end the startup phase. + int overhead_elements = + WebRtcAec_system_delay(aecpc->aec) / PART_LEN - aecpc->bufSizeStart; + if (overhead_elements == 0) { + // Enable the AEC + aecpc->startup_phase = 0; + } else if (overhead_elements > 0) { + // TODO(bjornv): Do we need a check on how much we actually + // moved the read pointer? It should always be possible to move + // the pointer |overhead_elements| since we have only added data + // to the buffer and no delay compensation nor AEC processing + // has been done. + WebRtcAec_MoveFarReadPtr(aecpc->aec, overhead_elements); - diff = aecpc->filtDelay - aecpc->knownDelay; - if (diff > 224) { - if (aecpc->lastDelayDiff < 96) { - aecpc->timeForDelayChange = 0; - } - else { - aecpc->timeForDelayChange++; - } + // Enable the AEC + aecpc->startup_phase = 0; + } } - else if (diff < 96 && aecpc->knownDelay > 0) { - if (aecpc->lastDelayDiff > 224) { - aecpc->timeForDelayChange = 0; - } - else { - aecpc->timeForDelayChange++; - } - } - else { - aecpc->timeForDelayChange = 0; - } - aecpc->lastDelayDiff = diff; + } else { + // AEC is enabled. + EstBufDelayNormal(aecpc); - if (aecpc->timeForDelayChange > 25) { - aecpc->knownDelay = WEBRTC_SPL_MAX((int)aecpc->filtDelay - 160, 0); - } - return 0; + // Call the AEC. + // TODO(bjornv): Re-structure such that we don't have to pass + // |aecpc->knownDelay| as input. Change name to something like + // |system_buffer_diff|. + WebRtcAec_ProcessFrames(aecpc->aec, + nearend, + num_bands, + nrOfSamples, + aecpc->knownDelay, + out); + } + + return retVal; } -static int DelayComp(aecpc_t *aecpc) -{ - int nSampFar, nSampSndCard, delayNew, nSampAdd; - const int maxStuffSamp = 10 * FRAME_LEN; +static void ProcessExtended(Aec* self, + const float* const* near, + size_t num_bands, + float* const* out, + size_t num_samples, + int16_t reported_delay_ms, + int32_t skew) { + size_t i; + const int delay_diff_offset = kDelayDiffOffsetSamples; +#if defined(WEBRTC_UNTRUSTED_DELAY) + reported_delay_ms = kFixedDelayMs; +#else + // This is the usual mode where we trust the reported system delay values. + // Due to the longer filter, we no longer add 10 ms to the reported delay + // to reduce chance of non-causality. Instead we apply a minimum here to avoid + // issues with the read pointer jumping around needlessly. + reported_delay_ms = reported_delay_ms < kMinTrustedDelayMs + ? kMinTrustedDelayMs + : reported_delay_ms; + // If the reported delay appears to be bogus, we attempt to recover by using + // the measured fixed delay values. We use >= here because higher layers + // may already clamp to this maximum value, and we would otherwise not + // detect it here. + reported_delay_ms = reported_delay_ms >= kMaxTrustedDelayMs + ? kFixedDelayMs + : reported_delay_ms; +#endif + self->msInSndCardBuf = reported_delay_ms; - nSampFar = WebRtcApm_get_buffer_size(aecpc->farendBuf); - nSampSndCard = aecpc->msInSndCardBuf * sampMsNb * aecpc->aec->mult; - delayNew = nSampSndCard - nSampFar; - - // Account for resampling frame delay - if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { - delayNew -= kResamplingDelay; + if (!self->farend_started) { + for (i = 0; i < num_bands; ++i) { + // Only needed if they don't already point to the same place. + if (near[i] != out[i]) { + memcpy(out[i], near[i], sizeof(near[i][0]) * num_samples); + } } + return; + } + if (self->startup_phase) { + // In the extended mode, there isn't a startup "phase", just a special + // action on the first frame. In the trusted delay case, we'll take the + // current reported delay, unless it's less then our conservative + // measurement. + int startup_size_ms = + reported_delay_ms < kFixedDelayMs ? kFixedDelayMs : reported_delay_ms; +#if defined(WEBRTC_ANDROID) + int target_delay = startup_size_ms * self->rate_factor * 8; +#else + // To avoid putting the AEC in a non-causal state we're being slightly + // conservative and scale by 2. On Android we use a fixed delay and + // therefore there is no need to scale the target_delay. + int target_delay = startup_size_ms * self->rate_factor * 8 / 2; +#endif + int overhead_elements = + (WebRtcAec_system_delay(self->aec) - target_delay) / PART_LEN; + WebRtcAec_MoveFarReadPtr(self->aec, overhead_elements); + self->startup_phase = 0; + } - if (delayNew > FAR_BUF_LEN - FRAME_LEN*aecpc->aec->mult) { - // The difference of the buffersizes is larger than the maximum - // allowed known delay. Compensate by stuffing the buffer. - nSampAdd = (int)(WEBRTC_SPL_MAX((int)(0.5 * nSampSndCard - nSampFar), - FRAME_LEN)); - nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp); + EstBufDelayExtended(self); - WebRtcApm_StuffBuffer(aecpc->farendBuf, nSampAdd); - aecpc->delayChange = 1; // the delay needs to be updated - } + { + // |delay_diff_offset| gives us the option to manually rewind the delay on + // very low delay platforms which can't be expressed purely through + // |reported_delay_ms|. + const int adjusted_known_delay = + WEBRTC_SPL_MAX(0, self->knownDelay + delay_diff_offset); - return 0; + WebRtcAec_ProcessFrames(self->aec, + near, + num_bands, + num_samples, + adjusted_known_delay, + out); + } +} + +static void EstBufDelayNormal(Aec* aecpc) { + int nSampSndCard = aecpc->msInSndCardBuf * sampMsNb * aecpc->rate_factor; + int current_delay = nSampSndCard - WebRtcAec_system_delay(aecpc->aec); + int delay_difference = 0; + + // Before we proceed with the delay estimate filtering we: + // 1) Compensate for the frame that will be read. + // 2) Compensate for drift resampling. + // 3) Compensate for non-causality if needed, since the estimated delay can't + // be negative. + + // 1) Compensating for the frame(s) that will be read/processed. + current_delay += FRAME_LEN * aecpc->rate_factor; + + // 2) Account for resampling frame delay. + if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) { + current_delay -= kResamplingDelay; + } + + // 3) Compensate for non-causality, if needed, by flushing one block. + if (current_delay < PART_LEN) { + current_delay += WebRtcAec_MoveFarReadPtr(aecpc->aec, 1) * PART_LEN; + } + + // We use -1 to signal an initialized state in the "extended" implementation; + // compensate for that. + aecpc->filtDelay = aecpc->filtDelay < 0 ? 0 : aecpc->filtDelay; + aecpc->filtDelay = + WEBRTC_SPL_MAX(0, (short)(0.8 * aecpc->filtDelay + 0.2 * current_delay)); + + delay_difference = aecpc->filtDelay - aecpc->knownDelay; + if (delay_difference > 224) { + if (aecpc->lastDelayDiff < 96) { + aecpc->timeForDelayChange = 0; + } else { + aecpc->timeForDelayChange++; + } + } else if (delay_difference < 96 && aecpc->knownDelay > 0) { + if (aecpc->lastDelayDiff > 224) { + aecpc->timeForDelayChange = 0; + } else { + aecpc->timeForDelayChange++; + } + } else { + aecpc->timeForDelayChange = 0; + } + aecpc->lastDelayDiff = delay_difference; + + if (aecpc->timeForDelayChange > 25) { + aecpc->knownDelay = WEBRTC_SPL_MAX((int)aecpc->filtDelay - 160, 0); + } +} + +static void EstBufDelayExtended(Aec* self) { + int reported_delay = self->msInSndCardBuf * sampMsNb * self->rate_factor; + int current_delay = reported_delay - WebRtcAec_system_delay(self->aec); + int delay_difference = 0; + + // Before we proceed with the delay estimate filtering we: + // 1) Compensate for the frame that will be read. + // 2) Compensate for drift resampling. + // 3) Compensate for non-causality if needed, since the estimated delay can't + // be negative. + + // 1) Compensating for the frame(s) that will be read/processed. + current_delay += FRAME_LEN * self->rate_factor; + + // 2) Account for resampling frame delay. + if (self->skewMode == kAecTrue && self->resample == kAecTrue) { + current_delay -= kResamplingDelay; + } + + // 3) Compensate for non-causality, if needed, by flushing two blocks. + if (current_delay < PART_LEN) { + current_delay += WebRtcAec_MoveFarReadPtr(self->aec, 2) * PART_LEN; + } + + if (self->filtDelay == -1) { + self->filtDelay = WEBRTC_SPL_MAX(0, 0.5 * current_delay); + } else { + self->filtDelay = WEBRTC_SPL_MAX( + 0, (short)(0.95 * self->filtDelay + 0.05 * current_delay)); + } + + delay_difference = self->filtDelay - self->knownDelay; + if (delay_difference > 384) { + if (self->lastDelayDiff < 128) { + self->timeForDelayChange = 0; + } else { + self->timeForDelayChange++; + } + } else if (delay_difference < 128 && self->knownDelay > 0) { + if (self->lastDelayDiff > 384) { + self->timeForDelayChange = 0; + } else { + self->timeForDelayChange++; + } + } else { + self->timeForDelayChange = 0; + } + self->lastDelayDiff = delay_difference; + + if (self->timeForDelayChange > 25) { + self->knownDelay = WEBRTC_SPL_MAX((int)self->filtDelay - 256, 0); + } } diff --git a/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h b/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h new file mode 100644 index 0000000..95a6cf3 --- /dev/null +++ b/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_ + +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/modules/audio_processing/aec/aec_core.h" + +typedef struct { + int delayCtr; + int sampFreq; + int splitSampFreq; + int scSampFreq; + float sampFactor; // scSampRate / sampFreq + short skewMode; + int bufSizeStart; + int knownDelay; + int rate_factor; + + short initFlag; // indicates if AEC has been initialized + + // Variables used for averaging far end buffer size + short counter; + int sum; + short firstVal; + short checkBufSizeCtr; + + // Variables used for delay shifts + short msInSndCardBuf; + short filtDelay; // Filtered delay estimate. + int timeForDelayChange; + int startup_phase; + int checkBuffSize; + short lastDelayDiff; + +#ifdef WEBRTC_AEC_DEBUG_DUMP + FILE* bufFile; + FILE* delayFile; + FILE* skewFile; +#endif + + // Structures + void* resampler; + + int skewFrCtr; + int resample; // if the skew is small enough we don't resample + int highSkewCtr; + float skew; + + RingBuffer* far_pre_buf; // Time domain far-end pre-buffer. + + int lastError; + + int farend_started; + + AecCore* aec; +} Aec; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_ diff --git a/webrtc/modules/audio_processing/aec/include/echo_cancellation.h b/webrtc/modules/audio_processing/aec/include/echo_cancellation.h new file mode 100644 index 0000000..a340cf8 --- /dev/null +++ b/webrtc/modules/audio_processing/aec/include/echo_cancellation.h @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_ + +#include + +#include "webrtc/typedefs.h" + +// Errors +#define AEC_UNSPECIFIED_ERROR 12000 +#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001 +#define AEC_UNINITIALIZED_ERROR 12002 +#define AEC_NULL_POINTER_ERROR 12003 +#define AEC_BAD_PARAMETER_ERROR 12004 + +// Warnings +#define AEC_BAD_PARAMETER_WARNING 12050 + +enum { + kAecNlpConservative = 0, + kAecNlpModerate, + kAecNlpAggressive +}; + +enum { + kAecFalse = 0, + kAecTrue +}; + +typedef struct { + int16_t nlpMode; // default kAecNlpModerate + int16_t skewMode; // default kAecFalse + int16_t metricsMode; // default kAecFalse + int delay_logging; // default kAecFalse + // float realSkew; +} AecConfig; + +typedef struct { + int instant; + int average; + int max; + int min; +} AecLevel; + +typedef struct { + AecLevel rerl; + AecLevel erl; + AecLevel erle; + AecLevel aNlp; +} AecMetrics; + +struct AecCore; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Allocates the memory needed by the AEC. The memory needs to be initialized + * separately using the WebRtcAec_Init() function. Returns a pointer to the + * object or NULL on error. + */ +void* WebRtcAec_Create(); + +/* + * This function releases the memory allocated by WebRtcAec_Create(). + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + */ +void WebRtcAec_Free(void* aecInst); + +/* + * Initializes an AEC instance. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * int32_t sampFreq Sampling frequency of data + * int32_t scSampFreq Soundcard sampling frequency + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq); + +/* + * Inserts an 80 or 160 sample block of data into the farend buffer. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * const float* farend In buffer containing one frame of + * farend signal for L band + * int16_t nrOfSamples Number of samples in farend buffer + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAec_BufferFarend(void* aecInst, + const float* farend, + size_t nrOfSamples); + +/* + * Runs the echo canceller on an 80 or 160 sample blocks of data. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * float* const* nearend In buffer containing one frame of + * nearend+echo signal for each band + * int num_bands Number of bands in nearend buffer + * int16_t nrOfSamples Number of samples in nearend buffer + * int16_t msInSndCardBuf Delay estimate for sound card and + * system buffers + * int16_t skew Difference between number of samples played + * and recorded at the soundcard (for clock skew + * compensation) + * + * Outputs Description + * ------------------------------------------------------------------- + * float* const* out Out buffer, one frame of processed nearend + * for each band + * int32_t return 0: OK + * -1: error + */ +int32_t WebRtcAec_Process(void* aecInst, + const float* const* nearend, + size_t num_bands, + float* const* out, + size_t nrOfSamples, + int16_t msInSndCardBuf, + int32_t skew); + +/* + * This function enables the user to set certain parameters on-the-fly. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* handle Pointer to the AEC instance + * AecConfig config Config instance that contains all + * properties to be set + * + * Outputs Description + * ------------------------------------------------------------------- + * int return 0: OK + * -1: error + */ +int WebRtcAec_set_config(void* handle, AecConfig config); + +/* + * Gets the current echo status of the nearend signal. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* handle Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * int* status 0: Almost certainly nearend single-talk + * 1: Might not be neared single-talk + * int return 0: OK + * -1: error + */ +int WebRtcAec_get_echo_status(void* handle, int* status); + +/* + * Gets the current echo metrics for the session. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* handle Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * AecMetrics* metrics Struct which will be filled out with the + * current echo metrics. + * int return 0: OK + * -1: error + */ +int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics); + +/* + * Gets the current delay metrics for the session. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* handle Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * int* median Delay median value. + * int* std Delay standard deviation. + * float* fraction_poor_delays Fraction of the delay estimates that may + * cause the AEC to perform poorly. + * + * int return 0: OK + * -1: error + */ +int WebRtcAec_GetDelayMetrics(void* handle, + int* median, + int* std, + float* fraction_poor_delays); + +/* + * Gets the last error code. + * + * Inputs Description + * ------------------------------------------------------------------- + * void* aecInst Pointer to the AEC instance + * + * Outputs Description + * ------------------------------------------------------------------- + * int32_t return 11000-11100: error code + */ +int32_t WebRtcAec_get_error_code(void* aecInst); + +// Returns a pointer to the low level AEC handle. +// +// Input: +// - handle : Pointer to the AEC instance. +// +// Return value: +// - AecCore pointer : NULL for error. +// +struct AecCore* WebRtcAec_aec_core(void* handle); + +#ifdef __cplusplus +} +#endif +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_ diff --git a/webrtc/modules/audio_processing/aec/interface/echo_cancellation.h b/webrtc/modules/audio_processing/aec/interface/echo_cancellation.h deleted file mode 100644 index 4da6e73..0000000 --- a/webrtc/modules/audio_processing/aec/interface/echo_cancellation.h +++ /dev/null @@ -1,278 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_INTERFACE_ECHO_CANCELLATION_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_INTERFACE_ECHO_CANCELLATION_H_ - -#include "typedefs.h" - -// Errors -#define AEC_UNSPECIFIED_ERROR 12000 -#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001 -#define AEC_UNINITIALIZED_ERROR 12002 -#define AEC_NULL_POINTER_ERROR 12003 -#define AEC_BAD_PARAMETER_ERROR 12004 - -// Warnings -#define AEC_BAD_PARAMETER_WARNING 12050 - -enum { - kAecNlpConservative = 0, - kAecNlpModerate, - kAecNlpAggressive -}; - -enum { - kAecFalse = 0, - kAecTrue -}; - -typedef struct { - WebRtc_Word16 nlpMode; // default kAecNlpModerate - WebRtc_Word16 skewMode; // default kAecFalse - WebRtc_Word16 metricsMode; // default kAecFalse - int delay_logging; // default kAecFalse - //float realSkew; -} AecConfig; - -typedef struct { - WebRtc_Word16 instant; - WebRtc_Word16 average; - WebRtc_Word16 max; - WebRtc_Word16 min; -} AecLevel; - -typedef struct { - AecLevel rerl; - AecLevel erl; - AecLevel erle; - AecLevel aNlp; -} AecMetrics; - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Allocates the memory needed by the AEC. The memory needs to be initialized - * separately using the WebRtcAec_Init() function. - * - * Inputs Description - * ------------------------------------------------------------------- - * void **aecInst Pointer to the AEC instance to be created - * and initialized - * - * Outputs Description - * ------------------------------------------------------------------- - * WebRtc_Word32 return 0: OK - * -1: error - */ -WebRtc_Word32 WebRtcAec_Create(void **aecInst); - -/* - * This function releases the memory allocated by WebRtcAec_Create(). - * - * Inputs Description - * ------------------------------------------------------------------- - * void *aecInst Pointer to the AEC instance - * - * Outputs Description - * ------------------------------------------------------------------- - * WebRtc_Word32 return 0: OK - * -1: error - */ -WebRtc_Word32 WebRtcAec_Free(void *aecInst); - -/* - * Initializes an AEC instance. - * - * Inputs Description - * ------------------------------------------------------------------- - * void *aecInst Pointer to the AEC instance - * WebRtc_Word32 sampFreq Sampling frequency of data - * WebRtc_Word32 scSampFreq Soundcard sampling frequency - * - * Outputs Description - * ------------------------------------------------------------------- - * WebRtc_Word32 return 0: OK - * -1: error - */ -WebRtc_Word32 WebRtcAec_Init(void *aecInst, - WebRtc_Word32 sampFreq, - WebRtc_Word32 scSampFreq); - -/* - * Inserts an 80 or 160 sample block of data into the farend buffer. - * - * Inputs Description - * ------------------------------------------------------------------- - * void *aecInst Pointer to the AEC instance - * WebRtc_Word16 *farend In buffer containing one frame of - * farend signal for L band - * WebRtc_Word16 nrOfSamples Number of samples in farend buffer - * - * Outputs Description - * ------------------------------------------------------------------- - * WebRtc_Word32 return 0: OK - * -1: error - */ -WebRtc_Word32 WebRtcAec_BufferFarend(void *aecInst, - const WebRtc_Word16 *farend, - WebRtc_Word16 nrOfSamples); - -/* - * Runs the echo canceller on an 80 or 160 sample blocks of data. - * - * Inputs Description - * ------------------------------------------------------------------- - * void *aecInst Pointer to the AEC instance - * WebRtc_Word16 *nearend In buffer containing one frame of - * nearend+echo signal for L band - * WebRtc_Word16 *nearendH In buffer containing one frame of - * nearend+echo signal for H band - * WebRtc_Word16 nrOfSamples Number of samples in nearend buffer - * WebRtc_Word16 msInSndCardBuf Delay estimate for sound card and - * system buffers - * WebRtc_Word16 skew Difference between number of samples played - * and recorded at the soundcard (for clock skew - * compensation) - * - * Outputs Description - * ------------------------------------------------------------------- - * WebRtc_Word16 *out Out buffer, one frame of processed nearend - * for L band - * WebRtc_Word16 *outH Out buffer, one frame of processed nearend - * for H band - * WebRtc_Word32 return 0: OK - * -1: error - */ -WebRtc_Word32 WebRtcAec_Process(void *aecInst, - const WebRtc_Word16 *nearend, - const WebRtc_Word16 *nearendH, - WebRtc_Word16 *out, - WebRtc_Word16 *outH, - WebRtc_Word16 nrOfSamples, - WebRtc_Word16 msInSndCardBuf, - WebRtc_Word32 skew); - -/* - * This function enables the user to set certain parameters on-the-fly. - * - * Inputs Description - * ------------------------------------------------------------------- - * void *aecInst Pointer to the AEC instance - * AecConfig config Config instance that contains all - * properties to be set - * - * Outputs Description - * ------------------------------------------------------------------- - * WebRtc_Word32 return 0: OK - * -1: error - */ -WebRtc_Word32 WebRtcAec_set_config(void *aecInst, AecConfig config); - -/* - * Gets the on-the-fly paramters. - * - * Inputs Description - * ------------------------------------------------------------------- - * void *aecInst Pointer to the AEC instance - * - * Outputs Description - * ------------------------------------------------------------------- - * AecConfig *config Pointer to the config instance that - * all properties will be written to - * WebRtc_Word32 return 0: OK - * -1: error - */ -WebRtc_Word32 WebRtcAec_get_config(void *aecInst, AecConfig *config); - -/* - * Gets the current echo status of the nearend signal. - * - * Inputs Description - * ------------------------------------------------------------------- - * void *aecInst Pointer to the AEC instance - * - * Outputs Description - * ------------------------------------------------------------------- - * WebRtc_Word16 *status 0: Almost certainly nearend single-talk - * 1: Might not be neared single-talk - * WebRtc_Word32 return 0: OK - * -1: error - */ -WebRtc_Word32 WebRtcAec_get_echo_status(void *aecInst, WebRtc_Word16 *status); - -/* - * Gets the current echo metrics for the session. - * - * Inputs Description - * ------------------------------------------------------------------- - * void *aecInst Pointer to the AEC instance - * - * Outputs Description - * ------------------------------------------------------------------- - * AecMetrics *metrics Struct which will be filled out with the - * current echo metrics. - * WebRtc_Word32 return 0: OK - * -1: error - */ -WebRtc_Word32 WebRtcAec_GetMetrics(void *aecInst, AecMetrics *metrics); - -/* - * Gets the current delay metrics for the session. - * - * Inputs Description - * ------------------------------------------------------------------- - * void* handle Pointer to the AEC instance - * - * Outputs Description - * ------------------------------------------------------------------- - * int* median Delay median value. - * int* std Delay standard deviation. - * - * int return 0: OK - * -1: error - */ -int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std); - -/* - * Gets the last error code. - * - * Inputs Description - * ------------------------------------------------------------------- - * void *aecInst Pointer to the AEC instance - * - * Outputs Description - * ------------------------------------------------------------------- - * WebRtc_Word32 return 11000-11100: error code - */ -WebRtc_Word32 WebRtcAec_get_error_code(void *aecInst); - -/* - * Gets a version string. - * - * Inputs Description - * ------------------------------------------------------------------- - * char *versionStr Pointer to a string array - * WebRtc_Word16 len The maximum length of the string - * - * Outputs Description - * ------------------------------------------------------------------- - * WebRtc_Word8 *versionStr Pointer to a string array - * WebRtc_Word32 return 0: OK - * -1: error - */ -WebRtc_Word32 WebRtcAec_get_version(WebRtc_Word8 *versionStr, WebRtc_Word16 len); - -#ifdef __cplusplus -} -#endif -#endif /* WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_INTERFACE_ECHO_CANCELLATION_H_ */ diff --git a/webrtc/modules/audio_processing/aec/resampler.c b/webrtc/modules/audio_processing/aec/resampler.c deleted file mode 100644 index 468fa8c..0000000 --- a/webrtc/modules/audio_processing/aec/resampler.c +++ /dev/null @@ -1,233 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for clock - * skew by resampling the farend signal. - */ - -#include -#include -#include -#include - -#include "resampler.h" -#include "aec_core.h" - -enum { kFrameBufferSize = FRAME_LEN * 4 }; -enum { kEstimateLengthFrames = 400 }; - -typedef struct { - short buffer[kFrameBufferSize]; - float position; - - int deviceSampleRateHz; - int skewData[kEstimateLengthFrames]; - int skewDataIndex; - float skewEstimate; -} resampler_t; - -static int EstimateSkew(const int* rawSkew, - int size, - int absLimit, - float *skewEst); - -int WebRtcAec_CreateResampler(void **resampInst) -{ - resampler_t *obj = malloc(sizeof(resampler_t)); - *resampInst = obj; - if (obj == NULL) { - return -1; - } - - return 0; -} - -int WebRtcAec_InitResampler(void *resampInst, int deviceSampleRateHz) -{ - resampler_t *obj = (resampler_t*) resampInst; - memset(obj->buffer, 0, sizeof(obj->buffer)); - obj->position = 0.0; - - obj->deviceSampleRateHz = deviceSampleRateHz; - memset(obj->skewData, 0, sizeof(obj->skewData)); - obj->skewDataIndex = 0; - obj->skewEstimate = 0.0; - - return 0; -} - -int WebRtcAec_FreeResampler(void *resampInst) -{ - resampler_t *obj = (resampler_t*) resampInst; - free(obj); - - return 0; -} - -int WebRtcAec_ResampleLinear(void *resampInst, - const short *inspeech, - int size, - float skew, - short *outspeech) -{ - resampler_t *obj = (resampler_t*) resampInst; - - short *y; - float be, tnew, interp; - int tn, outsize, mm; - - if (size < 0 || size > 2 * FRAME_LEN) { - return -1; - } - - // Add new frame data in lookahead - memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay], - inspeech, - size * sizeof(short)); - - // Sample rate ratio - be = 1 + skew; - - // Loop over input frame - mm = 0; - y = &obj->buffer[FRAME_LEN]; // Point at current frame - - tnew = be * mm + obj->position; - tn = (int) tnew; - - while (tn < size) { - - // Interpolation - interp = y[tn] + (tnew - tn) * (y[tn+1] - y[tn]); - - if (interp > 32767) { - interp = 32767; - } - else if (interp < -32768) { - interp = -32768; - } - - outspeech[mm] = (short) interp; - mm++; - - tnew = be * mm + obj->position; - tn = (int) tnew; - } - - outsize = mm; - obj->position += outsize * be - size; - - // Shift buffer - memmove(obj->buffer, - &obj->buffer[size], - (kFrameBufferSize - size) * sizeof(short)); - - return outsize; -} - -int WebRtcAec_GetSkew(void *resampInst, int rawSkew, float *skewEst) -{ - resampler_t *obj = (resampler_t*)resampInst; - int err = 0; - - if (obj->skewDataIndex < kEstimateLengthFrames) { - obj->skewData[obj->skewDataIndex] = rawSkew; - obj->skewDataIndex++; - } - else if (obj->skewDataIndex == kEstimateLengthFrames) { - err = EstimateSkew(obj->skewData, - kEstimateLengthFrames, - obj->deviceSampleRateHz, - skewEst); - obj->skewEstimate = *skewEst; - obj->skewDataIndex++; - } - else { - *skewEst = obj->skewEstimate; - } - - return err; -} - -int EstimateSkew(const int* rawSkew, - int size, - int deviceSampleRateHz, - float *skewEst) -{ - const int absLimitOuter = (int)(0.04f * deviceSampleRateHz); - const int absLimitInner = (int)(0.0025f * deviceSampleRateHz); - int i = 0; - int n = 0; - float rawAvg = 0; - float err = 0; - float rawAbsDev = 0; - int upperLimit = 0; - int lowerLimit = 0; - float cumSum = 0; - float x = 0; - float x2 = 0; - float y = 0; - float xy = 0; - float xAvg = 0; - float denom = 0; - float skew = 0; - - *skewEst = 0; // Set in case of error below. - for (i = 0; i < size; i++) { - if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { - n++; - rawAvg += rawSkew[i]; - } - } - - if (n == 0) { - return -1; - } - assert(n > 0); - rawAvg /= n; - - for (i = 0; i < size; i++) { - if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) { - err = rawSkew[i] - rawAvg; - rawAbsDev += err >= 0 ? err : -err; - } - } - assert(n > 0); - rawAbsDev /= n; - upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1); // +1 for ceiling. - lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1); // -1 for floor. - - n = 0; - for (i = 0; i < size; i++) { - if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) || - (rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) { - n++; - cumSum += rawSkew[i]; - x += n; - x2 += n*n; - y += cumSum; - xy += n * cumSum; - } - } - - if (n == 0) { - return -1; - } - assert(n > 0); - xAvg = x / n; - denom = x2 - xAvg*x; - - if (denom != 0) { - skew = (xy - xAvg*y) / denom; - } - - *skewEst = skew; - return 0; -} diff --git a/webrtc/modules/audio_processing/aec/resampler.h b/webrtc/modules/audio_processing/aec/resampler.h deleted file mode 100644 index 9cb2837..0000000 --- a/webrtc/modules/audio_processing/aec/resampler.h +++ /dev/null @@ -1,32 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_RESAMPLER_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_RESAMPLER_H_ - -enum { kResamplingDelay = 1 }; - -// Unless otherwise specified, functions return 0 on success and -1 on error -int WebRtcAec_CreateResampler(void **resampInst); -int WebRtcAec_InitResampler(void *resampInst, int deviceSampleRateHz); -int WebRtcAec_FreeResampler(void *resampInst); - -// Estimates skew from raw measurement. -int WebRtcAec_GetSkew(void *resampInst, int rawSkew, float *skewEst); - -// Resamples input using linear interpolation. -// Returns size of resampled array. -int WebRtcAec_ResampleLinear(void *resampInst, - const short *inspeech, - int size, - float skew, - short *outspeech); - -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_RESAMPLER_H_ diff --git a/webrtc/modules/audio_processing/aecm/Makefile.am b/webrtc/modules/audio_processing/aecm/Makefile.am deleted file mode 100644 index 5d0270b..0000000 --- a/webrtc/modules/audio_processing/aecm/Makefile.am +++ /dev/null @@ -1,9 +0,0 @@ -noinst_LTLIBRARIES = libaecm.la - -libaecm_la_SOURCES = interface/echo_control_mobile.h \ - echo_control_mobile.c \ - aecm_core.c \ - aecm_core.h -libaecm_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \ - -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \ - -I$(top_srcdir)/src/modules/audio_processing/utility diff --git a/webrtc/modules/audio_processing/aecm/aecm.gypi b/webrtc/modules/audio_processing/aecm/aecm.gypi deleted file mode 100644 index a4997fb..0000000 --- a/webrtc/modules/audio_processing/aecm/aecm.gypi +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. -# -# Use of this source code is governed by a BSD-style license -# that can be found in the LICENSE file in the root of the source -# tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may -# be found in the AUTHORS file in the root of the source tree. - -{ - 'targets': [ - { - 'target_name': 'aecm', - 'type': '<(library)', - 'dependencies': [ - '<(webrtc_root)/common_audio/common_audio.gyp:spl', - 'apm_util' - ], - 'include_dirs': [ - 'interface', - ], - 'direct_dependent_settings': { - 'include_dirs': [ - 'interface', - ], - }, - 'sources': [ - 'interface/echo_control_mobile.h', - 'echo_control_mobile.c', - 'aecm_core.c', - 'aecm_core.h', - ], - }, - ], -} diff --git a/webrtc/modules/audio_processing/aecm/aecm_core.c b/webrtc/modules/audio_processing/aecm/aecm_core.c index 13bffae..b801f07 100644 --- a/webrtc/modules/audio_processing/aecm/aecm_core.c +++ b/webrtc/modules/audio_processing/aecm/aecm_core.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,76 +8,118 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "aecm_core.h" +#include "webrtc/modules/audio_processing/aecm/aecm_core.h" #include +#include #include -#include "echo_control_mobile.h" -#include "delay_estimator.h" -#include "ring_buffer.h" -#include "typedefs.h" - -#ifdef ARM_WINM_LOG -#include -#include -#endif +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/signal_processing/include/real_fft.h" +#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "webrtc/system_wrappers/interface/compile_assert_c.h" +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" +#include "webrtc/typedefs.h" #ifdef AEC_DEBUG FILE *dfile; FILE *testfile; #endif -#ifdef _MSC_VER // visual c++ -#define ALIGN8_BEG __declspec(align(8)) -#define ALIGN8_END -#else // gcc or icc -#define ALIGN8_BEG -#define ALIGN8_END __attribute__((aligned(8))) -#endif - -#ifdef AECM_SHORT - -// Square root of Hanning window in Q14 -const WebRtc_Word16 WebRtcAecm_kSqrtHanning[] = -{ - 0, 804, 1606, 2404, 3196, 3981, 4756, 5520, - 6270, 7005, 7723, 8423, 9102, 9760, 10394, 11003, - 11585, 12140, 12665, 13160, 13623, 14053, 14449, 14811, - 15137, 15426, 15679, 15893, 16069, 16207, 16305, 16364, - 16384 +const int16_t WebRtcAecm_kCosTable[] = { + 8192, 8190, 8187, 8180, 8172, 8160, 8147, 8130, 8112, + 8091, 8067, 8041, 8012, 7982, 7948, 7912, 7874, 7834, + 7791, 7745, 7697, 7647, 7595, 7540, 7483, 7424, 7362, + 7299, 7233, 7164, 7094, 7021, 6947, 6870, 6791, 6710, + 6627, 6542, 6455, 6366, 6275, 6182, 6087, 5991, 5892, + 5792, 5690, 5586, 5481, 5374, 5265, 5155, 5043, 4930, + 4815, 4698, 4580, 4461, 4341, 4219, 4096, 3971, 3845, + 3719, 3591, 3462, 3331, 3200, 3068, 2935, 2801, 2667, + 2531, 2395, 2258, 2120, 1981, 1842, 1703, 1563, 1422, + 1281, 1140, 998, 856, 713, 571, 428, 285, 142, + 0, -142, -285, -428, -571, -713, -856, -998, -1140, + -1281, -1422, -1563, -1703, -1842, -1981, -2120, -2258, -2395, + -2531, -2667, -2801, -2935, -3068, -3200, -3331, -3462, -3591, + -3719, -3845, -3971, -4095, -4219, -4341, -4461, -4580, -4698, + -4815, -4930, -5043, -5155, -5265, -5374, -5481, -5586, -5690, + -5792, -5892, -5991, -6087, -6182, -6275, -6366, -6455, -6542, + -6627, -6710, -6791, -6870, -6947, -7021, -7094, -7164, -7233, + -7299, -7362, -7424, -7483, -7540, -7595, -7647, -7697, -7745, + -7791, -7834, -7874, -7912, -7948, -7982, -8012, -8041, -8067, + -8091, -8112, -8130, -8147, -8160, -8172, -8180, -8187, -8190, + -8191, -8190, -8187, -8180, -8172, -8160, -8147, -8130, -8112, + -8091, -8067, -8041, -8012, -7982, -7948, -7912, -7874, -7834, + -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424, -7362, + -7299, -7233, -7164, -7094, -7021, -6947, -6870, -6791, -6710, + -6627, -6542, -6455, -6366, -6275, -6182, -6087, -5991, -5892, + -5792, -5690, -5586, -5481, -5374, -5265, -5155, -5043, -4930, + -4815, -4698, -4580, -4461, -4341, -4219, -4096, -3971, -3845, + -3719, -3591, -3462, -3331, -3200, -3068, -2935, -2801, -2667, + -2531, -2395, -2258, -2120, -1981, -1842, -1703, -1563, -1422, + -1281, -1140, -998, -856, -713, -571, -428, -285, -142, + 0, 142, 285, 428, 571, 713, 856, 998, 1140, + 1281, 1422, 1563, 1703, 1842, 1981, 2120, 2258, 2395, + 2531, 2667, 2801, 2935, 3068, 3200, 3331, 3462, 3591, + 3719, 3845, 3971, 4095, 4219, 4341, 4461, 4580, 4698, + 4815, 4930, 5043, 5155, 5265, 5374, 5481, 5586, 5690, + 5792, 5892, 5991, 6087, 6182, 6275, 6366, 6455, 6542, + 6627, 6710, 6791, 6870, 6947, 7021, 7094, 7164, 7233, + 7299, 7362, 7424, 7483, 7540, 7595, 7647, 7697, 7745, + 7791, 7834, 7874, 7912, 7948, 7982, 8012, 8041, 8067, + 8091, 8112, 8130, 8147, 8160, 8172, 8180, 8187, 8190 }; -#else - -// Square root of Hanning window in Q14 -const ALIGN8_BEG WebRtc_Word16 WebRtcAecm_kSqrtHanning[] ALIGN8_END = -{ - 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, - 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, 6591, 6954, 7313, 7668, 8019, 8364, - 8705, 9040, 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, 11795, 12068, 12335, - 12594, 12845, 13089, 13325, 13553, 13773, 13985, 14189, 14384, 14571, 14749, 14918, - 15079, 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, 16111, 16179, 16237, - 16286, 16325, 16354, 16373, 16384 +const int16_t WebRtcAecm_kSinTable[] = { + 0, 142, 285, 428, 571, 713, 856, 998, + 1140, 1281, 1422, 1563, 1703, 1842, 1981, 2120, + 2258, 2395, 2531, 2667, 2801, 2935, 3068, 3200, + 3331, 3462, 3591, 3719, 3845, 3971, 4095, 4219, + 4341, 4461, 4580, 4698, 4815, 4930, 5043, 5155, + 5265, 5374, 5481, 5586, 5690, 5792, 5892, 5991, + 6087, 6182, 6275, 6366, 6455, 6542, 6627, 6710, + 6791, 6870, 6947, 7021, 7094, 7164, 7233, 7299, + 7362, 7424, 7483, 7540, 7595, 7647, 7697, 7745, + 7791, 7834, 7874, 7912, 7948, 7982, 8012, 8041, + 8067, 8091, 8112, 8130, 8147, 8160, 8172, 8180, + 8187, 8190, 8191, 8190, 8187, 8180, 8172, 8160, + 8147, 8130, 8112, 8091, 8067, 8041, 8012, 7982, + 7948, 7912, 7874, 7834, 7791, 7745, 7697, 7647, + 7595, 7540, 7483, 7424, 7362, 7299, 7233, 7164, + 7094, 7021, 6947, 6870, 6791, 6710, 6627, 6542, + 6455, 6366, 6275, 6182, 6087, 5991, 5892, 5792, + 5690, 5586, 5481, 5374, 5265, 5155, 5043, 4930, + 4815, 4698, 4580, 4461, 4341, 4219, 4096, 3971, + 3845, 3719, 3591, 3462, 3331, 3200, 3068, 2935, + 2801, 2667, 2531, 2395, 2258, 2120, 1981, 1842, + 1703, 1563, 1422, 1281, 1140, 998, 856, 713, + 571, 428, 285, 142, 0, -142, -285, -428, + -571, -713, -856, -998, -1140, -1281, -1422, -1563, + -1703, -1842, -1981, -2120, -2258, -2395, -2531, -2667, + -2801, -2935, -3068, -3200, -3331, -3462, -3591, -3719, + -3845, -3971, -4095, -4219, -4341, -4461, -4580, -4698, + -4815, -4930, -5043, -5155, -5265, -5374, -5481, -5586, + -5690, -5792, -5892, -5991, -6087, -6182, -6275, -6366, + -6455, -6542, -6627, -6710, -6791, -6870, -6947, -7021, + -7094, -7164, -7233, -7299, -7362, -7424, -7483, -7540, + -7595, -7647, -7697, -7745, -7791, -7834, -7874, -7912, + -7948, -7982, -8012, -8041, -8067, -8091, -8112, -8130, + -8147, -8160, -8172, -8180, -8187, -8190, -8191, -8190, + -8187, -8180, -8172, -8160, -8147, -8130, -8112, -8091, + -8067, -8041, -8012, -7982, -7948, -7912, -7874, -7834, + -7791, -7745, -7697, -7647, -7595, -7540, -7483, -7424, + -7362, -7299, -7233, -7164, -7094, -7021, -6947, -6870, + -6791, -6710, -6627, -6542, -6455, -6366, -6275, -6182, + -6087, -5991, -5892, -5792, -5690, -5586, -5481, -5374, + -5265, -5155, -5043, -4930, -4815, -4698, -4580, -4461, + -4341, -4219, -4096, -3971, -3845, -3719, -3591, -3462, + -3331, -3200, -3068, -2935, -2801, -2667, -2531, -2395, + -2258, -2120, -1981, -1842, -1703, -1563, -1422, -1281, + -1140, -998, -856, -713, -571, -428, -285, -142 }; -#endif - -//Q15 alpha = 0.99439986968132 const Factor for magnitude approximation -static const WebRtc_UWord16 kAlpha1 = 32584; -//Q15 beta = 0.12967166976970 const Factor for magnitude approximation -static const WebRtc_UWord16 kBeta1 = 4249; -//Q15 alpha = 0.94234827210087 const Factor for magnitude approximation -static const WebRtc_UWord16 kAlpha2 = 30879; -//Q15 beta = 0.33787806009150 const Factor for magnitude approximation -static const WebRtc_UWord16 kBeta2 = 11072; -//Q15 alpha = 0.82247698684306 const Factor for magnitude approximation -static const WebRtc_UWord16 kAlpha3 = 26951; -//Q15 beta = 0.57762063060713 const Factor for magnitude approximation -static const WebRtc_UWord16 kBeta3 = 18927; - // Initialization table for echo channel in 8 kHz -static const WebRtc_Word16 kChannelStored8kHz[PART_LEN1] = { +static const int16_t kChannelStored8kHz[PART_LEN1] = { 2040, 1815, 1590, 1498, 1405, 1395, 1385, 1418, 1451, 1506, 1562, 1644, 1726, 1804, 1882, 1918, 1953, 1982, 2010, 2025, 2040, 2034, 2027, 2021, @@ -90,7 +132,7 @@ static const WebRtc_Word16 kChannelStored8kHz[PART_LEN1] = { }; // Initialization table for echo channel in 16 kHz -static const WebRtc_Word16 kChannelStored16kHz[PART_LEN1] = { +static const int16_t kChannelStored16kHz[PART_LEN1] = { 2040, 1590, 1405, 1385, 1451, 1562, 1726, 1882, 1953, 2010, 2040, 2027, 2014, 1980, 1869, 1732, 1635, 1572, 1517, 1444, 1367, 1294, 1245, 1233, @@ -102,94 +144,152 @@ static const WebRtc_Word16 kChannelStored16kHz[PART_LEN1] = { 3153 }; -static const WebRtc_Word16 kNoiseEstQDomain = 15; -static const WebRtc_Word16 kNoiseEstIncCount = 5; +// Moves the pointer to the next entry and inserts |far_spectrum| and +// corresponding Q-domain in its buffer. +// +// Inputs: +// - self : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// - far_q : Q-domain of far end spectrum +// +void WebRtcAecm_UpdateFarHistory(AecmCore* self, + uint16_t* far_spectrum, + int far_q) { + // Get new buffer position + self->far_history_pos++; + if (self->far_history_pos >= MAX_DELAY) { + self->far_history_pos = 0; + } + // Update Q-domain buffer + self->far_q_domains[self->far_history_pos] = far_q; + // Update far end spectrum buffer + memcpy(&(self->far_history[self->far_history_pos * PART_LEN1]), + far_spectrum, + sizeof(uint16_t) * PART_LEN1); +} -static void ComfortNoise(AecmCore_t* aecm, - const WebRtc_UWord16* dfa, - complex16_t* out, - const WebRtc_Word16* lambda); +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been +// called before AlignedFarend(...). Otherwise, you get the pointer to the +// previous frame. The memory is only valid until the next call of +// WebRtc_DelayEstimatorProcessFix(...). +// +// Inputs: +// - self : Pointer to the AECM instance. +// - delay : Current delay estimate. +// +// Output: +// - far_q : The Q-domain of the aligned far end spectrum +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, + int* far_q, + int delay) { + int buffer_position = 0; + assert(self != NULL); + buffer_position = self->far_history_pos - delay; -static WebRtc_Word16 CalcSuppressionGain(AecmCore_t * const aecm); + // Check buffer position + if (buffer_position < 0) { + buffer_position += MAX_DELAY; + } + // Get Q-domain + *far_q = self->far_q_domains[buffer_position]; + // Return far end spectrum + return &(self->far_history[buffer_position * PART_LEN1]); +} -#ifdef ARM_WINM_LOG -HANDLE logFile = NULL; -#endif +// Declare function pointers. +CalcLinearEnergies WebRtcAecm_CalcLinearEnergies; +StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel; +ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel; -int WebRtcAecm_CreateCore(AecmCore_t **aecmInst) -{ - AecmCore_t *aecm = malloc(sizeof(AecmCore_t)); - *aecmInst = aecm; - if (aecm == NULL) - { - return -1; - } +AecmCore* WebRtcAecm_CreateCore() { + AecmCore* aecm = malloc(sizeof(AecmCore)); - if (WebRtcApm_CreateBuffer(&aecm->farFrameBuf, FRAME_LEN + PART_LEN) == -1) + aecm->farFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(int16_t)); + if (!aecm->farFrameBuf) { WebRtcAecm_FreeCore(aecm); - aecm = NULL; - return -1; + return NULL; } - if (WebRtcApm_CreateBuffer(&aecm->nearNoisyFrameBuf, FRAME_LEN + PART_LEN) == -1) + aecm->nearNoisyFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(int16_t)); + if (!aecm->nearNoisyFrameBuf) { WebRtcAecm_FreeCore(aecm); - aecm = NULL; - return -1; + return NULL; } - if (WebRtcApm_CreateBuffer(&aecm->nearCleanFrameBuf, FRAME_LEN + PART_LEN) == -1) + aecm->nearCleanFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(int16_t)); + if (!aecm->nearCleanFrameBuf) { WebRtcAecm_FreeCore(aecm); - aecm = NULL; - return -1; + return NULL; } - if (WebRtcApm_CreateBuffer(&aecm->outFrameBuf, FRAME_LEN + PART_LEN) == -1) + aecm->outFrameBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, + sizeof(int16_t)); + if (!aecm->outFrameBuf) { WebRtcAecm_FreeCore(aecm); - aecm = NULL; - return -1; + return NULL; } - if (WebRtc_CreateDelayEstimator(&aecm->delay_estimator, - PART_LEN1, - MAX_DELAY, - 1) == -1) { + aecm->delay_estimator_farend = WebRtc_CreateDelayEstimatorFarend(PART_LEN1, + MAX_DELAY); + if (aecm->delay_estimator_farend == NULL) { WebRtcAecm_FreeCore(aecm); - aecm = NULL; - return -1; + return NULL; + } + aecm->delay_estimator = + WebRtc_CreateDelayEstimator(aecm->delay_estimator_farend, 0); + if (aecm->delay_estimator == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; + } + // TODO(bjornv): Explicitly disable robust delay validation until no + // performance regression has been established. Then remove the line. + WebRtc_enable_robust_validation(aecm->delay_estimator, 0); + + aecm->real_fft = WebRtcSpl_CreateRealFFT(PART_LEN_SHIFT); + if (aecm->real_fft == NULL) { + WebRtcAecm_FreeCore(aecm); + return NULL; } // Init some aecm pointers. 16 and 32 byte alignment is only necessary // for Neon code currently. - aecm->xBuf = (WebRtc_Word16*) (((uintptr_t)aecm->xBuf_buf + 31) & ~ 31); - aecm->dBufClean = (WebRtc_Word16*) (((uintptr_t)aecm->dBufClean_buf + 31) & ~ 31); - aecm->dBufNoisy = (WebRtc_Word16*) (((uintptr_t)aecm->dBufNoisy_buf + 31) & ~ 31); - aecm->outBuf = (WebRtc_Word16*) (((uintptr_t)aecm->outBuf_buf + 15) & ~ 15); - aecm->channelStored = (WebRtc_Word16*) (((uintptr_t) + aecm->xBuf = (int16_t*) (((uintptr_t)aecm->xBuf_buf + 31) & ~ 31); + aecm->dBufClean = (int16_t*) (((uintptr_t)aecm->dBufClean_buf + 31) & ~ 31); + aecm->dBufNoisy = (int16_t*) (((uintptr_t)aecm->dBufNoisy_buf + 31) & ~ 31); + aecm->outBuf = (int16_t*) (((uintptr_t)aecm->outBuf_buf + 15) & ~ 15); + aecm->channelStored = (int16_t*) (((uintptr_t) aecm->channelStored_buf + 15) & ~ 15); - aecm->channelAdapt16 = (WebRtc_Word16*) (((uintptr_t) + aecm->channelAdapt16 = (int16_t*) (((uintptr_t) aecm->channelAdapt16_buf + 15) & ~ 15); - aecm->channelAdapt32 = (WebRtc_Word32*) (((uintptr_t) + aecm->channelAdapt32 = (int32_t*) (((uintptr_t) aecm->channelAdapt32_buf + 31) & ~ 31); - return 0; + return aecm; } -void WebRtcAecm_InitEchoPathCore(AecmCore_t* aecm, const WebRtc_Word16* echo_path) -{ +void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path) { int i = 0; // Reset the stored channel - memcpy(aecm->channelStored, echo_path, sizeof(WebRtc_Word16) * PART_LEN1); + memcpy(aecm->channelStored, echo_path, sizeof(int16_t) * PART_LEN1); // Reset the adapted channels - memcpy(aecm->channelAdapt16, echo_path, sizeof(WebRtc_Word16) * PART_LEN1); + memcpy(aecm->channelAdapt16, echo_path, sizeof(int16_t) * PART_LEN1); for (i = 0; i < PART_LEN1; i++) { - aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32( - (WebRtc_Word32)(aecm->channelAdapt16[i]), 16); + aecm->channelAdapt32[i] = (int32_t)aecm->channelAdapt16[i] << 16; } // Reset channel storing variables @@ -199,6 +299,89 @@ void WebRtcAecm_InitEchoPathCore(AecmCore_t* aecm, const WebRtc_Word16* echo_pat aecm->mseChannelCount = 0; } +static void CalcLinearEnergiesC(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int i; + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + for (i = 0; i < PART_LEN1; i++) + { + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); + (*far_energy) += (uint32_t)(far_spectrum[i]); + *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i]; + (*echo_energy_stored) += (uint32_t)echo_est[i]; + } +} + +static void StoreAdaptiveChannelC(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + int i; + + // During startup we store the channel every block. + memcpy(aecm->channelStored, aecm->channelAdapt16, sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + for (i = 0; i < PART_LEN; i += 4) + { + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); + echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1], + far_spectrum[i + 1]); + echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2], + far_spectrum[i + 2]); + echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3], + far_spectrum[i + 3]); + } + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); +} + +static void ResetAdaptiveChannelC(AecmCore* aecm) { + int i; + + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + memcpy(aecm->channelAdapt16, aecm->channelStored, + sizeof(int16_t) * PART_LEN1); + // Restore the W32 channel + for (i = 0; i < PART_LEN; i += 4) + { + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; + aecm->channelAdapt32[i + 1] = (int32_t)aecm->channelStored[i + 1] << 16; + aecm->channelAdapt32[i + 2] = (int32_t)aecm->channelStored[i + 2] << 16; + aecm->channelAdapt32[i + 3] = (int32_t)aecm->channelStored[i + 3] << 16; + } + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; +} + +// Initialize function pointers for ARM Neon platform. +#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON) +static void WebRtcAecm_InitNeon(void) +{ + WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannelNeon; + WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannelNeon; + WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergiesNeon; +} +#endif + +// Initialize function pointers for MIPS platform. +#if defined(MIPS32_LE) +static void WebRtcAecm_InitMips(void) +{ +#if defined(MIPS_DSP_R1_LE) + WebRtcAecm_StoreAdaptiveChannel = WebRtcAecm_StoreAdaptiveChannel_mips; + WebRtcAecm_ResetAdaptiveChannel = WebRtcAecm_ResetAdaptiveChannel_mips; +#endif + WebRtcAecm_CalcLinearEnergies = WebRtcAecm_CalcLinearEnergies_mips; +} +#endif + // WebRtcAecm_InitCore(...) // // This function initializes the AECM instant created with WebRtcAecm_CreateCore(...) @@ -212,11 +395,10 @@ void WebRtcAecm_InitEchoPathCore(AecmCore_t* aecm, const WebRtc_Word16* echo_pat // Return value : 0 - Ok // -1 - Error // -int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq) -{ +int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq) { int i = 0; - WebRtc_Word32 tmp32 = PART_LEN1 * PART_LEN1; - WebRtc_Word16 tmp16 = PART_LEN1; + int32_t tmp32 = PART_LEN1 * PART_LEN1; + int16_t tmp16 = PART_LEN1; if (samplingFreq != 8000 && samplingFreq != 16000) { @@ -224,17 +406,17 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq) return -1; } // sanity check of sampling frequency - aecm->mult = (WebRtc_Word16)samplingFreq / 8000; + aecm->mult = (int16_t)samplingFreq / 8000; aecm->farBufWritePos = 0; aecm->farBufReadPos = 0; aecm->knownDelay = 0; aecm->lastKnownDelay = 0; - WebRtcApm_InitBuffer(aecm->farFrameBuf); - WebRtcApm_InitBuffer(aecm->nearNoisyFrameBuf); - WebRtcApm_InitBuffer(aecm->nearCleanFrameBuf); - WebRtcApm_InitBuffer(aecm->outFrameBuf); + WebRtc_InitBuffer(aecm->farFrameBuf); + WebRtc_InitBuffer(aecm->nearNoisyFrameBuf); + WebRtc_InitBuffer(aecm->nearCleanFrameBuf); + WebRtc_InitBuffer(aecm->outFrameBuf); memset(aecm->xBuf_buf, 0, sizeof(aecm->xBuf_buf)); memset(aecm->dBufClean_buf, 0, sizeof(aecm->dBufClean_buf)); @@ -244,12 +426,16 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq) aecm->seed = 666; aecm->totCount = 0; + if (WebRtc_InitDelayEstimatorFarend(aecm->delay_estimator_farend) != 0) { + return -1; + } if (WebRtc_InitDelayEstimator(aecm->delay_estimator) != 0) { return -1; } - - // Initialize to reasonable values - aecm->currentDelay = 8; + // Set far end histories to zero + memset(aecm->far_history, 0, sizeof(uint16_t) * PART_LEN1 * MAX_DELAY); + memset(aecm->far_q_domains, 0, sizeof(int) * MAX_DELAY); + aecm->far_history_pos = MAX_DELAY; aecm->nlpFlag = 1; aecm->fixedDelay = -1; @@ -287,7 +473,7 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq) { aecm->noiseEst[i] = (tmp32 << 8); tmp16--; - tmp32 -= (WebRtc_Word32)((tmp16 << 1) + 1); + tmp32 -= (int32_t)((tmp16 << 1) + 1); } for (; i < PART_LEN1; i++) { @@ -313,52 +499,67 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq) aecm->supGainErrParamDiffAB = SUPGAIN_ERROR_PARAM_A - SUPGAIN_ERROR_PARAM_B; aecm->supGainErrParamDiffBD = SUPGAIN_ERROR_PARAM_B - SUPGAIN_ERROR_PARAM_D; - assert(PART_LEN % 16 == 0); + // Assert a preprocessor definition at compile-time. It's an assumption + // used in assembly code, so check the assembly files before any change. + COMPILE_ASSERT(PART_LEN % 16 == 0); + // Initialize function pointers. + WebRtcAecm_CalcLinearEnergies = CalcLinearEnergiesC; + WebRtcAecm_StoreAdaptiveChannel = StoreAdaptiveChannelC; + WebRtcAecm_ResetAdaptiveChannel = ResetAdaptiveChannelC; + +#ifdef WEBRTC_DETECT_NEON + uint64_t features = WebRtc_GetCPUFeaturesARM(); + if ((features & kCPUFeatureNEON) != 0) + { + WebRtcAecm_InitNeon(); + } +#elif defined(WEBRTC_HAS_NEON) + WebRtcAecm_InitNeon(); +#endif + +#if defined(MIPS32_LE) + WebRtcAecm_InitMips(); +#endif return 0; } // TODO(bjornv): This function is currently not used. Add support for these // parameters from a higher level -int WebRtcAecm_Control(AecmCore_t *aecm, int delay, int nlpFlag) -{ +int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag) { aecm->nlpFlag = nlpFlag; aecm->fixedDelay = delay; return 0; } -int WebRtcAecm_FreeCore(AecmCore_t *aecm) -{ - if (aecm == NULL) - { - return -1; +void WebRtcAecm_FreeCore(AecmCore* aecm) { + if (aecm == NULL) { + return; } - WebRtcApm_FreeBuffer(aecm->farFrameBuf); - WebRtcApm_FreeBuffer(aecm->nearNoisyFrameBuf); - WebRtcApm_FreeBuffer(aecm->nearCleanFrameBuf); - WebRtcApm_FreeBuffer(aecm->outFrameBuf); + WebRtc_FreeBuffer(aecm->farFrameBuf); + WebRtc_FreeBuffer(aecm->nearNoisyFrameBuf); + WebRtc_FreeBuffer(aecm->nearCleanFrameBuf); + WebRtc_FreeBuffer(aecm->outFrameBuf); WebRtc_FreeDelayEstimator(aecm->delay_estimator); - free(aecm); + WebRtc_FreeDelayEstimatorFarend(aecm->delay_estimator_farend); + WebRtcSpl_FreeRealFFT(aecm->real_fft); - return 0; + free(aecm); } -int WebRtcAecm_ProcessFrame(AecmCore_t * aecm, - const WebRtc_Word16 * farend, - const WebRtc_Word16 * nearendNoisy, - const WebRtc_Word16 * nearendClean, - WebRtc_Word16 * out) -{ - WebRtc_Word16 farBlock[PART_LEN]; - WebRtc_Word16 nearNoisyBlock[PART_LEN]; - WebRtc_Word16 nearCleanBlock[PART_LEN]; - WebRtc_Word16 outBlock_buf[PART_LEN + 8]; // Align buffer to 8-byte boundary. - WebRtc_Word16* outBlock = (WebRtc_Word16*) (((uintptr_t) outBlock_buf + 15) & ~ 15); +int WebRtcAecm_ProcessFrame(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out) { + int16_t outBlock_buf[PART_LEN + 8]; // Align buffer to 8-byte boundary. + int16_t* outBlock = (int16_t*) (((uintptr_t) outBlock_buf + 15) & ~ 15); - WebRtc_Word16 farFrame[FRAME_LEN]; + int16_t farFrame[FRAME_LEN]; + const int16_t* out_ptr = NULL; int size = 0; // Buffer the current frame. @@ -368,25 +569,40 @@ int WebRtcAecm_ProcessFrame(AecmCore_t * aecm, // Buffer the synchronized far and near frames, // to pass the smaller blocks individually. - WebRtcApm_WriteBuffer(aecm->farFrameBuf, farFrame, FRAME_LEN); - WebRtcApm_WriteBuffer(aecm->nearNoisyFrameBuf, nearendNoisy, FRAME_LEN); + WebRtc_WriteBuffer(aecm->farFrameBuf, farFrame, FRAME_LEN); + WebRtc_WriteBuffer(aecm->nearNoisyFrameBuf, nearendNoisy, FRAME_LEN); if (nearendClean != NULL) { - WebRtcApm_WriteBuffer(aecm->nearCleanFrameBuf, nearendClean, FRAME_LEN); + WebRtc_WriteBuffer(aecm->nearCleanFrameBuf, nearendClean, FRAME_LEN); } // Process as many blocks as possible. - while (WebRtcApm_get_buffer_size(aecm->farFrameBuf) >= PART_LEN) + while (WebRtc_available_read(aecm->farFrameBuf) >= PART_LEN) { - WebRtcApm_ReadBuffer(aecm->farFrameBuf, farBlock, PART_LEN); - WebRtcApm_ReadBuffer(aecm->nearNoisyFrameBuf, nearNoisyBlock, PART_LEN); + int16_t far_block[PART_LEN]; + const int16_t* far_block_ptr = NULL; + int16_t near_noisy_block[PART_LEN]; + const int16_t* near_noisy_block_ptr = NULL; + + WebRtc_ReadBuffer(aecm->farFrameBuf, (void**) &far_block_ptr, far_block, + PART_LEN); + WebRtc_ReadBuffer(aecm->nearNoisyFrameBuf, + (void**) &near_noisy_block_ptr, + near_noisy_block, + PART_LEN); if (nearendClean != NULL) { - WebRtcApm_ReadBuffer(aecm->nearCleanFrameBuf, nearCleanBlock, PART_LEN); + int16_t near_clean_block[PART_LEN]; + const int16_t* near_clean_block_ptr = NULL; + + WebRtc_ReadBuffer(aecm->nearCleanFrameBuf, + (void**) &near_clean_block_ptr, + near_clean_block, + PART_LEN); if (WebRtcAecm_ProcessBlock(aecm, - farBlock, - nearNoisyBlock, - nearCleanBlock, + far_block_ptr, + near_noisy_block_ptr, + near_clean_block_ptr, outBlock) == -1) { return -1; @@ -394,8 +610,8 @@ int WebRtcAecm_ProcessFrame(AecmCore_t * aecm, } else { if (WebRtcAecm_ProcessBlock(aecm, - farBlock, - nearNoisyBlock, + far_block_ptr, + near_noisy_block_ptr, NULL, outBlock) == -1) { @@ -403,19 +619,23 @@ int WebRtcAecm_ProcessFrame(AecmCore_t * aecm, } } - WebRtcApm_WriteBuffer(aecm->outFrameBuf, outBlock, PART_LEN); + WebRtc_WriteBuffer(aecm->outFrameBuf, outBlock, PART_LEN); } // Stuff the out buffer if we have less than a frame to output. // This should only happen for the first frame. - size = WebRtcApm_get_buffer_size(aecm->outFrameBuf); + size = (int) WebRtc_available_read(aecm->outFrameBuf); if (size < FRAME_LEN) { - WebRtcApm_StuffBuffer(aecm->outFrameBuf, FRAME_LEN - size); + WebRtc_MoveReadPtr(aecm->outFrameBuf, size - FRAME_LEN); } // Obtain an output frame. - WebRtcApm_ReadBuffer(aecm->outFrameBuf, out, FRAME_LEN); + WebRtc_ReadBuffer(aecm->outFrameBuf, (void**) &out_ptr, out, FRAME_LEN); + if (out_ptr != out) { + // ReadBuffer() hasn't copied to |out| in this case. + memcpy(out, out_ptr, FRAME_LEN * sizeof(int16_t)); + } return 0; } @@ -434,11 +654,11 @@ int WebRtcAecm_ProcessFrame(AecmCore_t * aecm, // // Return: - Filtered value. // -WebRtc_Word16 WebRtcAecm_AsymFilt(const WebRtc_Word16 filtOld, const WebRtc_Word16 inVal, - const WebRtc_Word16 stepSizePos, - const WebRtc_Word16 stepSizeNeg) +int16_t WebRtcAecm_AsymFilt(const int16_t filtOld, const int16_t inVal, + const int16_t stepSizePos, + const int16_t stepSizeNeg) { - WebRtc_Word16 retVal; + int16_t retVal; if ((filtOld == WEBRTC_SPL_WORD16_MAX) | (filtOld == WEBRTC_SPL_WORD16_MIN)) { @@ -447,15 +667,38 @@ WebRtc_Word16 WebRtcAecm_AsymFilt(const WebRtc_Word16 filtOld, const WebRtc_Word retVal = filtOld; if (filtOld > inVal) { - retVal -= WEBRTC_SPL_RSHIFT_W16(filtOld - inVal, stepSizeNeg); + retVal -= (filtOld - inVal) >> stepSizeNeg; } else { - retVal += WEBRTC_SPL_RSHIFT_W16(inVal - filtOld, stepSizePos); + retVal += (inVal - filtOld) >> stepSizePos; } return retVal; } +// ExtractFractionPart(a, zeros) +// +// returns the fraction part of |a|, with |zeros| number of leading zeros, as an +// int16_t scaled to Q8. There is no sanity check of |a| in the sense that the +// number of zeros match. +static int16_t ExtractFractionPart(uint32_t a, int zeros) { + return (int16_t)(((a << zeros) & 0x7FFFFFFF) >> 23); +} + +// Calculates and returns the log of |energy| in Q8. The input |energy| is +// supposed to be in Q(|q_domain|). +static int16_t LogOfEnergyInQ8(uint32_t energy, int q_domain) { + static const int16_t kLogLowValue = PART_LEN_SHIFT << 7; + int16_t log_energy_q8 = kLogLowValue; + if (energy > 0) { + int zeros = WebRtcSpl_NormU32(energy); + int16_t frac = ExtractFractionPart(energy, zeros); + // log2 of |energy| in Q8. + log_energy_q8 += ((31 - zeros) << 8) + frac - (q_domain << 8); + } + return log_energy_q8; +} + // WebRtcAecm_CalcEnergies(...) // // This function calculates the log of energies for nearend, farend and estimated @@ -469,94 +712,51 @@ WebRtc_Word16 WebRtcAecm_AsymFilt(const WebRtc_Word16 filtOld, const WebRtc_Word // Q(aecm->dfaQDomain). // @param echoEst [out] Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16). // -void WebRtcAecm_CalcEnergies(AecmCore_t * aecm, - const WebRtc_UWord16* far_spectrum, - const WebRtc_Word16 far_q, - const WebRtc_UWord32 nearEner, - WebRtc_Word32 * echoEst) -{ +void WebRtcAecm_CalcEnergies(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint32_t nearEner, + int32_t* echoEst) { // Local variables - WebRtc_UWord32 tmpAdapt = 0; - WebRtc_UWord32 tmpStored = 0; - WebRtc_UWord32 tmpFar = 0; + uint32_t tmpAdapt = 0; + uint32_t tmpStored = 0; + uint32_t tmpFar = 0; int i; - WebRtc_Word16 zeros, frac; - WebRtc_Word16 tmp16; - WebRtc_Word16 increase_max_shifts = 4; - WebRtc_Word16 decrease_max_shifts = 11; - WebRtc_Word16 increase_min_shifts = 11; - WebRtc_Word16 decrease_min_shifts = 3; - WebRtc_Word16 kLogLowValue = WEBRTC_SPL_LSHIFT_W16(PART_LEN_SHIFT, 7); + int16_t tmp16; + int16_t increase_max_shifts = 4; + int16_t decrease_max_shifts = 11; + int16_t increase_min_shifts = 11; + int16_t decrease_min_shifts = 3; // Get log of near end energy and store in buffer // Shift buffer memmove(aecm->nearLogEnergy + 1, aecm->nearLogEnergy, - sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1)); + sizeof(int16_t) * (MAX_BUF_LEN - 1)); // Logarithm of integrated magnitude spectrum (nearEner) - tmp16 = kLogLowValue; - if (nearEner) - { - zeros = WebRtcSpl_NormU32(nearEner); - frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32( - (WEBRTC_SPL_LSHIFT_U32(nearEner, zeros) & 0x7FFFFFFF), - 23); - // log2 in Q8 - tmp16 += WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; - tmp16 -= WEBRTC_SPL_LSHIFT_W16(aecm->dfaNoisyQDomain, 8); - } - aecm->nearLogEnergy[0] = tmp16; - // END: Get log of near end energy + aecm->nearLogEnergy[0] = LogOfEnergyInQ8(nearEner, aecm->dfaNoisyQDomain); WebRtcAecm_CalcLinearEnergies(aecm, far_spectrum, echoEst, &tmpFar, &tmpAdapt, &tmpStored); // Shift buffers memmove(aecm->echoAdaptLogEnergy + 1, aecm->echoAdaptLogEnergy, - sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1)); + sizeof(int16_t) * (MAX_BUF_LEN - 1)); memmove(aecm->echoStoredLogEnergy + 1, aecm->echoStoredLogEnergy, - sizeof(WebRtc_Word16) * (MAX_BUF_LEN - 1)); + sizeof(int16_t) * (MAX_BUF_LEN - 1)); // Logarithm of delayed far end energy - tmp16 = kLogLowValue; - if (tmpFar) - { - zeros = WebRtcSpl_NormU32(tmpFar); - frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpFar, zeros) - & 0x7FFFFFFF), 23); - // log2 in Q8 - tmp16 += WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; - tmp16 -= WEBRTC_SPL_LSHIFT_W16(far_q, 8); - } - aecm->farLogEnergy = tmp16; + aecm->farLogEnergy = LogOfEnergyInQ8(tmpFar, far_q); // Logarithm of estimated echo energy through adapted channel - tmp16 = kLogLowValue; - if (tmpAdapt) - { - zeros = WebRtcSpl_NormU32(tmpAdapt); - frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpAdapt, zeros) - & 0x7FFFFFFF), 23); - //log2 in Q8 - tmp16 += WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; - tmp16 -= WEBRTC_SPL_LSHIFT_W16(RESOLUTION_CHANNEL16 + far_q, 8); - } - aecm->echoAdaptLogEnergy[0] = tmp16; + aecm->echoAdaptLogEnergy[0] = LogOfEnergyInQ8(tmpAdapt, + RESOLUTION_CHANNEL16 + far_q); // Logarithm of estimated echo energy through stored channel - tmp16 = kLogLowValue; - if (tmpStored) - { - zeros = WebRtcSpl_NormU32(tmpStored); - frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32((WEBRTC_SPL_LSHIFT_U32(tmpStored, zeros) - & 0x7FFFFFFF), 23); - //log2 in Q8 - tmp16 += WEBRTC_SPL_LSHIFT_W16((31 - zeros), 8) + frac; - tmp16 -= WEBRTC_SPL_LSHIFT_W16(RESOLUTION_CHANNEL16 + far_q, 8); - } - aecm->echoStoredLogEnergy[0] = tmp16; + aecm->echoStoredLogEnergy[0] = + LogOfEnergyInQ8(tmpStored, RESOLUTION_CHANNEL16 + far_q); // Update farend energy levels (min, max, vad, mse) if (aecm->farLogEnergy > FAR_ENERGY_MIN) @@ -578,7 +778,7 @@ void WebRtcAecm_CalcEnergies(AecmCore_t * aecm, tmp16 = 2560 - aecm->farEnergyMin; if (tmp16 > 0) { - tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, FAR_ENERGY_VAD_REGION, 9); + tmp16 = (int16_t)((tmp16 * FAR_ENERGY_VAD_REGION) >> 9); } else { tmp16 = 0; @@ -593,10 +793,8 @@ void WebRtcAecm_CalcEnergies(AecmCore_t * aecm, { if (aecm->farEnergyVAD > aecm->farLogEnergy) { - aecm->farEnergyVAD += WEBRTC_SPL_RSHIFT_W16(aecm->farLogEnergy + - tmp16 - - aecm->farEnergyVAD, - 6); + aecm->farEnergyVAD += + (aecm->farLogEnergy + tmp16 - aecm->farEnergyVAD) >> 6; aecm->vadUpdateCount = 0; } else { @@ -647,12 +845,10 @@ void WebRtcAecm_CalcEnergies(AecmCore_t * aecm, // @param mu [out] (Return value) Stepsize in log2(), i.e. number of shifts. // // -WebRtc_Word16 WebRtcAecm_CalcStepSize(AecmCore_t * const aecm) -{ - - WebRtc_Word32 tmp32; - WebRtc_Word16 tmp16; - WebRtc_Word16 mu = MU_MAX; +int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm) { + int32_t tmp32; + int16_t tmp16; + int16_t mu = MU_MAX; // Here we calculate the step size mu used in the // following NLMS based Channel estimation algorithm @@ -668,9 +864,9 @@ WebRtc_Word16 WebRtcAecm_CalcStepSize(AecmCore_t * const aecm) } else { tmp16 = (aecm->farLogEnergy - aecm->farEnergyMin); - tmp32 = WEBRTC_SPL_MUL_16_16(tmp16, MU_DIFF); + tmp32 = tmp16 * MU_DIFF; tmp32 = WebRtcSpl_DivW32W16(tmp32, aecm->farEnergyMaxMin); - mu = MU_MIN - 1 - (WebRtc_Word16)(tmp32); + mu = MU_MIN - 1 - (int16_t)(tmp32); // The -1 is an alternative to rounding. This way we get a larger // stepsize, so we in some sense compensate for truncation in NLMS } @@ -695,25 +891,23 @@ WebRtc_Word16 WebRtcAecm_CalcStepSize(AecmCore_t * const aecm) // @param mu [in] NLMS step size. // @param echoEst [i/o] Estimated echo in Q(far_q+RESOLUTION_CHANNEL16). // -void WebRtcAecm_UpdateChannel(AecmCore_t * aecm, - const WebRtc_UWord16* far_spectrum, - const WebRtc_Word16 far_q, - const WebRtc_UWord16 * const dfa, - const WebRtc_Word16 mu, - WebRtc_Word32 * echoEst) -{ - - WebRtc_UWord32 tmpU32no1, tmpU32no2; - WebRtc_Word32 tmp32no1, tmp32no2; - WebRtc_Word32 mseStored; - WebRtc_Word32 mseAdapt; +void WebRtcAecm_UpdateChannel(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint16_t* const dfa, + const int16_t mu, + int32_t* echoEst) { + uint32_t tmpU32no1, tmpU32no2; + int32_t tmp32no1, tmp32no2; + int32_t mseStored; + int32_t mseAdapt; int i; - WebRtc_Word16 zerosFar, zerosNum, zerosCh, zerosDfa; - WebRtc_Word16 shiftChFar, shiftNum, shift2ResChan; - WebRtc_Word16 tmp16no1; - WebRtc_Word16 xfaQ, dfaQ; + int16_t zerosFar, zerosNum, zerosCh, zerosDfa; + int16_t shiftChFar, shiftNum, shift2ResChan; + int16_t tmp16no1; + int16_t xfaQ, dfaQ; // This is the channel estimation algorithm. It is base on NLMS but has a variable step // length, which was calculated above. @@ -724,7 +918,7 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * aecm, // Determine norm of channel and farend to make sure we don't get overflow in // multiplication zerosCh = WebRtcSpl_NormU32(aecm->channelAdapt32[i]); - zerosFar = WebRtcSpl_NormU32((WebRtc_UWord32)far_spectrum[i]); + zerosFar = WebRtcSpl_NormU32((uint32_t)far_spectrum[i]); if (zerosCh + zerosFar > 31) { // Multiplication is safe @@ -735,15 +929,14 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * aecm, { // We need to shift down before multiplication shiftChFar = 32 - zerosCh - zerosFar; - tmpU32no1 = WEBRTC_SPL_UMUL_32_16( - WEBRTC_SPL_RSHIFT_W32(aecm->channelAdapt32[i], shiftChFar), - far_spectrum[i]); + tmpU32no1 = (aecm->channelAdapt32[i] >> shiftChFar) * + far_spectrum[i]; } // Determine Q-domain of numerator zerosNum = WebRtcSpl_NormU32(tmpU32no1); if (dfa[i]) { - zerosDfa = WebRtcSpl_NormU32((WebRtc_UWord32)dfa[i]); + zerosDfa = WebRtcSpl_NormU32((uint32_t)dfa[i]); } else { zerosDfa = 32; @@ -762,8 +955,8 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * aecm, } // Add in the same Q-domain tmpU32no1 = WEBRTC_SPL_SHIFT_W32(tmpU32no1, xfaQ); - tmpU32no2 = WEBRTC_SPL_SHIFT_W32((WebRtc_UWord32)dfa[i], dfaQ); - tmp32no1 = (WebRtc_Word32)tmpU32no2 - (WebRtc_Word32)tmpU32no1; + tmpU32no2 = WEBRTC_SPL_SHIFT_W32((uint32_t)dfa[i], dfaQ); + tmp32no1 = (int32_t)tmpU32no2 - (int32_t)tmpU32no1; zerosNum = WebRtcSpl_NormW32(tmp32no1); if ((tmp32no1) && (far_spectrum[i] > (CHANNEL_VAD << far_q))) { @@ -783,11 +976,11 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * aecm, { if (tmp32no1 > 0) { - tmp32no2 = (WebRtc_Word32)WEBRTC_SPL_UMUL_32_16(tmp32no1, + tmp32no2 = (int32_t)WEBRTC_SPL_UMUL_32_16(tmp32no1, far_spectrum[i]); } else { - tmp32no2 = -(WebRtc_Word32)WEBRTC_SPL_UMUL_32_16(-tmp32no1, + tmp32no2 = -(int32_t)WEBRTC_SPL_UMUL_32_16(-tmp32no1, far_spectrum[i]); } shiftNum = 0; @@ -796,14 +989,10 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * aecm, shiftNum = 32 - (zerosNum + zerosFar); if (tmp32no1 > 0) { - tmp32no2 = (WebRtc_Word32)WEBRTC_SPL_UMUL_32_16( - WEBRTC_SPL_RSHIFT_W32(tmp32no1, shiftNum), - far_spectrum[i]); + tmp32no2 = (tmp32no1 >> shiftNum) * far_spectrum[i]; } else { - tmp32no2 = -(WebRtc_Word32)WEBRTC_SPL_UMUL_32_16( - WEBRTC_SPL_RSHIFT_W32(-tmp32no1, shiftNum), - far_spectrum[i]); + tmp32no2 = -((-tmp32no1 >> shiftNum) * far_spectrum[i]); } } // Normalize with respect to frequency bin @@ -817,15 +1006,15 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * aecm, { tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, shift2ResChan); } - aecm->channelAdapt32[i] = WEBRTC_SPL_ADD_SAT_W32(aecm->channelAdapt32[i], - tmp32no2); + aecm->channelAdapt32[i] = + WebRtcSpl_AddSatW32(aecm->channelAdapt32[i], tmp32no2); if (aecm->channelAdapt32[i] < 0) { // We can never have negative channel gain aecm->channelAdapt32[i] = 0; } - aecm->channelAdapt16[i] - = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(aecm->channelAdapt32[i], 16); + aecm->channelAdapt16[i] = + (int16_t)(aecm->channelAdapt32[i] >> 16); } } } @@ -856,13 +1045,13 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * aecm, mseAdapt = 0; for (i = 0; i < MIN_MSE_COUNT; i++) { - tmp32no1 = ((WebRtc_Word32)aecm->echoStoredLogEnergy[i] - - (WebRtc_Word32)aecm->nearLogEnergy[i]); + tmp32no1 = ((int32_t)aecm->echoStoredLogEnergy[i] + - (int32_t)aecm->nearLogEnergy[i]); tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1); mseStored += tmp32no2; - tmp32no1 = ((WebRtc_Word32)aecm->echoAdaptLogEnergy[i] - - (WebRtc_Word32)aecm->nearLogEnergy[i]); + tmp32no1 = ((int32_t)aecm->echoAdaptLogEnergy[i] + - (int32_t)aecm->nearLogEnergy[i]); tmp32no2 = WEBRTC_SPL_ABS_W32(tmp32no1); mseAdapt += tmp32no2; } @@ -887,8 +1076,9 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * aecm, aecm->mseThreshold = (mseAdapt + aecm->mseAdaptOld); } else { - aecm->mseThreshold += WEBRTC_SPL_MUL_16_16_RSFT(mseAdapt - - WEBRTC_SPL_MUL_16_16_RSFT(aecm->mseThreshold, 5, 3), 205, 8); + int scaled_threshold = aecm->mseThreshold * 5 / 8; + aecm->mseThreshold += + ((mseAdapt - scaled_threshold) * 205) >> 8; } } @@ -914,13 +1104,12 @@ void WebRtcAecm_UpdateChannel(AecmCore_t * aecm, // level (Q14). // // -static WebRtc_Word16 CalcSuppressionGain(AecmCore_t * const aecm) -{ - WebRtc_Word32 tmp32no1; +int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm) { + int32_t tmp32no1; - WebRtc_Word16 supGain = SUPGAIN_DEFAULT; - WebRtc_Word16 tmp16no1; - WebRtc_Word16 dE = 0; + int16_t supGain = SUPGAIN_DEFAULT; + int16_t tmp16no1; + int16_t dE = 0; // Determine suppression gain used in the Wiener filter. The gain is based on a mix of far // end energy and echo estimation error. @@ -942,16 +1131,15 @@ static WebRtc_Word16 CalcSuppressionGain(AecmCore_t * const aecm) // Update counters if (dE < SUPGAIN_EPC_DT) { - tmp32no1 = WEBRTC_SPL_MUL_16_16(aecm->supGainErrParamDiffAB, dE); + tmp32no1 = aecm->supGainErrParamDiffAB * dE; tmp32no1 += (SUPGAIN_EPC_DT >> 1); - tmp16no1 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32no1, SUPGAIN_EPC_DT); + tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, SUPGAIN_EPC_DT); supGain = aecm->supGainErrParamA - tmp16no1; } else { - tmp32no1 = WEBRTC_SPL_MUL_16_16(aecm->supGainErrParamDiffBD, - (ENERGY_DEV_TOL - dE)); + tmp32no1 = aecm->supGainErrParamDiffBD * (ENERGY_DEV_TOL - dE); tmp32no1 += ((ENERGY_DEV_TOL - SUPGAIN_EPC_DT) >> 1); - tmp16no1 = (WebRtc_Word16)WebRtcSpl_DivW32W16(tmp32no1, (ENERGY_DEV_TOL + tmp16no1 = (int16_t)WebRtcSpl_DivW32W16(tmp32no1, (ENERGY_DEV_TOL - SUPGAIN_EPC_DT)); supGain = aecm->supGainErrParamD + tmp16no1; } @@ -972,10 +1160,10 @@ static WebRtc_Word16 CalcSuppressionGain(AecmCore_t * const aecm) aecm->supGainOld = supGain; if (tmp16no1 < aecm->supGain) { - aecm->supGain += (WebRtc_Word16)((tmp16no1 - aecm->supGain) >> 4); + aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4); } else { - aecm->supGain += (WebRtc_Word16)((tmp16no1 - aecm->supGain) >> 4); + aecm->supGain += (int16_t)((tmp16no1 - aecm->supGain) >> 4); } // END: Update suppression gain @@ -983,705 +1171,9 @@ static WebRtc_Word16 CalcSuppressionGain(AecmCore_t * const aecm) return aecm->supGain; } -// Transforms a time domain signal into the frequency domain, outputting the -// complex valued signal, absolute value and sum of absolute values. -// -// time_signal [in] Pointer to time domain signal -// freq_signal_real [out] Pointer to real part of frequency domain array -// freq_signal_imag [out] Pointer to imaginary part of frequency domain -// array -// freq_signal_abs [out] Pointer to absolute value of frequency domain -// array -// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in -// the frequency domain array -// return value The Q-domain of current frequency values -// -static int TimeToFrequencyDomain(const WebRtc_Word16* time_signal, - complex16_t* freq_signal, - WebRtc_UWord16* freq_signal_abs, - WebRtc_UWord32* freq_signal_sum_abs) -{ - int i = 0; - int time_signal_scaling = 0; - - WebRtc_Word32 tmp32no1; - WebRtc_Word32 tmp32no2; - - // In fft_buf, +16 for 32-byte alignment. - WebRtc_Word16 fft_buf[PART_LEN4 + 16]; - WebRtc_Word16 *fft = (WebRtc_Word16 *) (((uintptr_t) fft_buf + 31) & ~31); - - WebRtc_Word16 tmp16no1; - WebRtc_Word16 tmp16no2; -#ifdef AECM_WITH_ABS_APPROX - WebRtc_Word16 max_value = 0; - WebRtc_Word16 min_value = 0; - WebRtc_UWord16 alpha = 0; - WebRtc_UWord16 beta = 0; -#endif - -#ifdef AECM_DYNAMIC_Q - tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); - time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); -#endif - - WebRtcAecm_WindowAndFFT(fft, time_signal, freq_signal, time_signal_scaling); - - // Extract imaginary and real part, calculate the magnitude for all frequency bins - freq_signal[0].imag = 0; - freq_signal[PART_LEN].imag = 0; - freq_signal[PART_LEN].real = fft[PART_LEN2]; - freq_signal_abs[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16( - freq_signal[0].real); - freq_signal_abs[PART_LEN] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16( - freq_signal[PART_LEN].real); - (*freq_signal_sum_abs) = (WebRtc_UWord32)(freq_signal_abs[0]) + - (WebRtc_UWord32)(freq_signal_abs[PART_LEN]); - - for (i = 1; i < PART_LEN; i++) - { - if (freq_signal[i].real == 0) - { - freq_signal_abs[i] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16( - freq_signal[i].imag); - } - else if (freq_signal[i].imag == 0) - { - freq_signal_abs[i] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16( - freq_signal[i].real); - } - else - { - // Approximation for magnitude of complex fft output - // magn = sqrt(real^2 + imag^2) - // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) - // - // The parameters alpha and beta are stored in Q15 - -#ifdef AECM_WITH_ABS_APPROX - tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); - tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); - - if(tmp16no1 > tmp16no2) - { - max_value = tmp16no1; - min_value = tmp16no2; - } else - { - max_value = tmp16no2; - min_value = tmp16no1; - } - - // Magnitude in Q(-6) - if ((max_value >> 2) > min_value) - { - alpha = kAlpha1; - beta = kBeta1; - } else if ((max_value >> 1) > min_value) - { - alpha = kAlpha2; - beta = kBeta2; - } else - { - alpha = kAlpha3; - beta = kBeta3; - } - tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(max_value, - alpha, - 15); - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(min_value, - beta, - 15); - freq_signal_abs[i] = (WebRtc_UWord16)tmp16no1 + - (WebRtc_UWord16)tmp16no2; -#else -#ifdef WEBRTC_ARCH_ARM_V7A - __asm__("smulbb %0, %1, %2" : "=r"(tmp32no1) : "r"(freq_signal[i].real), - "r"(freq_signal[i].real)); - __asm__("smlabb %0, %1, %2, %3" :: "r"(tmp32no2), "r"(freq_signal[i].imag), - "r"(freq_signal[i].imag), "r"(tmp32no1)); -#else - tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); - tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); - tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); - tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2); - tmp32no2 = WEBRTC_SPL_ADD_SAT_W32(tmp32no1, tmp32no2); -#endif // WEBRTC_ARCH_ARM_V7A - tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); - - freq_signal_abs[i] = (WebRtc_UWord16)tmp32no1; -#endif // AECM_WITH_ABS_APPROX - } - (*freq_signal_sum_abs) += (WebRtc_UWord32)freq_signal_abs[i]; - } - - return time_signal_scaling; -} - -int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, - const WebRtc_Word16 * farend, - const WebRtc_Word16 * nearendNoisy, - const WebRtc_Word16 * nearendClean, - WebRtc_Word16 * output) -{ - int i; - - WebRtc_UWord32 xfaSum; - WebRtc_UWord32 dfaNoisySum; - WebRtc_UWord32 dfaCleanSum; - WebRtc_UWord32 echoEst32Gained; - WebRtc_UWord32 tmpU32; - - WebRtc_Word32 tmp32no1; - - WebRtc_UWord16 xfa[PART_LEN1]; - WebRtc_UWord16 dfaNoisy[PART_LEN1]; - WebRtc_UWord16 dfaClean[PART_LEN1]; - WebRtc_UWord16* ptrDfaClean = dfaClean; - const WebRtc_UWord16* far_spectrum_ptr = NULL; - - // 32 byte aligned buffers (with +8 or +16). - // TODO (kma): define fft with complex16_t. - WebRtc_Word16 fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. - WebRtc_Word32 echoEst32_buf[PART_LEN1 + 8]; - WebRtc_Word32 dfw_buf[PART_LEN1 + 8]; - WebRtc_Word32 efw_buf[PART_LEN1 + 8]; - - WebRtc_Word16* fft = (WebRtc_Word16*) (((uintptr_t) fft_buf + 31) & ~ 31); - WebRtc_Word32* echoEst32 = (WebRtc_Word32*) (((uintptr_t) echoEst32_buf + 31) & ~ 31); - complex16_t* dfw = (complex16_t*) (((uintptr_t) dfw_buf + 31) & ~ 31); - complex16_t* efw = (complex16_t*) (((uintptr_t) efw_buf + 31) & ~ 31); - - WebRtc_Word16 hnl[PART_LEN1]; - WebRtc_Word16 numPosCoef = 0; - WebRtc_Word16 nlpGain = ONE_Q14; - WebRtc_Word16 delay; - WebRtc_Word16 tmp16no1; - WebRtc_Word16 tmp16no2; - WebRtc_Word16 mu; - WebRtc_Word16 supGain; - WebRtc_Word16 zeros32, zeros16; - WebRtc_Word16 zerosDBufNoisy, zerosDBufClean, zerosXBuf; - int far_q; - WebRtc_Word16 resolutionDiff, qDomainDiff; - - const int kMinPrefBand = 4; - const int kMaxPrefBand = 24; - WebRtc_Word32 avgHnl32 = 0; - -#ifdef ARM_WINM_LOG_ - DWORD temp; - static int flag0 = 0; - __int64 freq, start, end, diff__; - unsigned int milliseconds; -#endif - - // Determine startup state. There are three states: - // (0) the first CONV_LEN blocks - // (1) another CONV_LEN blocks - // (2) the rest - - if (aecm->startupState < 2) - { - aecm->startupState = (aecm->totCount >= CONV_LEN) + (aecm->totCount >= CONV_LEN2); - } - // END: Determine startup state - - // Buffer near and far end signals - memcpy(aecm->xBuf + PART_LEN, farend, sizeof(WebRtc_Word16) * PART_LEN); - memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(WebRtc_Word16) * PART_LEN); - if (nearendClean != NULL) - { - memcpy(aecm->dBufClean + PART_LEN, nearendClean, sizeof(WebRtc_Word16) * PART_LEN); - } - -#ifdef ARM_WINM_LOG_ - // measure tick start - QueryPerformanceFrequency((LARGE_INTEGER*)&freq); - QueryPerformanceCounter((LARGE_INTEGER*)&start); -#endif - - // Transform far end signal from time domain to frequency domain. - far_q = TimeToFrequencyDomain(aecm->xBuf, - dfw, - xfa, - &xfaSum); - - // Transform noisy near end signal from time domain to frequency domain. - zerosDBufNoisy = TimeToFrequencyDomain(aecm->dBufNoisy, - dfw, - dfaNoisy, - &dfaNoisySum); - aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; - aecm->dfaNoisyQDomain = (WebRtc_Word16)zerosDBufNoisy; - - - if (nearendClean == NULL) - { - ptrDfaClean = dfaNoisy; - aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; - aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; - dfaCleanSum = dfaNoisySum; - } else - { - // Transform clean near end signal from time domain to frequency domain. - zerosDBufClean = TimeToFrequencyDomain(aecm->dBufClean, - dfw, - dfaClean, - &dfaCleanSum); - aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; - aecm->dfaCleanQDomain = (WebRtc_Word16)zerosDBufClean; - } - -#ifdef ARM_WINM_LOG_ - // measure tick end - QueryPerformanceCounter((LARGE_INTEGER*)&end); - diff__ = ((end - start) * 1000) / (freq/1000); - milliseconds = (unsigned int)(diff__ & 0xffffffff); - WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); - // measure tick start - QueryPerformanceCounter((LARGE_INTEGER*)&start); -#endif - - // Get the delay - // Save far-end history and estimate delay - delay = WebRtc_DelayEstimatorProcess(aecm->delay_estimator, - xfa, - dfaNoisy, - PART_LEN1, - far_q, - aecm->currentVADValue); - if (delay < 0) - { - return -1; - } - - if (aecm->fixedDelay >= 0) - { - // Use fixed delay - delay = aecm->fixedDelay; - } - - aecm->currentDelay = delay; - -#ifdef ARM_WINM_LOG_ - // measure tick end - QueryPerformanceCounter((LARGE_INTEGER*)&end); - diff__ = ((end - start) * 1000) / (freq/1000); - milliseconds = (unsigned int)(diff__ & 0xffffffff); - WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); - // measure tick start - QueryPerformanceCounter((LARGE_INTEGER*)&start); -#endif - // Get aligned far end spectrum - far_spectrum_ptr = WebRtc_AlignedFarend(aecm->delay_estimator, - PART_LEN1, - &far_q); - zerosXBuf = (WebRtc_Word16) far_q; - if (far_spectrum_ptr == NULL) - { - return -1; - } - - // Calculate log(energy) and update energy threshold levels - WebRtcAecm_CalcEnergies(aecm, - far_spectrum_ptr, - zerosXBuf, - dfaNoisySum, - echoEst32); - - // Calculate stepsize - mu = WebRtcAecm_CalcStepSize(aecm); - - // Update counters - aecm->totCount++; - - // This is the channel estimation algorithm. - // It is base on NLMS but has a variable step length, which was calculated above. - WebRtcAecm_UpdateChannel(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisy, mu, echoEst32); - supGain = CalcSuppressionGain(aecm); - -#ifdef ARM_WINM_LOG_ - // measure tick end - QueryPerformanceCounter((LARGE_INTEGER*)&end); - diff__ = ((end - start) * 1000) / (freq/1000); - milliseconds = (unsigned int)(diff__ & 0xffffffff); - WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); - // measure tick start - QueryPerformanceCounter((LARGE_INTEGER*)&start); -#endif - - // Calculate Wiener filter hnl[] - for (i = 0; i < PART_LEN1; i++) - { - // Far end signal through channel estimate in Q8 - // How much can we shift right to preserve resolution - tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; - aecm->echoFilt[i] += WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32no1, 50), 8); - - zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; - zeros16 = WebRtcSpl_NormW16(supGain) + 1; - if (zeros32 + zeros16 > 16) - { - // Multiplication is safe - // Result in Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff]) - echoEst32Gained = WEBRTC_SPL_UMUL_32_16((WebRtc_UWord32)aecm->echoFilt[i], - (WebRtc_UWord16)supGain); - resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; - resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); - } else - { - tmp16no1 = 17 - zeros32 - zeros16; - resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; - resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); - if (zeros32 > tmp16no1) - { - echoEst32Gained = WEBRTC_SPL_UMUL_32_16((WebRtc_UWord32)aecm->echoFilt[i], - (WebRtc_UWord16)WEBRTC_SPL_RSHIFT_W16(supGain, - tmp16no1)); // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) - } else - { - // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) - echoEst32Gained = WEBRTC_SPL_UMUL_32_16( - (WebRtc_UWord32)WEBRTC_SPL_RSHIFT_W32(aecm->echoFilt[i], tmp16no1), - (WebRtc_UWord16)supGain); - } - } - - zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); - if ((zeros16 < (aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld)) - & (aecm->nearFilt[i])) - { - tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], zeros16); - qDomainDiff = zeros16 - aecm->dfaCleanQDomain + aecm->dfaCleanQDomainOld; - } else - { - tmp16no1 = WEBRTC_SPL_SHIFT_W16(aecm->nearFilt[i], - aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld); - qDomainDiff = 0; - } - tmp16no2 = WEBRTC_SPL_SHIFT_W16(ptrDfaClean[i], qDomainDiff); - tmp32no1 = (WebRtc_Word32)(tmp16no2 - tmp16no1); - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 4); - tmp16no2 += tmp16no1; - zeros16 = WebRtcSpl_NormW16(tmp16no2); - if ((tmp16no2) & (-qDomainDiff > zeros16)) - { - aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; - } else - { - aecm->nearFilt[i] = WEBRTC_SPL_SHIFT_W16(tmp16no2, -qDomainDiff); - } - - // Wiener filter coefficients, resulting hnl in Q14 - if (echoEst32Gained == 0) - { - hnl[i] = ONE_Q14; - } else if (aecm->nearFilt[i] == 0) - { - hnl[i] = 0; - } else - { - // Multiply the suppression gain - // Rounding - echoEst32Gained += (WebRtc_UWord32)(aecm->nearFilt[i] >> 1); - tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, (WebRtc_UWord16)aecm->nearFilt[i]); - - // Current resolution is - // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN - max(0, 17 - zeros16 - zeros32)) - // Make sure we are in Q14 - tmp32no1 = (WebRtc_Word32)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); - if (tmp32no1 > ONE_Q14) - { - hnl[i] = 0; - } else if (tmp32no1 < 0) - { - hnl[i] = ONE_Q14; - } else - { - // 1-echoEst/dfa - hnl[i] = ONE_Q14 - (WebRtc_Word16)tmp32no1; - if (hnl[i] < 0) - { - hnl[i] = 0; - } - } - } - if (hnl[i]) - { - numPosCoef++; - } - } - // Only in wideband. Prevent the gain in upper band from being larger than - // in lower band. - if (aecm->mult == 2) - { - // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause - // speech distortion in double-talk. - for (i = 0; i < PART_LEN1; i++) - { - hnl[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], hnl[i], 14); - } - - for (i = kMinPrefBand; i <= kMaxPrefBand; i++) - { - avgHnl32 += (WebRtc_Word32)hnl[i]; - } - assert(kMaxPrefBand - kMinPrefBand + 1 > 0); - avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); - - for (i = kMaxPrefBand; i < PART_LEN1; i++) - { - if (hnl[i] > (WebRtc_Word16)avgHnl32) - { - hnl[i] = (WebRtc_Word16)avgHnl32; - } - } - } - -#ifdef ARM_WINM_LOG_ - // measure tick end - QueryPerformanceCounter((LARGE_INTEGER*)&end); - diff__ = ((end - start) * 1000) / (freq/1000); - milliseconds = (unsigned int)(diff__ & 0xffffffff); - WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); - // measure tick start - QueryPerformanceCounter((LARGE_INTEGER*)&start); -#endif - - // Calculate NLP gain, result is in Q14 - if (aecm->nlpFlag) - { - for (i = 0; i < PART_LEN1; i++) - { - // Truncate values close to zero and one. - if (hnl[i] > NLP_COMP_HIGH) - { - hnl[i] = ONE_Q14; - } else if (hnl[i] < NLP_COMP_LOW) - { - hnl[i] = 0; - } - - // Remove outliers - if (numPosCoef < 3) - { - nlpGain = 0; - } else - { - nlpGain = ONE_Q14; - } - - // NLP - if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14)) - { - hnl[i] = ONE_Q14; - } else - { - hnl[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(hnl[i], nlpGain, 14); - } - - // multiply with Wiener coefficients - efw[i].real = (WebRtc_Word16)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, - hnl[i], 14)); - efw[i].imag = (WebRtc_Word16)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, - hnl[i], 14)); - } - } - else - { - // multiply with Wiener coefficients - for (i = 0; i < PART_LEN1; i++) - { - efw[i].real = (WebRtc_Word16)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, - hnl[i], 14)); - efw[i].imag = (WebRtc_Word16)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, - hnl[i], 14)); - } - } - - if (aecm->cngMode == AecmTrue) - { - ComfortNoise(aecm, ptrDfaClean, efw, hnl); - } - -#ifdef ARM_WINM_LOG_ - // measure tick end - QueryPerformanceCounter((LARGE_INTEGER*)&end); - diff__ = ((end - start) * 1000) / (freq/1000); - milliseconds = (unsigned int)(diff__ & 0xffffffff); - WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); - // measure tick start - QueryPerformanceCounter((LARGE_INTEGER*)&start); -#endif - - WebRtcAecm_InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); - - return 0; -} - - -// Generate comfort noise and add to output signal. -// -// \param[in] aecm Handle of the AECM instance. -// \param[in] dfa Absolute value of the nearend signal (Q[aecm->dfaQDomain]). -// \param[in,out] outReal Real part of the output signal (Q[aecm->dfaQDomain]). -// \param[in,out] outImag Imaginary part of the output signal (Q[aecm->dfaQDomain]). -// \param[in] lambda Suppression gain with which to scale the noise level (Q14). -// -static void ComfortNoise(AecmCore_t* aecm, - const WebRtc_UWord16* dfa, - complex16_t* out, - const WebRtc_Word16* lambda) -{ - WebRtc_Word16 i; - WebRtc_Word16 tmp16; - WebRtc_Word32 tmp32; - - WebRtc_Word16 randW16[PART_LEN]; - WebRtc_Word16 uReal[PART_LEN1]; - WebRtc_Word16 uImag[PART_LEN1]; - WebRtc_Word32 outLShift32; - WebRtc_Word16 noiseRShift16[PART_LEN1]; - - WebRtc_Word16 shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; - WebRtc_Word16 minTrackShift; - - assert(shiftFromNearToNoise >= 0); - assert(shiftFromNearToNoise < 16); - - if (aecm->noiseEstCtr < 100) - { - // Track the minimum more quickly initially. - aecm->noiseEstCtr++; - minTrackShift = 6; - } else - { - minTrackShift = 9; - } - - // Estimate noise power. - for (i = 0; i < PART_LEN1; i++) - { - - // Shift to the noise domain. - tmp32 = (WebRtc_Word32)dfa[i]; - outLShift32 = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise); - - if (outLShift32 < aecm->noiseEst[i]) - { - // Reset "too low" counter - aecm->noiseEstTooLowCtr[i] = 0; - // Track the minimum. - if (aecm->noiseEst[i] < (1 << minTrackShift)) - { - // For small values, decrease noiseEst[i] every - // |kNoiseEstIncCount| block. The regular approach below can not - // go further down due to truncation. - aecm->noiseEstTooHighCtr[i]++; - if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) - { - aecm->noiseEst[i]--; - aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter - } - } - else - { - aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32) >> minTrackShift); - } - } else - { - // Reset "too high" counter - aecm->noiseEstTooHighCtr[i] = 0; - // Ramp slowly upwards until we hit the minimum again. - if ((aecm->noiseEst[i] >> 19) > 0) - { - // Avoid overflow. - // Multiplication with 2049 will cause wrap around. Scale - // down first and then multiply - aecm->noiseEst[i] >>= 11; - aecm->noiseEst[i] *= 2049; - } - else if ((aecm->noiseEst[i] >> 11) > 0) - { - // Large enough for relative increase - aecm->noiseEst[i] *= 2049; - aecm->noiseEst[i] >>= 11; - } - else - { - // Make incremental increases based on size every - // |kNoiseEstIncCount| block - aecm->noiseEstTooLowCtr[i]++; - if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) - { - aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1; - aecm->noiseEstTooLowCtr[i] = 0; // Reset counter - } - } - } - } - - for (i = 0; i < PART_LEN1; i++) - { - tmp32 = WEBRTC_SPL_RSHIFT_W32(aecm->noiseEst[i], shiftFromNearToNoise); - if (tmp32 > 32767) - { - tmp32 = 32767; - aecm->noiseEst[i] = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise); - } - noiseRShift16[i] = (WebRtc_Word16)tmp32; - - tmp16 = ONE_Q14 - lambda[i]; - noiseRShift16[i] - = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16, noiseRShift16[i], 14); - } - - // Generate a uniform random array on [0 2^15-1]. - WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); - - // Generate noise according to estimated energy. - uReal[0] = 0; // Reject LF noise. - uImag[0] = 0; - for (i = 1; i < PART_LEN1; i++) - { - // Get a random index for the cos and sin tables over [0 359]. - tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(359, randW16[i - 1], 15); - - // Tables are in Q13. - uReal[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(noiseRShift16[i], - WebRtcSpl_kCosTable[tmp16], 13); - uImag[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(-noiseRShift16[i], - WebRtcSpl_kSinTable[tmp16], 13); - } - uImag[PART_LEN] = 0; - -#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT) - for (i = 0; i < PART_LEN1; i++) - { - out[i].real = WEBRTC_SPL_ADD_SAT_W16(out[i].real, uReal[i]); - out[i].imag = WEBRTC_SPL_ADD_SAT_W16(out[i].imag, uImag[i]); - } -#else - for (i = 0; i < PART_LEN1 -1; ) - { - out[i].real = WEBRTC_SPL_ADD_SAT_W16(out[i].real, uReal[i]); - out[i].imag = WEBRTC_SPL_ADD_SAT_W16(out[i].imag, uImag[i]); - i++; - - out[i].real = WEBRTC_SPL_ADD_SAT_W16(out[i].real, uReal[i]); - out[i].imag = WEBRTC_SPL_ADD_SAT_W16(out[i].imag, uImag[i]); - i++; - } - out[i].real = WEBRTC_SPL_ADD_SAT_W16(out[i].real, uReal[i]); - out[i].imag = WEBRTC_SPL_ADD_SAT_W16(out[i].imag, uImag[i]); -#endif -} - -void WebRtcAecm_BufferFarFrame(AecmCore_t* const aecm, - const WebRtc_Word16* const farend, - const int farLen) -{ +void WebRtcAecm_BufferFarFrame(AecmCore* const aecm, + const int16_t* const farend, + const int farLen) { int writeLen = farLen, writePos = 0; // Check if the write position must be wrapped @@ -1690,20 +1182,21 @@ void WebRtcAecm_BufferFarFrame(AecmCore_t* const aecm, // Write to remaining buffer space before wrapping writeLen = FAR_BUF_LEN - aecm->farBufWritePos; memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos, - sizeof(WebRtc_Word16) * writeLen); + sizeof(int16_t) * writeLen); aecm->farBufWritePos = 0; writePos = writeLen; writeLen = farLen - writeLen; } memcpy(aecm->farBuf + aecm->farBufWritePos, farend + writePos, - sizeof(WebRtc_Word16) * writeLen); + sizeof(int16_t) * writeLen); aecm->farBufWritePos += writeLen; } -void WebRtcAecm_FetchFarFrame(AecmCore_t * const aecm, WebRtc_Word16 * const farend, - const int farLen, const int knownDelay) -{ +void WebRtcAecm_FetchFarFrame(AecmCore* const aecm, + int16_t* const farend, + const int farLen, + const int knownDelay) { int readLen = farLen; int readPos = 0; int delayChange = knownDelay - aecm->lastKnownDelay; @@ -1729,204 +1222,12 @@ void WebRtcAecm_FetchFarFrame(AecmCore_t * const aecm, WebRtc_Word16 * const far // Read from remaining buffer space before wrapping readLen = FAR_BUF_LEN - aecm->farBufReadPos; memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos, - sizeof(WebRtc_Word16) * readLen); + sizeof(int16_t) * readLen); aecm->farBufReadPos = 0; readPos = readLen; readLen = farLen - readLen; } memcpy(farend + readPos, aecm->farBuf + aecm->farBufReadPos, - sizeof(WebRtc_Word16) * readLen); + sizeof(int16_t) * readLen); aecm->farBufReadPos += readLen; } - -#if !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)) - -void WebRtcAecm_WindowAndFFT(WebRtc_Word16* fft, - const WebRtc_Word16* time_signal, - complex16_t* freq_signal, - int time_signal_scaling) -{ - int i, j; - - memset(fft, 0, sizeof(WebRtc_Word16) * PART_LEN4); - // FFT of signal - for (i = 0, j = 0; i < PART_LEN; i++, j += 2) - { - // Window time domain signal and insert into real part of - // transformation array |fft| - fft[j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( - (time_signal[i] << time_signal_scaling), - WebRtcAecm_kSqrtHanning[i], - 14); - fft[PART_LEN2 + j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( - (time_signal[i + PART_LEN] << time_signal_scaling), - WebRtcAecm_kSqrtHanning[PART_LEN - i], - 14); - // Inserting zeros in imaginary parts not necessary since we - // initialized the array with all zeros - } - - WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT); - WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); - - // Take only the first PART_LEN2 samples - for (i = 0, j = 0; j < PART_LEN2; i += 1, j += 2) - { - freq_signal[i].real = fft[j]; - - // The imaginary part has to switch sign - freq_signal[i].imag = - fft[j+1]; - } -} - -void WebRtcAecm_InverseFFTAndWindow(AecmCore_t* aecm, - WebRtc_Word16* fft, - complex16_t* efw, - WebRtc_Word16* output, - const WebRtc_Word16* nearendClean) -{ - int i, j, outCFFT; - WebRtc_Word32 tmp32no1; - - // Synthesis - for (i = 1; i < PART_LEN; i++) - { - j = WEBRTC_SPL_LSHIFT_W32(i, 1); - fft[j] = efw[i].real; - - // mirrored data, even - fft[PART_LEN4 - j] = efw[i].real; - fft[j + 1] = -efw[i].imag; - - //mirrored data, odd - fft[PART_LEN4 - (j - 1)] = efw[i].imag; - } - fft[0] = efw[0].real; - fft[1] = -efw[0].imag; - - fft[PART_LEN2] = efw[PART_LEN].real; - fft[PART_LEN2 + 1] = -efw[PART_LEN].imag; - - // inverse FFT, result should be scaled with outCFFT - WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT); - outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1); - - //take only the real values and scale with outCFFT - for (i = 0; i < PART_LEN2; i++) - { - j = WEBRTC_SPL_LSHIFT_W32(i, 1); - fft[i] = fft[j]; - } - - for (i = 0; i < PART_LEN; i++) - { - fft[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - fft[i], - WebRtcAecm_kSqrtHanning[i], - 14); - tmp32no1 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)fft[i], - outCFFT - aecm->dfaCleanQDomain); - fft[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, - tmp32no1 + aecm->outBuf[i], - WEBRTC_SPL_WORD16_MIN); - output[i] = fft[i]; - - tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT( - fft[PART_LEN + i], - WebRtcAecm_kSqrtHanning[PART_LEN - i], - 14); - tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, - outCFFT - aecm->dfaCleanQDomain); - aecm->outBuf[i] = (WebRtc_Word16)WEBRTC_SPL_SAT( - WEBRTC_SPL_WORD16_MAX, - tmp32no1, - WEBRTC_SPL_WORD16_MIN); - } - -#ifdef ARM_WINM_LOG_ - // measure tick end - QueryPerformanceCounter((LARGE_INTEGER*)&end); - diff__ = ((end - start) * 1000) / (freq/1000); - milliseconds = (unsigned int)(diff__ & 0xffffffff); - WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); -#endif - - // Copy the current block to the old position (aecm->outBuf is shifted elsewhere) - memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(WebRtc_Word16) * PART_LEN); - memcpy(aecm->dBufNoisy, aecm->dBufNoisy + PART_LEN, sizeof(WebRtc_Word16) * PART_LEN); - if (nearendClean != NULL) - { - memcpy(aecm->dBufClean, aecm->dBufClean + PART_LEN, sizeof(WebRtc_Word16) * PART_LEN); - } -} - -void WebRtcAecm_CalcLinearEnergies(AecmCore_t* aecm, - const WebRtc_UWord16* far_spectrum, - WebRtc_Word32* echo_est, - WebRtc_UWord32* far_energy, - WebRtc_UWord32* echo_energy_adapt, - WebRtc_UWord32* echo_energy_stored) -{ - int i; - - // Get energy for the delayed far end signal and estimated - // echo using both stored and adapted channels. - for (i = 0; i < PART_LEN1; i++) - { - echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], - far_spectrum[i]); - (*far_energy) += (WebRtc_UWord32)(far_spectrum[i]); - (*echo_energy_adapt) += WEBRTC_SPL_UMUL_16_16(aecm->channelAdapt16[i], - far_spectrum[i]); - (*echo_energy_stored) += (WebRtc_UWord32)echo_est[i]; - } -} - -void WebRtcAecm_StoreAdaptiveChannel(AecmCore_t* aecm, - const WebRtc_UWord16* far_spectrum, - WebRtc_Word32* echo_est) -{ - int i; - - // During startup we store the channel every block. - memcpy(aecm->channelStored, aecm->channelAdapt16, sizeof(WebRtc_Word16) * PART_LEN1); - // Recalculate echo estimate - for (i = 0; i < PART_LEN; i += 4) - { - echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], - far_spectrum[i]); - echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1], - far_spectrum[i + 1]); - echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2], - far_spectrum[i + 2]); - echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3], - far_spectrum[i + 3]); - } - echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], - far_spectrum[i]); -} - -void WebRtcAecm_ResetAdaptiveChannel(AecmCore_t* aecm) -{ - int i; - - // The stored channel has a significantly lower MSE than the adaptive one for - // two consecutive calculations. Reset the adaptive channel. - memcpy(aecm->channelAdapt16, aecm->channelStored, - sizeof(WebRtc_Word16) * PART_LEN1); - // Restore the W32 channel - for (i = 0; i < PART_LEN; i += 4) - { - aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32( - (WebRtc_Word32)aecm->channelStored[i], 16); - aecm->channelAdapt32[i + 1] = WEBRTC_SPL_LSHIFT_W32( - (WebRtc_Word32)aecm->channelStored[i + 1], 16); - aecm->channelAdapt32[i + 2] = WEBRTC_SPL_LSHIFT_W32( - (WebRtc_Word32)aecm->channelStored[i + 2], 16); - aecm->channelAdapt32[i + 3] = WEBRTC_SPL_LSHIFT_W32( - (WebRtc_Word32)aecm->channelStored[i + 3], 16); - } - aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)aecm->channelStored[i], 16); -} - -#endif // !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)) diff --git a/webrtc/modules/audio_processing/aecm/aecm_core.h b/webrtc/modules/audio_processing/aecm/aecm_core.h index 0dfdb04..b52bb62 100644 --- a/webrtc/modules/audio_processing/aecm/aecm_core.h +++ b/webrtc/modules/audio_processing/aecm/aecm_core.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,217 +8,144 @@ * be found in the AUTHORS file in the root of the source tree. */ -// Performs echo control (suppression) with fft routines in fixed-point +// Performs echo control (suppression) with fft routines in fixed-point. -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_CORE_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_CORE_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_ -#define AECM_DYNAMIC_Q // turn on/off dynamic Q-domain -//#define AECM_WITH_ABS_APPROX -//#define AECM_SHORT // for 32 sample partition length (otherwise 64) - -#include "typedefs.h" -#include "signal_processing_library.h" - -// Algorithm parameters - -#define FRAME_LEN 80 // Total frame length, 10 ms -#ifdef AECM_SHORT - -#define PART_LEN 32 // Length of partition -#define PART_LEN_SHIFT 6 // Length of (PART_LEN * 2) in base 2 - -#else - -#define PART_LEN 64 // Length of partition -#define PART_LEN_SHIFT 7 // Length of (PART_LEN * 2) in base 2 +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aecm/aecm_defines.h" +#include "webrtc/typedefs.h" +#ifdef _MSC_VER // visual c++ +#define ALIGN8_BEG __declspec(align(8)) +#define ALIGN8_END +#else // gcc or icc +#define ALIGN8_BEG +#define ALIGN8_END __attribute__((aligned(8))) #endif -#define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients -#define PART_LEN2 (PART_LEN << 1) // Length of partition * 2 -#define PART_LEN4 (PART_LEN << 2) // Length of partition * 4 -#define FAR_BUF_LEN PART_LEN4 // Length of buffers -#define MAX_DELAY 100 - -// Counter parameters -#ifdef AECM_SHORT - -#define CONV_LEN 1024 // Convergence length used at startup -#else - -#define CONV_LEN 512 // Convergence length used at startup -#endif - -#define CONV_LEN2 (CONV_LEN << 1) // Convergence length * 2 used at startup -// Energy parameters -#define MAX_BUF_LEN 64 // History length of energy signals - -#define FAR_ENERGY_MIN 1025 // Lowest Far energy level: At least 2 in energy -#define FAR_ENERGY_DIFF 929 // Allowed difference between max and min - -#define ENERGY_DEV_OFFSET 0 // The energy error offset in Q8 -#define ENERGY_DEV_TOL 400 // The energy estimation tolerance in Q8 -#define FAR_ENERGY_VAD_REGION 230 // Far VAD tolerance region -// Stepsize parameters -#define MU_MIN 10 // Min stepsize 2^-MU_MIN (far end energy dependent) -#define MU_MAX 1 // Max stepsize 2^-MU_MAX (far end energy dependent) -#define MU_DIFF 9 // MU_MIN - MU_MAX -// Channel parameters -#define MIN_MSE_COUNT 20 // Min number of consecutive blocks with enough far end - // energy to compare channel estimates -#define MIN_MSE_DIFF 29 // The ratio between adapted and stored channel to - // accept a new storage (0.8 in Q-MSE_RESOLUTION) -#define MSE_RESOLUTION 5 // MSE parameter resolution -#define RESOLUTION_CHANNEL16 12 // W16 Channel in Q-RESOLUTION_CHANNEL16 -#define RESOLUTION_CHANNEL32 28 // W32 Channel in Q-RESOLUTION_CHANNEL -#define CHANNEL_VAD 16 // Minimum energy in frequency band to update channel -// Suppression gain parameters: SUPGAIN_ parameters in Q-(RESOLUTION_SUPGAIN) -#define RESOLUTION_SUPGAIN 8 // Channel in Q-(RESOLUTION_SUPGAIN) -#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN) // Default suppression gain -#define SUPGAIN_ERROR_PARAM_A 3072 // Estimation error parameter (Maximum gain) (8 in Q8) -#define SUPGAIN_ERROR_PARAM_B 1536 // Estimation error parameter (Gain before going down) -#define SUPGAIN_ERROR_PARAM_D SUPGAIN_DEFAULT // Estimation error parameter - // (Should be the same as Default) (1 in Q8) -#define SUPGAIN_EPC_DT 200 // = SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL -// Defines for "check delay estimation" -#define CORR_WIDTH 31 // Number of samples to correlate over. -#define CORR_MAX 16 // Maximum correlation offset -#define CORR_MAX_BUF 63 -#define CORR_DEV 4 -#define CORR_MAX_LEVEL 20 -#define CORR_MAX_LOW 4 -#define CORR_BUF_LEN (CORR_MAX << 1) + 1 -// Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN - -#define ONE_Q14 (1 << 14) - -// NLP defines -#define NLP_COMP_LOW 3277 // 0.2 in Q14 -#define NLP_COMP_HIGH ONE_Q14 // 1 in Q14 - -extern const WebRtc_Word16 WebRtcAecm_kSqrtHanning[]; - typedef struct { - WebRtc_Word16 real; - WebRtc_Word16 imag; -} complex16_t; + int16_t real; + int16_t imag; +} ComplexInt16; -typedef struct -{ +typedef struct { int farBufWritePos; int farBufReadPos; int knownDelay; int lastKnownDelay; - int firstVAD; // Parameter to control poorly initialized channels + int firstVAD; // Parameter to control poorly initialized channels - void *farFrameBuf; - void *nearNoisyFrameBuf; - void *nearCleanFrameBuf; - void *outFrameBuf; + RingBuffer* farFrameBuf; + RingBuffer* nearNoisyFrameBuf; + RingBuffer* nearCleanFrameBuf; + RingBuffer* outFrameBuf; - WebRtc_Word16 farBuf[FAR_BUF_LEN]; + int16_t farBuf[FAR_BUF_LEN]; - WebRtc_Word16 mult; - WebRtc_UWord32 seed; + int16_t mult; + uint32_t seed; // Delay estimation variables + void* delay_estimator_farend; void* delay_estimator; - WebRtc_UWord16 currentDelay; + uint16_t currentDelay; + // Far end history variables + // TODO(bjornv): Replace |far_history| with ring_buffer. + uint16_t far_history[PART_LEN1 * MAX_DELAY]; + int far_history_pos; + int far_q_domains[MAX_DELAY]; - WebRtc_Word16 nlpFlag; - WebRtc_Word16 fixedDelay; + int16_t nlpFlag; + int16_t fixedDelay; - WebRtc_UWord32 totCount; + uint32_t totCount; - WebRtc_Word16 dfaCleanQDomain; - WebRtc_Word16 dfaCleanQDomainOld; - WebRtc_Word16 dfaNoisyQDomain; - WebRtc_Word16 dfaNoisyQDomainOld; + int16_t dfaCleanQDomain; + int16_t dfaCleanQDomainOld; + int16_t dfaNoisyQDomain; + int16_t dfaNoisyQDomainOld; - WebRtc_Word16 nearLogEnergy[MAX_BUF_LEN]; - WebRtc_Word16 farLogEnergy; - WebRtc_Word16 echoAdaptLogEnergy[MAX_BUF_LEN]; - WebRtc_Word16 echoStoredLogEnergy[MAX_BUF_LEN]; + int16_t nearLogEnergy[MAX_BUF_LEN]; + int16_t farLogEnergy; + int16_t echoAdaptLogEnergy[MAX_BUF_LEN]; + int16_t echoStoredLogEnergy[MAX_BUF_LEN]; - // The extra 16 or 32 bytes in the following buffers are for alignment based Neon code. - // It's designed this way since the current GCC compiler can't align a buffer in 16 or 32 - // byte boundaries properly. - WebRtc_Word16 channelStored_buf[PART_LEN1 + 8]; - WebRtc_Word16 channelAdapt16_buf[PART_LEN1 + 8]; - WebRtc_Word32 channelAdapt32_buf[PART_LEN1 + 8]; - WebRtc_Word16 xBuf_buf[PART_LEN2 + 16]; // farend - WebRtc_Word16 dBufClean_buf[PART_LEN2 + 16]; // nearend - WebRtc_Word16 dBufNoisy_buf[PART_LEN2 + 16]; // nearend - WebRtc_Word16 outBuf_buf[PART_LEN + 8]; + // The extra 16 or 32 bytes in the following buffers are for alignment based + // Neon code. + // It's designed this way since the current GCC compiler can't align a + // buffer in 16 or 32 byte boundaries properly. + int16_t channelStored_buf[PART_LEN1 + 8]; + int16_t channelAdapt16_buf[PART_LEN1 + 8]; + int32_t channelAdapt32_buf[PART_LEN1 + 8]; + int16_t xBuf_buf[PART_LEN2 + 16]; // farend + int16_t dBufClean_buf[PART_LEN2 + 16]; // nearend + int16_t dBufNoisy_buf[PART_LEN2 + 16]; // nearend + int16_t outBuf_buf[PART_LEN + 8]; // Pointers to the above buffers - WebRtc_Word16 *channelStored; - WebRtc_Word16 *channelAdapt16; - WebRtc_Word32 *channelAdapt32; - WebRtc_Word16 *xBuf; - WebRtc_Word16 *dBufClean; - WebRtc_Word16 *dBufNoisy; - WebRtc_Word16 *outBuf; + int16_t *channelStored; + int16_t *channelAdapt16; + int32_t *channelAdapt32; + int16_t *xBuf; + int16_t *dBufClean; + int16_t *dBufNoisy; + int16_t *outBuf; - WebRtc_Word32 echoFilt[PART_LEN1]; - WebRtc_Word16 nearFilt[PART_LEN1]; - WebRtc_Word32 noiseEst[PART_LEN1]; + int32_t echoFilt[PART_LEN1]; + int16_t nearFilt[PART_LEN1]; + int32_t noiseEst[PART_LEN1]; int noiseEstTooLowCtr[PART_LEN1]; int noiseEstTooHighCtr[PART_LEN1]; - WebRtc_Word16 noiseEstCtr; - WebRtc_Word16 cngMode; + int16_t noiseEstCtr; + int16_t cngMode; - WebRtc_Word32 mseAdaptOld; - WebRtc_Word32 mseStoredOld; - WebRtc_Word32 mseThreshold; + int32_t mseAdaptOld; + int32_t mseStoredOld; + int32_t mseThreshold; - WebRtc_Word16 farEnergyMin; - WebRtc_Word16 farEnergyMax; - WebRtc_Word16 farEnergyMaxMin; - WebRtc_Word16 farEnergyVAD; - WebRtc_Word16 farEnergyMSE; + int16_t farEnergyMin; + int16_t farEnergyMax; + int16_t farEnergyMaxMin; + int16_t farEnergyVAD; + int16_t farEnergyMSE; int currentVADValue; - WebRtc_Word16 vadUpdateCount; + int16_t vadUpdateCount; - WebRtc_Word16 startupState; - WebRtc_Word16 mseChannelCount; - WebRtc_Word16 supGain; - WebRtc_Word16 supGainOld; + int16_t startupState; + int16_t mseChannelCount; + int16_t supGain; + int16_t supGainOld; - WebRtc_Word16 supGainErrParamA; - WebRtc_Word16 supGainErrParamD; - WebRtc_Word16 supGainErrParamDiffAB; - WebRtc_Word16 supGainErrParamDiffBD; + int16_t supGainErrParamA; + int16_t supGainErrParamD; + int16_t supGainErrParamDiffAB; + int16_t supGainErrParamDiffBD; + + struct RealFFT* real_fft; #ifdef AEC_DEBUG FILE *farFile; FILE *nearFile; FILE *outFile; #endif -} AecmCore_t; +} AecmCore; -/////////////////////////////////////////////////////////////////////////////////////////////// -// WebRtcAecm_CreateCore(...) +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CreateCore() // // Allocates the memory needed by the AECM. The memory needs to be // initialized separately using the WebRtcAecm_InitCore() function. -// -// Input: -// - aecm : Instance that should be created -// -// Output: -// - aecm : Created instance -// -// Return value : 0 - Ok -// -1 - Error -// -int WebRtcAecm_CreateCore(AecmCore_t **aecm); +// Returns a pointer to the instance and a nullptr at failure. +AecmCore* WebRtcAecm_CreateCore(); -/////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// // WebRtcAecm_InitCore(...) // -// This function initializes the AECM instant created with WebRtcAecm_CreateCore(...) +// This function initializes the AECM instant created with +// WebRtcAecm_CreateCore() // Input: // - aecm : Pointer to the AECM instance // - samplingFreq : Sampling Frequency @@ -229,57 +156,58 @@ int WebRtcAecm_CreateCore(AecmCore_t **aecm); // Return value : 0 - Ok // -1 - Error // -int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq); +int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq); -/////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// // WebRtcAecm_FreeCore(...) // // This function releases the memory allocated by WebRtcAecm_CreateCore() // Input: // - aecm : Pointer to the AECM instance // -// Return value : 0 - Ok -// -1 - Error -// 11001-11016: Error -// -int WebRtcAecm_FreeCore(AecmCore_t *aecm); +void WebRtcAecm_FreeCore(AecmCore* aecm); -int WebRtcAecm_Control(AecmCore_t *aecm, int delay, int nlpFlag); +int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag); -/////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// // WebRtcAecm_InitEchoPathCore(...) // // This function resets the echo channel adaptation with the specified channel. // Input: // - aecm : Pointer to the AECM instance -// - echo_path : Pointer to the data that should initialize the echo path +// - echo_path : Pointer to the data that should initialize the echo +// path // // Output: // - aecm : Initialized instance // -void WebRtcAecm_InitEchoPathCore(AecmCore_t* aecm, const WebRtc_Word16* echo_path); +void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path); -/////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// // WebRtcAecm_ProcessFrame(...) // -// This function processes frames and sends blocks to WebRtcAecm_ProcessBlock(...) +// This function processes frames and sends blocks to +// WebRtcAecm_ProcessBlock(...) // // Inputs: // - aecm : Pointer to the AECM instance // - farend : In buffer containing one frame of echo signal -// - nearendNoisy : In buffer containing one frame of nearend+echo signal without NS -// - nearendClean : In buffer containing one frame of nearend+echo signal with NS +// - nearendNoisy : In buffer containing one frame of nearend+echo signal +// without NS +// - nearendClean : In buffer containing one frame of nearend+echo signal +// with NS // // Output: // - out : Out buffer, one frame of nearend signal : // // -int WebRtcAecm_ProcessFrame(AecmCore_t * aecm, const WebRtc_Word16 * farend, - const WebRtc_Word16 * nearendNoisy, - const WebRtc_Word16 * nearendClean, - WebRtc_Word16 * out); +int WebRtcAecm_ProcessFrame(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out); -/////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// // WebRtcAecm_ProcessBlock(...) // // This function is called for every block within one frame @@ -288,19 +216,22 @@ int WebRtcAecm_ProcessFrame(AecmCore_t * aecm, const WebRtc_Word16 * farend, // Inputs: // - aecm : Pointer to the AECM instance // - farend : In buffer containing one block of echo signal -// - nearendNoisy : In buffer containing one frame of nearend+echo signal without NS -// - nearendClean : In buffer containing one frame of nearend+echo signal with NS +// - nearendNoisy : In buffer containing one frame of nearend+echo signal +// without NS +// - nearendClean : In buffer containing one frame of nearend+echo signal +// with NS // // Output: // - out : Out buffer, one block of nearend signal : // // -int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, const WebRtc_Word16 * farend, - const WebRtc_Word16 * nearendNoisy, - const WebRtc_Word16 * noisyClean, - WebRtc_Word16 * out); +int WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* noisyClean, + int16_t* out); -/////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// // WebRtcAecm_BufferFarFrame() // // Inserts a frame of data into farend buffer. @@ -310,10 +241,11 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, const WebRtc_Word16 * farend, // - farend : In buffer containing one frame of farend signal // - farLen : Length of frame // -void WebRtcAecm_BufferFarFrame(AecmCore_t * const aecm, const WebRtc_Word16 * const farend, +void WebRtcAecm_BufferFarFrame(AecmCore* const aecm, + const int16_t* const farend, const int farLen); -/////////////////////////////////////////////////////////////////////////////////////////////// +//////////////////////////////////////////////////////////////////////////////// // WebRtcAecm_FetchFarFrame() // // Read the farend buffer to account for known delay @@ -324,35 +256,179 @@ void WebRtcAecm_BufferFarFrame(AecmCore_t * const aecm, const WebRtc_Word16 * co // - farLen : Length of frame // - knownDelay : known delay // -void WebRtcAecm_FetchFarFrame(AecmCore_t * const aecm, WebRtc_Word16 * const farend, - const int farLen, const int knownDelay); +void WebRtcAecm_FetchFarFrame(AecmCore* const aecm, + int16_t* const farend, + const int farLen, + const int knownDelay); -/////////////////////////////////////////////////////////////////////////////////////////////// -// Some internal functions shared by ARM NEON and generic C code: +// All the functions below are intended to be private + +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_UpdateFarHistory() // +// Moves the pointer to the next entry and inserts |far_spectrum| and +// corresponding Q-domain in its buffer. +// +// Inputs: +// - self : Pointer to the delay estimation instance +// - far_spectrum : Pointer to the far end spectrum +// - far_q : Q-domain of far end spectrum +// +void WebRtcAecm_UpdateFarHistory(AecmCore* self, + uint16_t* far_spectrum, + int far_q); -void WebRtcAecm_CalcLinearEnergies(AecmCore_t* aecm, - const WebRtc_UWord16* far_spectrum, - WebRtc_Word32* echoEst, - WebRtc_UWord32* far_energy, - WebRtc_UWord32* echo_energy_adapt, - WebRtc_UWord32* echo_energy_stored); +//////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_AlignedFarend() +// +// Returns a pointer to the far end spectrum aligned to current near end +// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been +// called before AlignedFarend(...). Otherwise, you get the pointer to the +// previous frame. The memory is only valid until the next call of +// WebRtc_DelayEstimatorProcessFix(...). +// +// Inputs: +// - self : Pointer to the AECM instance. +// - delay : Current delay estimate. +// +// Output: +// - far_q : The Q-domain of the aligned far end spectrum +// +// Return value: +// - far_spectrum : Pointer to the aligned far end spectrum +// NULL - Error +// +const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, int* far_q, int delay); -void WebRtcAecm_StoreAdaptiveChannel(AecmCore_t* aecm, - const WebRtc_UWord16* far_spectrum, - WebRtc_Word32* echo_est); +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcSuppressionGain() +// +// This function calculates the suppression gain that is used in the +// Wiener filter. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// +// Return value: +// - supGain : Suppression gain with which to scale the noise +// level (Q14). +// +int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm); -void WebRtcAecm_ResetAdaptiveChannel(AecmCore_t *aecm); +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcEnergies() +// +// This function calculates the log of energies for nearend, farend and +// estimated echoes. There is also an update of energy decision levels, +// i.e. internal VAD. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// - far_spectrum : Pointer to farend spectrum. +// - far_q : Q-domain of farend spectrum. +// - nearEner : Near end energy for current block in +// Q(aecm->dfaQDomain). +// +// Output: +// - echoEst : Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_CalcEnergies(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint32_t nearEner, + int32_t* echoEst); -void WebRtcAecm_WindowAndFFT(WebRtc_Word16* fft, - const WebRtc_Word16* time_signal, - complex16_t* freq_signal, - int time_signal_scaling); +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_CalcStepSize() +// +// This function calculates the step size used in channel estimation +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// +// Return value: +// - mu : Stepsize in log2(), i.e. number of shifts. +// +int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm); -void WebRtcAecm_InverseFFTAndWindow(AecmCore_t* aecm, - WebRtc_Word16* fft, - complex16_t* efw, - WebRtc_Word16* output, - const WebRtc_Word16* nearendClean); +/////////////////////////////////////////////////////////////////////////////// +// WebRtcAecm_UpdateChannel(...) +// +// This function performs channel estimation. +// NLMS and decision on channel storage. +// +// Inputs: +// - aecm : Pointer to the AECM instance. +// - far_spectrum : Absolute value of the farend signal in Q(far_q) +// - far_q : Q-domain of the farend signal +// - dfa : Absolute value of the nearend signal +// (Q[aecm->dfaQDomain]) +// - mu : NLMS step size. +// Input/Output: +// - echoEst : Estimated echo in Q(far_q+RESOLUTION_CHANNEL16). +// +void WebRtcAecm_UpdateChannel(AecmCore* aecm, + const uint16_t* far_spectrum, + const int16_t far_q, + const uint16_t* const dfa, + const int16_t mu, + int32_t* echoEst); + +extern const int16_t WebRtcAecm_kCosTable[]; +extern const int16_t WebRtcAecm_kSinTable[]; + +/////////////////////////////////////////////////////////////////////////////// +// Some function pointers, for internal functions shared by ARM NEON and +// generic C code. +// +typedef void (*CalcLinearEnergies)(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echoEst, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); +extern CalcLinearEnergies WebRtcAecm_CalcLinearEnergies; + +typedef void (*StoreAdaptiveChannel)(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); +extern StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel; + +typedef void (*ResetAdaptiveChannel)(AecmCore* aecm); +extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel; + +// For the above function pointers, functions for generic platforms are declared +// and defined as static in file aecm_core.c, while those for ARM Neon platforms +// are declared below and defined in file aecm_core_neon.c. +#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON) +void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); + +void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); + +void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm); +#endif + +#if defined(MIPS32_LE) +void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored); +#if defined(MIPS_DSP_R1_LE) +void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est); + +void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm); +#endif +#endif #endif diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_c.c b/webrtc/modules/audio_processing/aecm/aecm_core_c.c new file mode 100644 index 0000000..eb2bd91 --- /dev/null +++ b/webrtc/modules/audio_processing/aecm/aecm_core_c.c @@ -0,0 +1,771 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/aecm/aecm_core.h" + +#include +#include +#include + +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/signal_processing/include/real_fft.h" +#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" +#include "webrtc/system_wrappers/interface/compile_assert_c.h" +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" +#include "webrtc/typedefs.h" + +// Square root of Hanning window in Q14. +#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON) +// Table is defined in an ARM assembly file. +extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END; +#else +static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, + 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, + 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, + 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, + 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, + 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, + 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 +}; +#endif + +#ifdef AECM_WITH_ABS_APPROX +//Q15 alpha = 0.99439986968132 const Factor for magnitude approximation +static const uint16_t kAlpha1 = 32584; +//Q15 beta = 0.12967166976970 const Factor for magnitude approximation +static const uint16_t kBeta1 = 4249; +//Q15 alpha = 0.94234827210087 const Factor for magnitude approximation +static const uint16_t kAlpha2 = 30879; +//Q15 beta = 0.33787806009150 const Factor for magnitude approximation +static const uint16_t kBeta2 = 11072; +//Q15 alpha = 0.82247698684306 const Factor for magnitude approximation +static const uint16_t kAlpha3 = 26951; +//Q15 beta = 0.57762063060713 const Factor for magnitude approximation +static const uint16_t kBeta3 = 18927; +#endif + +static const int16_t kNoiseEstQDomain = 15; +static const int16_t kNoiseEstIncCount = 5; + +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda); + +static void WindowAndFFT(AecmCore* aecm, + int16_t* fft, + const int16_t* time_signal, + ComplexInt16* freq_signal, + int time_signal_scaling) { + int i = 0; + + // FFT of signal + for (i = 0; i < PART_LEN; i++) { + // Window time domain signal and insert into real part of + // transformation array |fft| + int16_t scaled_time_signal = time_signal[i] << time_signal_scaling; + fft[i] = (int16_t)((scaled_time_signal * WebRtcAecm_kSqrtHanning[i]) >> 14); + scaled_time_signal = time_signal[i + PART_LEN] << time_signal_scaling; + fft[PART_LEN + i] = (int16_t)(( + scaled_time_signal * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14); + } + + // Do forward FFT, then take only the first PART_LEN complex samples, + // and change signs of the imaginary parts. + WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal); + for (i = 0; i < PART_LEN; i++) { + freq_signal[i].imag = -freq_signal[i].imag; + } +} + +static void InverseFFTAndWindow(AecmCore* aecm, + int16_t* fft, + ComplexInt16* efw, + int16_t* output, + const int16_t* nearendClean) { + int i, j, outCFFT; + int32_t tmp32no1; + // Reuse |efw| for the inverse FFT output after transferring + // the contents to |fft|. + int16_t* ifft_out = (int16_t*)efw; + + // Synthesis + for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) { + fft[j] = efw[i].real; + fft[j + 1] = -efw[i].imag; + } + fft[0] = efw[0].real; + fft[1] = -efw[0].imag; + + fft[PART_LEN2] = efw[PART_LEN].real; + fft[PART_LEN2 + 1] = -efw[PART_LEN].imag; + + // Inverse FFT. Keep outCFFT to scale the samples in the next block. + outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out); + for (i = 0; i < PART_LEN; i++) { + ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14); + tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i], + outCFFT - aecm->dfaCleanQDomain); + output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + tmp32no1 + aecm->outBuf[i], + WEBRTC_SPL_WORD16_MIN); + + tmp32no1 = (ifft_out[PART_LEN + i] * + WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14; + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, + outCFFT - aecm->dfaCleanQDomain); + aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + tmp32no1, + WEBRTC_SPL_WORD16_MIN); + } + + // Copy the current block to the old position + // (aecm->outBuf is shifted elsewhere) + memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy, + aecm->dBufNoisy + PART_LEN, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) + { + memcpy(aecm->dBufClean, + aecm->dBufClean + PART_LEN, + sizeof(int16_t) * PART_LEN); + } +} + +// Transforms a time domain signal into the frequency domain, outputting the +// complex valued signal, absolute value and sum of absolute values. +// +// time_signal [in] Pointer to time domain signal +// freq_signal_real [out] Pointer to real part of frequency domain array +// freq_signal_imag [out] Pointer to imaginary part of frequency domain +// array +// freq_signal_abs [out] Pointer to absolute value of frequency domain +// array +// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in +// the frequency domain array +// return value The Q-domain of current frequency values +// +static int TimeToFrequencyDomain(AecmCore* aecm, + const int16_t* time_signal, + ComplexInt16* freq_signal, + uint16_t* freq_signal_abs, + uint32_t* freq_signal_sum_abs) { + int i = 0; + int time_signal_scaling = 0; + + int32_t tmp32no1 = 0; + int32_t tmp32no2 = 0; + + // In fft_buf, +16 for 32-byte alignment. + int16_t fft_buf[PART_LEN4 + 16]; + int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31); + + int16_t tmp16no1; +#ifndef WEBRTC_ARCH_ARM_V7 + int16_t tmp16no2; +#endif +#ifdef AECM_WITH_ABS_APPROX + int16_t max_value = 0; + int16_t min_value = 0; + uint16_t alpha = 0; + uint16_t beta = 0; +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); + time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); +#endif + + WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); + + // Extract imaginary and real part, calculate the magnitude for + // all frequency bins + freq_signal[0].imag = 0; + freq_signal[PART_LEN].imag = 0; + freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); + freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[PART_LEN].real); + (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) + + (uint32_t)(freq_signal_abs[PART_LEN]); + + for (i = 1; i < PART_LEN; i++) + { + if (freq_signal[i].real == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + } + else if (freq_signal[i].imag == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real); + } + else + { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) + // + // The parameters alpha and beta are stored in Q15 + +#ifdef AECM_WITH_ABS_APPROX + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + + if(tmp16no1 > tmp16no2) + { + max_value = tmp16no1; + min_value = tmp16no2; + } else + { + max_value = tmp16no2; + min_value = tmp16no1; + } + + // Magnitude in Q(-6) + if ((max_value >> 2) > min_value) + { + alpha = kAlpha1; + beta = kBeta1; + } else if ((max_value >> 1) > min_value) + { + alpha = kAlpha2; + beta = kBeta2; + } else + { + alpha = kAlpha3; + beta = kBeta3; + } + tmp16no1 = (int16_t)((max_value * alpha) >> 15); + tmp16no2 = (int16_t)((min_value * beta) >> 15); + freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2; +#else +#ifdef WEBRTC_ARCH_ARM_V7 + __asm __volatile( + "smulbb %[tmp32no1], %[real], %[real]\n\t" + "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t" + :[tmp32no1]"+&r"(tmp32no1), + [tmp32no2]"=r"(tmp32no2) + :[real]"r"(freq_signal[i].real), + [imag]"r"(freq_signal[i].imag) + ); +#else + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + tmp32no1 = tmp16no1 * tmp16no1; + tmp32no2 = tmp16no2 * tmp16no2; + tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2); +#endif // WEBRTC_ARCH_ARM_V7 + tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); + + freq_signal_abs[i] = (uint16_t)tmp32no1; +#endif // AECM_WITH_ABS_APPROX + } + (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; + } + + return time_signal_scaling; +} + +int WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* output) { + int i; + + uint32_t xfaSum; + uint32_t dfaNoisySum; + uint32_t dfaCleanSum; + uint32_t echoEst32Gained; + uint32_t tmpU32; + + int32_t tmp32no1; + + uint16_t xfa[PART_LEN1]; + uint16_t dfaNoisy[PART_LEN1]; + uint16_t dfaClean[PART_LEN1]; + uint16_t* ptrDfaClean = dfaClean; + const uint16_t* far_spectrum_ptr = NULL; + + // 32 byte aligned buffers (with +8 or +16). + // TODO(kma): define fft with ComplexInt16. + int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. + int32_t echoEst32_buf[PART_LEN1 + 8]; + int32_t dfw_buf[PART_LEN2 + 8]; + int32_t efw_buf[PART_LEN2 + 8]; + + int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31); + int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31); + ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31); + ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31); + + int16_t hnl[PART_LEN1]; + int16_t numPosCoef = 0; + int16_t nlpGain = ONE_Q14; + int delay; + int16_t tmp16no1; + int16_t tmp16no2; + int16_t mu; + int16_t supGain; + int16_t zeros32, zeros16; + int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; + int far_q; + int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff; + + const int kMinPrefBand = 4; + const int kMaxPrefBand = 24; + int32_t avgHnl32 = 0; + + // Determine startup state. There are three states: + // (0) the first CONV_LEN blocks + // (1) another CONV_LEN blocks + // (2) the rest + + if (aecm->startupState < 2) + { + aecm->startupState = (aecm->totCount >= CONV_LEN) + + (aecm->totCount >= CONV_LEN2); + } + // END: Determine startup state + + // Buffer near and far end signals + memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) + { + memcpy(aecm->dBufClean + PART_LEN, + nearendClean, + sizeof(int16_t) * PART_LEN); + } + + // Transform far end signal from time domain to frequency domain. + far_q = TimeToFrequencyDomain(aecm, + aecm->xBuf, + dfw, + xfa, + &xfaSum); + + // Transform noisy near end signal from time domain to frequency domain. + zerosDBufNoisy = TimeToFrequencyDomain(aecm, + aecm->dBufNoisy, + dfw, + dfaNoisy, + &dfaNoisySum); + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; + + + if (nearendClean == NULL) + { + ptrDfaClean = dfaNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + dfaCleanSum = dfaNoisySum; + } else + { + // Transform clean near end signal from time domain to frequency domain. + zerosDBufClean = TimeToFrequencyDomain(aecm, + aecm->dBufClean, + dfw, + dfaClean, + &dfaCleanSum); + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; + } + + // Get the delay + // Save far-end history and estimate delay + WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); + if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, + xfa, + PART_LEN1, + far_q) == -1) { + return -1; + } + delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, + dfaNoisy, + PART_LEN1, + zerosDBufNoisy); + if (delay == -1) + { + return -1; + } + else if (delay == -2) + { + // If the delay is unknown, we assume zero. + // NOTE: this will have to be adjusted if we ever add lookahead. + delay = 0; + } + + if (aecm->fixedDelay >= 0) + { + // Use fixed delay + delay = aecm->fixedDelay; + } + + // Get aligned far end spectrum + far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); + zerosXBuf = (int16_t) far_q; + if (far_spectrum_ptr == NULL) + { + return -1; + } + + // Calculate log(energy) and update energy threshold levels + WebRtcAecm_CalcEnergies(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisySum, + echoEst32); + + // Calculate stepsize + mu = WebRtcAecm_CalcStepSize(aecm); + + // Update counters + aecm->totCount++; + + // This is the channel estimation algorithm. + // It is base on NLMS but has a variable step length, + // which was calculated above. + WebRtcAecm_UpdateChannel(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisy, + mu, + echoEst32); + supGain = WebRtcAecm_CalcSuppressionGain(aecm); + + + // Calculate Wiener filter hnl[] + for (i = 0; i < PART_LEN1; i++) + { + // Far end signal through channel estimate in Q8 + // How much can we shift right to preserve resolution + tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; + aecm->echoFilt[i] += (tmp32no1 * 50) >> 8; + + zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; + zeros16 = WebRtcSpl_NormW16(supGain) + 1; + if (zeros32 + zeros16 > 16) + { + // Multiplication is safe + // Result in + // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+ + // aecm->xfaQDomainBuf[diff]) + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + (uint16_t)supGain); + resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + } else + { + tmp16no1 = 17 - zeros32 - zeros16; + resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - + RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + if (zeros32 > tmp16no1) + { + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + supGain >> tmp16no1); + } else + { + // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain; + } + } + + zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); + assert(zeros16 >= 0); // |zeros16| is a norm, hence non-negative. + dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld; + if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) { + tmp16no1 = aecm->nearFilt[i] << zeros16; + qDomainDiff = zeros16 - dfa_clean_q_domain_diff; + tmp16no2 = ptrDfaClean[i] >> -qDomainDiff; + } else { + tmp16no1 = dfa_clean_q_domain_diff < 0 + ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff + : aecm->nearFilt[i] << dfa_clean_q_domain_diff; + qDomainDiff = 0; + tmp16no2 = ptrDfaClean[i]; + } + tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); + tmp16no2 = (int16_t)(tmp32no1 >> 4); + tmp16no2 += tmp16no1; + zeros16 = WebRtcSpl_NormW16(tmp16no2); + if ((tmp16no2) & (-qDomainDiff > zeros16)) { + aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; + } else { + aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff + : tmp16no2 >> qDomainDiff; + } + + // Wiener filter coefficients, resulting hnl in Q14 + if (echoEst32Gained == 0) + { + hnl[i] = ONE_Q14; + } else if (aecm->nearFilt[i] == 0) + { + hnl[i] = 0; + } else + { + // Multiply the suppression gain + // Rounding + echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); + tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, + (uint16_t)aecm->nearFilt[i]); + + // Current resolution is + // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32)) + // Make sure we are in Q14 + tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); + if (tmp32no1 > ONE_Q14) + { + hnl[i] = 0; + } else if (tmp32no1 < 0) + { + hnl[i] = ONE_Q14; + } else + { + // 1-echoEst/dfa + hnl[i] = ONE_Q14 - (int16_t)tmp32no1; + if (hnl[i] < 0) + { + hnl[i] = 0; + } + } + } + if (hnl[i]) + { + numPosCoef++; + } + } + // Only in wideband. Prevent the gain in upper band from being larger than + // in lower band. + if (aecm->mult == 2) + { + // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause + // speech distortion in double-talk. + for (i = 0; i < PART_LEN1; i++) + { + hnl[i] = (int16_t)((hnl[i] * hnl[i]) >> 14); + } + + for (i = kMinPrefBand; i <= kMaxPrefBand; i++) + { + avgHnl32 += (int32_t)hnl[i]; + } + assert(kMaxPrefBand - kMinPrefBand + 1 > 0); + avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); + + for (i = kMaxPrefBand; i < PART_LEN1; i++) + { + if (hnl[i] > (int16_t)avgHnl32) + { + hnl[i] = (int16_t)avgHnl32; + } + } + } + + // Calculate NLP gain, result is in Q14 + if (aecm->nlpFlag) + { + for (i = 0; i < PART_LEN1; i++) + { + // Truncate values close to zero and one. + if (hnl[i] > NLP_COMP_HIGH) + { + hnl[i] = ONE_Q14; + } else if (hnl[i] < NLP_COMP_LOW) + { + hnl[i] = 0; + } + + // Remove outliers + if (numPosCoef < 3) + { + nlpGain = 0; + } else + { + nlpGain = ONE_Q14; + } + + // NLP + if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14)) + { + hnl[i] = ONE_Q14; + } else + { + hnl[i] = (int16_t)((hnl[i] * nlpGain) >> 14); + } + + // multiply with Wiener coefficients + efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, + hnl[i], 14)); + efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, + hnl[i], 14)); + } + } + else + { + // multiply with Wiener coefficients + for (i = 0; i < PART_LEN1; i++) + { + efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, + hnl[i], 14)); + efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, + hnl[i], 14)); + } + } + + if (aecm->cngMode == AecmTrue) + { + ComfortNoise(aecm, ptrDfaClean, efw, hnl); + } + + InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); + + return 0; +} + +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda) { + int16_t i; + int16_t tmp16; + int32_t tmp32; + + int16_t randW16[PART_LEN]; + int16_t uReal[PART_LEN1]; + int16_t uImag[PART_LEN1]; + int32_t outLShift32; + int16_t noiseRShift16[PART_LEN1]; + + int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; + int16_t minTrackShift; + + assert(shiftFromNearToNoise >= 0); + assert(shiftFromNearToNoise < 16); + + if (aecm->noiseEstCtr < 100) + { + // Track the minimum more quickly initially. + aecm->noiseEstCtr++; + minTrackShift = 6; + } else + { + minTrackShift = 9; + } + + // Estimate noise power. + for (i = 0; i < PART_LEN1; i++) + { + // Shift to the noise domain. + tmp32 = (int32_t)dfa[i]; + outLShift32 = tmp32 << shiftFromNearToNoise; + + if (outLShift32 < aecm->noiseEst[i]) + { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i] = 0; + // Track the minimum. + if (aecm->noiseEst[i] < (1 << minTrackShift)) + { + // For small values, decrease noiseEst[i] every + // |kNoiseEstIncCount| block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i]++; + if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) + { + aecm->noiseEst[i]--; + aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter + } + } + else + { + aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32) + >> minTrackShift); + } + } else + { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((aecm->noiseEst[i] >> 19) > 0) + { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + aecm->noiseEst[i] >>= 11; + aecm->noiseEst[i] *= 2049; + } + else if ((aecm->noiseEst[i] >> 11) > 0) + { + // Large enough for relative increase + aecm->noiseEst[i] *= 2049; + aecm->noiseEst[i] >>= 11; + } + else + { + // Make incremental increases based on size every + // |kNoiseEstIncCount| block + aecm->noiseEstTooLowCtr[i]++; + if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) + { + aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1; + aecm->noiseEstTooLowCtr[i] = 0; // Reset counter + } + } + } + } + + for (i = 0; i < PART_LEN1; i++) + { + tmp32 = aecm->noiseEst[i] >> shiftFromNearToNoise; + if (tmp32 > 32767) + { + tmp32 = 32767; + aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise; + } + noiseRShift16[i] = (int16_t)tmp32; + + tmp16 = ONE_Q14 - lambda[i]; + noiseRShift16[i] = (int16_t)((tmp16 * noiseRShift16[i]) >> 14); + } + + // Generate a uniform random array on [0 2^15-1]. + WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); + + // Generate noise according to estimated energy. + uReal[0] = 0; // Reject LF noise. + uImag[0] = 0; + for (i = 1; i < PART_LEN1; i++) + { + // Get a random index for the cos and sin tables over [0 359]. + tmp16 = (int16_t)((359 * randW16[i - 1]) >> 15); + + // Tables are in Q13. + uReal[i] = (int16_t)((noiseRShift16[i] * WebRtcAecm_kCosTable[tmp16]) >> + 13); + uImag[i] = (int16_t)((-noiseRShift16[i] * WebRtcAecm_kSinTable[tmp16]) >> + 13); + } + uImag[PART_LEN] = 0; + + for (i = 0; i < PART_LEN1; i++) + { + out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]); + out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]); + } +} + diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_mips.c b/webrtc/modules/audio_processing/aecm/aecm_core_mips.c new file mode 100644 index 0000000..3c2343a --- /dev/null +++ b/webrtc/modules/audio_processing/aecm/aecm_core_mips.c @@ -0,0 +1,1566 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/aecm/aecm_core.h" + +#include + +#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" + +static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, + 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, + 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, + 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, + 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, + 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, + 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 +}; + +static const int16_t kNoiseEstQDomain = 15; +static const int16_t kNoiseEstIncCount = 5; + +static int16_t coefTable[] = { + 0, 4, 256, 260, 128, 132, 384, 388, + 64, 68, 320, 324, 192, 196, 448, 452, + 32, 36, 288, 292, 160, 164, 416, 420, + 96, 100, 352, 356, 224, 228, 480, 484, + 16, 20, 272, 276, 144, 148, 400, 404, + 80, 84, 336, 340, 208, 212, 464, 468, + 48, 52, 304, 308, 176, 180, 432, 436, + 112, 116, 368, 372, 240, 244, 496, 500, + 8, 12, 264, 268, 136, 140, 392, 396, + 72, 76, 328, 332, 200, 204, 456, 460, + 40, 44, 296, 300, 168, 172, 424, 428, + 104, 108, 360, 364, 232, 236, 488, 492, + 24, 28, 280, 284, 152, 156, 408, 412, + 88, 92, 344, 348, 216, 220, 472, 476, + 56, 60, 312, 316, 184, 188, 440, 444, + 120, 124, 376, 380, 248, 252, 504, 508 +}; + +static int16_t coefTable_ifft[] = { + 0, 512, 256, 508, 128, 252, 384, 380, + 64, 124, 320, 444, 192, 188, 448, 316, + 32, 60, 288, 476, 160, 220, 416, 348, + 96, 92, 352, 412, 224, 156, 480, 284, + 16, 28, 272, 492, 144, 236, 400, 364, + 80, 108, 336, 428, 208, 172, 464, 300, + 48, 44, 304, 460, 176, 204, 432, 332, + 112, 76, 368, 396, 240, 140, 496, 268, + 8, 12, 264, 500, 136, 244, 392, 372, + 72, 116, 328, 436, 200, 180, 456, 308, + 40, 52, 296, 468, 168, 212, 424, 340, + 104, 84, 360, 404, 232, 148, 488, 276, + 24, 20, 280, 484, 152, 228, 408, 356, + 88, 100, 344, 420, 216, 164, 472, 292, + 56, 36, 312, 452, 184, 196, 440, 324, + 120, 68, 376, 388, 248, 132, 504, 260 +}; + +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda); + +static void WindowAndFFT(AecmCore* aecm, + int16_t* fft, + const int16_t* time_signal, + ComplexInt16* freq_signal, + int time_signal_scaling) { + int i, j; + int32_t tmp1, tmp2, tmp3, tmp4; + int16_t* pfrfi; + ComplexInt16* pfreq_signal; + int16_t f_coef, s_coef; + int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1; + int32_t hann, hann1, coefs; + + memset(fft, 0, sizeof(int16_t) * PART_LEN4); + + // FFT of signal + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[shift], %[time_signal_scaling], -14 \n\t" + "addiu %[i], $zero, 64 \n\t" + "addiu %[load_ptr], %[time_signal], 0 \n\t" + "addiu %[hann], %[hanning], 0 \n\t" + "addiu %[hann1], %[hanning], 128 \n\t" + "addiu %[coefs], %[coefTable], 0 \n\t" + "bltz %[shift], 2f \n\t" + " negu %[shift1], %[shift] \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[load_ptr]) \n\t" + "lh %[tmp2], 0(%[hann]) \n\t" + "lh %[tmp3], 128(%[load_ptr]) \n\t" + "lh %[tmp4], 0(%[hann1]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "lh %[f_coef], 0(%[coefs]) \n\t" + "lh %[s_coef], 2(%[coefs]) \n\t" + "addiu %[load_ptr], %[load_ptr], 2 \n\t" + "addiu %[hann], %[hann], 2 \n\t" + "addiu %[hann1], %[hann1], -2 \n\t" + "addu %[store_ptr1], %[fft], %[f_coef] \n\t" + "addu %[store_ptr2], %[fft], %[s_coef] \n\t" + "sllv %[tmp1], %[tmp1], %[shift] \n\t" + "sllv %[tmp3], %[tmp3], %[shift] \n\t" + "sh %[tmp1], 0(%[store_ptr1]) \n\t" + "sh %[tmp3], 0(%[store_ptr2]) \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[coefs], %[coefs], 4 \n\t" + "b 3f \n\t" + " nop \n\t" + "2: \n\t" + "lh %[tmp1], 0(%[load_ptr]) \n\t" + "lh %[tmp2], 0(%[hann]) \n\t" + "lh %[tmp3], 128(%[load_ptr]) \n\t" + "lh %[tmp4], 0(%[hann1]) \n\t" + "addiu %[i], %[i], -1 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "lh %[f_coef], 0(%[coefs]) \n\t" + "lh %[s_coef], 2(%[coefs]) \n\t" + "addiu %[load_ptr], %[load_ptr], 2 \n\t" + "addiu %[hann], %[hann], 2 \n\t" + "addiu %[hann1], %[hann1], -2 \n\t" + "addu %[store_ptr1], %[fft], %[f_coef] \n\t" + "addu %[store_ptr2], %[fft], %[s_coef] \n\t" + "srav %[tmp1], %[tmp1], %[shift1] \n\t" + "srav %[tmp3], %[tmp3], %[shift1] \n\t" + "sh %[tmp1], 0(%[store_ptr1]) \n\t" + "sh %[tmp3], 0(%[store_ptr2]) \n\t" + "bgtz %[i], 2b \n\t" + " addiu %[coefs], %[coefs], 4 \n\t" + "3: \n\t" + ".set pop \n\t" + : [load_ptr] "=&r" (load_ptr), [shift] "=&r" (shift), [hann] "=&r" (hann), + [hann1] "=&r" (hann1), [shift1] "=&r" (shift1), [coefs] "=&r" (coefs), + [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [tmp4] "=&r" (tmp4), [i] "=&r" (i), [f_coef] "=&r" (f_coef), + [s_coef] "=&r" (s_coef), [store_ptr1] "=&r" (store_ptr1), + [store_ptr2] "=&r" (store_ptr2) + : [time_signal] "r" (time_signal), [coefTable] "r" (coefTable), + [time_signal_scaling] "r" (time_signal_scaling), + [hanning] "r" (WebRtcAecm_kSqrtHanning), [fft] "r" (fft) + : "memory", "hi", "lo" + ); + + WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); + pfrfi = fft; + pfreq_signal = freq_signal; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[j], $zero, 128 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[pfrfi]) \n\t" + "lh %[tmp2], 2(%[pfrfi]) \n\t" + "lh %[tmp3], 4(%[pfrfi]) \n\t" + "lh %[tmp4], 6(%[pfrfi]) \n\t" + "subu %[tmp2], $zero, %[tmp2] \n\t" + "sh %[tmp1], 0(%[pfreq_signal]) \n\t" + "sh %[tmp2], 2(%[pfreq_signal]) \n\t" + "subu %[tmp4], $zero, %[tmp4] \n\t" + "sh %[tmp3], 4(%[pfreq_signal]) \n\t" + "sh %[tmp4], 6(%[pfreq_signal]) \n\t" + "lh %[tmp1], 8(%[pfrfi]) \n\t" + "lh %[tmp2], 10(%[pfrfi]) \n\t" + "lh %[tmp3], 12(%[pfrfi]) \n\t" + "lh %[tmp4], 14(%[pfrfi]) \n\t" + "addiu %[j], %[j], -8 \n\t" + "subu %[tmp2], $zero, %[tmp2] \n\t" + "sh %[tmp1], 8(%[pfreq_signal]) \n\t" + "sh %[tmp2], 10(%[pfreq_signal]) \n\t" + "subu %[tmp4], $zero, %[tmp4] \n\t" + "sh %[tmp3], 12(%[pfreq_signal]) \n\t" + "sh %[tmp4], 14(%[pfreq_signal]) \n\t" + "addiu %[pfreq_signal], %[pfreq_signal], 16 \n\t" + "bgtz %[j], 1b \n\t" + " addiu %[pfrfi], %[pfrfi], 16 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), + [j] "=&r" (j), [pfrfi] "+r" (pfrfi), [pfreq_signal] "+r" (pfreq_signal), + [tmp4] "=&r" (tmp4) + : + : "memory" + ); +} + +static void InverseFFTAndWindow(AecmCore* aecm, + int16_t* fft, + ComplexInt16* efw, + int16_t* output, + const int16_t* nearendClean) { + int i, outCFFT; + int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im; + int16_t* pcoefTable_ifft = coefTable_ifft; + int16_t* pfft = fft; + int16_t* ppfft = fft; + ComplexInt16* pefw = efw; + int32_t out_aecm; + int16_t* paecm_buf = aecm->outBuf; + const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning; + const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN]; + int16_t* output1 = output; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 64 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 2(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 0(%[pefw]) \n\t" + "lh %[tmp_im], 2(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "lh %[tmp1], 4(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 6(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 4(%[pefw]) \n\t" + "lh %[tmp_im], 6(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "lh %[tmp1], 8(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 10(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 8(%[pefw]) \n\t" + "lh %[tmp_im], 10(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "lh %[tmp1], 12(%[pcoefTable_ifft]) \n\t" + "lh %[tmp2], 14(%[pcoefTable_ifft]) \n\t" + "lh %[tmp_re], 12(%[pefw]) \n\t" + "lh %[tmp_im], 14(%[pefw]) \n\t" + "addu %[pfft], %[fft], %[tmp2] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addu %[pfft], %[fft], %[tmp1] \n\t" + "sh %[tmp_re], 0(%[pfft]) \n\t" + "subu %[tmp_im], $zero, %[tmp_im] \n\t" + "sh %[tmp_im], 2(%[pfft]) \n\t" + "addiu %[pcoefTable_ifft], %[pcoefTable_ifft], 16 \n\t" + "addiu %[i], %[i], -4 \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[pefw], %[pefw], 16 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), + [i] "=&r" (i), [tmp_re] "=&r" (tmp_re), [tmp_im] "=&r" (tmp_im), + [pefw] "+r" (pefw), [pcoefTable_ifft] "+r" (pcoefTable_ifft), + [fft] "+r" (fft) + : + : "memory" + ); + + fft[2] = efw[PART_LEN].real; + fft[3] = -efw[PART_LEN].imag; + + outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1); + pfft = fft; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 128 \n\t" + "1: \n\t" + "lh %[tmp1], 0(%[ppfft]) \n\t" + "lh %[tmp2], 4(%[ppfft]) \n\t" + "lh %[tmp3], 8(%[ppfft]) \n\t" + "lh %[tmp4], 12(%[ppfft]) \n\t" + "addiu %[i], %[i], -4 \n\t" + "sh %[tmp1], 0(%[pfft]) \n\t" + "sh %[tmp2], 2(%[pfft]) \n\t" + "sh %[tmp3], 4(%[pfft]) \n\t" + "sh %[tmp4], 6(%[pfft]) \n\t" + "addiu %[ppfft], %[ppfft], 16 \n\t" + "bgtz %[i], 1b \n\t" + " addiu %[pfft], %[pfft], 8 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), + [i] "=&r" (i), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4), + [ppfft] "+r" (ppfft) + : + : "memory" + ); + + pfft = fft; + out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain); + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "addiu %[i], $zero, 64 \n\t" + "11: \n\t" + "lh %[tmp1], 0(%[pfft]) \n\t" + "lh %[tmp2], 0(%[p_kSqrtHanning]) \n\t" + "addiu %[i], %[i], -2 \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "lh %[tmp3], 2(%[pfft]) \n\t" + "lh %[tmp4], 2(%[p_kSqrtHanning]) \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "addiu %[tmp1], %[tmp1], 8192 \n\t" + "sra %[tmp1], %[tmp1], 14 \n\t" + "addiu %[tmp3], %[tmp3], 8192 \n\t" + "sra %[tmp3], %[tmp3], 14 \n\t" + "bgez %[out_aecm], 1f \n\t" + " negu %[tmp2], %[out_aecm] \n\t" + "srav %[tmp1], %[tmp1], %[tmp2] \n\t" + "b 2f \n\t" + " srav %[tmp3], %[tmp3], %[tmp2] \n\t" + "1: \n\t" + "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t" + "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t" + "2: \n\t" + "lh %[tmp4], 0(%[paecm_buf]) \n\t" + "lh %[tmp2], 2(%[paecm_buf]) \n\t" + "addu %[tmp3], %[tmp3], %[tmp2] \n\t" + "addu %[tmp1], %[tmp1], %[tmp4] \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[tmp1], %[tmp1], 16 \n\t" + "sra %[tmp1], %[tmp1], 16 \n\t" + "shll_s.w %[tmp3], %[tmp3], 16 \n\t" + "sra %[tmp3], %[tmp3], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sra %[tmp4], %[tmp1], 31 \n\t" + "sra %[tmp2], %[tmp1], 15 \n\t" + "beq %[tmp4], %[tmp2], 3f \n\t" + " ori %[tmp2], $zero, 0x7fff \n\t" + "xor %[tmp1], %[tmp2], %[tmp4] \n\t" + "3: \n\t" + "sra %[tmp2], %[tmp3], 31 \n\t" + "sra %[tmp4], %[tmp3], 15 \n\t" + "beq %[tmp2], %[tmp4], 4f \n\t" + " ori %[tmp4], $zero, 0x7fff \n\t" + "xor %[tmp3], %[tmp4], %[tmp2] \n\t" + "4: \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[tmp1], 0(%[pfft]) \n\t" + "sh %[tmp1], 0(%[output1]) \n\t" + "sh %[tmp3], 2(%[pfft]) \n\t" + "sh %[tmp3], 2(%[output1]) \n\t" + "lh %[tmp1], 128(%[pfft]) \n\t" + "lh %[tmp2], 0(%[pp_kSqrtHanning]) \n\t" + "mul %[tmp1], %[tmp1], %[tmp2] \n\t" + "lh %[tmp3], 130(%[pfft]) \n\t" + "lh %[tmp4], -2(%[pp_kSqrtHanning]) \n\t" + "mul %[tmp3], %[tmp3], %[tmp4] \n\t" + "sra %[tmp1], %[tmp1], 14 \n\t" + "sra %[tmp3], %[tmp3], 14 \n\t" + "bgez %[out_aecm], 5f \n\t" + " negu %[tmp2], %[out_aecm] \n\t" + "srav %[tmp3], %[tmp3], %[tmp2] \n\t" + "b 6f \n\t" + " srav %[tmp1], %[tmp1], %[tmp2] \n\t" + "5: \n\t" + "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t" + "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t" + "6: \n\t" +#if defined(MIPS_DSP_R1_LE) + "shll_s.w %[tmp1], %[tmp1], 16 \n\t" + "sra %[tmp1], %[tmp1], 16 \n\t" + "shll_s.w %[tmp3], %[tmp3], 16 \n\t" + "sra %[tmp3], %[tmp3], 16 \n\t" +#else // #if defined(MIPS_DSP_R1_LE) + "sra %[tmp4], %[tmp1], 31 \n\t" + "sra %[tmp2], %[tmp1], 15 \n\t" + "beq %[tmp4], %[tmp2], 7f \n\t" + " ori %[tmp2], $zero, 0x7fff \n\t" + "xor %[tmp1], %[tmp2], %[tmp4] \n\t" + "7: \n\t" + "sra %[tmp2], %[tmp3], 31 \n\t" + "sra %[tmp4], %[tmp3], 15 \n\t" + "beq %[tmp2], %[tmp4], 8f \n\t" + " ori %[tmp4], $zero, 0x7fff \n\t" + "xor %[tmp3], %[tmp4], %[tmp2] \n\t" + "8: \n\t" +#endif // #if defined(MIPS_DSP_R1_LE) + "sh %[tmp1], 0(%[paecm_buf]) \n\t" + "sh %[tmp3], 2(%[paecm_buf]) \n\t" + "addiu %[output1], %[output1], 4 \n\t" + "addiu %[paecm_buf], %[paecm_buf], 4 \n\t" + "addiu %[pfft], %[pfft], 4 \n\t" + "addiu %[p_kSqrtHanning], %[p_kSqrtHanning], 4 \n\t" + "bgtz %[i], 11b \n\t" + " addiu %[pp_kSqrtHanning], %[pp_kSqrtHanning], -4 \n\t" + ".set pop \n\t" + : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), + [output1] "+r" (output1), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4), + [paecm_buf] "+r" (paecm_buf), [i] "=&r" (i), + [pp_kSqrtHanning] "+r" (pp_kSqrtHanning), + [p_kSqrtHanning] "+r" (p_kSqrtHanning) + : [out_aecm] "r" (out_aecm), + [WebRtcAecm_kSqrtHanning] "r" (WebRtcAecm_kSqrtHanning) + : "hi", "lo","memory" + ); + + // Copy the current block to the old position + // (aecm->outBuf is shifted elsewhere) + memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy, + aecm->dBufNoisy + PART_LEN, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean, + aecm->dBufClean + PART_LEN, + sizeof(int16_t) * PART_LEN); + } +} + +void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int i; + uint32_t par1 = (*far_energy); + uint32_t par2 = (*echo_energy_adapt); + uint32_t par3 = (*echo_energy_stored); + int16_t* ch_stored_p = &(aecm->channelStored[0]); + int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]); + uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0])); + int32_t* echo_p = &(echo_est[0]); + int32_t temp0, stored0, echo0, adept0, spectrum0; + int32_t stored1, adept1, spectrum1, echo1, temp1; + + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + for (i = 0; i < PART_LEN; i+= 4) { + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[stored0], 0(%[ch_stored_p]) \n\t" + "lhu %[adept0], 0(%[ch_adapt_p]) \n\t" + "lhu %[spectrum0], 0(%[spectrum_p]) \n\t" + "lh %[stored1], 2(%[ch_stored_p]) \n\t" + "lhu %[adept1], 2(%[ch_adapt_p]) \n\t" + "lhu %[spectrum1], 2(%[spectrum_p]) \n\t" + "mul %[echo0], %[stored0], %[spectrum0] \n\t" + "mul %[temp0], %[adept0], %[spectrum0] \n\t" + "mul %[echo1], %[stored1], %[spectrum1] \n\t" + "mul %[temp1], %[adept1], %[spectrum1] \n\t" + "addu %[par1], %[par1], %[spectrum0] \n\t" + "addu %[par1], %[par1], %[spectrum1] \n\t" + "addiu %[echo_p], %[echo_p], 16 \n\t" + "addu %[par3], %[par3], %[echo0] \n\t" + "addu %[par2], %[par2], %[temp0] \n\t" + "addu %[par3], %[par3], %[echo1] \n\t" + "addu %[par2], %[par2], %[temp1] \n\t" + "usw %[echo0], -16(%[echo_p]) \n\t" + "usw %[echo1], -12(%[echo_p]) \n\t" + "lh %[stored0], 4(%[ch_stored_p]) \n\t" + "lhu %[adept0], 4(%[ch_adapt_p]) \n\t" + "lhu %[spectrum0], 4(%[spectrum_p]) \n\t" + "lh %[stored1], 6(%[ch_stored_p]) \n\t" + "lhu %[adept1], 6(%[ch_adapt_p]) \n\t" + "lhu %[spectrum1], 6(%[spectrum_p]) \n\t" + "mul %[echo0], %[stored0], %[spectrum0] \n\t" + "mul %[temp0], %[adept0], %[spectrum0] \n\t" + "mul %[echo1], %[stored1], %[spectrum1] \n\t" + "mul %[temp1], %[adept1], %[spectrum1] \n\t" + "addu %[par1], %[par1], %[spectrum0] \n\t" + "addu %[par1], %[par1], %[spectrum1] \n\t" + "addiu %[ch_stored_p], %[ch_stored_p], 8 \n\t" + "addiu %[ch_adapt_p], %[ch_adapt_p], 8 \n\t" + "addiu %[spectrum_p], %[spectrum_p], 8 \n\t" + "addu %[par3], %[par3], %[echo0] \n\t" + "addu %[par2], %[par2], %[temp0] \n\t" + "addu %[par3], %[par3], %[echo1] \n\t" + "addu %[par2], %[par2], %[temp1] \n\t" + "usw %[echo0], -8(%[echo_p]) \n\t" + "usw %[echo1], -4(%[echo_p]) \n\t" + ".set pop \n\t" + : [temp0] "=&r" (temp0), [stored0] "=&r" (stored0), + [adept0] "=&r" (adept0), [spectrum0] "=&r" (spectrum0), + [echo0] "=&r" (echo0), [echo_p] "+r" (echo_p), [par3] "+r" (par3), + [par1] "+r" (par1), [par2] "+r" (par2), [stored1] "=&r" (stored1), + [adept1] "=&r" (adept1), [echo1] "=&r" (echo1), + [spectrum1] "=&r" (spectrum1), [temp1] "=&r" (temp1), + [ch_stored_p] "+r" (ch_stored_p), [ch_adapt_p] "+r" (ch_adapt_p), + [spectrum_p] "+r" (spectrum_p) + : + : "hi", "lo", "memory" + ); + } + + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); + par1 += (uint32_t)(far_spectrum[PART_LEN]); + par2 += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; + par3 += (uint32_t)echo_est[PART_LEN]; + + (*far_energy) = par1; + (*echo_energy_adapt) = par2; + (*echo_energy_stored) = par3; +} + +#if defined(MIPS_DSP_R1_LE) +void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + int i; + int16_t* temp1; + uint16_t* temp8; + int32_t temp0, temp2, temp3, temp4, temp5, temp6; + int32_t* temp7 = &(echo_est[0]); + temp1 = &(aecm->channelStored[0]); + temp8 = (uint16_t*)(&far_spectrum[0]); + + // During startup we store the channel every block. + memcpy(aecm->channelStored, aecm->channelAdapt16, + sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile ( + "ulw %[temp0], 0(%[temp8]) \n\t" + "ulw %[temp2], 0(%[temp1]) \n\t" + "ulw %[temp4], 4(%[temp8]) \n\t" + "ulw %[temp5], 4(%[temp1]) \n\t" + "muleq_s.w.phl %[temp3], %[temp2], %[temp0] \n\t" + "muleq_s.w.phr %[temp0], %[temp2], %[temp0] \n\t" + "muleq_s.w.phl %[temp6], %[temp5], %[temp4] \n\t" + "muleq_s.w.phr %[temp4], %[temp5], %[temp4] \n\t" + "addiu %[temp7], %[temp7], 16 \n\t" + "addiu %[temp1], %[temp1], 8 \n\t" + "addiu %[temp8], %[temp8], 8 \n\t" + "sra %[temp3], %[temp3], 1 \n\t" + "sra %[temp0], %[temp0], 1 \n\t" + "sra %[temp6], %[temp6], 1 \n\t" + "sra %[temp4], %[temp4], 1 \n\t" + "usw %[temp3], -12(%[temp7]) \n\t" + "usw %[temp0], -16(%[temp7]) \n\t" + "usw %[temp6], -4(%[temp7]) \n\t" + "usw %[temp4], -8(%[temp7]) \n\t" + : [temp0] "=&r" (temp0), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6), + [temp1] "+r" (temp1), [temp8] "+r" (temp8), [temp7] "+r" (temp7) + : + : "hi", "lo", "memory" + ); + } + echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + far_spectrum[i]); +} + +void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm) { + int i; + int32_t* temp3; + int16_t* temp0; + int32_t temp1, temp2, temp4, temp5; + + temp0 = &(aecm->channelStored[0]); + temp3 = &(aecm->channelAdapt32[0]); + + // The stored channel has a significantly lower MSE than the adaptive one for + // two consecutive calculations. Reset the adaptive channel. + memcpy(aecm->channelAdapt16, + aecm->channelStored, + sizeof(int16_t) * PART_LEN1); + + // Restore the W32 channel + for (i = 0; i < PART_LEN; i += 4) { + __asm __volatile ( + "ulw %[temp1], 0(%[temp0]) \n\t" + "ulw %[temp4], 4(%[temp0]) \n\t" + "preceq.w.phl %[temp2], %[temp1] \n\t" + "preceq.w.phr %[temp1], %[temp1] \n\t" + "preceq.w.phl %[temp5], %[temp4] \n\t" + "preceq.w.phr %[temp4], %[temp4] \n\t" + "addiu %[temp0], %[temp0], 8 \n\t" + "usw %[temp2], 4(%[temp3]) \n\t" + "usw %[temp1], 0(%[temp3]) \n\t" + "usw %[temp5], 12(%[temp3]) \n\t" + "usw %[temp4], 8(%[temp3]) \n\t" + "addiu %[temp3], %[temp3], 16 \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), + [temp3] "+r" (temp3), [temp0] "+r" (temp0) + : + : "memory" + ); + } + + aecm->channelAdapt32[i] = (int32_t)aecm->channelStored[i] << 16; +} +#endif // #if defined(MIPS_DSP_R1_LE) + +// Transforms a time domain signal into the frequency domain, outputting the +// complex valued signal, absolute value and sum of absolute values. +// +// time_signal [in] Pointer to time domain signal +// freq_signal_real [out] Pointer to real part of frequency domain array +// freq_signal_imag [out] Pointer to imaginary part of frequency domain +// array +// freq_signal_abs [out] Pointer to absolute value of frequency domain +// array +// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in +// the frequency domain array +// return value The Q-domain of current frequency values +// +static int TimeToFrequencyDomain(AecmCore* aecm, + const int16_t* time_signal, + ComplexInt16* freq_signal, + uint16_t* freq_signal_abs, + uint32_t* freq_signal_sum_abs) { + int i = 0; + int time_signal_scaling = 0; + + // In fft_buf, +16 for 32-byte alignment. + int16_t fft_buf[PART_LEN4 + 16]; + int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31); + + int16_t tmp16no1; +#if !defined(MIPS_DSP_R2_LE) + int32_t tmp32no1; + int32_t tmp32no2; + int16_t tmp16no2; +#else + int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13; + int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23; + int16_t* freqp; + uint16_t* freqabsp; + uint32_t freqt0, freqt1, freqt2, freqt3; + uint32_t freqs; +#endif + +#ifdef AECM_DYNAMIC_Q + tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); + time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); +#endif + + WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); + + // Extract imaginary and real part, + // calculate the magnitude for all frequency bins + freq_signal[0].imag = 0; + freq_signal[PART_LEN].imag = 0; + freq_signal[PART_LEN].real = fft[PART_LEN2]; + freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); + freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[PART_LEN].real); + (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) + + (uint32_t)(freq_signal_abs[PART_LEN]); + +#if !defined(MIPS_DSP_R2_LE) + for (i = 1; i < PART_LEN; i++) { + if (freq_signal[i].real == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[i].imag); + } + else if (freq_signal[i].imag == 0) + { + freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( + freq_signal[i].real); + } + else + { + // Approximation for magnitude of complex fft output + // magn = sqrt(real^2 + imag^2) + // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) + // + // The parameters alpha and beta are stored in Q15 + tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); + tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); + tmp32no1 = tmp16no1 * tmp16no1; + tmp32no2 = tmp16no2 * tmp16no2; + tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2); + tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); + + freq_signal_abs[i] = (uint16_t)tmp32no1; + } + (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; + } +#else // #if !defined(MIPS_DSP_R2_LE) + freqs = (uint32_t)(freq_signal_abs[0]) + + (uint32_t)(freq_signal_abs[PART_LEN]); + freqp = &(freq_signal[1].real); + + __asm __volatile ( + "lw %[freqt0], 0(%[freqp]) \n\t" + "lw %[freqt1], 4(%[freqp]) \n\t" + "lw %[freqt2], 8(%[freqp]) \n\t" + "mult $ac0, $zero, $zero \n\t" + "mult $ac1, $zero, $zero \n\t" + "mult $ac2, $zero, $zero \n\t" + "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" + "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" + "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" + "addiu %[freqp], %[freqp], 12 \n\t" + "extr.w %[tmp32no20], $ac0, 1 \n\t" + "extr.w %[tmp32no21], $ac1, 1 \n\t" + "extr.w %[tmp32no22], $ac2, 1 \n\t" + : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1), + [freqt2] "=&r" (freqt2), [freqp] "+r" (freqp), + [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21), + [tmp32no22] "=r" (tmp32no22) + : + : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo" + ); + + tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); + tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); + tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); + freq_signal_abs[1] = (uint16_t)tmp32no10; + freq_signal_abs[2] = (uint16_t)tmp32no11; + freq_signal_abs[3] = (uint16_t)tmp32no12; + freqs += (uint32_t)tmp32no10; + freqs += (uint32_t)tmp32no11; + freqs += (uint32_t)tmp32no12; + freqabsp = &(freq_signal_abs[4]); + for (i = 4; i < PART_LEN; i+=4) + { + __asm __volatile ( + "ulw %[freqt0], 0(%[freqp]) \n\t" + "ulw %[freqt1], 4(%[freqp]) \n\t" + "ulw %[freqt2], 8(%[freqp]) \n\t" + "ulw %[freqt3], 12(%[freqp]) \n\t" + "mult $ac0, $zero, $zero \n\t" + "mult $ac1, $zero, $zero \n\t" + "mult $ac2, $zero, $zero \n\t" + "mult $ac3, $zero, $zero \n\t" + "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" + "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" + "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" + "dpaq_s.w.ph $ac3, %[freqt3], %[freqt3] \n\t" + "addiu %[freqp], %[freqp], 16 \n\t" + "addiu %[freqabsp], %[freqabsp], 8 \n\t" + "extr.w %[tmp32no20], $ac0, 1 \n\t" + "extr.w %[tmp32no21], $ac1, 1 \n\t" + "extr.w %[tmp32no22], $ac2, 1 \n\t" + "extr.w %[tmp32no23], $ac3, 1 \n\t" + : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1), + [freqt2] "=&r" (freqt2), [freqt3] "=&r" (freqt3), + [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21), + [tmp32no22] "=r" (tmp32no22), [tmp32no23] "=r" (tmp32no23), + [freqabsp] "+r" (freqabsp), [freqp] "+r" (freqp) + : + : "memory", "hi", "lo", "$ac1hi", "$ac1lo", + "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo" + ); + + tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); + tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); + tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); + tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23); + + __asm __volatile ( + "sh %[tmp32no10], -8(%[freqabsp]) \n\t" + "sh %[tmp32no11], -6(%[freqabsp]) \n\t" + "sh %[tmp32no12], -4(%[freqabsp]) \n\t" + "sh %[tmp32no13], -2(%[freqabsp]) \n\t" + "addu %[freqs], %[freqs], %[tmp32no10] \n\t" + "addu %[freqs], %[freqs], %[tmp32no11] \n\t" + "addu %[freqs], %[freqs], %[tmp32no12] \n\t" + "addu %[freqs], %[freqs], %[tmp32no13] \n\t" + : [freqs] "+r" (freqs) + : [tmp32no10] "r" (tmp32no10), [tmp32no11] "r" (tmp32no11), + [tmp32no12] "r" (tmp32no12), [tmp32no13] "r" (tmp32no13), + [freqabsp] "r" (freqabsp) + : "memory" + ); + } + + (*freq_signal_sum_abs) = freqs; +#endif + + return time_signal_scaling; +} + +int WebRtcAecm_ProcessBlock(AecmCore* aecm, + const int16_t* farend, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* output) { + int i; + uint32_t xfaSum; + uint32_t dfaNoisySum; + uint32_t dfaCleanSum; + uint32_t echoEst32Gained; + uint32_t tmpU32; + int32_t tmp32no1; + + uint16_t xfa[PART_LEN1]; + uint16_t dfaNoisy[PART_LEN1]; + uint16_t dfaClean[PART_LEN1]; + uint16_t* ptrDfaClean = dfaClean; + const uint16_t* far_spectrum_ptr = NULL; + + // 32 byte aligned buffers (with +8 or +16). + int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. + int32_t echoEst32_buf[PART_LEN1 + 8]; + int32_t dfw_buf[PART_LEN2 + 8]; + int32_t efw_buf[PART_LEN2 + 8]; + + int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~ 31); + int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~ 31); + ComplexInt16* dfw = (ComplexInt16*)(((uint32_t)dfw_buf + 31) & ~31); + ComplexInt16* efw = (ComplexInt16*)(((uint32_t)efw_buf + 31) & ~31); + + int16_t hnl[PART_LEN1]; + int16_t numPosCoef = 0; + int delay; + int16_t tmp16no1; + int16_t tmp16no2; + int16_t mu; + int16_t supGain; + int16_t zeros32, zeros16; + int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; + int far_q; + int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff; + + const int kMinPrefBand = 4; + const int kMaxPrefBand = 24; + int32_t avgHnl32 = 0; + + int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; + int16_t* ptr; + int16_t* ptr1; + int16_t* er_ptr; + int16_t* dr_ptr; + + ptr = &hnl[0]; + ptr1 = &hnl[0]; + er_ptr = &efw[0].real; + dr_ptr = &dfw[0].real; + + // Determine startup state. There are three states: + // (0) the first CONV_LEN blocks + // (1) another CONV_LEN blocks + // (2) the rest + + if (aecm->startupState < 2) { + aecm->startupState = (aecm->totCount >= CONV_LEN) + + (aecm->totCount >= CONV_LEN2); + } + // END: Determine startup state + + // Buffer near and far end signals + memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); + memcpy(aecm->dBufNoisy + PART_LEN, + nearendNoisy, + sizeof(int16_t) * PART_LEN); + if (nearendClean != NULL) { + memcpy(aecm->dBufClean + PART_LEN, + nearendClean, + sizeof(int16_t) * PART_LEN); + } + + // Transform far end signal from time domain to frequency domain. + far_q = TimeToFrequencyDomain(aecm, + aecm->xBuf, + dfw, + xfa, + &xfaSum); + + // Transform noisy near end signal from time domain to frequency domain. + zerosDBufNoisy = TimeToFrequencyDomain(aecm, + aecm->dBufNoisy, + dfw, + dfaNoisy, + &dfaNoisySum); + aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; + aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; + + if (nearendClean == NULL) { + ptrDfaClean = dfaNoisy; + aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; + aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; + dfaCleanSum = dfaNoisySum; + } else { + // Transform clean near end signal from time domain to frequency domain. + zerosDBufClean = TimeToFrequencyDomain(aecm, + aecm->dBufClean, + dfw, + dfaClean, + &dfaCleanSum); + aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; + aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; + } + + // Get the delay + // Save far-end history and estimate delay + WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); + + if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1, + far_q) == -1) { + return -1; + } + delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, + dfaNoisy, + PART_LEN1, + zerosDBufNoisy); + if (delay == -1) { + return -1; + } + else if (delay == -2) { + // If the delay is unknown, we assume zero. + // NOTE: this will have to be adjusted if we ever add lookahead. + delay = 0; + } + + if (aecm->fixedDelay >= 0) { + // Use fixed delay + delay = aecm->fixedDelay; + } + + // Get aligned far end spectrum + far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); + zerosXBuf = (int16_t) far_q; + + if (far_spectrum_ptr == NULL) { + return -1; + } + + // Calculate log(energy) and update energy threshold levels + WebRtcAecm_CalcEnergies(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisySum, + echoEst32); + // Calculate stepsize + mu = WebRtcAecm_CalcStepSize(aecm); + + // Update counters + aecm->totCount++; + + // This is the channel estimation algorithm. + // It is base on NLMS but has a variable step length, + // which was calculated above. + WebRtcAecm_UpdateChannel(aecm, + far_spectrum_ptr, + zerosXBuf, + dfaNoisy, + mu, + echoEst32); + + supGain = WebRtcAecm_CalcSuppressionGain(aecm); + + // Calculate Wiener filter hnl[] + for (i = 0; i < PART_LEN1; i++) { + // Far end signal through channel estimate in Q8 + // How much can we shift right to preserve resolution + tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; + aecm->echoFilt[i] += (tmp32no1 * 50) >> 8; + + zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; + zeros16 = WebRtcSpl_NormW16(supGain) + 1; + if (zeros32 + zeros16 > 16) { + // Multiplication is safe + // Result in + // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff]) + echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], + (uint16_t)supGain); + resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + } else { + tmp16no1 = 17 - zeros32 - zeros16; + resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - + RESOLUTION_SUPGAIN; + resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); + if (zeros32 > tmp16no1) { + echoEst32Gained = WEBRTC_SPL_UMUL_32_16( + (uint32_t)aecm->echoFilt[i], + supGain >> tmp16no1); + } else { + // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) + echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain; + } + } + + zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); + assert(zeros16 >= 0); // |zeros16| is a norm, hence non-negative. + dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld; + if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) { + tmp16no1 = aecm->nearFilt[i] << zeros16; + qDomainDiff = zeros16 - dfa_clean_q_domain_diff; + tmp16no2 = ptrDfaClean[i] >> -qDomainDiff; + } else { + tmp16no1 = dfa_clean_q_domain_diff < 0 + ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff + : aecm->nearFilt[i] << dfa_clean_q_domain_diff; + qDomainDiff = 0; + tmp16no2 = ptrDfaClean[i]; + } + + tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); + tmp16no2 = (int16_t)(tmp32no1 >> 4); + tmp16no2 += tmp16no1; + zeros16 = WebRtcSpl_NormW16(tmp16no2); + if ((tmp16no2) & (-qDomainDiff > zeros16)) { + aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; + } else { + aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff + : tmp16no2 >> qDomainDiff; + } + + // Wiener filter coefficients, resulting hnl in Q14 + if (echoEst32Gained == 0) { + hnl[i] = ONE_Q14; + numPosCoef++; + } else if (aecm->nearFilt[i] == 0) { + hnl[i] = 0; + } else { + // Multiply the suppression gain + // Rounding + echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); + tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, + (uint16_t)aecm->nearFilt[i]); + + // Current resolution is + // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN + // - max(0, 17 - zeros16 - zeros32)) + // Make sure we are in Q14 + tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); + if (tmp32no1 > ONE_Q14) { + hnl[i] = 0; + } else if (tmp32no1 < 0) { + hnl[i] = ONE_Q14; + numPosCoef++; + } else { + // 1-echoEst/dfa + hnl[i] = ONE_Q14 - (int16_t)tmp32no1; + if (hnl[i] <= 0) { + hnl[i] = 0; + } else { + numPosCoef++; + } + } + } + } + + // Only in wideband. Prevent the gain in upper band from being larger than + // in lower band. + if (aecm->mult == 2) { + // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause + // speech distortion in double-talk. + for (i = 0; i < (PART_LEN1 >> 3); i++) { + __asm __volatile ( + "lh %[temp1], 0(%[ptr1]) \n\t" + "lh %[temp2], 2(%[ptr1]) \n\t" + "lh %[temp3], 4(%[ptr1]) \n\t" + "lh %[temp4], 6(%[ptr1]) \n\t" + "lh %[temp5], 8(%[ptr1]) \n\t" + "lh %[temp6], 10(%[ptr1]) \n\t" + "lh %[temp7], 12(%[ptr1]) \n\t" + "lh %[temp8], 14(%[ptr1]) \n\t" + "mul %[temp1], %[temp1], %[temp1] \n\t" + "mul %[temp2], %[temp2], %[temp2] \n\t" + "mul %[temp3], %[temp3], %[temp3] \n\t" + "mul %[temp4], %[temp4], %[temp4] \n\t" + "mul %[temp5], %[temp5], %[temp5] \n\t" + "mul %[temp6], %[temp6], %[temp6] \n\t" + "mul %[temp7], %[temp7], %[temp7] \n\t" + "mul %[temp8], %[temp8], %[temp8] \n\t" + "sra %[temp1], %[temp1], 14 \n\t" + "sra %[temp2], %[temp2], 14 \n\t" + "sra %[temp3], %[temp3], 14 \n\t" + "sra %[temp4], %[temp4], 14 \n\t" + "sra %[temp5], %[temp5], 14 \n\t" + "sra %[temp6], %[temp6], 14 \n\t" + "sra %[temp7], %[temp7], 14 \n\t" + "sra %[temp8], %[temp8], 14 \n\t" + "sh %[temp1], 0(%[ptr1]) \n\t" + "sh %[temp2], 2(%[ptr1]) \n\t" + "sh %[temp3], 4(%[ptr1]) \n\t" + "sh %[temp4], 6(%[ptr1]) \n\t" + "sh %[temp5], 8(%[ptr1]) \n\t" + "sh %[temp6], 10(%[ptr1]) \n\t" + "sh %[temp7], 12(%[ptr1]) \n\t" + "sh %[temp8], 14(%[ptr1]) \n\t" + "addiu %[ptr1], %[ptr1], 16 \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6), + [temp7] "=&r" (temp7), [temp8] "=&r" (temp8), [ptr1] "+r" (ptr1) + : + : "memory", "hi", "lo" + ); + } + for(i = 0; i < (PART_LEN1 & 7); i++) { + __asm __volatile ( + "lh %[temp1], 0(%[ptr1]) \n\t" + "mul %[temp1], %[temp1], %[temp1] \n\t" + "sra %[temp1], %[temp1], 14 \n\t" + "sh %[temp1], 0(%[ptr1]) \n\t" + "addiu %[ptr1], %[ptr1], 2 \n\t" + : [temp1] "=&r" (temp1), [ptr1] "+r" (ptr1) + : + : "memory", "hi", "lo" + ); + } + + for (i = kMinPrefBand; i <= kMaxPrefBand; i++) { + avgHnl32 += (int32_t)hnl[i]; + } + + assert(kMaxPrefBand - kMinPrefBand + 1 > 0); + avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); + + for (i = kMaxPrefBand; i < PART_LEN1; i++) { + if (hnl[i] > (int16_t)avgHnl32) { + hnl[i] = (int16_t)avgHnl32; + } + } + } + + // Calculate NLP gain, result is in Q14 + if (aecm->nlpFlag) { + if (numPosCoef < 3) { + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = 0; + efw[i].imag = 0; + hnl[i] = 0; + } + } else { + for (i = 0; i < PART_LEN1; i++) { +#if defined(MIPS_DSP_R1_LE) + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[temp1], 0(%[ptr]) \n\t" + "lh %[temp2], 0(%[dr_ptr]) \n\t" + "slti %[temp4], %[temp1], 0x4001 \n\t" + "beqz %[temp4], 3f \n\t" + " lh %[temp3], 2(%[dr_ptr]) \n\t" + "slti %[temp5], %[temp1], 3277 \n\t" + "bnez %[temp5], 2f \n\t" + " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" + "mul %[temp2], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[temp1] \n\t" + "shra_r.w %[temp2], %[temp2], 14 \n\t" + "shra_r.w %[temp3], %[temp3], 14 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" + "addu %[temp1], $zero, $zero \n\t" + "addu %[temp2], $zero, $zero \n\t" + "addu %[temp3], $zero, $zero \n\t" + "b 1f \n\t" + " nop \n\t" + "3: \n\t" + "addiu %[temp1], $0, 0x4000 \n\t" + "1: \n\t" + "sh %[temp1], 0(%[ptr]) \n\t" + "4: \n\t" + "sh %[temp2], 0(%[er_ptr]) \n\t" + "sh %[temp3], 2(%[er_ptr]) \n\t" + "addiu %[ptr], %[ptr], 2 \n\t" + "addiu %[er_ptr], %[er_ptr], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr), + [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr) + : + : "memory", "hi", "lo" + ); +#else + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "lh %[temp1], 0(%[ptr]) \n\t" + "lh %[temp2], 0(%[dr_ptr]) \n\t" + "slti %[temp4], %[temp1], 0x4001 \n\t" + "beqz %[temp4], 3f \n\t" + " lh %[temp3], 2(%[dr_ptr]) \n\t" + "slti %[temp5], %[temp1], 3277 \n\t" + "bnez %[temp5], 2f \n\t" + " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" + "mul %[temp2], %[temp2], %[temp1] \n\t" + "mul %[temp3], %[temp3], %[temp1] \n\t" + "addiu %[temp2], %[temp2], 0x2000 \n\t" + "addiu %[temp3], %[temp3], 0x2000 \n\t" + "sra %[temp2], %[temp2], 14 \n\t" + "sra %[temp3], %[temp3], 14 \n\t" + "b 4f \n\t" + " nop \n\t" + "2: \n\t" + "addu %[temp1], $zero, $zero \n\t" + "addu %[temp2], $zero, $zero \n\t" + "addu %[temp3], $zero, $zero \n\t" + "b 1f \n\t" + " nop \n\t" + "3: \n\t" + "addiu %[temp1], $0, 0x4000 \n\t" + "1: \n\t" + "sh %[temp1], 0(%[ptr]) \n\t" + "4: \n\t" + "sh %[temp2], 0(%[er_ptr]) \n\t" + "sh %[temp3], 2(%[er_ptr]) \n\t" + "addiu %[ptr], %[ptr], 2 \n\t" + "addiu %[er_ptr], %[er_ptr], 4 \n\t" + ".set pop \n\t" + : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), + [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr), + [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr) + : + : "memory", "hi", "lo" + ); +#endif + } + } + } + else { + // multiply with Wiener coefficients + for (i = 0; i < PART_LEN1; i++) { + efw[i].real = (int16_t) + (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, + hnl[i], + 14)); + efw[i].imag = (int16_t) + (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, + hnl[i], + 14)); + } + } + + if (aecm->cngMode == AecmTrue) { + ComfortNoise(aecm, ptrDfaClean, efw, hnl); + } + + InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); + + return 0; +} + +// Generate comfort noise and add to output signal. +static void ComfortNoise(AecmCore* aecm, + const uint16_t* dfa, + ComplexInt16* out, + const int16_t* lambda) { + int16_t i; + int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2; + int32_t tmp32, tmp321, tnoise, tnoise1; + int32_t tmp322, tmp323, *tmp1; + int16_t* dfap; + int16_t* lambdap; + const int32_t c2049 = 2049; + const int32_t c359 = 359; + const int32_t c114 = ONE_Q14; + + int16_t randW16[PART_LEN]; + int16_t uReal[PART_LEN1]; + int16_t uImag[PART_LEN1]; + int32_t outLShift32; + + int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; + int16_t minTrackShift = 9; + + assert(shiftFromNearToNoise >= 0); + assert(shiftFromNearToNoise < 16); + + if (aecm->noiseEstCtr < 100) { + // Track the minimum more quickly initially. + aecm->noiseEstCtr++; + minTrackShift = 6; + } + + // Generate a uniform random array on [0 2^15-1]. + WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); + int16_t* randW16p = (int16_t*)randW16; +#if defined (MIPS_DSP_R1_LE) + int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable; + int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable; +#endif // #if defined(MIPS_DSP_R1_LE) + tmp1 = (int32_t*)aecm->noiseEst + 1; + dfap = (int16_t*)dfa + 1; + lambdap = (int16_t*)lambda + 1; + // Estimate noise power. + for (i = 1; i < PART_LEN1; i+=2) { + // Shift to the noise domain. + __asm __volatile ( + "lh %[tmp32], 0(%[dfap]) \n\t" + "lw %[tnoise], 0(%[tmp1]) \n\t" + "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" + : [tmp32] "=&r" (tmp32), [outLShift32] "=r" (outLShift32), + [tnoise] "=&r" (tnoise) + : [tmp1] "r" (tmp1), [dfap] "r" (dfap), + [shiftFromNearToNoise] "r" (shiftFromNearToNoise) + : "memory" + ); + + if (outLShift32 < tnoise) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i] = 0; + // Track the minimum. + if (tnoise < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // |kNoiseEstIncCount| block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i]++; + if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) { + tnoise--; + aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter + } + } else { + __asm __volatile ( + "subu %[tmp32], %[tnoise], %[outLShift32] \n\t" + "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" + "subu %[tnoise], %[tnoise], %[tmp32] \n\t" + : [tmp32] "=&r" (tmp32), [tnoise] "+r" (tnoise) + : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift) + ); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((tnoise >> 19) <= 0) { + if ((tnoise >> 11) > 0) { + // Large enough for relative increase + __asm __volatile ( + "mul %[tnoise], %[tnoise], %[c2049] \n\t" + "sra %[tnoise], %[tnoise], 11 \n\t" + : [tnoise] "+r" (tnoise) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } else { + // Make incremental increases based on size every + // |kNoiseEstIncCount| block + aecm->noiseEstTooLowCtr[i]++; + if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) { + __asm __volatile ( + "sra %[tmp32], %[tnoise], 9 \n\t" + "addi %[tnoise], %[tnoise], 1 \n\t" + "addu %[tnoise], %[tnoise], %[tmp32] \n\t" + : [tnoise] "+r" (tnoise), [tmp32] "=&r" (tmp32) + : + ); + aecm->noiseEstTooLowCtr[i] = 0; // Reset counter + } + } + } else { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + __asm __volatile ( + "sra %[tnoise], %[tnoise], 11 \n\t" + "mul %[tnoise], %[tnoise], %[c2049] \n\t" + : [tnoise] "+r" (tnoise) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } + } + + // Shift to the noise domain. + __asm __volatile ( + "lh %[tmp32], 2(%[dfap]) \n\t" + "lw %[tnoise1], 4(%[tmp1]) \n\t" + "addiu %[dfap], %[dfap], 4 \n\t" + "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" + : [tmp32] "=&r" (tmp32), [dfap] "+r" (dfap), + [outLShift32] "=r" (outLShift32), [tnoise1] "=&r" (tnoise1) + : [tmp1] "r" (tmp1), [shiftFromNearToNoise] "r" (shiftFromNearToNoise) + : "memory" + ); + + if (outLShift32 < tnoise1) { + // Reset "too low" counter + aecm->noiseEstTooLowCtr[i + 1] = 0; + // Track the minimum. + if (tnoise1 < (1 << minTrackShift)) { + // For small values, decrease noiseEst[i] every + // |kNoiseEstIncCount| block. The regular approach below can not + // go further down due to truncation. + aecm->noiseEstTooHighCtr[i + 1]++; + if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) { + tnoise1--; + aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter + } + } else { + __asm __volatile ( + "subu %[tmp32], %[tnoise1], %[outLShift32] \n\t" + "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" + "subu %[tnoise1], %[tnoise1], %[tmp32] \n\t" + : [tmp32] "=&r" (tmp32), [tnoise1] "+r" (tnoise1) + : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift) + ); + } + } else { + // Reset "too high" counter + aecm->noiseEstTooHighCtr[i + 1] = 0; + // Ramp slowly upwards until we hit the minimum again. + if ((tnoise1 >> 19) <= 0) { + if ((tnoise1 >> 11) > 0) { + // Large enough for relative increase + __asm __volatile ( + "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" + "sra %[tnoise1], %[tnoise1], 11 \n\t" + : [tnoise1] "+r" (tnoise1) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } else { + // Make incremental increases based on size every + // |kNoiseEstIncCount| block + aecm->noiseEstTooLowCtr[i + 1]++; + if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) { + __asm __volatile ( + "sra %[tmp32], %[tnoise1], 9 \n\t" + "addi %[tnoise1], %[tnoise1], 1 \n\t" + "addu %[tnoise1], %[tnoise1], %[tmp32] \n\t" + : [tnoise1] "+r" (tnoise1), [tmp32] "=&r" (tmp32) + : + ); + aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter + } + } + } else { + // Avoid overflow. + // Multiplication with 2049 will cause wrap around. Scale + // down first and then multiply + __asm __volatile ( + "sra %[tnoise1], %[tnoise1], 11 \n\t" + "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" + : [tnoise1] "+r" (tnoise1) + : [c2049] "r" (c2049) + : "hi", "lo" + ); + } + } + + __asm __volatile ( + "lh %[tmp16], 0(%[lambdap]) \n\t" + "lh %[tmp161], 2(%[lambdap]) \n\t" + "sw %[tnoise], 0(%[tmp1]) \n\t" + "sw %[tnoise1], 4(%[tmp1]) \n\t" + "subu %[tmp16], %[c114], %[tmp16] \n\t" + "subu %[tmp161], %[c114], %[tmp161] \n\t" + "srav %[tmp32], %[tnoise], %[shiftFromNearToNoise] \n\t" + "srav %[tmp321], %[tnoise1], %[shiftFromNearToNoise] \n\t" + "addiu %[lambdap], %[lambdap], 4 \n\t" + "addiu %[tmp1], %[tmp1], 8 \n\t" + : [tmp16] "=&r" (tmp16), [tmp161] "=&r" (tmp161), [tmp1] "+r" (tmp1), + [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), [lambdap] "+r" (lambdap) + : [tnoise] "r" (tnoise), [tnoise1] "r" (tnoise1), [c114] "r" (c114), + [shiftFromNearToNoise] "r" (shiftFromNearToNoise) + : "memory" + ); + + if (tmp32 > 32767) { + tmp32 = 32767; + aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise; + } + if (tmp321 > 32767) { + tmp321 = 32767; + aecm->noiseEst[i+1] = tmp321 << shiftFromNearToNoise; + } + + __asm __volatile ( + "mul %[tmp32], %[tmp32], %[tmp16] \n\t" + "mul %[tmp321], %[tmp321], %[tmp161] \n\t" + "sra %[nrsh1], %[tmp32], 14 \n\t" + "sra %[nrsh2], %[tmp321], 14 \n\t" + : [nrsh1] "=&r" (nrsh1), [nrsh2] "=r" (nrsh2) + : [tmp16] "r" (tmp16), [tmp161] "r" (tmp161), [tmp32] "r" (tmp32), + [tmp321] "r" (tmp321) + : "memory", "hi", "lo" + ); + + __asm __volatile ( + "lh %[tmp32], 0(%[randW16p]) \n\t" + "lh %[tmp321], 2(%[randW16p]) \n\t" + "addiu %[randW16p], %[randW16p], 4 \n\t" + "mul %[tmp32], %[tmp32], %[c359] \n\t" + "mul %[tmp321], %[tmp321], %[c359] \n\t" + "sra %[tmp16], %[tmp32], 15 \n\t" + "sra %[tmp161], %[tmp321], 15 \n\t" + : [randW16p] "+r" (randW16p), [tmp32] "=&r" (tmp32), + [tmp16] "=r" (tmp16), [tmp161] "=r" (tmp161), [tmp321] "=&r" (tmp321) + : [c359] "r" (c359) + : "memory", "hi", "lo" + ); + +#if !defined(MIPS_DSP_R1_LE) + tmp32 = WebRtcAecm_kCosTable[tmp16]; + tmp321 = WebRtcAecm_kSinTable[tmp16]; + tmp322 = WebRtcAecm_kCosTable[tmp161]; + tmp323 = WebRtcAecm_kSinTable[tmp161]; +#else + __asm __volatile ( + "sll %[tmp16], %[tmp16], 1 \n\t" + "sll %[tmp161], %[tmp161], 1 \n\t" + "lhx %[tmp32], %[tmp16](%[kCosTablep]) \n\t" + "lhx %[tmp321], %[tmp16](%[kSinTablep]) \n\t" + "lhx %[tmp322], %[tmp161](%[kCosTablep]) \n\t" + "lhx %[tmp323], %[tmp161](%[kSinTablep]) \n\t" + : [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), + [tmp322] "=&r" (tmp322), [tmp323] "=&r" (tmp323) + : [kCosTablep] "r" (kCosTablep), [tmp16] "r" (tmp16), + [tmp161] "r" (tmp161), [kSinTablep] "r" (kSinTablep) + : "memory" + ); +#endif + __asm __volatile ( + "mul %[tmp32], %[tmp32], %[nrsh1] \n\t" + "negu %[tmp162], %[nrsh1] \n\t" + "mul %[tmp322], %[tmp322], %[nrsh2] \n\t" + "negu %[tmp163], %[nrsh2] \n\t" + "sra %[tmp32], %[tmp32], 13 \n\t" + "mul %[tmp321], %[tmp321], %[tmp162] \n\t" + "sra %[tmp322], %[tmp322], 13 \n\t" + "mul %[tmp323], %[tmp323], %[tmp163] \n\t" + "sra %[tmp321], %[tmp321], 13 \n\t" + "sra %[tmp323], %[tmp323], 13 \n\t" + : [tmp32] "+r" (tmp32), [tmp321] "+r" (tmp321), [tmp162] "=&r" (tmp162), + [tmp322] "+r" (tmp322), [tmp323] "+r" (tmp323), [tmp163] "=&r" (tmp163) + : [nrsh1] "r" (nrsh1), [nrsh2] "r" (nrsh2) + : "hi", "lo" + ); + // Tables are in Q13. + uReal[i] = (int16_t)tmp32; + uImag[i] = (int16_t)tmp321; + uReal[i + 1] = (int16_t)tmp322; + uImag[i + 1] = (int16_t)tmp323; + } + + int32_t tt, sgn; + tt = out[0].real; + sgn = ((int)tt) >> 31; + out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[0].imag; + sgn = ((int)tt) >> 31; + out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + for (i = 1; i < PART_LEN; i++) { + tt = out[i].real + uReal[i]; + sgn = ((int)tt) >> 31; + out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[i].imag + uImag[i]; + sgn = ((int)tt) >> 31; + out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + } + tt = out[PART_LEN].real + uReal[PART_LEN]; + sgn = ((int)tt) >> 31; + out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); + tt = out[PART_LEN].imag; + sgn = ((int)tt) >> 31; + out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); +} + diff --git a/webrtc/modules/audio_processing/aecm/aecm_core_neon.c b/webrtc/modules/audio_processing/aecm/aecm_core_neon.c index 86ced1e..1751fcf 100644 --- a/webrtc/modules/audio_processing/aecm/aecm_core_neon.c +++ b/webrtc/modules/audio_processing/aecm/aecm_core_neon.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -7,308 +7,206 @@ * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ -#if defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON) -#include "aecm_core.h" +#include "webrtc/modules/audio_processing/aecm/aecm_core.h" #include #include +#include "webrtc/common_audio/signal_processing/include/real_fft.h" + +// TODO(kma): Re-write the corresponding assembly file, the offset +// generating script and makefile, to replace these C functions. // Square root of Hanning window in Q14. -static const WebRtc_Word16 kSqrtHanningReversed[] __attribute__ ((aligned (8))) = { - 16384, 16373, 16354, 16325, - 16286, 16237, 16179, 16111, - 16034, 15947, 15851, 15746, - 15631, 15506, 15373, 15231, - 15079, 14918, 14749, 14571, - 14384, 14189, 13985, 13773, - 13553, 13325, 13089, 12845, - 12594, 12335, 12068, 11795, - 11514, 11227, 10933, 10633, - 10326, 10013, 9695, 9370, - 9040, 8705, 8364, 8019, - 7668, 7313, 6954, 6591, - 6224, 5853, 5478, 5101, - 4720, 4337, 3951, 3562, - 3172, 2780, 2386, 1990, - 1594, 1196, 798, 399 +const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { + 0, + 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, + 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, + 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, + 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, + 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, + 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, + 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, + 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 }; -void WebRtcAecm_WindowAndFFT(WebRtc_Word16* fft, - const WebRtc_Word16* time_signal, - complex16_t* freq_signal, - int time_signal_scaling) -{ - int i, j; - - int16x4_t tmp16x4_scaling = vdup_n_s16(time_signal_scaling); - __asm__("vmov.i16 d21, #0" ::: "d21"); - - for(i = 0, j = 0; i < PART_LEN; i += 4, j += 8) - { - int16x4_t tmp16x4_0; - int16x4_t tmp16x4_1; - int32x4_t tmp32x4_0; - - /* Window near end */ - // fft[j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((time_signal[i] - // << time_signal_scaling), WebRtcAecm_kSqrtHanning[i], 14); - __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&time_signal[i])); - tmp16x4_0 = vshl_s16(tmp16x4_0, tmp16x4_scaling); - - __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_1) : "r"(&WebRtcAecm_kSqrtHanning[i])); - tmp32x4_0 = vmull_s16(tmp16x4_0, tmp16x4_1); - - __asm__("vshrn.i32 d20, %q0, #14" : : "w"(tmp32x4_0) : "d20"); - __asm__("vst2.16 {d20, d21}, [%0, :128]" : : "r"(&fft[j]) : "q10"); - - // fft[PART_LEN2 + j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( - // (time_signal[PART_LEN + i] << time_signal_scaling), - // WebRtcAecm_kSqrtHanning[PART_LEN - i], 14); - __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&time_signal[i + PART_LEN])); - tmp16x4_0 = vshl_s16(tmp16x4_0, tmp16x4_scaling); - - __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_1) : "r"(&kSqrtHanningReversed[i])); - tmp32x4_0 = vmull_s16(tmp16x4_0, tmp16x4_1); - - __asm__("vshrn.i32 d20, %q0, #14" : : "w"(tmp32x4_0) : "d20"); - __asm__("vst2.16 {d20, d21}, [%0, :128]" : : "r"(&fft[PART_LEN2 + j]) : "q10"); - } - - WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT); - WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); - - // Take only the first PART_LEN2 samples, and switch the sign of the imaginary part. - for(i = 0, j = 0; j < PART_LEN2; i += 8, j += 16) - { - __asm__("vld2.16 {d20, d21, d22, d23}, [%0, :256]" : : "r"(&fft[j]) : "q10", "q11"); - __asm__("vneg.s16 d22, d22" : : : "q10"); - __asm__("vneg.s16 d23, d23" : : : "q11"); - __asm__("vst2.16 {d20, d21, d22, d23}, [%0, :256]" : : - "r"(&freq_signal[i].real): "q10", "q11"); - } +static inline void AddLanes(uint32_t* ptr, uint32x4_t v) { +#if defined(WEBRTC_ARCH_ARM64) + *(ptr) = vaddvq_u32(v); +#else + uint32x2_t tmp_v; + tmp_v = vadd_u32(vget_low_u32(v), vget_high_u32(v)); + tmp_v = vpadd_u32(tmp_v, tmp_v); + *(ptr) = vget_lane_u32(tmp_v, 0); +#endif } -void WebRtcAecm_InverseFFTAndWindow(AecmCore_t* aecm, - WebRtc_Word16* fft, - complex16_t* efw, - WebRtc_Word16* output, - const WebRtc_Word16* nearendClean) -{ - int i, j, outCFFT; - WebRtc_Word32 tmp32no1; +void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est, + uint32_t* far_energy, + uint32_t* echo_energy_adapt, + uint32_t* echo_energy_stored) { + int16_t* start_stored_p = aecm->channelStored; + int16_t* start_adapt_p = aecm->channelAdapt16; + int32_t* echo_est_p = echo_est; + const int16_t* end_stored_p = aecm->channelStored + PART_LEN; + const uint16_t* far_spectrum_p = far_spectrum; + int16x8_t store_v, adapt_v; + uint16x8_t spectrum_v; + uint32x4_t echo_est_v_low, echo_est_v_high; + uint32x4_t far_energy_v, echo_stored_v, echo_adapt_v; - // Synthesis - for(i = 0, j = 0; i < PART_LEN; i += 4, j += 8) - { - // We overwrite two more elements in fft[], but it's ok. - __asm__("vld2.16 {d20, d21}, [%0, :128]" : : "r"(&(efw[i].real)) : "q10"); - __asm__("vmov q11, q10" : : : "q10", "q11"); + far_energy_v = vdupq_n_u32(0); + echo_adapt_v = vdupq_n_u32(0); + echo_stored_v = vdupq_n_u32(0); - __asm__("vneg.s16 d23, d23" : : : "q11"); - __asm__("vst2.16 {d22, d23}, [%0, :128]" : : "r"(&fft[j]): "q11"); + // Get energy for the delayed far end signal and estimated + // echo using both stored and adapted channels. + // The C code: + // for (i = 0; i < PART_LEN1; i++) { + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + // (*far_energy) += (uint32_t)(far_spectrum[i]); + // *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i]; + // (*echo_energy_stored) += (uint32_t)echo_est[i]; + // } + while (start_stored_p < end_stored_p) { + spectrum_v = vld1q_u16(far_spectrum_p); + adapt_v = vld1q_s16(start_adapt_p); + store_v = vld1q_s16(start_stored_p); - __asm__("vrev64.16 q10, q10" : : : "q10"); - __asm__("vst2.16 {d20, d21}, [%0]" : : "r"(&fft[PART_LEN4 - j - 6]): "q10"); - } + far_energy_v = vaddw_u16(far_energy_v, vget_low_u16(spectrum_v)); + far_energy_v = vaddw_u16(far_energy_v, vget_high_u16(spectrum_v)); - fft[PART_LEN2] = efw[PART_LEN].real; - fft[PART_LEN2 + 1] = -efw[PART_LEN].imag; + echo_est_v_low = vmull_u16(vreinterpret_u16_s16(vget_low_s16(store_v)), + vget_low_u16(spectrum_v)); + echo_est_v_high = vmull_u16(vreinterpret_u16_s16(vget_high_s16(store_v)), + vget_high_u16(spectrum_v)); + vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low)); + vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high)); - // Inverse FFT, result should be scaled with outCFFT. - WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT); - outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1); + echo_stored_v = vaddq_u32(echo_est_v_low, echo_stored_v); + echo_stored_v = vaddq_u32(echo_est_v_high, echo_stored_v); - // Take only the real values and scale with outCFFT. - for (i = 0, j = 0; i < PART_LEN2; i += 8, j+= 16) - { - __asm__("vld2.16 {d20, d21, d22, d23}, [%0, :256]" : : "r"(&fft[j]) : "q10", "q11"); - __asm__("vst1.16 {d20, d21}, [%0, :128]" : : "r"(&fft[i]): "q10"); - } + echo_adapt_v = vmlal_u16(echo_adapt_v, + vreinterpret_u16_s16(vget_low_s16(adapt_v)), + vget_low_u16(spectrum_v)); + echo_adapt_v = vmlal_u16(echo_adapt_v, + vreinterpret_u16_s16(vget_high_s16(adapt_v)), + vget_high_u16(spectrum_v)); - int32x4_t tmp32x4_2; - __asm__("vdup.32 %q0, %1" : "=w"(tmp32x4_2) : "r"((WebRtc_Word32) - (outCFFT - aecm->dfaCleanQDomain))); - for (i = 0; i < PART_LEN; i += 4) - { - int16x4_t tmp16x4_0; - int16x4_t tmp16x4_1; - int32x4_t tmp32x4_0; - int32x4_t tmp32x4_1; + start_stored_p += 8; + start_adapt_p += 8; + far_spectrum_p += 8; + echo_est_p += 8; + } - // fft[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - // fft[i], WebRtcAecm_kSqrtHanning[i], 14); - __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&fft[i])); - __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_1) : "r"(&WebRtcAecm_kSqrtHanning[i])); - __asm__("vmull.s16 %q0, %P1, %P2" : "=w"(tmp32x4_0) : "w"(tmp16x4_0), "w"(tmp16x4_1)); - __asm__("vrshr.s32 %q0, %q1, #14" : "=w"(tmp32x4_0) : "0"(tmp32x4_0)); + AddLanes(far_energy, far_energy_v); + AddLanes(echo_energy_stored, echo_stored_v); + AddLanes(echo_energy_adapt, echo_adapt_v); - // tmp32no1 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)fft[i], - // outCFFT - aecm->dfaCleanQDomain); - __asm__("vshl.s32 %q0, %q1, %q2" : "=w"(tmp32x4_0) : "0"(tmp32x4_0), "w"(tmp32x4_2)); - - // fft[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, - // tmp32no1 + outBuf[i], WEBRTC_SPL_WORD16_MIN); - // output[i] = fft[i]; - __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&aecm->outBuf[i])); - __asm__("vmovl.s16 %q0, %P1" : "=w"(tmp32x4_1) : "w"(tmp16x4_0)); - __asm__("vadd.i32 %q0, %q1" : : "w"(tmp32x4_0), "w"(tmp32x4_1)); - __asm__("vqshrn.s32 %P0, %q1, #0" : "=w"(tmp16x4_0) : "w"(tmp32x4_0)); - __asm__("vst1.16 %P0, [%1, :64]" : : "w"(tmp16x4_0), "r"(&fft[i])); - __asm__("vst1.16 %P0, [%1, :64]" : : "w"(tmp16x4_0), "r"(&output[i])); - - // tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT( - // fft[PART_LEN + i], WebRtcAecm_kSqrtHanning[PART_LEN - i], 14); - __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&fft[PART_LEN + i])); - __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_1) : "r"(&kSqrtHanningReversed[i])); - __asm__("vmull.s16 %q0, %P1, %P2" : "=w"(tmp32x4_0) : "w"(tmp16x4_0), "w"(tmp16x4_1)); - __asm__("vshr.s32 %q0, %q1, #14" : "=w"(tmp32x4_0) : "0"(tmp32x4_0)); - - // tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, outCFFT - aecm->dfaCleanQDomain); - __asm__("vshl.s32 %q0, %q1, %q2" : "=w"(tmp32x4_0) : "0"(tmp32x4_0), "w"(tmp32x4_2)); - // outBuf[i] = (WebRtc_Word16)WEBRTC_SPL_SAT( - // WEBRTC_SPL_WORD16_MAX, tmp32no1, WEBRTC_SPL_WORD16_MIN); - __asm__("vqshrn.s32 %P0, %q1, #0" : "=w"(tmp16x4_0) : "w"(tmp32x4_0)); - __asm__("vst1.16 %P0, [%1, :64]" : : "w"(tmp16x4_0), "r"(&aecm->outBuf[i])); - } - - // Copy the current block to the old position (outBuf is shifted elsewhere). - for (i = 0; i < PART_LEN; i += 16) - { - __asm__("vld1.16 {d20, d21, d22, d23}, [%0, :256]" : : - "r"(&aecm->xBuf[i + PART_LEN]) : "q10"); - __asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : : "r"(&aecm->xBuf[i]): "q10"); - } - for (i = 0; i < PART_LEN; i += 16) - { - __asm__("vld1.16 {d20, d21, d22, d23}, [%0, :256]" : : - "r"(&aecm->dBufNoisy[i + PART_LEN]) : "q10"); - __asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : : - "r"(&aecm->dBufNoisy[i]): "q10"); - } - if (nearendClean != NULL) { - for (i = 0; i < PART_LEN; i += 16) - { - __asm__("vld1.16 {d20, d21, d22, d23}, [%0, :256]" : : - "r"(&aecm->dBufClean[i + PART_LEN]) : "q10"); - __asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : : - "r"(&aecm->dBufClean[i]): "q10"); - } - } + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); + *echo_energy_stored += (uint32_t)echo_est[PART_LEN]; + *far_energy += (uint32_t)far_spectrum[PART_LEN]; + *echo_energy_adapt += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; } -void WebRtcAecm_CalcLinearEnergies(AecmCore_t* aecm, - const WebRtc_UWord16* far_spectrum, - WebRtc_Word32* echo_est, - WebRtc_UWord32* far_energy, - WebRtc_UWord32* echo_energy_adapt, - WebRtc_UWord32* echo_energy_stored) -{ - int i; +void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm, + const uint16_t* far_spectrum, + int32_t* echo_est) { + assert((uintptr_t)echo_est % 32 == 0); + assert((uintptr_t)(aecm->channelStored) % 16 == 0); + assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0); - register WebRtc_UWord32 far_energy_r; - register WebRtc_UWord32 echo_energy_stored_r; - register WebRtc_UWord32 echo_energy_adapt_r; - uint32x4_t tmp32x4_0; + // This is C code of following optimized code. + // During startup we store the channel every block. + // memcpy(aecm->channelStored, + // aecm->channelAdapt16, + // sizeof(int16_t) * PART_LEN1); + // Recalculate echo estimate + // for (i = 0; i < PART_LEN; i += 4) { + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + // echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1], + // far_spectrum[i + 1]); + // echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2], + // far_spectrum[i + 2]); + // echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3], + // far_spectrum[i + 3]); + // } + // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], + // far_spectrum[i]); + const uint16_t* far_spectrum_p = far_spectrum; + int16_t* start_adapt_p = aecm->channelAdapt16; + int16_t* start_stored_p = aecm->channelStored; + const int16_t* end_stored_p = aecm->channelStored + PART_LEN; + int32_t* echo_est_p = echo_est; - __asm__("vmov.i32 q14, #0" : : : "q14"); // far_energy - __asm__("vmov.i32 q8, #0" : : : "q8"); // echo_energy_stored - __asm__("vmov.i32 q9, #0" : : : "q9"); // echo_energy_adapt + uint16x8_t far_spectrum_v; + int16x8_t adapt_v; + uint32x4_t echo_est_v_low, echo_est_v_high; - for(i = 0; i < PART_LEN -7; i += 8) - { - // far_energy += (WebRtc_UWord32)(far_spectrum[i]); - __asm__("vld1.16 {d26, d27}, [%0]" : : "r"(&far_spectrum[i]) : "q13"); - __asm__("vaddw.u16 q14, q14, d26" : : : "q14", "q13"); - __asm__("vaddw.u16 q14, q14, d27" : : : "q14", "q13"); + while (start_stored_p < end_stored_p) { + far_spectrum_v = vld1q_u16(far_spectrum_p); + adapt_v = vld1q_s16(start_adapt_p); - // Get estimated echo energies for adaptive channel and stored channel. - // echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); - __asm__("vld1.16 {d24, d25}, [%0, :128]" : : "r"(&aecm->channelStored[i]) : "q12"); - __asm__("vmull.u16 q10, d26, d24" : : : "q12", "q13", "q10"); - __asm__("vmull.u16 q11, d27, d25" : : : "q12", "q13", "q11"); - __asm__("vst1.32 {d20, d21, d22, d23}, [%0, :256]" : : "r"(&echo_est[i]): - "q10", "q11"); + vst1q_s16(start_stored_p, adapt_v); - // echo_energy_stored += (WebRtc_UWord32)echoEst[i]; - __asm__("vadd.u32 q8, q10" : : : "q10", "q8"); - __asm__("vadd.u32 q8, q11" : : : "q11", "q8"); + echo_est_v_low = vmull_u16(vget_low_u16(far_spectrum_v), + vget_low_u16(vreinterpretq_u16_s16(adapt_v))); + echo_est_v_high = vmull_u16(vget_high_u16(far_spectrum_v), + vget_high_u16(vreinterpretq_u16_s16(adapt_v))); - // echo_energy_adapt += WEBRTC_SPL_UMUL_16_16( - // aecm->channelAdapt16[i], far_spectrum[i]); - __asm__("vld1.16 {d24, d25}, [%0, :128]" : : "r"(&aecm->channelAdapt16[i]) : "q12"); - __asm__("vmull.u16 q10, d26, d24" : : : "q12", "q13", "q10"); - __asm__("vmull.u16 q11, d27, d25" : : : "q12", "q13", "q11"); - __asm__("vadd.u32 q9, q10" : : : "q9", "q15"); - __asm__("vadd.u32 q9, q11" : : : "q9", "q11"); - } + vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low)); + vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high)); - __asm__("vadd.u32 d28, d29" : : : "q14"); - __asm__("vpadd.u32 d28, d28" : : : "q14"); - __asm__("vmov.32 %0, d28[0]" : "=r"(far_energy_r): : "q14"); - - __asm__("vadd.u32 d18, d19" : : : "q9"); - __asm__("vpadd.u32 d18, d18" : : : "q9"); - __asm__("vmov.32 %0, d18[0]" : "=r"(echo_energy_adapt_r): : "q9"); - - __asm__("vadd.u32 d16, d17" : : : "q8"); - __asm__("vpadd.u32 d16, d16" : : : "q8"); - __asm__("vmov.32 %0, d16[0]" : "=r"(echo_energy_stored_r): : "q8"); - - // Get estimated echo energies for adaptive channel and stored channel. - echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); - *echo_energy_stored = echo_energy_stored_r + (WebRtc_UWord32)echo_est[i]; - *far_energy = far_energy_r + (WebRtc_UWord32)(far_spectrum[i]); - *echo_energy_adapt = echo_energy_adapt_r + WEBRTC_SPL_UMUL_16_16( - aecm->channelAdapt16[i], far_spectrum[i]); + far_spectrum_p += 8; + start_adapt_p += 8; + start_stored_p += 8; + echo_est_p += 8; + } + aecm->channelStored[PART_LEN] = aecm->channelAdapt16[PART_LEN]; + echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], + far_spectrum[PART_LEN]); } -void WebRtcAecm_StoreAdaptiveChannel(AecmCore_t* aecm, - const WebRtc_UWord16* far_spectrum, - WebRtc_Word32* echo_est) -{ - int i; +void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm) { + assert((uintptr_t)(aecm->channelStored) % 16 == 0); + assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0); + assert((uintptr_t)(aecm->channelAdapt32) % 32 == 0); - // During startup we store the channel every block. - // Recalculate echo estimate. - for(i = 0; i < PART_LEN -7; i += 8) - { - // aecm->channelStored[i] = acem->channelAdapt16[i]; - // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); - __asm__("vld1.16 {d26, d27}, [%0]" : : "r"(&far_spectrum[i]) : "q13"); - __asm__("vld1.16 {d24, d25}, [%0, :128]" : : "r"(&aecm->channelAdapt16[i]) : "q12"); - __asm__("vst1.16 {d24, d25}, [%0, :128]" : : "r"(&aecm->channelStored[i]) : "q12"); - __asm__("vmull.u16 q10, d26, d24" : : : "q12", "q13", "q10"); - __asm__("vmull.u16 q11, d27, d25" : : : "q12", "q13", "q11"); - __asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : : - "r"(&echo_est[i]) : "q10", "q11"); - } - aecm->channelStored[i] = aecm->channelAdapt16[i]; - echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]); + // The C code of following optimized code. + // for (i = 0; i < PART_LEN1; i++) { + // aecm->channelAdapt16[i] = aecm->channelStored[i]; + // aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32( + // (int32_t)aecm->channelStored[i], 16); + // } + + int16_t* start_stored_p = aecm->channelStored; + int16_t* start_adapt16_p = aecm->channelAdapt16; + int32_t* start_adapt32_p = aecm->channelAdapt32; + const int16_t* end_stored_p = start_stored_p + PART_LEN; + + int16x8_t stored_v; + int32x4_t adapt32_v_low, adapt32_v_high; + + while (start_stored_p < end_stored_p) { + stored_v = vld1q_s16(start_stored_p); + vst1q_s16(start_adapt16_p, stored_v); + + adapt32_v_low = vshll_n_s16(vget_low_s16(stored_v), 16); + adapt32_v_high = vshll_n_s16(vget_high_s16(stored_v), 16); + + vst1q_s32(start_adapt32_p, adapt32_v_low); + vst1q_s32(start_adapt32_p + 4, adapt32_v_high); + + start_stored_p += 8; + start_adapt16_p += 8; + start_adapt32_p += 8; + } + aecm->channelAdapt16[PART_LEN] = aecm->channelStored[PART_LEN]; + aecm->channelAdapt32[PART_LEN] = (int32_t)aecm->channelStored[PART_LEN] << 16; } - -void WebRtcAecm_ResetAdaptiveChannel(AecmCore_t* aecm) -{ - int i; - - for(i = 0; i < PART_LEN -7; i += 8) - { - // aecm->channelAdapt16[i] = aecm->channelStored[i]; - // aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32) - // aecm->channelStored[i], 16); - __asm__("vld1.16 {d24, d25}, [%0, :128]" : : - "r"(&aecm->channelStored[i]) : "q12"); - __asm__("vst1.16 {d24, d25}, [%0, :128]" : : - "r"(&aecm->channelAdapt16[i]) : "q12"); - __asm__("vshll.s16 q10, d24, #16" : : : "q12", "q13", "q10"); - __asm__("vshll.s16 q11, d25, #16" : : : "q12", "q13", "q11"); - __asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : : - "r"(&aecm->channelAdapt32[i]): "q10", "q11"); - } - aecm->channelAdapt16[i] = aecm->channelStored[i]; - aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32( - (WebRtc_Word32)aecm->channelStored[i], 16); -} - -#endif // #if defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON) diff --git a/webrtc/modules/audio_processing/aecm/aecm_defines.h b/webrtc/modules/audio_processing/aecm/aecm_defines.h new file mode 100644 index 0000000..6d63990 --- /dev/null +++ b/webrtc/modules/audio_processing/aecm/aecm_defines.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_ + +#define AECM_DYNAMIC_Q /* Turn on/off dynamic Q-domain. */ + +/* Algorithm parameters */ +#define FRAME_LEN 80 /* Total frame length, 10 ms. */ + +#define PART_LEN 64 /* Length of partition. */ +#define PART_LEN_SHIFT 7 /* Length of (PART_LEN * 2) in base 2. */ + +#define PART_LEN1 (PART_LEN + 1) /* Unique fft coefficients. */ +#define PART_LEN2 (PART_LEN << 1) /* Length of partition * 2. */ +#define PART_LEN4 (PART_LEN << 2) /* Length of partition * 4. */ +#define FAR_BUF_LEN PART_LEN4 /* Length of buffers. */ +#define MAX_DELAY 100 + +/* Counter parameters */ +#define CONV_LEN 512 /* Convergence length used at startup. */ +#define CONV_LEN2 (CONV_LEN << 1) /* Used at startup. */ + +/* Energy parameters */ +#define MAX_BUF_LEN 64 /* History length of energy signals. */ +#define FAR_ENERGY_MIN 1025 /* Lowest Far energy level: At least 2 */ + /* in energy. */ +#define FAR_ENERGY_DIFF 929 /* Allowed difference between max */ + /* and min. */ +#define ENERGY_DEV_OFFSET 0 /* The energy error offset in Q8. */ +#define ENERGY_DEV_TOL 400 /* The energy estimation tolerance (Q8). */ +#define FAR_ENERGY_VAD_REGION 230 /* Far VAD tolerance region. */ + +/* Stepsize parameters */ +#define MU_MIN 10 /* Min stepsize 2^-MU_MIN (far end energy */ + /* dependent). */ +#define MU_MAX 1 /* Max stepsize 2^-MU_MAX (far end energy */ + /* dependent). */ +#define MU_DIFF 9 /* MU_MIN - MU_MAX */ + +/* Channel parameters */ +#define MIN_MSE_COUNT 20 /* Min number of consecutive blocks with enough */ + /* far end energy to compare channel estimates. */ +#define MIN_MSE_DIFF 29 /* The ratio between adapted and stored channel to */ + /* accept a new storage (0.8 in Q-MSE_RESOLUTION). */ +#define MSE_RESOLUTION 5 /* MSE parameter resolution. */ +#define RESOLUTION_CHANNEL16 12 /* W16 Channel in Q-RESOLUTION_CHANNEL16. */ +#define RESOLUTION_CHANNEL32 28 /* W32 Channel in Q-RESOLUTION_CHANNEL. */ +#define CHANNEL_VAD 16 /* Minimum energy in frequency band */ + /* to update channel. */ + +/* Suppression gain parameters: SUPGAIN parameters in Q-(RESOLUTION_SUPGAIN). */ +#define RESOLUTION_SUPGAIN 8 /* Channel in Q-(RESOLUTION_SUPGAIN). */ +#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN) /* Default. */ +#define SUPGAIN_ERROR_PARAM_A 3072 /* Estimation error parameter */ + /* (Maximum gain) (8 in Q8). */ +#define SUPGAIN_ERROR_PARAM_B 1536 /* Estimation error parameter */ + /* (Gain before going down). */ +#define SUPGAIN_ERROR_PARAM_D SUPGAIN_DEFAULT /* Estimation error parameter */ + /* (Should be the same as Default) (1 in Q8). */ +#define SUPGAIN_EPC_DT 200 /* SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL */ + +/* Defines for "check delay estimation" */ +#define CORR_WIDTH 31 /* Number of samples to correlate over. */ +#define CORR_MAX 16 /* Maximum correlation offset. */ +#define CORR_MAX_BUF 63 +#define CORR_DEV 4 +#define CORR_MAX_LEVEL 20 +#define CORR_MAX_LOW 4 +#define CORR_BUF_LEN (CORR_MAX << 1) + 1 +/* Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN. */ + +#define ONE_Q14 (1 << 14) + +/* NLP defines */ +#define NLP_COMP_LOW 3277 /* 0.2 in Q14 */ +#define NLP_COMP_HIGH ONE_Q14 /* 1 in Q14 */ + +#endif diff --git a/webrtc/modules/audio_processing/aecm/echo_control_mobile.c b/webrtc/modules/audio_processing/aecm/echo_control_mobile.c index dc5c926..83781e9 100644 --- a/webrtc/modules/audio_processing/aecm/echo_control_mobile.c +++ b/webrtc/modules/audio_processing/aecm/echo_control_mobile.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,22 +8,16 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include -//#include +#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" -#include "echo_control_mobile.h" -#include "aecm_core.h" -#include "ring_buffer.h" #ifdef AEC_DEBUG #include #endif -#ifdef MAC_IPHONE_PRINT -#include -#include -#elif defined ARM_WINM_LOG -#include "windows.h" -extern HANDLE logFile; -#endif +#include + +#include "webrtc/common_audio/ring_buffer.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/aecm/aecm_core.h" #define BUF_SIZE_FRAMES 50 // buffer size (frames) // Maximum length of resampled signal. Must be an integer multiple of frames @@ -31,7 +25,7 @@ extern HANDLE logFile; // The factor of 2 handles wb, and the + 1 is as a safety margin #define MAX_RESAMP_LEN (5 * FRAME_LEN) -static const int kBufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples) +static const size_t kBufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples) static const int kSampMsNb = 8; // samples per ms in nb // Target suppression levels for nlp modes // log{0.001, 0.00001, 0.00000001} @@ -63,7 +57,7 @@ typedef struct int delayChange; short lastDelayDiff; - WebRtc_Word16 echoMode; + int16_t echoMode; #ifdef AEC_DEBUG FILE *bufFile; @@ -72,47 +66,37 @@ typedef struct FILE *postCompFile; #endif // AEC_DEBUG // Structures - void *farendBuf; + RingBuffer *farendBuf; int lastError; - AecmCore_t *aecmCore; -} aecmob_t; + AecmCore* aecmCore; +} AecMobile; // Estimates delay to set the position of the farend buffer read pointer // (controlled by knownDelay) -static int WebRtcAecm_EstBufDelay(aecmob_t *aecmInst, short msInSndCardBuf); +static int WebRtcAecm_EstBufDelay(AecMobile* aecmInst, short msInSndCardBuf); // Stuffs the farend buffer if the estimated delay is too large -static int WebRtcAecm_DelayComp(aecmob_t *aecmInst); +static int WebRtcAecm_DelayComp(AecMobile* aecmInst); -WebRtc_Word32 WebRtcAecm_Create(void **aecmInst) -{ - aecmob_t *aecm; - if (aecmInst == NULL) - { - return -1; +void* WebRtcAecm_Create() { + AecMobile* aecm = malloc(sizeof(AecMobile)); + + WebRtcSpl_Init(); + + aecm->aecmCore = WebRtcAecm_CreateCore(); + if (!aecm->aecmCore) { + WebRtcAecm_Free(aecm); + return NULL; } - aecm = malloc(sizeof(aecmob_t)); - *aecmInst = aecm; - if (aecm == NULL) - { - return -1; - } - - if (WebRtcAecm_CreateCore(&aecm->aecmCore) == -1) + aecm->farendBuf = WebRtc_CreateBuffer(kBufSizeSamp, + sizeof(int16_t)); + if (!aecm->farendBuf) { WebRtcAecm_Free(aecm); - aecm = NULL; - return -1; - } - - if (WebRtcApm_CreateBuffer(&aecm->farendBuf, kBufSizeSamp) == -1) - { - WebRtcAecm_Free(aecm); - aecm = NULL; - return -1; + return NULL; } aecm->initFlag = 0; @@ -129,16 +113,14 @@ WebRtc_Word32 WebRtcAecm_Create(void **aecmInst) aecm->preCompFile = fopen("preComp.pcm", "wb"); aecm->postCompFile = fopen("postComp.pcm", "wb"); #endif // AEC_DEBUG - return 0; + return aecm; } -WebRtc_Word32 WebRtcAecm_Free(void *aecmInst) -{ - aecmob_t *aecm = aecmInst; +void WebRtcAecm_Free(void* aecmInst) { + AecMobile* aecm = aecmInst; - if (aecm == NULL) - { - return -1; + if (aecm == NULL) { + return; } #ifdef AEC_DEBUG @@ -153,15 +135,13 @@ WebRtc_Word32 WebRtcAecm_Free(void *aecmInst) fclose(aecm->postCompFile); #endif // AEC_DEBUG WebRtcAecm_FreeCore(aecm->aecmCore); - WebRtcApm_FreeBuffer(aecm->farendBuf); + WebRtc_FreeBuffer(aecm->farendBuf); free(aecm); - - return 0; } -WebRtc_Word32 WebRtcAecm_Init(void *aecmInst, WebRtc_Word32 sampFreq) +int32_t WebRtcAecm_Init(void *aecmInst, int32_t sampFreq) { - aecmob_t *aecm = aecmInst; + AecMobile* aecm = aecmInst; AecmConfig aecConfig; if (aecm == NULL) @@ -184,11 +164,7 @@ WebRtc_Word32 WebRtcAecm_Init(void *aecmInst, WebRtc_Word32 sampFreq) } // Initialize farend buffer - if (WebRtcApm_InitBuffer(aecm->farendBuf) == -1) - { - aecm->lastError = AECM_UNSPECIFIED_ERROR; - return -1; - } + WebRtc_InitBuffer(aecm->farendBuf); aecm->initFlag = kInitCheck; // indicates that initialization has been done @@ -222,11 +198,11 @@ WebRtc_Word32 WebRtcAecm_Init(void *aecmInst, WebRtc_Word32 sampFreq) return 0; } -WebRtc_Word32 WebRtcAecm_BufferFarend(void *aecmInst, const WebRtc_Word16 *farend, - WebRtc_Word16 nrOfSamples) +int32_t WebRtcAecm_BufferFarend(void *aecmInst, const int16_t *farend, + size_t nrOfSamples) { - aecmob_t *aecm = aecmInst; - WebRtc_Word32 retVal = 0; + AecMobile* aecm = aecmInst; + int32_t retVal = 0; if (aecm == NULL) { @@ -257,38 +233,25 @@ WebRtc_Word32 WebRtcAecm_BufferFarend(void *aecmInst, const WebRtc_Word16 *faren WebRtcAecm_DelayComp(aecm); } - WebRtcApm_WriteBuffer(aecm->farendBuf, farend, nrOfSamples); + WebRtc_WriteBuffer(aecm->farendBuf, farend, nrOfSamples); return retVal; } -WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoisy, - const WebRtc_Word16 *nearendClean, WebRtc_Word16 *out, - WebRtc_Word16 nrOfSamples, WebRtc_Word16 msInSndCardBuf) +int32_t WebRtcAecm_Process(void *aecmInst, const int16_t *nearendNoisy, + const int16_t *nearendClean, int16_t *out, + size_t nrOfSamples, int16_t msInSndCardBuf) { - aecmob_t *aecm = aecmInst; - WebRtc_Word32 retVal = 0; - short i; - short farend[FRAME_LEN]; + AecMobile* aecm = aecmInst; + int32_t retVal = 0; + size_t i; short nmbrOfFilledBuffers; - short nBlocks10ms; - short nFrames; + size_t nBlocks10ms; + size_t nFrames; #ifdef AEC_DEBUG short msInAECBuf; #endif -#ifdef ARM_WINM_LOG - __int64 freq, start, end, diff; - unsigned int milliseconds; - DWORD temp; -#elif defined MAC_IPHONE_PRINT - // double endtime = 0, starttime = 0; - struct timeval starttime; - struct timeval endtime; - static long int timeused = 0; - static int timecount = 0; -#endif - if (aecm == NULL) { return -1; @@ -339,13 +302,17 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi { if (nearendClean == NULL) { - memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples); - } else + if (out != nearendNoisy) + { + memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples); + } + } else if (out != nearendClean) { memcpy(out, nearendClean, sizeof(short) * nrOfSamples); } - nmbrOfFilledBuffers = WebRtcApm_get_buffer_size(aecm->farendBuf) / FRAME_LEN; + nmbrOfFilledBuffers = + (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN; // The AECM is in the start up mode // AECM is disabled until the soundcard buffer and farend buffers are OK @@ -407,10 +374,9 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi aecm->ECstartup = 0; // Enable the AECM } else if (nmbrOfFilledBuffers > aecm->bufSizeStart) { - WebRtcApm_FlushBuffer( - aecm->farendBuf, - WebRtcApm_get_buffer_size(aecm->farendBuf) - - aecm->bufSizeStart * FRAME_LEN); + WebRtc_MoveReadPtr(aecm->farendBuf, + (int) WebRtc_available_read(aecm->farendBuf) + - (int) aecm->bufSizeStart * FRAME_LEN); aecm->ECstartup = 0; } } @@ -422,20 +388,27 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi // Note only 1 block supported for nb and 2 blocks for wb for (i = 0; i < nFrames; i++) { - nmbrOfFilledBuffers = WebRtcApm_get_buffer_size(aecm->farendBuf) / FRAME_LEN; + int16_t farend[FRAME_LEN]; + const int16_t* farend_ptr = NULL; + + nmbrOfFilledBuffers = + (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN; // Check that there is data in the far end buffer if (nmbrOfFilledBuffers > 0) { // Get the next 80 samples from the farend buffer - WebRtcApm_ReadBuffer(aecm->farendBuf, farend, FRAME_LEN); + WebRtc_ReadBuffer(aecm->farendBuf, (void**) &farend_ptr, farend, + FRAME_LEN); // Always store the last frame for use when we run out of data - memcpy(&(aecm->farendOld[i][0]), farend, FRAME_LEN * sizeof(short)); + memcpy(&(aecm->farendOld[i][0]), farend_ptr, + FRAME_LEN * sizeof(short)); } else { // We have no data so we use the last played frame memcpy(farend, &(aecm->farendOld[i][0]), FRAME_LEN * sizeof(short)); + farend_ptr = farend; } // Call buffer delay estimator when all data is extracted, @@ -445,77 +418,23 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi WebRtcAecm_EstBufDelay(aecm, aecm->msInSndCardBuf); } -#ifdef ARM_WINM_LOG - // measure tick start - QueryPerformanceFrequency((LARGE_INTEGER*)&freq); - QueryPerformanceCounter((LARGE_INTEGER*)&start); -#elif defined MAC_IPHONE_PRINT - // starttime = clock()/(double)CLOCKS_PER_SEC; - gettimeofday(&starttime, NULL); -#endif // Call the AECM /*WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearend[FRAME_LEN * i], &out[FRAME_LEN * i], aecm->knownDelay);*/ - if (nearendClean == NULL) - { - if (WebRtcAecm_ProcessFrame(aecm->aecmCore, - farend, - &nearendNoisy[FRAME_LEN * i], - NULL, - &out[FRAME_LEN * i]) == -1) - { - return -1; - } - } else - { - if (WebRtcAecm_ProcessFrame(aecm->aecmCore, - farend, - &nearendNoisy[FRAME_LEN * i], - &nearendClean[FRAME_LEN * i], - &out[FRAME_LEN * i]) == -1) - { - return -1; - } - } - -#ifdef ARM_WINM_LOG - - // measure tick end - QueryPerformanceCounter((LARGE_INTEGER*)&end); - - if(end > start) - { - diff = ((end - start) * 1000) / (freq/1000); - milliseconds = (unsigned int)(diff & 0xffffffff); - WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL); - } -#elif defined MAC_IPHONE_PRINT - // endtime = clock()/(double)CLOCKS_PER_SEC; - // printf("%f\n", endtime - starttime); - - gettimeofday(&endtime, NULL); - - if( endtime.tv_usec > starttime.tv_usec) - { - timeused += endtime.tv_usec - starttime.tv_usec; - } else - { - timeused += endtime.tv_usec + 1000000 - starttime.tv_usec; - } - - if(++timecount == 1000) - { - timecount = 0; - printf("AEC: %ld\n", timeused); - timeused = 0; - } -#endif - + if (WebRtcAecm_ProcessFrame(aecm->aecmCore, + farend_ptr, + &nearendNoisy[FRAME_LEN * i], + (nearendClean + ? &nearendClean[FRAME_LEN * i] + : NULL), + &out[FRAME_LEN * i]) == -1) + return -1; } } #ifdef AEC_DEBUG - msInAECBuf = WebRtcApm_get_buffer_size(aecm->farendBuf) / (kSampMsNb*aecm->aecmCore->mult); + msInAECBuf = (short) WebRtc_available_read(aecm->farendBuf) / + (kSampMsNb * aecm->aecmCore->mult); fwrite(&msInAECBuf, 2, 1, aecm->bufFile); fwrite(&(aecm->knownDelay), sizeof(aecm->knownDelay), 1, aecm->delayFile); #endif @@ -523,9 +442,9 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi return retVal; } -WebRtc_Word32 WebRtcAecm_set_config(void *aecmInst, AecmConfig config) +int32_t WebRtcAecm_set_config(void *aecmInst, AecmConfig config) { - aecmob_t *aecm = aecmInst; + AecMobile* aecm = aecmInst; if (aecm == NULL) { @@ -605,9 +524,9 @@ WebRtc_Word32 WebRtcAecm_set_config(void *aecmInst, AecmConfig config) return 0; } -WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst, AecmConfig *config) +int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config) { - aecmob_t *aecm = aecmInst; + AecMobile* aecm = aecmInst; if (aecm == NULL) { @@ -632,17 +551,19 @@ WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst, AecmConfig *config) return 0; } -WebRtc_Word32 WebRtcAecm_InitEchoPath(void* aecmInst, - const void* echo_path, - size_t size_bytes) +int32_t WebRtcAecm_InitEchoPath(void* aecmInst, + const void* echo_path, + size_t size_bytes) { - aecmob_t *aecm = aecmInst; - const WebRtc_Word16* echo_path_ptr = echo_path; + AecMobile* aecm = aecmInst; + const int16_t* echo_path_ptr = echo_path; - if ((aecm == NULL) || (echo_path == NULL)) - { - aecm->lastError = AECM_NULL_POINTER_ERROR; - return -1; + if (aecmInst == NULL) { + return -1; + } + if (echo_path == NULL) { + aecm->lastError = AECM_NULL_POINTER_ERROR; + return -1; } if (size_bytes != WebRtcAecm_echo_path_size_bytes()) { @@ -661,17 +582,19 @@ WebRtc_Word32 WebRtcAecm_InitEchoPath(void* aecmInst, return 0; } -WebRtc_Word32 WebRtcAecm_GetEchoPath(void* aecmInst, - void* echo_path, - size_t size_bytes) +int32_t WebRtcAecm_GetEchoPath(void* aecmInst, + void* echo_path, + size_t size_bytes) { - aecmob_t *aecm = aecmInst; - WebRtc_Word16* echo_path_ptr = echo_path; + AecMobile* aecm = aecmInst; + int16_t* echo_path_ptr = echo_path; - if ((aecm == NULL) || (echo_path == NULL)) - { - aecm->lastError = AECM_NULL_POINTER_ERROR; - return -1; + if (aecmInst == NULL) { + return -1; + } + if (echo_path == NULL) { + aecm->lastError = AECM_NULL_POINTER_ERROR; + return -1; } if (size_bytes != WebRtcAecm_echo_path_size_bytes()) { @@ -691,31 +614,12 @@ WebRtc_Word32 WebRtcAecm_GetEchoPath(void* aecmInst, size_t WebRtcAecm_echo_path_size_bytes() { - return (PART_LEN1 * sizeof(WebRtc_Word16)); + return (PART_LEN1 * sizeof(int16_t)); } -WebRtc_Word32 WebRtcAecm_get_version(WebRtc_Word8 *versionStr, WebRtc_Word16 len) +int32_t WebRtcAecm_get_error_code(void *aecmInst) { - const char version[] = "AECM 1.2.0"; - const short versionLen = (short)strlen(version) + 1; // +1 for null-termination - - if (versionStr == NULL) - { - return -1; - } - - if (versionLen > len) - { - return -1; - } - - strncpy(versionStr, version, versionLen); - return 0; -} - -WebRtc_Word32 WebRtcAecm_get_error_code(void *aecmInst) -{ - aecmob_t *aecm = aecmInst; + AecMobile* aecm = aecmInst; if (aecm == NULL) { @@ -725,19 +629,18 @@ WebRtc_Word32 WebRtcAecm_get_error_code(void *aecmInst) return aecm->lastError; } -static int WebRtcAecm_EstBufDelay(aecmob_t *aecm, short msInSndCardBuf) -{ - short delayNew, nSampFar, nSampSndCard; +static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf) { + short delayNew, nSampSndCard; + short nSampFar = (short) WebRtc_available_read(aecm->farendBuf); short diff; - nSampFar = WebRtcApm_get_buffer_size(aecm->farendBuf); nSampSndCard = msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult; delayNew = nSampSndCard - nSampFar; if (delayNew < FRAME_LEN) { - WebRtcApm_FlushBuffer(aecm->farendBuf, FRAME_LEN); + WebRtc_MoveReadPtr(aecm->farendBuf, FRAME_LEN); delayNew += FRAME_LEN; } @@ -775,12 +678,11 @@ static int WebRtcAecm_EstBufDelay(aecmob_t *aecm, short msInSndCardBuf) return 0; } -static int WebRtcAecm_DelayComp(aecmob_t *aecm) -{ - int nSampFar, nSampSndCard, delayNew, nSampAdd; +static int WebRtcAecm_DelayComp(AecMobile* aecm) { + int nSampFar = (int) WebRtc_available_read(aecm->farendBuf); + int nSampSndCard, delayNew, nSampAdd; const int maxStuffSamp = 10 * FRAME_LEN; - nSampFar = WebRtcApm_get_buffer_size(aecm->farendBuf); nSampSndCard = aecm->msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult; delayNew = nSampSndCard - nSampFar; @@ -792,7 +694,7 @@ static int WebRtcAecm_DelayComp(aecmob_t *aecm) FRAME_LEN)); nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp); - WebRtcApm_StuffBuffer(aecm->farendBuf, nSampAdd); + WebRtc_MoveReadPtr(aecm->farendBuf, -nSampAdd); aecm->delayChange = 1; // the delay needs to be updated } diff --git a/webrtc/modules/audio_processing/aecm/interface/echo_control_mobile.h b/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h similarity index 51% rename from webrtc/modules/audio_processing/aecm/interface/echo_control_mobile.h rename to webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h index 30bea7a..7ae15c2 100644 --- a/webrtc/modules/audio_processing/aecm/interface/echo_control_mobile.h +++ b/webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,10 +8,12 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_INTERFACE_ECHO_CONTROL_MOBILE_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_INTERFACE_ECHO_CONTROL_MOBILE_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_ -#include "typedefs.h" +#include + +#include "webrtc/typedefs.h" enum { AecmFalse = 0, @@ -29,8 +31,8 @@ enum { #define AECM_BAD_PARAMETER_WARNING 12100 typedef struct { - WebRtc_Word16 cngMode; // AECM_FALSE, AECM_TRUE (default) - WebRtc_Word16 echoMode; // 0, 1, 2, 3 (default), 4 + int16_t cngMode; // AECM_FALSE, AECM_TRUE (default) + int16_t echoMode; // 0, 1, 2, 3 (default), 4 } AecmConfig; #ifdef __cplusplus @@ -40,133 +42,116 @@ extern "C" { /* * Allocates the memory needed by the AECM. The memory needs to be * initialized separately using the WebRtcAecm_Init() function. - * - * Inputs Description - * ------------------------------------------------------------------- - * void **aecmInst Pointer to the AECM instance to be - * created and initialized - * - * Outputs Description - * ------------------------------------------------------------------- - * WebRtc_Word32 return 0: OK - * -1: error + * Returns a pointer to the instance and a nullptr at failure. */ -WebRtc_Word32 WebRtcAecm_Create(void **aecmInst); +void* WebRtcAecm_Create(); /* * This function releases the memory allocated by WebRtcAecm_Create() * * Inputs Description * ------------------------------------------------------------------- - * void *aecmInst Pointer to the AECM instance - * - * Outputs Description - * ------------------------------------------------------------------- - * WebRtc_Word32 return 0: OK - * -1: error + * void* aecmInst Pointer to the AECM instance */ -WebRtc_Word32 WebRtcAecm_Free(void *aecmInst); +void WebRtcAecm_Free(void* aecmInst); /* * Initializes an AECM instance. * * Inputs Description * ------------------------------------------------------------------- - * void *aecmInst Pointer to the AECM instance - * WebRtc_Word32 sampFreq Sampling frequency of data + * void* aecmInst Pointer to the AECM instance + * int32_t sampFreq Sampling frequency of data * * Outputs Description * ------------------------------------------------------------------- - * WebRtc_Word32 return 0: OK + * int32_t return 0: OK * -1: error */ -WebRtc_Word32 WebRtcAecm_Init(void* aecmInst, - WebRtc_Word32 sampFreq); +int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq); /* * Inserts an 80 or 160 sample block of data into the farend buffer. * * Inputs Description * ------------------------------------------------------------------- - * void *aecmInst Pointer to the AECM instance - * WebRtc_Word16 *farend In buffer containing one frame of + * void* aecmInst Pointer to the AECM instance + * int16_t* farend In buffer containing one frame of * farend signal - * WebRtc_Word16 nrOfSamples Number of samples in farend buffer + * int16_t nrOfSamples Number of samples in farend buffer * * Outputs Description * ------------------------------------------------------------------- - * WebRtc_Word32 return 0: OK + * int32_t return 0: OK * -1: error */ -WebRtc_Word32 WebRtcAecm_BufferFarend(void* aecmInst, - const WebRtc_Word16* farend, - WebRtc_Word16 nrOfSamples); +int32_t WebRtcAecm_BufferFarend(void* aecmInst, + const int16_t* farend, + size_t nrOfSamples); /* * Runs the AECM on an 80 or 160 sample blocks of data. * - * Inputs Description + * Inputs Description * ------------------------------------------------------------------- - * void *aecmInst Pointer to the AECM instance - * WebRtc_Word16 *nearendNoisy In buffer containing one frame of + * void* aecmInst Pointer to the AECM instance + * int16_t* nearendNoisy In buffer containing one frame of * reference nearend+echo signal. If * noise reduction is active, provide * the noisy signal here. - * WebRtc_Word16 *nearendClean In buffer containing one frame of + * int16_t* nearendClean In buffer containing one frame of * nearend+echo signal. If noise * reduction is active, provide the * clean signal here. Otherwise pass a * NULL pointer. - * WebRtc_Word16 nrOfSamples Number of samples in nearend buffer - * WebRtc_Word16 msInSndCardBuf Delay estimate for sound card and + * int16_t nrOfSamples Number of samples in nearend buffer + * int16_t msInSndCardBuf Delay estimate for sound card and * system buffers * - * Outputs Description + * Outputs Description * ------------------------------------------------------------------- - * WebRtc_Word16 *out Out buffer, one frame of processed nearend - * WebRtc_Word32 return 0: OK - * -1: error + * int16_t* out Out buffer, one frame of processed nearend + * int32_t return 0: OK + * -1: error */ -WebRtc_Word32 WebRtcAecm_Process(void* aecmInst, - const WebRtc_Word16* nearendNoisy, - const WebRtc_Word16* nearendClean, - WebRtc_Word16* out, - WebRtc_Word16 nrOfSamples, - WebRtc_Word16 msInSndCardBuf); +int32_t WebRtcAecm_Process(void* aecmInst, + const int16_t* nearendNoisy, + const int16_t* nearendClean, + int16_t* out, + size_t nrOfSamples, + int16_t msInSndCardBuf); /* * This function enables the user to set certain parameters on-the-fly * * Inputs Description * ------------------------------------------------------------------- - * void *aecmInst Pointer to the AECM instance - * AecmConfig config Config instance that contains all + * void* aecmInst Pointer to the AECM instance + * AecmConfig config Config instance that contains all * properties to be set * * Outputs Description * ------------------------------------------------------------------- - * WebRtc_Word32 return 0: OK + * int32_t return 0: OK * -1: error */ -WebRtc_Word32 WebRtcAecm_set_config(void* aecmInst, - AecmConfig config); +int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config); /* * This function enables the user to set certain parameters on-the-fly * * Inputs Description * ------------------------------------------------------------------- - * void *aecmInst Pointer to the AECM instance + * void* aecmInst Pointer to the AECM instance * * Outputs Description * ------------------------------------------------------------------- - * AecmConfig *config Pointer to the config instance that + * AecmConfig* config Pointer to the config instance that * all properties will be written to - * WebRtc_Word32 return 0: OK + * int32_t return 0: OK * -1: error */ -WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst, - AecmConfig *config); +int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config); /* * This function enables the user to set the echo path on-the-fly. @@ -179,12 +164,12 @@ WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst, * * Outputs Description * ------------------------------------------------------------------- - * WebRtc_Word32 return 0: OK + * int32_t return 0: OK * -1: error */ -WebRtc_Word32 WebRtcAecm_InitEchoPath(void* aecmInst, - const void* echo_path, - size_t size_bytes); +int32_t WebRtcAecm_InitEchoPath(void* aecmInst, + const void* echo_path, + size_t size_bytes); /* * This function enables the user to get the currently used echo path @@ -198,19 +183,19 @@ WebRtc_Word32 WebRtcAecm_InitEchoPath(void* aecmInst, * * Outputs Description * ------------------------------------------------------------------- - * WebRtc_Word32 return 0: OK + * int32_t return 0: OK * -1: error */ -WebRtc_Word32 WebRtcAecm_GetEchoPath(void* aecmInst, - void* echo_path, - size_t size_bytes); +int32_t WebRtcAecm_GetEchoPath(void* aecmInst, + void* echo_path, + size_t size_bytes); /* * This function enables the user to get the echo path size in bytes * * Outputs Description * ------------------------------------------------------------------- - * size_t return : size in bytes + * size_t return Size in bytes */ size_t WebRtcAecm_echo_path_size_bytes(); @@ -219,32 +204,15 @@ size_t WebRtcAecm_echo_path_size_bytes(); * * Inputs Description * ------------------------------------------------------------------- - * void *aecmInst Pointer to the AECM instance + * void* aecmInst Pointer to the AECM instance * * Outputs Description * ------------------------------------------------------------------- - * WebRtc_Word32 return 11000-11100: error code + * int32_t return 11000-11100: error code */ -WebRtc_Word32 WebRtcAecm_get_error_code(void *aecmInst); - -/* - * Gets a version string - * - * Inputs Description - * ------------------------------------------------------------------- - * char *versionStr Pointer to a string array - * WebRtc_Word16 len The maximum length of the string - * - * Outputs Description - * ------------------------------------------------------------------- - * WebRtc_Word8 *versionStr Pointer to a string array - * WebRtc_Word32 return 0: OK - * -1: error - */ -WebRtc_Word32 WebRtcAecm_get_version(WebRtc_Word8 *versionStr, - WebRtc_Word16 len); +int32_t WebRtcAecm_get_error_code(void *aecmInst); #ifdef __cplusplus } #endif -#endif /* WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_INTERFACE_ECHO_CONTROL_MOBILE_H_ */ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_ diff --git a/webrtc/modules/audio_processing/agc/Makefile.am b/webrtc/modules/audio_processing/agc/Makefile.am deleted file mode 100644 index e73f2e3..0000000 --- a/webrtc/modules/audio_processing/agc/Makefile.am +++ /dev/null @@ -1,10 +0,0 @@ -noinst_LTLIBRARIES = libagc.la - -libagc_la_SOURCES = interface/gain_control.h \ - analog_agc.c \ - analog_agc.h \ - digital_agc.c \ - digital_agc.h -libagc_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \ - -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \ - -I$(top_srcdir)/src/modules/audio_processing/utility diff --git a/webrtc/modules/audio_processing/agc/agc.cc b/webrtc/modules/audio_processing/agc/agc.cc new file mode 100644 index 0000000..706b963 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/agc.cc @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/agc.h" + +#include +#include + +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/modules/audio_processing/agc/histogram.h" +#include "webrtc/modules/audio_processing/agc/utility.h" +#include "webrtc/modules/interface/module_common_types.h" + +namespace webrtc { +namespace { + +const int kDefaultLevelDbfs = -18; +const int kNumAnalysisFrames = 100; +const double kActivityThreshold = 0.3; + +} // namespace + +Agc::Agc() + : target_level_loudness_(Dbfs2Loudness(kDefaultLevelDbfs)), + target_level_dbfs_(kDefaultLevelDbfs), + histogram_(Histogram::Create(kNumAnalysisFrames)), + inactive_histogram_(Histogram::Create()) { + } + +Agc::~Agc() {} + +float Agc::AnalyzePreproc(const int16_t* audio, size_t length) { + assert(length > 0); + size_t num_clipped = 0; + for (size_t i = 0; i < length; ++i) { + if (audio[i] == 32767 || audio[i] == -32768) + ++num_clipped; + } + return 1.0f * num_clipped / length; +} + +int Agc::Process(const int16_t* audio, size_t length, int sample_rate_hz) { + vad_.ProcessChunk(audio, length, sample_rate_hz); + const std::vector& rms = vad_.chunkwise_rms(); + const std::vector& probabilities = + vad_.chunkwise_voice_probabilities(); + RTC_DCHECK_EQ(rms.size(), probabilities.size()); + for (size_t i = 0; i < rms.size(); ++i) { + histogram_->Update(rms[i], probabilities[i]); + } + return 0; +} + +bool Agc::GetRmsErrorDb(int* error) { + if (!error) { + assert(false); + return false; + } + + if (histogram_->num_updates() < kNumAnalysisFrames) { + // We haven't yet received enough frames. + return false; + } + + if (histogram_->AudioContent() < kNumAnalysisFrames * kActivityThreshold) { + // We are likely in an inactive segment. + return false; + } + + double loudness = Linear2Loudness(histogram_->CurrentRms()); + *error = std::floor(Loudness2Db(target_level_loudness_ - loudness) + 0.5); + histogram_->Reset(); + return true; +} + +void Agc::Reset() { + histogram_->Reset(); +} + +int Agc::set_target_level_dbfs(int level) { + // TODO(turajs): just some arbitrary sanity check. We can come up with better + // limits. The upper limit should be chosen such that the risk of clipping is + // low. The lower limit should not result in a too quiet signal. + if (level >= 0 || level <= -100) + return -1; + target_level_dbfs_ = level; + target_level_loudness_ = Dbfs2Loudness(level); + return 0; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/agc.gypi b/webrtc/modules/audio_processing/agc/agc.gypi deleted file mode 100644 index 44e7d24..0000000 --- a/webrtc/modules/audio_processing/agc/agc.gypi +++ /dev/null @@ -1,34 +0,0 @@ -# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. -# -# Use of this source code is governed by a BSD-style license -# that can be found in the LICENSE file in the root of the source -# tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may -# be found in the AUTHORS file in the root of the source tree. - -{ - 'targets': [ - { - 'target_name': 'agc', - 'type': '<(library)', - 'dependencies': [ - '<(webrtc_root)/common_audio/common_audio.gyp:spl', - ], - 'include_dirs': [ - 'interface', - ], - 'direct_dependent_settings': { - 'include_dirs': [ - 'interface', - ], - }, - 'sources': [ - 'interface/gain_control.h', - 'analog_agc.c', - 'analog_agc.h', - 'digital_agc.c', - 'digital_agc.h', - ], - }, - ], -} diff --git a/webrtc/modules/audio_processing/agc/agc.h b/webrtc/modules/audio_processing/agc/agc.h new file mode 100644 index 0000000..08c287f --- /dev/null +++ b/webrtc/modules/audio_processing/agc/agc.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_ + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class AudioFrame; +class Histogram; + +class Agc { + public: + Agc(); + virtual ~Agc(); + + // Returns the proportion of samples in the buffer which are at full-scale + // (and presumably clipped). + virtual float AnalyzePreproc(const int16_t* audio, size_t length); + // |audio| must be mono; in a multi-channel stream, provide the first (usually + // left) channel. + virtual int Process(const int16_t* audio, size_t length, int sample_rate_hz); + + // Retrieves the difference between the target RMS level and the current + // signal RMS level in dB. Returns true if an update is available and false + // otherwise, in which case |error| should be ignored and no action taken. + virtual bool GetRmsErrorDb(int* error); + virtual void Reset(); + + virtual int set_target_level_dbfs(int level); + virtual int target_level_dbfs() const { return target_level_dbfs_; } + + virtual float voice_probability() const { + return vad_.last_voice_probability(); + } + + private: + double target_level_loudness_; + int target_level_dbfs_; + rtc::scoped_ptr histogram_; + rtc::scoped_ptr inactive_histogram_; + VoiceActivityDetector vad_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_ diff --git a/webrtc/modules/audio_processing/agc/agc_manager_direct.cc b/webrtc/modules/audio_processing/agc/agc_manager_direct.cc new file mode 100644 index 0000000..48ce2f8 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/agc_manager_direct.cc @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h" + +#include +#include + +#ifdef WEBRTC_AGC_DEBUG_DUMP +#include +#endif + +#include "webrtc/modules/audio_processing/agc/gain_map_internal.h" +#include "webrtc/modules/audio_processing/gain_control_impl.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/logging.h" + +namespace webrtc { + +namespace { + +// Lowest the microphone level can be lowered due to clipping. +const int kClippedLevelMin = 170; +// Amount the microphone level is lowered with every clipping event. +const int kClippedLevelStep = 15; +// Proportion of clipped samples required to declare a clipping event. +const float kClippedRatioThreshold = 0.1f; +// Time in frames to wait after a clipping event before checking again. +const int kClippedWaitFrames = 300; + +// Amount of error we tolerate in the microphone level (presumably due to OS +// quantization) before we assume the user has manually adjusted the microphone. +const int kLevelQuantizationSlack = 25; + +const int kDefaultCompressionGain = 7; +const int kMaxCompressionGain = 12; +const int kMinCompressionGain = 2; +// Controls the rate of compression changes towards the target. +const float kCompressionGainStep = 0.05f; + +const int kMaxMicLevel = 255; +static_assert(kGainMapSize > kMaxMicLevel, "gain map too small"); +const int kMinMicLevel = 12; + +// Prevent very large microphone level changes. +const int kMaxResidualGainChange = 15; + +// Maximum additional gain allowed to compensate for microphone level +// restrictions from clipping events. +const int kSurplusCompressionGain = 6; + +int ClampLevel(int mic_level) { + return std::min(std::max(kMinMicLevel, mic_level), kMaxMicLevel); +} + +int LevelFromGainError(int gain_error, int level) { + assert(level >= 0 && level <= kMaxMicLevel); + if (gain_error == 0) { + return level; + } + // TODO(ajm): Could be made more efficient with a binary search. + int new_level = level; + if (gain_error > 0) { + while (kGainMap[new_level] - kGainMap[level] < gain_error && + new_level < kMaxMicLevel) { + ++new_level; + } + } else { + while (kGainMap[new_level] - kGainMap[level] > gain_error && + new_level > kMinMicLevel) { + --new_level; + } + } + return new_level; +} + +} // namespace + +// Facility for dumping debug audio files. All methods are no-ops in the +// default case where WEBRTC_AGC_DEBUG_DUMP is undefined. +class DebugFile { +#ifdef WEBRTC_AGC_DEBUG_DUMP + public: + explicit DebugFile(const char* filename) + : file_(fopen(filename, "wb")) { + assert(file_); + } + ~DebugFile() { + fclose(file_); + } + void Write(const int16_t* data, size_t length_samples) { + fwrite(data, 1, length_samples * sizeof(int16_t), file_); + } + private: + FILE* file_; +#else + public: + explicit DebugFile(const char* filename) { + } + ~DebugFile() { + } + void Write(const int16_t* data, size_t length_samples) { + } +#endif // WEBRTC_AGC_DEBUG_DUMP +}; + +AgcManagerDirect::AgcManagerDirect(GainControl* gctrl, + VolumeCallbacks* volume_callbacks, + int startup_min_level) + : agc_(new Agc()), + gctrl_(gctrl), + volume_callbacks_(volume_callbacks), + frames_since_clipped_(kClippedWaitFrames), + level_(0), + max_level_(kMaxMicLevel), + max_compression_gain_(kMaxCompressionGain), + target_compression_(kDefaultCompressionGain), + compression_(target_compression_), + compression_accumulator_(compression_), + capture_muted_(false), + check_volume_on_next_process_(true), // Check at startup. + startup_(true), + startup_min_level_(ClampLevel(startup_min_level)), + file_preproc_(new DebugFile("agc_preproc.pcm")), + file_postproc_(new DebugFile("agc_postproc.pcm")) { +} + +AgcManagerDirect::AgcManagerDirect(Agc* agc, + GainControl* gctrl, + VolumeCallbacks* volume_callbacks, + int startup_min_level) + : agc_(agc), + gctrl_(gctrl), + volume_callbacks_(volume_callbacks), + frames_since_clipped_(kClippedWaitFrames), + level_(0), + max_level_(kMaxMicLevel), + max_compression_gain_(kMaxCompressionGain), + target_compression_(kDefaultCompressionGain), + compression_(target_compression_), + compression_accumulator_(compression_), + capture_muted_(false), + check_volume_on_next_process_(true), // Check at startup. + startup_(true), + startup_min_level_(ClampLevel(startup_min_level)), + file_preproc_(new DebugFile("agc_preproc.pcm")), + file_postproc_(new DebugFile("agc_postproc.pcm")) { +} + +AgcManagerDirect::~AgcManagerDirect() {} + +int AgcManagerDirect::Initialize() { + max_level_ = kMaxMicLevel; + max_compression_gain_ = kMaxCompressionGain; + target_compression_ = kDefaultCompressionGain; + compression_ = target_compression_; + compression_accumulator_ = compression_; + capture_muted_ = false; + check_volume_on_next_process_ = true; + // TODO(bjornv): Investigate if we need to reset |startup_| as well. For + // example, what happens when we change devices. + + if (gctrl_->set_mode(GainControl::kFixedDigital) != 0) { + LOG_FERR1(LS_ERROR, set_mode, GainControl::kFixedDigital); + return -1; + } + if (gctrl_->set_target_level_dbfs(2) != 0) { + LOG_FERR1(LS_ERROR, set_target_level_dbfs, 2); + return -1; + } + if (gctrl_->set_compression_gain_db(kDefaultCompressionGain) != 0) { + LOG_FERR1(LS_ERROR, set_compression_gain_db, kDefaultCompressionGain); + return -1; + } + if (gctrl_->enable_limiter(true) != 0) { + LOG_FERR1(LS_ERROR, enable_limiter, true); + return -1; + } + return 0; +} + +void AgcManagerDirect::AnalyzePreProcess(int16_t* audio, + int num_channels, + size_t samples_per_channel) { + size_t length = num_channels * samples_per_channel; + if (capture_muted_) { + return; + } + + file_preproc_->Write(audio, length); + + if (frames_since_clipped_ < kClippedWaitFrames) { + ++frames_since_clipped_; + return; + } + + // Check for clipped samples, as the AGC has difficulty detecting pitch + // under clipping distortion. We do this in the preprocessing phase in order + // to catch clipped echo as well. + // + // If we find a sufficiently clipped frame, drop the current microphone level + // and enforce a new maximum level, dropped the same amount from the current + // maximum. This harsh treatment is an effort to avoid repeated clipped echo + // events. As compensation for this restriction, the maximum compression + // gain is increased, through SetMaxLevel(). + float clipped_ratio = agc_->AnalyzePreproc(audio, length); + if (clipped_ratio > kClippedRatioThreshold) { + LOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio=" + << clipped_ratio; + // Always decrease the maximum level, even if the current level is below + // threshold. + SetMaxLevel(std::max(kClippedLevelMin, max_level_ - kClippedLevelStep)); + if (level_ > kClippedLevelMin) { + // Don't try to adjust the level if we're already below the limit. As + // a consequence, if the user has brought the level above the limit, we + // will still not react until the postproc updates the level. + SetLevel(std::max(kClippedLevelMin, level_ - kClippedLevelStep)); + // Reset the AGC since the level has changed. + agc_->Reset(); + } + frames_since_clipped_ = 0; + } +} + +void AgcManagerDirect::Process(const int16_t* audio, + size_t length, + int sample_rate_hz) { + if (capture_muted_) { + return; + } + + if (check_volume_on_next_process_) { + check_volume_on_next_process_ = false; + // We have to wait until the first process call to check the volume, + // because Chromium doesn't guarantee it to be valid any earlier. + CheckVolumeAndReset(); + } + + if (agc_->Process(audio, length, sample_rate_hz) != 0) { + LOG_FERR0(LS_ERROR, Agc::Process); + assert(false); + } + + UpdateGain(); + UpdateCompressor(); + + file_postproc_->Write(audio, length); +} + +void AgcManagerDirect::SetLevel(int new_level) { + int voe_level = volume_callbacks_->GetMicVolume(); + if (voe_level < 0) { + return; + } + if (voe_level == 0) { + LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action."; + return; + } + if (voe_level > kMaxMicLevel) { + LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << voe_level; + return; + } + + if (voe_level > level_ + kLevelQuantizationSlack || + voe_level < level_ - kLevelQuantizationSlack) { + LOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating " + << "stored level from " << level_ << " to " << voe_level; + level_ = voe_level; + // Always allow the user to increase the volume. + if (level_ > max_level_) { + SetMaxLevel(level_); + } + // Take no action in this case, since we can't be sure when the volume + // was manually adjusted. The compressor will still provide some of the + // desired gain change. + agc_->Reset(); + return; + } + + new_level = std::min(new_level, max_level_); + if (new_level == level_) { + return; + } + + volume_callbacks_->SetMicVolume(new_level); + LOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", " + << "level_=" << level_ << ", " + << "new_level=" << new_level; + level_ = new_level; +} + +void AgcManagerDirect::SetMaxLevel(int level) { + assert(level >= kClippedLevelMin); + max_level_ = level; + // Scale the |kSurplusCompressionGain| linearly across the restricted + // level range. + max_compression_gain_ = kMaxCompressionGain + std::floor( + (1.f * kMaxMicLevel - max_level_) / (kMaxMicLevel - kClippedLevelMin) * + kSurplusCompressionGain + 0.5f); + LOG(LS_INFO) << "[agc] max_level_=" << max_level_ + << ", max_compression_gain_=" << max_compression_gain_; +} + +void AgcManagerDirect::SetCaptureMuted(bool muted) { + if (capture_muted_ == muted) { + return; + } + capture_muted_ = muted; + + if (!muted) { + // When we unmute, we should reset things to be safe. + check_volume_on_next_process_ = true; + } +} + +float AgcManagerDirect::voice_probability() { + return agc_->voice_probability(); +} + +int AgcManagerDirect::CheckVolumeAndReset() { + int level = volume_callbacks_->GetMicVolume(); + if (level < 0) { + return -1; + } + // Reasons for taking action at startup: + // 1) A person starting a call is expected to be heard. + // 2) Independent of interpretation of |level| == 0 we should raise it so the + // AGC can do its job properly. + if (level == 0 && !startup_) { + LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action."; + return 0; + } + if (level > kMaxMicLevel) { + LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << level; + return -1; + } + LOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level; + + int minLevel = startup_ ? startup_min_level_ : kMinMicLevel; + if (level < minLevel) { + level = minLevel; + LOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level; + volume_callbacks_->SetMicVolume(level); + } + agc_->Reset(); + level_ = level; + startup_ = false; + return 0; +} + +// Requests the RMS error from AGC and distributes the required gain change +// between the digital compression stage and volume slider. We use the +// compressor first, providing a slack region around the current slider +// position to reduce movement. +// +// If the slider needs to be moved, we check first if the user has adjusted +// it, in which case we take no action and cache the updated level. +void AgcManagerDirect::UpdateGain() { + int rms_error = 0; + if (!agc_->GetRmsErrorDb(&rms_error)) { + // No error update ready. + return; + } + // The compressor will always add at least kMinCompressionGain. In effect, + // this adjusts our target gain upward by the same amount and rms_error + // needs to reflect that. + rms_error += kMinCompressionGain; + + // Handle as much error as possible with the compressor first. + int raw_compression = std::max(std::min(rms_error, max_compression_gain_), + kMinCompressionGain); + // Deemphasize the compression gain error. Move halfway between the current + // target and the newly received target. This serves to soften perceptible + // intra-talkspurt adjustments, at the cost of some adaptation speed. + if ((raw_compression == max_compression_gain_ && + target_compression_ == max_compression_gain_ - 1) || + (raw_compression == kMinCompressionGain && + target_compression_ == kMinCompressionGain + 1)) { + // Special case to allow the target to reach the endpoints of the + // compression range. The deemphasis would otherwise halt it at 1 dB shy. + target_compression_ = raw_compression; + } else { + target_compression_ = (raw_compression - target_compression_) / 2 + + target_compression_; + } + + // Residual error will be handled by adjusting the volume slider. Use the + // raw rather than deemphasized compression here as we would otherwise + // shrink the amount of slack the compressor provides. + int residual_gain = rms_error - raw_compression; + residual_gain = std::min(std::max(residual_gain, -kMaxResidualGainChange), + kMaxResidualGainChange); + LOG(LS_INFO) << "[agc] rms_error=" << rms_error << ", " + << "target_compression=" << target_compression_ << ", " + << "residual_gain=" << residual_gain; + if (residual_gain == 0) + return; + + SetLevel(LevelFromGainError(residual_gain, level_)); +} + +void AgcManagerDirect::UpdateCompressor() { + if (compression_ == target_compression_) { + return; + } + + // Adapt the compression gain slowly towards the target, in order to avoid + // highly perceptible changes. + if (target_compression_ > compression_) { + compression_accumulator_ += kCompressionGainStep; + } else { + compression_accumulator_ -= kCompressionGainStep; + } + + // The compressor accepts integer gains in dB. Adjust the gain when + // we've come within half a stepsize of the nearest integer. (We don't + // check for equality due to potential floating point imprecision). + int new_compression = compression_; + int nearest_neighbor = std::floor(compression_accumulator_ + 0.5); + if (std::fabs(compression_accumulator_ - nearest_neighbor) < + kCompressionGainStep / 2) { + new_compression = nearest_neighbor; + } + + // Set the new compression gain. + if (new_compression != compression_) { + compression_ = new_compression; + compression_accumulator_ = new_compression; + if (gctrl_->set_compression_gain_db(compression_) != 0) { + LOG_FERR1(LS_ERROR, set_compression_gain_db, compression_); + } + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/agc_manager_direct.h b/webrtc/modules/audio_processing/agc/agc_manager_direct.h new file mode 100644 index 0000000..6edb0f7 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/agc_manager_direct.h @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/modules/audio_processing/agc/agc.h" + +namespace webrtc { + +class AudioFrame; +class DebugFile; +class GainControl; + +// Callbacks that need to be injected into AgcManagerDirect to read and control +// the volume values. This is done to remove the VoiceEngine dependency in +// AgcManagerDirect. +// TODO(aluebs): Remove VolumeCallbacks. +class VolumeCallbacks { + public: + virtual ~VolumeCallbacks() {} + virtual void SetMicVolume(int volume) = 0; + virtual int GetMicVolume() = 0; +}; + +// Direct interface to use AGC to set volume and compression values. +// AudioProcessing uses this interface directly to integrate the callback-less +// AGC. +// +// This class is not thread-safe. +class AgcManagerDirect final { + public: + // AgcManagerDirect will configure GainControl internally. The user is + // responsible for processing the audio using it after the call to Process. + // The operating range of startup_min_level is [12, 255] and any input value + // outside that range will be clamped. + AgcManagerDirect(GainControl* gctrl, + VolumeCallbacks* volume_callbacks, + int startup_min_level); + // Dependency injection for testing. Don't delete |agc| as the memory is owned + // by the manager. + AgcManagerDirect(Agc* agc, + GainControl* gctrl, + VolumeCallbacks* volume_callbacks, + int startup_min_level); + ~AgcManagerDirect(); + + int Initialize(); + void AnalyzePreProcess(int16_t* audio, + int num_channels, + size_t samples_per_channel); + void Process(const int16_t* audio, size_t length, int sample_rate_hz); + + // Call when the capture stream has been muted/unmuted. This causes the + // manager to disregard all incoming audio; chances are good it's background + // noise to which we'd like to avoid adapting. + void SetCaptureMuted(bool muted); + bool capture_muted() { return capture_muted_; } + + float voice_probability(); + + private: + // Sets a new microphone level, after first checking that it hasn't been + // updated by the user, in which case no action is taken. + void SetLevel(int new_level); + + // Set the maximum level the AGC is allowed to apply. Also updates the + // maximum compression gain to compensate. The level must be at least + // |kClippedLevelMin|. + void SetMaxLevel(int level); + + int CheckVolumeAndReset(); + void UpdateGain(); + void UpdateCompressor(); + + rtc::scoped_ptr agc_; + GainControl* gctrl_; + VolumeCallbacks* volume_callbacks_; + + int frames_since_clipped_; + int level_; + int max_level_; + int max_compression_gain_; + int target_compression_; + int compression_; + float compression_accumulator_; + bool capture_muted_; + bool check_volume_on_next_process_; + bool startup_; + int startup_min_level_; + + rtc::scoped_ptr file_preproc_; + rtc::scoped_ptr file_postproc_; + + RTC_DISALLOW_COPY_AND_ASSIGN(AgcManagerDirect); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_ diff --git a/webrtc/modules/audio_processing/agc/analog_agc.h b/webrtc/modules/audio_processing/agc/analog_agc.h deleted file mode 100644 index b32ac65..0000000 --- a/webrtc/modules/audio_processing/agc/analog_agc.h +++ /dev/null @@ -1,133 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_ - -#include "typedefs.h" -#include "gain_control.h" -#include "digital_agc.h" - -//#define AGC_DEBUG -//#define MIC_LEVEL_FEEDBACK -#ifdef AGC_DEBUG -#include -#endif - -/* Analog Automatic Gain Control variables: - * Constant declarations (inner limits inside which no changes are done) - * In the beginning the range is narrower to widen as soon as the measure - * 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0 - * and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal - * go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm - * The limits are created by running the AGC with a file having the desired - * signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined - * by out=10*log10(in/260537279.7); Set the target level to the average level - * of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in - * Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) ) - */ -#define RXX_BUFFER_LEN 10 - -static const WebRtc_Word16 kMsecSpeechInner = 520; -static const WebRtc_Word16 kMsecSpeechOuter = 340; - -static const WebRtc_Word16 kNormalVadThreshold = 400; - -static const WebRtc_Word16 kAlphaShortTerm = 6; // 1 >> 6 = 0.0156 -static const WebRtc_Word16 kAlphaLongTerm = 10; // 1 >> 10 = 0.000977 - -typedef struct -{ - // Configurable parameters/variables - WebRtc_UWord32 fs; // Sampling frequency - WebRtc_Word16 compressionGaindB; // Fixed gain level in dB - WebRtc_Word16 targetLevelDbfs; // Target level in -dBfs of envelope (default -3) - WebRtc_Word16 agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig) - WebRtc_UWord8 limiterEnable; // Enabling limiter (on/off (default off)) - WebRtcAgc_config_t defaultConfig; - WebRtcAgc_config_t usedConfig; - - // General variables - WebRtc_Word16 initFlag; - WebRtc_Word16 lastError; - - // Target level parameters - // Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7) - WebRtc_Word32 analogTargetLevel; // = RXX_BUFFER_LEN * 846805; -22 dBfs - WebRtc_Word32 startUpperLimit; // = RXX_BUFFER_LEN * 1066064; -21 dBfs - WebRtc_Word32 startLowerLimit; // = RXX_BUFFER_LEN * 672641; -23 dBfs - WebRtc_Word32 upperPrimaryLimit; // = RXX_BUFFER_LEN * 1342095; -20 dBfs - WebRtc_Word32 lowerPrimaryLimit; // = RXX_BUFFER_LEN * 534298; -24 dBfs - WebRtc_Word32 upperSecondaryLimit;// = RXX_BUFFER_LEN * 2677832; -17 dBfs - WebRtc_Word32 lowerSecondaryLimit;// = RXX_BUFFER_LEN * 267783; -27 dBfs - WebRtc_UWord16 targetIdx; // Table index for corresponding target level -#ifdef MIC_LEVEL_FEEDBACK - WebRtc_UWord16 targetIdxOffset; // Table index offset for level compensation -#endif - WebRtc_Word16 analogTarget; // Digital reference level in ENV scale - - // Analog AGC specific variables - WebRtc_Word32 filterState[8]; // For downsampling wb to nb - WebRtc_Word32 upperLimit; // Upper limit for mic energy - WebRtc_Word32 lowerLimit; // Lower limit for mic energy - WebRtc_Word32 Rxx160w32; // Average energy for one frame - WebRtc_Word32 Rxx16_LPw32; // Low pass filtered subframe energies - WebRtc_Word32 Rxx160_LPw32; // Low pass filtered frame energies - WebRtc_Word32 Rxx16_LPw32Max; // Keeps track of largest energy subframe - WebRtc_Word32 Rxx16_vectorw32[RXX_BUFFER_LEN];// Array with subframe energies - WebRtc_Word32 Rxx16w32_array[2][5];// Energy values of microphone signal - WebRtc_Word32 env[2][10]; // Envelope values of subframes - - WebRtc_Word16 Rxx16pos; // Current position in the Rxx16_vectorw32 - WebRtc_Word16 envSum; // Filtered scaled envelope in subframes - WebRtc_Word16 vadThreshold; // Threshold for VAD decision - WebRtc_Word16 inActive; // Inactive time in milliseconds - WebRtc_Word16 msTooLow; // Milliseconds of speech at a too low level - WebRtc_Word16 msTooHigh; // Milliseconds of speech at a too high level - WebRtc_Word16 changeToSlowMode; // Change to slow mode after some time at target - WebRtc_Word16 firstCall; // First call to the process-function - WebRtc_Word16 msZero; // Milliseconds of zero input - WebRtc_Word16 msecSpeechOuterChange;// Min ms of speech between volume changes - WebRtc_Word16 msecSpeechInnerChange;// Min ms of speech between volume changes - WebRtc_Word16 activeSpeech; // Milliseconds of active speech - WebRtc_Word16 muteGuardMs; // Counter to prevent mute action - WebRtc_Word16 inQueue; // 10 ms batch indicator - - // Microphone level variables - WebRtc_Word32 micRef; // Remember ref. mic level for virtual mic - WebRtc_UWord16 gainTableIdx; // Current position in virtual gain table - WebRtc_Word32 micGainIdx; // Gain index of mic level to increase slowly - WebRtc_Word32 micVol; // Remember volume between frames - WebRtc_Word32 maxLevel; // Max possible vol level, incl dig gain - WebRtc_Word32 maxAnalog; // Maximum possible analog volume level - WebRtc_Word32 maxInit; // Initial value of "max" - WebRtc_Word32 minLevel; // Minimum possible volume level - WebRtc_Word32 minOutput; // Minimum output volume level - WebRtc_Word32 zeroCtrlMax; // Remember max gain => don't amp low input - - WebRtc_Word16 scale; // Scale factor for internal volume levels -#ifdef MIC_LEVEL_FEEDBACK - WebRtc_Word16 numBlocksMicLvlSat; - WebRtc_UWord8 micLvlSat; -#endif - // Structs for VAD and digital_agc - AgcVad_t vadMic; - DigitalAgc_t digitalAgc; - -#ifdef AGC_DEBUG - FILE* fpt; - FILE* agcLog; - WebRtc_Word32 fcount; -#endif - - WebRtc_Word16 lowLevelSignal; -} Agc_t; - -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_ diff --git a/webrtc/modules/audio_processing/agc/digital_agc.h b/webrtc/modules/audio_processing/agc/digital_agc.h deleted file mode 100644 index 240b220..0000000 --- a/webrtc/modules/audio_processing/agc/digital_agc.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_DIGITAL_AGC_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_DIGITAL_AGC_H_ - -#ifdef AGC_DEBUG -#include -#endif -#include "typedefs.h" -#include "signal_processing_library.h" - -// the 32 most significant bits of A(19) * B(26) >> 13 -#define AGC_MUL32(A, B) (((B)>>13)*(A) + ( ((0x00001FFF & (B))*(A)) >> 13 )) -// C + the 32 most significant bits of A * B -#define AGC_SCALEDIFF32(A, B, C) ((C) + ((B)>>16)*(A) + ( ((0x0000FFFF & (B))*(A)) >> 16 )) - -typedef struct -{ - WebRtc_Word32 downState[8]; - WebRtc_Word16 HPstate; - WebRtc_Word16 counter; - WebRtc_Word16 logRatio; // log( P(active) / P(inactive) ) (Q10) - WebRtc_Word16 meanLongTerm; // Q10 - WebRtc_Word32 varianceLongTerm; // Q8 - WebRtc_Word16 stdLongTerm; // Q10 - WebRtc_Word16 meanShortTerm; // Q10 - WebRtc_Word32 varianceShortTerm; // Q8 - WebRtc_Word16 stdShortTerm; // Q10 -} AgcVad_t; // total = 54 bytes - -typedef struct -{ - WebRtc_Word32 capacitorSlow; - WebRtc_Word32 capacitorFast; - WebRtc_Word32 gain; - WebRtc_Word32 gainTable[32]; - WebRtc_Word16 gatePrevious; - WebRtc_Word16 agcMode; - AgcVad_t vadNearend; - AgcVad_t vadFarend; -#ifdef AGC_DEBUG - FILE* logFile; - int frameCounter; -#endif -} DigitalAgc_t; - -WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *digitalAgcInst, WebRtc_Word16 agcMode); - -WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *digitalAgcInst, const WebRtc_Word16 *inNear, - const WebRtc_Word16 *inNear_H, WebRtc_Word16 *out, - WebRtc_Word16 *out_H, WebRtc_UWord32 FS, - WebRtc_Word16 lowLevelSignal); - -WebRtc_Word32 WebRtcAgc_AddFarendToDigital(DigitalAgc_t *digitalAgcInst, const WebRtc_Word16 *inFar, - WebRtc_Word16 nrSamples); - -void WebRtcAgc_InitVad(AgcVad_t *vadInst); - -WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *vadInst, // (i) VAD state - const WebRtc_Word16 *in, // (i) Speech signal - WebRtc_Word16 nrSamples); // (i) number of samples - -WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16 - WebRtc_Word16 compressionGaindB, // Q0 (in dB) - WebRtc_Word16 targetLevelDbfs,// Q0 (in dB) - WebRtc_UWord8 limiterEnable, WebRtc_Word16 analogTarget); - -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_ diff --git a/webrtc/modules/audio_processing/agc/gain_map_internal.h b/webrtc/modules/audio_processing/agc/gain_map_internal.h new file mode 100644 index 0000000..53c71c1 --- /dev/null +++ b/webrtc/modules/audio_processing/agc/gain_map_internal.h @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_ + +static const int kGainMapSize = 256; +// Uses parameters: si = 2, sf = 0.25, D = 8/256 +static const int kGainMap[kGainMapSize] = { + -56, + -54, + -52, + -50, + -48, + -47, + -45, + -43, + -42, + -40, + -38, + -37, + -35, + -34, + -33, + -31, + -30, + -29, + -27, + -26, + -25, + -24, + -23, + -22, + -20, + -19, + -18, + -17, + -16, + -15, + -14, + -14, + -13, + -12, + -11, + -10, + -9, + -8, + -8, + -7, + -6, + -5, + -5, + -4, + -3, + -2, + -2, + -1, + 0, + 0, + 1, + 1, + 2, + 3, + 3, + 4, + 4, + 5, + 5, + 6, + 6, + 7, + 7, + 8, + 8, + 9, + 9, + 10, + 10, + 11, + 11, + 12, + 12, + 13, + 13, + 13, + 14, + 14, + 15, + 15, + 15, + 16, + 16, + 17, + 17, + 17, + 18, + 18, + 18, + 19, + 19, + 19, + 20, + 20, + 21, + 21, + 21, + 22, + 22, + 22, + 23, + 23, + 23, + 24, + 24, + 24, + 24, + 25, + 25, + 25, + 26, + 26, + 26, + 27, + 27, + 27, + 28, + 28, + 28, + 28, + 29, + 29, + 29, + 30, + 30, + 30, + 30, + 31, + 31, + 31, + 32, + 32, + 32, + 32, + 33, + 33, + 33, + 33, + 34, + 34, + 34, + 35, + 35, + 35, + 35, + 36, + 36, + 36, + 36, + 37, + 37, + 37, + 38, + 38, + 38, + 38, + 39, + 39, + 39, + 39, + 40, + 40, + 40, + 40, + 41, + 41, + 41, + 41, + 42, + 42, + 42, + 42, + 43, + 43, + 43, + 44, + 44, + 44, + 44, + 45, + 45, + 45, + 45, + 46, + 46, + 46, + 46, + 47, + 47, + 47, + 47, + 48, + 48, + 48, + 48, + 49, + 49, + 49, + 49, + 50, + 50, + 50, + 50, + 51, + 51, + 51, + 51, + 52, + 52, + 52, + 52, + 53, + 53, + 53, + 53, + 54, + 54, + 54, + 54, + 55, + 55, + 55, + 55, + 56, + 56, + 56, + 56, + 57, + 57, + 57, + 57, + 58, + 58, + 58, + 58, + 59, + 59, + 59, + 59, + 60, + 60, + 60, + 60, + 61, + 61, + 61, + 61, + 62, + 62, + 62, + 62, + 63, + 63, + 63, + 63, + 64 +}; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_ diff --git a/webrtc/modules/audio_processing/agc/histogram.cc b/webrtc/modules/audio_processing/agc/histogram.cc new file mode 100644 index 0000000..1d3035f --- /dev/null +++ b/webrtc/modules/audio_processing/agc/histogram.cc @@ -0,0 +1,228 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/histogram.h" + +#include +#include + +#include "webrtc/modules/interface/module_common_types.h" + +namespace webrtc { + +static const double kHistBinCenters[] = { + 7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01, + 1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01, + 2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01, + 3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01, + 5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01, + 1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00, + 1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00, + 2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00, + 4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00, + 7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01, + 1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01, + 2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01, + 3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01, + 6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01, + 1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02, + 1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02, + 2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02, + 4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02, + 8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03, + 1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03, + 2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03, + 3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03, + 6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03, + 1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04, + 1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04, + 3.00339145144454e+04, 3.56647189489147e+04}; + +static const double kProbQDomain = 1024.0; +// Loudness of -15 dB (smallest expected loudness) in log domain, +// loudness_db = 13.5 * log10(rms); +static const double kLogDomainMinBinCenter = -2.57752062648587; +// Loudness step of 1 dB in log domain +static const double kLogDomainStepSizeInverse = 5.81954605750359; + +static const int kTransientWidthThreshold = 7; +static const double kLowProbabilityThreshold = 0.2; + +static const int kLowProbThresholdQ10 = static_cast( + kLowProbabilityThreshold * kProbQDomain); + +Histogram::Histogram() + : num_updates_(0), + audio_content_q10_(0), + bin_count_q10_(), + activity_probability_(), + hist_bin_index_(), + buffer_index_(0), + buffer_is_full_(false), + len_circular_buffer_(0), + len_high_activity_(0) { + static_assert( + kHistSize == sizeof(kHistBinCenters) / sizeof(kHistBinCenters[0]), + "histogram bin centers incorrect size"); +} + +Histogram::Histogram(int window_size) + : num_updates_(0), + audio_content_q10_(0), + bin_count_q10_(), + activity_probability_(new int[window_size]), + hist_bin_index_(new int[window_size]), + buffer_index_(0), + buffer_is_full_(false), + len_circular_buffer_(window_size), + len_high_activity_(0) {} + +Histogram::~Histogram() {} + +void Histogram::Update(double rms, double activity_probaility) { + // If circular histogram is activated then remove the oldest entry. + if (len_circular_buffer_ > 0) + RemoveOldestEntryAndUpdate(); + + // Find the corresponding bin. + int hist_index = GetBinIndex(rms); + // To Q10 domain. + int prob_q10 = static_cast(floor(activity_probaility * + kProbQDomain)); + InsertNewestEntryAndUpdate(prob_q10, hist_index); +} + +// Doing nothing if buffer is not full, yet. +void Histogram::RemoveOldestEntryAndUpdate() { + assert(len_circular_buffer_ > 0); + // Do nothing if circular buffer is not full. + if (!buffer_is_full_) + return; + + int oldest_prob = activity_probability_[buffer_index_]; + int oldest_hist_index = hist_bin_index_[buffer_index_]; + UpdateHist(-oldest_prob, oldest_hist_index); +} + +void Histogram::RemoveTransient() { + // Don't expect to be here if high-activity region is longer than + // |kTransientWidthThreshold| or there has not been any transient. + assert(len_high_activity_ <= kTransientWidthThreshold); + int index = (buffer_index_ > 0) ? (buffer_index_ - 1) : + len_circular_buffer_ - 1; + while (len_high_activity_ > 0) { + UpdateHist(-activity_probability_[index], hist_bin_index_[index]); + activity_probability_[index] = 0; + index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1); + len_high_activity_--; + } +} + +void Histogram::InsertNewestEntryAndUpdate(int activity_prob_q10, + int hist_index) { + // Update the circular buffer if it is enabled. + if (len_circular_buffer_ > 0) { + // Removing transient. + if (activity_prob_q10 <= kLowProbThresholdQ10) { + // Lower than threshold probability, set it to zero. + activity_prob_q10 = 0; + // Check if this has been a transient. + if (len_high_activity_ <= kTransientWidthThreshold) + RemoveTransient(); // Remove this transient. + len_high_activity_ = 0; + } else if (len_high_activity_ <= kTransientWidthThreshold) { + len_high_activity_++; + } + // Updating the circular buffer. + activity_probability_[buffer_index_] = activity_prob_q10; + hist_bin_index_[buffer_index_] = hist_index; + // Increment the buffer index and check for wrap-around. + buffer_index_++; + if (buffer_index_ >= len_circular_buffer_) { + buffer_index_ = 0; + buffer_is_full_ = true; + } + } + + num_updates_++; + if (num_updates_ < 0) + num_updates_--; + + UpdateHist(activity_prob_q10, hist_index); +} + +void Histogram::UpdateHist(int activity_prob_q10, int hist_index) { + bin_count_q10_[hist_index] += activity_prob_q10; + audio_content_q10_ += activity_prob_q10; +} + +double Histogram::AudioContent() const { + return audio_content_q10_ / kProbQDomain; +} + +Histogram* Histogram::Create() { + return new Histogram; +} + +Histogram* Histogram::Create(int window_size) { + if (window_size < 0) + return NULL; + return new Histogram(window_size); +} + +void Histogram::Reset() { + // Reset the histogram, audio-content and number of updates. + memset(bin_count_q10_, 0, sizeof(bin_count_q10_)); + audio_content_q10_ = 0; + num_updates_ = 0; + // Empty the circular buffer. + buffer_index_ = 0; + buffer_is_full_ = false; + len_high_activity_ = 0; +} + +int Histogram::GetBinIndex(double rms) { + // First exclude overload cases. + if (rms <= kHistBinCenters[0]) { + return 0; + } else if (rms >= kHistBinCenters[kHistSize - 1]) { + return kHistSize - 1; + } else { + // The quantizer is uniform in log domain. Alternatively we could do binary + // search in linear domain. + double rms_log = log(rms); + + int index = static_cast(floor((rms_log - kLogDomainMinBinCenter) * + kLogDomainStepSizeInverse)); + // The final decision is in linear domain. + double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]); + if (rms > b) { + return index + 1; + } + return index; + } +} + +double Histogram::CurrentRms() const { + double p; + double mean_val = 0; + if (audio_content_q10_ > 0) { + double p_total_inverse = 1. / static_cast(audio_content_q10_); + for (int n = 0; n < kHistSize; n++) { + p = static_cast(bin_count_q10_[n]) * p_total_inverse; + mean_val += p * kHistBinCenters[n]; + } + } else { + mean_val = kHistBinCenters[0]; + } + return mean_val; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/agc/histogram.h b/webrtc/modules/audio_processing/agc/histogram.h new file mode 100644 index 0000000..a8706bb --- /dev/null +++ b/webrtc/modules/audio_processing/agc/histogram.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_ + +#include + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// This class implements the histogram of loudness with circular buffers so that +// the histogram tracks the last T seconds of the loudness. +class Histogram { + public: + // Create a non-sliding Histogram. + static Histogram* Create(); + + // Create a sliding Histogram, i.e. the histogram represents the last + // |window_size| samples. + static Histogram* Create(int window_size); + ~Histogram(); + + // Insert RMS and the corresponding activity probability. + void Update(double rms, double activity_probability); + + // Reset the histogram, forget the past. + void Reset(); + + // Current loudness, which is actually the mean of histogram in loudness + // domain. + double CurrentRms() const; + + // Sum of the histogram content. + double AudioContent() const; + + // Number of times the histogram has been updated. + int num_updates() const { return num_updates_; } + + private: + Histogram(); + explicit Histogram(int window); + + // Find the histogram bin associated with the given |rms|. + int GetBinIndex(double rms); + + void RemoveOldestEntryAndUpdate(); + void InsertNewestEntryAndUpdate(int activity_prob_q10, int hist_index); + void UpdateHist(int activity_prob_q10, int hist_index); + void RemoveTransient(); + + // Number of histogram bins. + static const int kHistSize = 77; + + // Number of times the histogram is updated + int num_updates_; + // Audio content, this should be equal to the sum of the components of + // |bin_count_q10_|. + int64_t audio_content_q10_; + + // Histogram of input RMS in Q10 with |kHistSize_| bins. In each 'Update(),' + // we increment the associated histogram-bin with the given probability. The + // increment is implemented in Q10 to avoid rounding errors. + int64_t bin_count_q10_[kHistSize]; + + // Circular buffer for probabilities + rtc::scoped_ptr activity_probability_; + // Circular buffer for histogram-indices of probabilities. + rtc::scoped_ptr hist_bin_index_; + // Current index of circular buffer, where the newest data will be written to, + // therefore, pointing to the oldest data if buffer is full. + int buffer_index_; + // Indicating if buffer is full and we had a wrap around. + int buffer_is_full_; + // Size of circular buffer. + int len_circular_buffer_; + int len_high_activity_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_ diff --git a/webrtc/modules/audio_processing/agc/analog_agc.c b/webrtc/modules/audio_processing/agc/legacy/analog_agc.c similarity index 62% rename from webrtc/modules/audio_processing/agc/analog_agc.c rename to webrtc/modules/audio_processing/agc/legacy/analog_agc.c index 0c2ccee..be644d9 100644 --- a/webrtc/modules/audio_processing/agc/analog_agc.c +++ b/webrtc/modules/audio_processing/agc/legacy/analog_agc.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -17,29 +17,31 @@ * */ +#include "webrtc/modules/audio_processing/agc/legacy/analog_agc.h" + #include #include -#ifdef AGC_DEBUG //test log +#ifdef WEBRTC_AGC_DEBUG_DUMP #include #endif -#include "analog_agc.h" /* The slope of in Q13*/ -static const WebRtc_Word16 kSlope1[8] = {21793, 12517, 7189, 4129, 2372, 1362, 472, 78}; +static const int16_t kSlope1[8] = {21793, 12517, 7189, 4129, 2372, 1362, 472, 78}; /* The offset in Q14 */ -static const WebRtc_Word16 kOffset1[8] = {25395, 23911, 22206, 20737, 19612, 18805, 17951, +static const int16_t kOffset1[8] = {25395, 23911, 22206, 20737, 19612, 18805, 17951, 17367}; /* The slope of in Q13*/ -static const WebRtc_Word16 kSlope2[8] = {2063, 1731, 1452, 1218, 1021, 857, 597, 337}; +static const int16_t kSlope2[8] = {2063, 1731, 1452, 1218, 1021, 857, 597, 337}; /* The offset in Q14 */ -static const WebRtc_Word16 kOffset2[8] = {18432, 18379, 18290, 18177, 18052, 17920, 17670, +static const int16_t kOffset2[8] = {18432, 18379, 18290, 18177, 18052, 17920, 17670, 17286}; -static const WebRtc_Word16 kMuteGuardTimeMs = 8000; -static const WebRtc_Word16 kInitCheck = 42; +static const int16_t kMuteGuardTimeMs = 8000; +static const int16_t kInitCheck = 42; +static const size_t kNumSubframes = 10; /* Default settings if config is not used */ #define AGC_DEFAULT_TARGET_LEVEL 3 @@ -72,12 +74,12 @@ static const WebRtc_Word16 kInitCheck = 42; * fprintf(1, '\t%i, %i, %i, %i,\n', round(10.^(linspace(0,10,32)/20) * 2^12)); */ /* Q12 */ -static const WebRtc_UWord16 kGainTableAnalog[GAIN_TBL_LEN] = {4096, 4251, 4412, 4579, 4752, +static const uint16_t kGainTableAnalog[GAIN_TBL_LEN] = {4096, 4251, 4412, 4579, 4752, 4932, 5118, 5312, 5513, 5722, 5938, 6163, 6396, 6638, 6889, 7150, 7420, 7701, 7992, 8295, 8609, 8934, 9273, 9623, 9987, 10365, 10758, 11165, 11587, 12025, 12480, 12953}; /* Gain/Suppression tables for virtual Mic (in Q10) */ -static const WebRtc_UWord16 kGainTableVirtualMic[128] = {1052, 1081, 1110, 1141, 1172, 1204, +static const uint16_t kGainTableVirtualMic[128] = {1052, 1081, 1110, 1141, 1172, 1204, 1237, 1271, 1305, 1341, 1378, 1416, 1454, 1494, 1535, 1577, 1620, 1664, 1710, 1757, 1805, 1854, 1905, 1957, 2010, 2065, 2122, 2180, 2239, 2301, 2364, 2428, 2495, 2563, 2633, 2705, 2779, 2855, 2933, 3013, 3096, 3180, 3267, 3357, 3449, 3543, 3640, 3739, @@ -88,7 +90,7 @@ static const WebRtc_UWord16 kGainTableVirtualMic[128] = {1052, 1081, 1110, 1141, 16055, 16494, 16945, 17409, 17885, 18374, 18877, 19393, 19923, 20468, 21028, 21603, 22194, 22801, 23425, 24065, 24724, 25400, 26095, 26808, 27541, 28295, 29069, 29864, 30681, 31520, 32382}; -static const WebRtc_UWord16 kSuppressionTableVirtualMic[128] = {1024, 1006, 988, 970, 952, +static const uint16_t kSuppressionTableVirtualMic[128] = {1024, 1006, 988, 970, 952, 935, 918, 902, 886, 870, 854, 839, 824, 809, 794, 780, 766, 752, 739, 726, 713, 700, 687, 675, 663, 651, 639, 628, 616, 605, 594, 584, 573, 563, 553, 543, 533, 524, 514, 505, 496, 487, 478, 470, 461, 453, 445, 437, 429, 421, 414, 406, 399, 392, 385, 378, @@ -102,7 +104,7 @@ static const WebRtc_UWord16 kSuppressionTableVirtualMic[128] = {1024, 1006, 988, * Matlab code * targetLevelTable = fprintf('%d,\t%d,\t%d,\t%d,\n', round((32767*10.^(-(0:63)'/20)).^2*16/2^7) */ -static const WebRtc_Word32 kTargetLevelTable[64] = {134209536, 106606424, 84680493, 67264106, +static const int32_t kTargetLevelTable[64] = {134209536, 106606424, 84680493, 67264106, 53429779, 42440782, 33711911, 26778323, 21270778, 16895980, 13420954, 10660642, 8468049, 6726411, 5342978, 4244078, 3371191, 2677832, 2127078, 1689598, 1342095, 1066064, 846805, 672641, 534298, 424408, 337119, 267783, 212708, 168960, 134210, @@ -110,101 +112,41 @@ static const WebRtc_Word32 kTargetLevelTable[64] = {134209536, 106606424, 846804 6726, 5343, 4244, 3371, 2678, 2127, 1690, 1342, 1066, 847, 673, 534, 424, 337, 268, 213, 169, 134, 107, 85, 67}; -int WebRtcAgc_AddMic(void *state, WebRtc_Word16 *in_mic, WebRtc_Word16 *in_mic_H, - WebRtc_Word16 samples) +int WebRtcAgc_AddMic(void *state, int16_t* const* in_mic, size_t num_bands, + size_t samples) { - WebRtc_Word32 nrg, max_nrg, sample, tmp32; - WebRtc_Word32 *ptr; - WebRtc_UWord16 targetGainIdx, gain; - WebRtc_Word16 i, n, L, M, subFrames, tmp16, tmp_speech[16]; - Agc_t *stt; - stt = (Agc_t *)state; + int32_t nrg, max_nrg, sample, tmp32; + int32_t *ptr; + uint16_t targetGainIdx, gain; + size_t i; + int16_t n, L, tmp16, tmp_speech[16]; + LegacyAgc* stt; + stt = (LegacyAgc*)state; - //default/initial values corresponding to 10ms for wb and swb - M = 10; - L = 16; - subFrames = 160; - - if (stt->fs == 8000) - { - if (samples == 80) - { - subFrames = 80; - M = 10; - L = 8; - } else if (samples == 160) - { - subFrames = 80; - M = 20; - L = 8; - } else - { -#ifdef AGC_DEBUG //test log - fprintf(stt->fpt, - "AGC->add_mic, frame %d: Invalid number of samples\n\n", - (stt->fcount + 1)); -#endif + if (stt->fs == 8000) { + L = 8; + if (samples != 80) { return -1; } - } else if (stt->fs == 16000) - { - if (samples == 160) - { - subFrames = 160; - M = 10; - L = 16; - } else if (samples == 320) - { - subFrames = 160; - M = 20; - L = 16; - } else - { -#ifdef AGC_DEBUG //test log - fprintf(stt->fpt, - "AGC->add_mic, frame %d: Invalid number of samples\n\n", - (stt->fcount + 1)); -#endif + } else { + L = 16; + if (samples != 160) { return -1; } - } else if (stt->fs == 32000) - { - /* SWB is processed as 160 sample for L and H bands */ - if (samples == 160) - { - subFrames = 160; - M = 10; - L = 16; - } else - { -#ifdef AGC_DEBUG - fprintf(stt->fpt, - "AGC->add_mic, frame %d: Invalid sample rate\n\n", - (stt->fcount + 1)); -#endif - return -1; - } - } - - /* Check for valid pointers based on sampling rate */ - if ((stt->fs == 32000) && (in_mic_H == NULL)) - { - return -1; - } - /* Check for valid pointer for low band */ - if (in_mic == NULL) - { - return -1; } /* apply slowly varying digital gain */ if (stt->micVol > stt->maxAnalog) { + /* |maxLevel| is strictly >= |micVol|, so this condition should be + * satisfied here, ensuring there is no divide-by-zero. */ + assert(stt->maxLevel > stt->maxAnalog); + /* Q1 */ - tmp16 = (WebRtc_Word16)(stt->micVol - stt->maxAnalog); - tmp32 = WEBRTC_SPL_MUL_16_16(GAIN_TBL_LEN - 1, tmp16); - tmp16 = (WebRtc_Word16)(stt->maxLevel - stt->maxAnalog); - targetGainIdx = (WebRtc_UWord16)WEBRTC_SPL_DIV(tmp32, tmp16); + tmp16 = (int16_t)(stt->micVol - stt->maxAnalog); + tmp32 = (GAIN_TBL_LEN - 1) * tmp16; + tmp16 = (int16_t)(stt->maxLevel - stt->maxAnalog); + targetGainIdx = tmp32 / tmp16; assert(targetGainIdx < GAIN_TBL_LEN); /* Increment through the table towards the target gain. @@ -223,34 +165,19 @@ int WebRtcAgc_AddMic(void *state, WebRtc_Word16 *in_mic, WebRtc_Word16 *in_mic_H for (i = 0; i < samples; i++) { - // For lower band - tmp32 = WEBRTC_SPL_MUL_16_U16(in_mic[i], gain); - sample = WEBRTC_SPL_RSHIFT_W32(tmp32, 12); - if (sample > 32767) + size_t j; + for (j = 0; j < num_bands; ++j) { - in_mic[i] = 32767; - } else if (sample < -32768) - { - in_mic[i] = -32768; - } else - { - in_mic[i] = (WebRtc_Word16)sample; - } - - // For higher band - if (stt->fs == 32000) - { - tmp32 = WEBRTC_SPL_MUL_16_U16(in_mic_H[i], gain); - sample = WEBRTC_SPL_RSHIFT_W32(tmp32, 12); + sample = (in_mic[j][i] * gain) >> 12; if (sample > 32767) { - in_mic_H[i] = 32767; + in_mic[j][i] = 32767; } else if (sample < -32768) { - in_mic_H[i] = -32768; + in_mic[j][i] = -32768; } else { - in_mic_H[i] = (WebRtc_Word16)sample; + in_mic[j][i] = (int16_t)sample; } } } @@ -260,7 +187,7 @@ int WebRtcAgc_AddMic(void *state, WebRtc_Word16 *in_mic, WebRtc_Word16 *in_mic_H } /* compute envelope */ - if ((M == 10) && (stt->inQueue > 0)) + if (stt->inQueue > 0) { ptr = stt->env[1]; } else @@ -268,13 +195,13 @@ int WebRtcAgc_AddMic(void *state, WebRtc_Word16 *in_mic, WebRtc_Word16 *in_mic_H ptr = stt->env[0]; } - for (i = 0; i < M; i++) + for (i = 0; i < kNumSubframes; i++) { /* iterate over samples */ max_nrg = 0; for (n = 0; n < L; n++) { - nrg = WEBRTC_SPL_MUL_16_16(in_mic[i * L + n], in_mic[i * L + n]); + nrg = in_mic[0][i * L + n] * in_mic[0][i * L + n]; if (nrg > max_nrg) { max_nrg = nrg; @@ -284,7 +211,7 @@ int WebRtcAgc_AddMic(void *state, WebRtc_Word16 *in_mic, WebRtc_Word16 *in_mic_H } /* compute energy */ - if ((M == 10) && (stt->inQueue > 0)) + if (stt->inQueue > 0) { ptr = stt->Rxx16w32_array[1]; } else @@ -292,21 +219,24 @@ int WebRtcAgc_AddMic(void *state, WebRtc_Word16 *in_mic, WebRtc_Word16 *in_mic_H ptr = stt->Rxx16w32_array[0]; } - for (i = 0; i < WEBRTC_SPL_RSHIFT_W16(M, 1); i++) + for (i = 0; i < kNumSubframes / 2; i++) { if (stt->fs == 16000) { - WebRtcSpl_DownsampleBy2(&in_mic[i * 32], 32, tmp_speech, stt->filterState); + WebRtcSpl_DownsampleBy2(&in_mic[0][i * 32], + 32, + tmp_speech, + stt->filterState); } else { - memcpy(tmp_speech, &in_mic[i * 16], 16 * sizeof(short)); + memcpy(tmp_speech, &in_mic[0][i * 16], 16 * sizeof(short)); } /* Compute energy in blocks of 16 samples */ ptr[i] = WebRtcSpl_DotProductWithScale(tmp_speech, tmp_speech, 16, 4); } /* update queue information */ - if ((stt->inQueue == 0) && (M == 10)) + if (stt->inQueue == 0) { stt->inQueue = 1; } else @@ -315,20 +245,15 @@ int WebRtcAgc_AddMic(void *state, WebRtc_Word16 *in_mic, WebRtc_Word16 *in_mic_H } /* call VAD (use low band only) */ - for (i = 0; i < samples; i += subFrames) - { - WebRtcAgc_ProcessVad(&stt->vadMic, &in_mic[i], subFrames); - } + WebRtcAgc_ProcessVad(&stt->vadMic, in_mic[0], samples); return 0; } -int WebRtcAgc_AddFarend(void *state, const WebRtc_Word16 *in_far, WebRtc_Word16 samples) +int WebRtcAgc_AddFarend(void *state, const int16_t *in_far, size_t samples) { - WebRtc_Word32 errHandle = 0; - WebRtc_Word16 i, subFrames; - Agc_t *stt; - stt = (Agc_t *)state; + LegacyAgc* stt; + stt = (LegacyAgc*)state; if (stt == NULL) { @@ -337,76 +262,42 @@ int WebRtcAgc_AddFarend(void *state, const WebRtc_Word16 *in_far, WebRtc_Word16 if (stt->fs == 8000) { - if ((samples != 80) && (samples != 160)) + if (samples != 80) { -#ifdef AGC_DEBUG //test log - fprintf(stt->fpt, - "AGC->add_far_end, frame %d: Invalid number of samples\n\n", - stt->fcount); -#endif return -1; } - subFrames = 80; - } else if (stt->fs == 16000) + } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) { - if ((samples != 160) && (samples != 320)) + if (samples != 160) { -#ifdef AGC_DEBUG //test log - fprintf(stt->fpt, - "AGC->add_far_end, frame %d: Invalid number of samples\n\n", - stt->fcount); -#endif return -1; } - subFrames = 160; - } else if (stt->fs == 32000) - { - if ((samples != 160) && (samples != 320)) - { -#ifdef AGC_DEBUG //test log - fprintf(stt->fpt, - "AGC->add_far_end, frame %d: Invalid number of samples\n\n", - stt->fcount); -#endif - return -1; - } - subFrames = 160; } else { -#ifdef AGC_DEBUG //test log - fprintf(stt->fpt, - "AGC->add_far_end, frame %d: Invalid sample rate\n\n", - stt->fcount + 1); -#endif return -1; } - for (i = 0; i < samples; i += subFrames) - { - errHandle += WebRtcAgc_AddFarendToDigital(&stt->digitalAgc, &in_far[i], subFrames); - } - - return errHandle; + return WebRtcAgc_AddFarendToDigital(&stt->digitalAgc, in_far, samples); } -int WebRtcAgc_VirtualMic(void *agcInst, WebRtc_Word16 *in_near, WebRtc_Word16 *in_near_H, - WebRtc_Word16 samples, WebRtc_Word32 micLevelIn, - WebRtc_Word32 *micLevelOut) +int WebRtcAgc_VirtualMic(void *agcInst, int16_t* const* in_near, + size_t num_bands, size_t samples, int32_t micLevelIn, + int32_t *micLevelOut) { - WebRtc_Word32 tmpFlt, micLevelTmp, gainIdx; - WebRtc_UWord16 gain; - WebRtc_Word16 ii; - Agc_t *stt; + int32_t tmpFlt, micLevelTmp, gainIdx; + uint16_t gain; + size_t ii, j; + LegacyAgc* stt; - WebRtc_UWord32 nrg; - WebRtc_Word16 sampleCntr; - WebRtc_UWord32 frameNrg = 0; - WebRtc_UWord32 frameNrgLimit = 5500; - WebRtc_Word16 numZeroCrossing = 0; - const WebRtc_Word16 kZeroCrossingLowLim = 15; - const WebRtc_Word16 kZeroCrossingHighLim = 20; + uint32_t nrg; + size_t sampleCntr; + uint32_t frameNrg = 0; + uint32_t frameNrgLimit = 5500; + int16_t numZeroCrossing = 0; + const int16_t kZeroCrossingLowLim = 15; + const int16_t kZeroCrossingHighLim = 20; - stt = (Agc_t *)agcInst; + stt = (LegacyAgc*)agcInst; /* * Before applying gain decide if this is a low-level signal. @@ -418,7 +309,7 @@ int WebRtcAgc_VirtualMic(void *agcInst, WebRtc_Word16 *in_near, WebRtc_Word16 *i frameNrgLimit = frameNrgLimit << 1; } - frameNrg = WEBRTC_SPL_MUL_16_16(in_near[0], in_near[0]); + frameNrg = (uint32_t)(in_near[0][0] * in_near[0][0]); for (sampleCntr = 1; sampleCntr < samples; sampleCntr++) { @@ -426,12 +317,13 @@ int WebRtcAgc_VirtualMic(void *agcInst, WebRtc_Word16 *in_near, WebRtc_Word16 *i // the correct value of the energy is not important if (frameNrg < frameNrgLimit) { - nrg = WEBRTC_SPL_MUL_16_16(in_near[sampleCntr], in_near[sampleCntr]); - frameNrg += nrg; + nrg = (uint32_t)(in_near[0][sampleCntr] * in_near[0][sampleCntr]); + frameNrg += nrg; } // Count the zero crossings - numZeroCrossing += ((in_near[sampleCntr] ^ in_near[sampleCntr - 1]) < 0); + numZeroCrossing += + ((in_near[0][sampleCntr] ^ in_near[0][sampleCntr - 1]) < 0); } if ((frameNrg < 500) || (numZeroCrossing <= 5)) @@ -451,7 +343,7 @@ int WebRtcAgc_VirtualMic(void *agcInst, WebRtc_Word16 *in_near, WebRtc_Word16 *i stt->lowLevelSignal = 0; } - micLevelTmp = WEBRTC_SPL_LSHIFT_W32(micLevelIn, stt->scale); + micLevelTmp = micLevelIn << stt->scale; /* Set desired level */ gainIdx = stt->micVol; if (stt->micVol > stt->maxAnalog) @@ -478,7 +370,7 @@ int WebRtcAgc_VirtualMic(void *agcInst, WebRtc_Word16 *in_near, WebRtc_Word16 *i } for (ii = 0; ii < samples; ii++) { - tmpFlt = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_U16(in_near[ii], gain), 10); + tmpFlt = (in_near[0][ii] * gain) >> 10; if (tmpFlt > 32767) { tmpFlt = 32767; @@ -503,11 +395,10 @@ int WebRtcAgc_VirtualMic(void *agcInst, WebRtc_Word16 *in_near, WebRtc_Word16 *i gain = kSuppressionTableVirtualMic[127 - gainIdx]; } } - in_near[ii] = (WebRtc_Word16)tmpFlt; - if (stt->fs == 32000) + in_near[0][ii] = (int16_t)tmpFlt; + for (j = 1; j < num_bands; ++j) { - tmpFlt = WEBRTC_SPL_MUL_16_U16(in_near_H[ii], gain); - tmpFlt = WEBRTC_SPL_RSHIFT_W32(tmpFlt, 10); + tmpFlt = (in_near[j][ii] * gain) >> 10; if (tmpFlt > 32767) { tmpFlt = 32767; @@ -516,25 +407,23 @@ int WebRtcAgc_VirtualMic(void *agcInst, WebRtc_Word16 *in_near, WebRtc_Word16 *i { tmpFlt = -32768; } - in_near_H[ii] = (WebRtc_Word16)tmpFlt; + in_near[j][ii] = (int16_t)tmpFlt; } } /* Set the level we (finally) used */ stt->micGainIdx = gainIdx; // *micLevelOut = stt->micGainIdx; - *micLevelOut = WEBRTC_SPL_RSHIFT_W32(stt->micGainIdx, stt->scale); + *micLevelOut = stt->micGainIdx >> stt->scale; /* Add to Mic as if it was the output from a true microphone */ - if (WebRtcAgc_AddMic(agcInst, in_near, in_near_H, samples) != 0) + if (WebRtcAgc_AddMic(agcInst, in_near, num_bands, samples) != 0) { return -1; } return 0; } -void WebRtcAgc_UpdateAgcThresholds(Agc_t *stt) -{ - - WebRtc_Word16 tmp16; +void WebRtcAgc_UpdateAgcThresholds(LegacyAgc* stt) { + int16_t tmp16; #ifdef MIC_LEVEL_FEEDBACK int zeros; @@ -542,13 +431,13 @@ void WebRtcAgc_UpdateAgcThresholds(Agc_t *stt) { /* Lower the analog target level since we have reached its maximum */ zeros = WebRtcSpl_NormW32(stt->Rxx160_LPw32); - stt->targetIdxOffset = WEBRTC_SPL_RSHIFT_W16((3 * zeros) - stt->targetIdx - 2, 2); + stt->targetIdxOffset = (3 * zeros - stt->targetIdx - 2) / 4; } #endif /* Set analog target level in envelope dBOv scale */ tmp16 = (DIFF_REF_TO_ANALOG * stt->compressionGaindB) + ANALOG_TARGET_LEVEL_2; - tmp16 = WebRtcSpl_DivW32W16ResW16((WebRtc_Word32)tmp16, ANALOG_TARGET_LEVEL); + tmp16 = WebRtcSpl_DivW32W16ResW16((int32_t)tmp16, ANALOG_TARGET_LEVEL); stt->analogTarget = DIGITAL_REF_AT_0_COMP_GAIN + tmp16; if (stt->analogTarget < DIGITAL_REF_AT_0_COMP_GAIN) { @@ -583,14 +472,15 @@ void WebRtcAgc_UpdateAgcThresholds(Agc_t *stt) stt->lowerLimit = stt->startLowerLimit; } -void WebRtcAgc_SaturationCtrl(Agc_t *stt, WebRtc_UWord8 *saturated, WebRtc_Word32 *env) -{ - WebRtc_Word16 i, tmpW16; +void WebRtcAgc_SaturationCtrl(LegacyAgc* stt, + uint8_t* saturated, + int32_t* env) { + int16_t i, tmpW16; /* Check if the signal is saturated */ for (i = 0; i < 10; i++) { - tmpW16 = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(env[i], 20); + tmpW16 = (int16_t)(env[i] >> 20); if (tmpW16 > 875) { stt->envSum += tmpW16; @@ -604,15 +494,13 @@ void WebRtcAgc_SaturationCtrl(Agc_t *stt, WebRtc_UWord8 *saturated, WebRtc_Word3 } /* stt->envSum *= 0.99; */ - stt->envSum = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(stt->envSum, - (WebRtc_Word16)32440, 15); + stt->envSum = (int16_t)((stt->envSum * 32440) >> 15); } -void WebRtcAgc_ZeroCtrl(Agc_t *stt, WebRtc_Word32 *inMicLevel, WebRtc_Word32 *env) -{ - WebRtc_Word16 i; - WebRtc_Word32 tmp32 = 0; - WebRtc_Word32 midVal; +void WebRtcAgc_ZeroCtrl(LegacyAgc* stt, int32_t* inMicLevel, int32_t* env) { + int16_t i; + int32_t tmp32 = 0; + int32_t midVal; /* Is the input signal zero? */ for (i = 0; i < 10; i++) @@ -641,22 +529,23 @@ void WebRtcAgc_ZeroCtrl(Agc_t *stt, WebRtc_Word32 *inMicLevel, WebRtc_Word32 *en stt->msZero = 0; /* Increase microphone level only if it's less than 50% */ - midVal = WEBRTC_SPL_RSHIFT_W32(stt->maxAnalog + stt->minLevel + 1, 1); + midVal = (stt->maxAnalog + stt->minLevel + 1) / 2; if (*inMicLevel < midVal) { /* *inMicLevel *= 1.1; */ - tmp32 = WEBRTC_SPL_MUL(1126, *inMicLevel); - *inMicLevel = WEBRTC_SPL_RSHIFT_W32(tmp32, 10); + *inMicLevel = (1126 * *inMicLevel) >> 10; /* Reduces risk of a muted mic repeatedly triggering excessive levels due * to zero signal detection. */ *inMicLevel = WEBRTC_SPL_MIN(*inMicLevel, stt->zeroCtrlMax); stt->micVol = *inMicLevel; } -#ifdef AGC_DEBUG //test log +#ifdef WEBRTC_AGC_DEBUG_DUMP fprintf(stt->fpt, - "\t\tAGC->zeroCntrl, frame %d: 500 ms under threshold, micVol:\n", - stt->fcount, stt->micVol); + "\t\tAGC->zeroCntrl, frame %d: 500 ms under threshold," + " micVol: %d\n", + stt->fcount, + stt->micVol); #endif stt->activeSpeech = 0; @@ -669,8 +558,7 @@ void WebRtcAgc_ZeroCtrl(Agc_t *stt, WebRtc_Word32 *inMicLevel, WebRtc_Word32 *en } } -void WebRtcAgc_SpeakerInactiveCtrl(Agc_t *stt) -{ +void WebRtcAgc_SpeakerInactiveCtrl(LegacyAgc* stt) { /* Check if the near end speaker is inactive. * If that is the case the VAD threshold is * increased since the VAD speech model gets @@ -678,8 +566,8 @@ void WebRtcAgc_SpeakerInactiveCtrl(Agc_t *stt) * silence. */ - WebRtc_Word32 tmp32; - WebRtc_Word16 vadThresh; + int32_t tmp32; + int16_t vadThresh; if (stt->vadMic.stdLongTerm < 2500) { @@ -690,17 +578,16 @@ void WebRtcAgc_SpeakerInactiveCtrl(Agc_t *stt) if (stt->vadMic.stdLongTerm < 4500) { /* Scale between min and max threshold */ - vadThresh += WEBRTC_SPL_RSHIFT_W16(4500 - stt->vadMic.stdLongTerm, 1); + vadThresh += (4500 - stt->vadMic.stdLongTerm) / 2; } /* stt->vadThreshold = (31 * stt->vadThreshold + vadThresh) / 32; */ - tmp32 = (WebRtc_Word32)vadThresh; - tmp32 += WEBRTC_SPL_MUL_16_16((WebRtc_Word16)31, stt->vadThreshold); - stt->vadThreshold = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 5); + tmp32 = vadThresh + 31 * stt->vadThreshold; + stt->vadThreshold = (int16_t)(tmp32 >> 5); } } -void WebRtcAgc_ExpCurve(WebRtc_Word16 volume, WebRtc_Word16 *index) +void WebRtcAgc_ExpCurve(int16_t volume, int16_t *index) { // volume in Q14 // index in [0-7] @@ -750,40 +637,44 @@ void WebRtcAgc_ExpCurve(WebRtc_Word16 volume, WebRtc_Word16 *index) } } -WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, - WebRtc_Word32 *outMicLevel, - WebRtc_Word16 vadLogRatio, - WebRtc_Word16 echo, WebRtc_UWord8 *saturationWarning) +int32_t WebRtcAgc_ProcessAnalog(void *state, int32_t inMicLevel, + int32_t *outMicLevel, + int16_t vadLogRatio, + int16_t echo, uint8_t *saturationWarning) { - WebRtc_UWord32 tmpU32; - WebRtc_Word32 Rxx16w32, tmp32; - WebRtc_Word32 inMicLevelTmp, lastMicVol; - WebRtc_Word16 i; - WebRtc_UWord8 saturated = 0; - Agc_t *stt; + uint32_t tmpU32; + int32_t Rxx16w32, tmp32; + int32_t inMicLevelTmp, lastMicVol; + int16_t i; + uint8_t saturated = 0; + LegacyAgc* stt; - stt = (Agc_t *)state; - inMicLevelTmp = WEBRTC_SPL_LSHIFT_W32(inMicLevel, stt->scale); + stt = (LegacyAgc*)state; + inMicLevelTmp = inMicLevel << stt->scale; if (inMicLevelTmp > stt->maxAnalog) { -#ifdef AGC_DEBUG //test log - fprintf(stt->fpt, "\tAGC->ProcessAnalog, frame %d: micLvl > maxAnalog\n", stt->fcount); +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: micLvl > maxAnalog\n", + stt->fcount); #endif return -1; } else if (inMicLevelTmp < stt->minLevel) { -#ifdef AGC_DEBUG //test log - fprintf(stt->fpt, "\tAGC->ProcessAnalog, frame %d: micLvl < minLevel\n", stt->fcount); +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, + "\tAGC->ProcessAnalog, frame %d: micLvl < minLevel\n", + stt->fcount); #endif return -1; } if (stt->firstCall == 0) { - WebRtc_Word32 tmpVol; + int32_t tmpVol; stt->firstCall = 1; - tmp32 = WEBRTC_SPL_RSHIFT_W32((stt->maxLevel - stt->minLevel) * (WebRtc_Word32)51, 9); + tmp32 = ((stt->maxLevel - stt->minLevel) * 51) >> 9; tmpVol = (stt->minLevel + tmp32); /* If the mic level is very low at start, increase it! */ @@ -803,25 +694,32 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, /* If the mic level was manually changed to a very low value raise it! */ if ((inMicLevelTmp != stt->micVol) && (inMicLevelTmp < stt->minOutput)) { - tmp32 = WEBRTC_SPL_RSHIFT_W32((stt->maxLevel - stt->minLevel) * (WebRtc_Word32)51, 9); + tmp32 = ((stt->maxLevel - stt->minLevel) * 51) >> 9; inMicLevelTmp = (stt->minLevel + tmp32); stt->micVol = inMicLevelTmp; #ifdef MIC_LEVEL_FEEDBACK //stt->numBlocksMicLvlSat = 0; #endif -#ifdef AGC_DEBUG //test log +#ifdef WEBRTC_AGC_DEBUG_DUMP fprintf(stt->fpt, - "\tAGC->ProcessAnalog, frame %d: micLvl < minLevel by manual decrease, raise vol\n", + "\tAGC->ProcessAnalog, frame %d: micLvl < minLevel by manual" + " decrease, raise vol\n", stt->fcount); #endif } if (inMicLevelTmp != stt->micVol) { - // Incoming level mismatch; update our level. - // This could be the case if the volume is changed manually, or if the - // sound device has a low volume resolution. - stt->micVol = inMicLevelTmp; + if (inMicLevel == stt->lastInMicLevel) { + // We requested a volume adjustment, but it didn't occur. This is + // probably due to a coarse quantization of the volume slider. + // Restore the requested value to prevent getting stuck. + inMicLevelTmp = stt->micVol; + } + else { + // As long as the value changed, update to match. + stt->micVol = inMicLevelTmp; + } } if (inMicLevelTmp > stt->maxLevel) @@ -831,6 +729,7 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, } // Store last value here, after we've taken care of manual updates etc. + stt->lastInMicLevel = inMicLevel; lastMicVol = stt->micVol; /* Checks if the signal is saturated. Also a check if individual samples @@ -846,24 +745,25 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, * Rxx160_LP is adjusted down because it is so slow it could * cause the AGC to make wrong decisions. */ /* stt->Rxx160_LPw32 *= 0.875; */ - stt->Rxx160_LPw32 = WEBRTC_SPL_MUL(WEBRTC_SPL_RSHIFT_W32(stt->Rxx160_LPw32, 3), 7); + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 8) * 7; stt->zeroCtrlMax = stt->micVol; /* stt->micVol *= 0.903; */ tmp32 = inMicLevelTmp - stt->minLevel; - tmpU32 = WEBRTC_SPL_UMUL(29591, (WebRtc_UWord32)(tmp32)); - stt->micVol = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_U32(tmpU32, 15) + stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(29591, (uint32_t)(tmp32)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; if (stt->micVol > lastMicVol - 2) { stt->micVol = lastMicVol - 2; } inMicLevelTmp = stt->micVol; -#ifdef AGC_DEBUG //test log +#ifdef WEBRTC_AGC_DEBUG_DUMP fprintf(stt->fpt, "\tAGC->ProcessAnalog, frame %d: saturated, micVol = %d\n", - stt->fcount, stt->micVol); + stt->fcount, + stt->micVol); #endif if (stt->micVol < stt->minOutput) @@ -916,7 +816,7 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, Rxx16w32 = stt->Rxx16w32_array[0][i]; /* Rxx160w32 in Q(-7) */ - tmp32 = WEBRTC_SPL_RSHIFT_W32(Rxx16w32 - stt->Rxx16_vectorw32[stt->Rxx16pos], 3); + tmp32 = (Rxx16w32 - stt->Rxx16_vectorw32[stt->Rxx16pos]) >> 3; stt->Rxx160w32 = stt->Rxx160w32 + tmp32; stt->Rxx16_vectorw32[stt->Rxx16pos] = Rxx16w32; @@ -928,7 +828,7 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, } /* Rxx16_LPw32 in Q(-4) */ - tmp32 = WEBRTC_SPL_RSHIFT_W32(Rxx16w32 - stt->Rxx16_LPw32, kAlphaShortTerm); + tmp32 = (Rxx16w32 - stt->Rxx16_LPw32) >> kAlphaShortTerm; stt->Rxx16_LPw32 = (stt->Rxx16_LPw32) + tmp32; if (vadLogRatio > stt->vadThreshold) @@ -950,11 +850,11 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, } else if (stt->activeSpeech == 250) { stt->activeSpeech += 2; - tmp32 = WEBRTC_SPL_RSHIFT_W32(stt->Rxx16_LPw32Max, 3); - stt->Rxx160_LPw32 = WEBRTC_SPL_MUL(tmp32, RXX_BUFFER_LEN); + tmp32 = stt->Rxx16_LPw32Max >> 3; + stt->Rxx160_LPw32 = tmp32 * RXX_BUFFER_LEN; } - tmp32 = WEBRTC_SPL_RSHIFT_W32(stt->Rxx160w32 - stt->Rxx160_LPw32, kAlphaLongTerm); + tmp32 = (stt->Rxx160w32 - stt->Rxx160_LPw32) >> kAlphaLongTerm; stt->Rxx160_LPw32 = stt->Rxx160_LPw32 + tmp32; if (stt->Rxx160_LPw32 > stt->upperSecondaryLimit) @@ -969,23 +869,21 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, /* Lower the recording level */ /* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */ - tmp32 = WEBRTC_SPL_RSHIFT_W32(stt->Rxx160_LPw32, 6); - stt->Rxx160_LPw32 = WEBRTC_SPL_MUL(tmp32, 53); + tmp32 = stt->Rxx160_LPw32 >> 6; + stt->Rxx160_LPw32 = tmp32 * 53; /* Reduce the max gain to avoid excessive oscillation * (but never drop below the maximum analog level). - * stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; */ - tmp32 = (15 * stt->maxLevel) + stt->micVol; - stt->maxLevel = WEBRTC_SPL_RSHIFT_W32(tmp32, 4); + stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; stt->maxLevel = WEBRTC_SPL_MAX(stt->maxLevel, stt->maxAnalog); stt->zeroCtrlMax = stt->micVol; /* 0.95 in Q15 */ tmp32 = inMicLevelTmp - stt->minLevel; - tmpU32 = WEBRTC_SPL_UMUL(31130, (WebRtc_UWord32)(tmp32)); - stt->micVol = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_U32(tmpU32, 15) + stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(31130, (uint32_t)(tmp32)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; if (stt->micVol > lastMicVol - 1) { stt->micVol = lastMicVol - 1; @@ -1000,10 +898,13 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, #ifdef MIC_LEVEL_FEEDBACK //stt->numBlocksMicLvlSat = 0; #endif -#ifdef AGC_DEBUG //test log +#ifdef WEBRTC_AGC_DEBUG_DUMP fprintf(stt->fpt, - "\tAGC->ProcessAnalog, frame %d: measure > 2ndUpperLim, micVol = %d, maxLevel = %d\n", - stt->fcount, stt->micVol, stt->maxLevel); + "\tAGC->ProcessAnalog, frame %d: measure >" + " 2ndUpperLim, micVol = %d, maxLevel = %d\n", + stt->fcount, + stt->micVol, + stt->maxLevel); #endif } } else if (stt->Rxx160_LPw32 > stt->upperLimit) @@ -1017,23 +918,20 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, /* Lower the recording level */ stt->msTooHigh = 0; /* Multiply by 0.828125 which corresponds to decreasing ~0.8dB */ - tmp32 = WEBRTC_SPL_RSHIFT_W32(stt->Rxx160_LPw32, 6); - stt->Rxx160_LPw32 = WEBRTC_SPL_MUL(tmp32, 53); + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 53; /* Reduce the max gain to avoid excessive oscillation * (but never drop below the maximum analog level). - * stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; */ - tmp32 = (15 * stt->maxLevel) + stt->micVol; - stt->maxLevel = WEBRTC_SPL_RSHIFT_W32(tmp32, 4); + stt->maxLevel = (15 * stt->maxLevel + stt->micVol) / 16; stt->maxLevel = WEBRTC_SPL_MAX(stt->maxLevel, stt->maxAnalog); stt->zeroCtrlMax = stt->micVol; /* 0.965 in Q15 */ tmp32 = inMicLevelTmp - stt->minLevel; - tmpU32 = WEBRTC_SPL_UMUL(31621, (WebRtc_UWord32)(inMicLevelTmp - stt->minLevel)); - stt->micVol = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_U32(tmpU32, 15) + stt->minLevel; + tmpU32 = WEBRTC_SPL_UMUL(31621, (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 15) + stt->minLevel; if (stt->micVol > lastMicVol - 1) { stt->micVol = lastMicVol - 1; @@ -1043,10 +941,13 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, #ifdef MIC_LEVEL_FEEDBACK //stt->numBlocksMicLvlSat = 0; #endif -#ifdef AGC_DEBUG //test log +#ifdef WEBRTC_AGC_DEBUG_DUMP fprintf(stt->fpt, - "\tAGC->ProcessAnalog, frame %d: measure > UpperLim, micVol = %d, maxLevel = %d\n", - stt->fcount, stt->micVol, stt->maxLevel); + "\tAGC->ProcessAnalog, frame %d: measure >" + " UpperLim, micVol = %d, maxLevel = %d\n", + stt->fcount, + stt->micVol, + stt->maxLevel); #endif } } else if (stt->Rxx160_LPw32 < stt->lowerSecondaryLimit) @@ -1058,34 +959,31 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, if (stt->msTooLow > stt->msecSpeechOuterChange) { /* Raise the recording level */ - WebRtc_Word16 index, weightFIX; - WebRtc_Word16 volNormFIX = 16384; // =1 in Q14. + int16_t index, weightFIX; + int16_t volNormFIX = 16384; // =1 in Q14. stt->msTooLow = 0; /* Normalize the volume level */ - tmp32 = WEBRTC_SPL_LSHIFT_W32(inMicLevelTmp - stt->minLevel, 14); + tmp32 = (inMicLevelTmp - stt->minLevel) << 14; if (stt->maxInit != stt->minLevel) { - volNormFIX = (WebRtc_Word16)WEBRTC_SPL_DIV(tmp32, - (stt->maxInit - stt->minLevel)); + volNormFIX = tmp32 / (stt->maxInit - stt->minLevel); } /* Find correct curve */ WebRtcAgc_ExpCurve(volNormFIX, &index); /* Compute weighting factor for the volume increase, 32^(-2*X)/2+1.05 */ - weightFIX = kOffset1[index] - - (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(kSlope1[index], - volNormFIX, 13); + weightFIX = kOffset1[index] - + (int16_t)((kSlope1[index] * volNormFIX) >> 13); /* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */ - tmp32 = WEBRTC_SPL_RSHIFT_W32(stt->Rxx160_LPw32, 6); - stt->Rxx160_LPw32 = WEBRTC_SPL_MUL(tmp32, 67); + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 67; tmp32 = inMicLevelTmp - stt->minLevel; - tmpU32 = ((WebRtc_UWord32)weightFIX * (WebRtc_UWord32)(inMicLevelTmp - stt->minLevel)); - stt->micVol = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_U32(tmpU32, 14) + stt->minLevel; + tmpU32 = ((uint32_t)weightFIX * (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 14) + stt->minLevel; if (stt->micVol < lastMicVol + 2) { stt->micVol = lastMicVol + 2; @@ -1103,10 +1001,12 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, fprintf(stderr, "Sat mic Level: %d\n", stt->numBlocksMicLvlSat); } #endif -#ifdef AGC_DEBUG //test log +#ifdef WEBRTC_AGC_DEBUG_DUMP fprintf(stt->fpt, - "\tAGC->ProcessAnalog, frame %d: measure < 2ndLowerLim, micVol = %d\n", - stt->fcount, stt->micVol); + "\tAGC->ProcessAnalog, frame %d: measure <" + " 2ndLowerLim, micVol = %d\n", + stt->fcount, + stt->micVol); #endif } } else if (stt->Rxx160_LPw32 < stt->lowerLimit) @@ -1118,34 +1018,31 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, if (stt->msTooLow > stt->msecSpeechInnerChange) { /* Raise the recording level */ - WebRtc_Word16 index, weightFIX; - WebRtc_Word16 volNormFIX = 16384; // =1 in Q14. + int16_t index, weightFIX; + int16_t volNormFIX = 16384; // =1 in Q14. stt->msTooLow = 0; /* Normalize the volume level */ - tmp32 = WEBRTC_SPL_LSHIFT_W32(inMicLevelTmp - stt->minLevel, 14); + tmp32 = (inMicLevelTmp - stt->minLevel) << 14; if (stt->maxInit != stt->minLevel) { - volNormFIX = (WebRtc_Word16)WEBRTC_SPL_DIV(tmp32, - (stt->maxInit - stt->minLevel)); + volNormFIX = tmp32 / (stt->maxInit - stt->minLevel); } /* Find correct curve */ WebRtcAgc_ExpCurve(volNormFIX, &index); /* Compute weighting factor for the volume increase, (3.^(-2.*X))/8+1 */ - weightFIX = kOffset2[index] - - (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(kSlope2[index], - volNormFIX, 13); + weightFIX = kOffset2[index] - + (int16_t)((kSlope2[index] * volNormFIX) >> 13); /* stt->Rxx160_LPw32 *= 1.047 [~0.2 dB]; */ - tmp32 = WEBRTC_SPL_RSHIFT_W32(stt->Rxx160_LPw32, 6); - stt->Rxx160_LPw32 = WEBRTC_SPL_MUL(tmp32, 67); + stt->Rxx160_LPw32 = (stt->Rxx160_LPw32 / 64) * 67; tmp32 = inMicLevelTmp - stt->minLevel; - tmpU32 = ((WebRtc_UWord32)weightFIX * (WebRtc_UWord32)(inMicLevelTmp - stt->minLevel)); - stt->micVol = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_U32(tmpU32, 14) + stt->minLevel; + tmpU32 = ((uint32_t)weightFIX * (uint32_t)(inMicLevelTmp - stt->minLevel)); + stt->micVol = (tmpU32 >> 14) + stt->minLevel; if (stt->micVol < lastMicVol + 1) { stt->micVol = lastMicVol + 1; @@ -1163,10 +1060,11 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, fprintf(stderr, "Sat mic Level: %d\n", stt->numBlocksMicLvlSat); } #endif -#ifdef AGC_DEBUG //test log +#ifdef WEBRTC_AGC_DEBUG_DUMP fprintf(stt->fpt, "\tAGC->ProcessAnalog, frame %d: measure < LowerLim, micVol = %d\n", - stt->fcount, stt->micVol); + stt->fcount, + stt->micVol); #endif } @@ -1229,27 +1127,20 @@ WebRtc_Word32 WebRtcAgc_ProcessAnalog(void *state, WebRtc_Word32 inMicLevel, stt->micVol = stt->minOutput; } - *outMicLevel = WEBRTC_SPL_RSHIFT_W32(stt->micVol, stt->scale); - if (*outMicLevel > WEBRTC_SPL_RSHIFT_W32(stt->maxAnalog, stt->scale)) - { - *outMicLevel = WEBRTC_SPL_RSHIFT_W32(stt->maxAnalog, stt->scale); - } + *outMicLevel = WEBRTC_SPL_MIN(stt->micVol, stt->maxAnalog) >> stt->scale; return 0; } -int WebRtcAgc_Process(void *agcInst, const WebRtc_Word16 *in_near, - const WebRtc_Word16 *in_near_H, WebRtc_Word16 samples, - WebRtc_Word16 *out, WebRtc_Word16 *out_H, WebRtc_Word32 inMicLevel, - WebRtc_Word32 *outMicLevel, WebRtc_Word16 echo, - WebRtc_UWord8 *saturationWarning) +int WebRtcAgc_Process(void *agcInst, const int16_t* const* in_near, + size_t num_bands, size_t samples, + int16_t* const* out, int32_t inMicLevel, + int32_t *outMicLevel, int16_t echo, + uint8_t *saturationWarning) { - Agc_t *stt; - WebRtc_Word32 inMicLevelTmp; - WebRtc_Word16 subFrames, i; - WebRtc_UWord8 satWarningTmp = 0; + LegacyAgc* stt; - stt = (Agc_t *)agcInst; + stt = (LegacyAgc*)agcInst; // if (stt == NULL) @@ -1261,53 +1152,17 @@ int WebRtcAgc_Process(void *agcInst, const WebRtc_Word16 *in_near, if (stt->fs == 8000) { - if ((samples != 80) && (samples != 160)) + if (samples != 80) { -#ifdef AGC_DEBUG //test log - fprintf(stt->fpt, - "AGC->Process, frame %d: Invalid number of samples\n\n", stt->fcount); -#endif return -1; } - subFrames = 80; - } else if (stt->fs == 16000) + } else if (stt->fs == 16000 || stt->fs == 32000 || stt->fs == 48000) { - if ((samples != 160) && (samples != 320)) + if (samples != 160) { -#ifdef AGC_DEBUG //test log - fprintf(stt->fpt, - "AGC->Process, frame %d: Invalid number of samples\n\n", stt->fcount); -#endif return -1; } - subFrames = 160; - } else if (stt->fs == 32000) - { - if ((samples != 160) && (samples != 320)) - { -#ifdef AGC_DEBUG //test log - fprintf(stt->fpt, - "AGC->Process, frame %d: Invalid number of samples\n\n", stt->fcount); -#endif - return -1; - } - subFrames = 160; } else - { -#ifdef AGC_DEBUG// test log - fprintf(stt->fpt, - "AGC->Process, frame %d: Invalid sample rate\n\n", stt->fcount); -#endif - return -1; - } - - /* Check for valid pointers based on sampling rate */ - if (stt->fs == 32000 && in_near_H == NULL) - { - return -1; - } - /* Check for valid pointers for low band */ - if (in_near == NULL) { return -1; } @@ -1315,84 +1170,68 @@ int WebRtcAgc_Process(void *agcInst, const WebRtc_Word16 *in_near, *saturationWarning = 0; //TODO: PUT IN RANGE CHECKING FOR INPUT LEVELS *outMicLevel = inMicLevel; - inMicLevelTmp = inMicLevel; - // TODO(andrew): clearly we don't need input and output pointers... - // Change the interface to take a shared input/output. - if (in_near != out) - { - // Only needed if they don't already point to the same place. - memcpy(out, in_near, samples * sizeof(WebRtc_Word16)); - } - if (stt->fs == 32000) - { - if (in_near_H != out_H) - { - memcpy(out_H, in_near_H, samples * sizeof(WebRtc_Word16)); - } - } - -#ifdef AGC_DEBUG//test log +#ifdef WEBRTC_AGC_DEBUG_DUMP stt->fcount++; #endif - for (i = 0; i < samples; i += subFrames) + if (WebRtcAgc_ProcessDigital(&stt->digitalAgc, + in_near, + num_bands, + out, + stt->fs, + stt->lowLevelSignal) == -1) { - if (WebRtcAgc_ProcessDigital(&stt->digitalAgc, &in_near[i], &in_near_H[i], &out[i], &out_H[i], - stt->fs, stt->lowLevelSignal) == -1) - { -#ifdef AGC_DEBUG//test log - fprintf(stt->fpt, "AGC->Process, frame %d: Error from DigAGC\n\n", stt->fcount); +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, + "AGC->Process, frame %d: Error from DigAGC\n\n", + stt->fcount); #endif + return -1; + } + if (stt->agcMode < kAgcModeFixedDigital && + (stt->lowLevelSignal == 0 || stt->agcMode != kAgcModeAdaptiveDigital)) + { + if (WebRtcAgc_ProcessAnalog(agcInst, + inMicLevel, + outMicLevel, + stt->vadMic.logRatio, + echo, + saturationWarning) == -1) + { return -1; } - if ((stt->agcMode < kAgcModeFixedDigital) && ((stt->lowLevelSignal == 0) - || (stt->agcMode != kAgcModeAdaptiveDigital))) - { - if (WebRtcAgc_ProcessAnalog(agcInst, inMicLevelTmp, outMicLevel, - stt->vadMic.logRatio, echo, saturationWarning) == -1) - { - return -1; - } - } -#ifdef AGC_DEBUG//test log - fprintf(stt->agcLog, "%5d\t%d\t%d\t%d\n", stt->fcount, inMicLevelTmp, *outMicLevel, stt->maxLevel, stt->micVol); + } +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->agcLog, + "%5d\t%d\t%d\t%d\t%d\n", + stt->fcount, + inMicLevel, + *outMicLevel, + stt->maxLevel, + stt->micVol); #endif - /* update queue */ - if (stt->inQueue > 1) - { - memcpy(stt->env[0], stt->env[1], 10 * sizeof(WebRtc_Word32)); - memcpy(stt->Rxx16w32_array[0], stt->Rxx16w32_array[1], 5 * sizeof(WebRtc_Word32)); - } - - if (stt->inQueue > 0) - { - stt->inQueue--; - } - - /* If 20ms frames are used the input mic level must be updated so that - * the analog AGC does not think that there has been a manual volume - * change. */ - inMicLevelTmp = *outMicLevel; - - /* Store a positive saturation warning. */ - if (*saturationWarning == 1) - { - satWarningTmp = 1; - } + /* update queue */ + if (stt->inQueue > 1) + { + memcpy(stt->env[0], stt->env[1], 10 * sizeof(int32_t)); + memcpy(stt->Rxx16w32_array[0], + stt->Rxx16w32_array[1], + 5 * sizeof(int32_t)); } - /* Trigger the saturation warning if displayed by any of the frames. */ - *saturationWarning = satWarningTmp; + if (stt->inQueue > 0) + { + stt->inQueue--; + } return 0; } -int WebRtcAgc_set_config(void *agcInst, WebRtcAgc_config_t agcConfig) -{ - Agc_t *stt; - stt = (Agc_t *)agcInst; +int WebRtcAgc_set_config(void* agcInst, WebRtcAgcConfig agcConfig) { + LegacyAgc* stt; + stt = (LegacyAgc*)agcInst; if (stt == NULL) { @@ -1432,12 +1271,14 @@ int WebRtcAgc_set_config(void *agcInst, WebRtcAgc_config_t agcConfig) if (WebRtcAgc_CalculateGainTable(&(stt->digitalAgc.gainTable[0]), stt->compressionGaindB, stt->targetLevelDbfs, stt->limiterEnable, stt->analogTarget) == -1) { -#ifdef AGC_DEBUG//test log - fprintf(stt->fpt, "AGC->set_config, frame %d: Error from calcGainTable\n\n", stt->fcount); +#ifdef WEBRTC_AGC_DEBUG_DUMP + fprintf(stt->fpt, + "AGC->set_config, frame %d: Error from calcGainTable\n\n", + stt->fcount); #endif return -1; } - /* Store the config in a WebRtcAgc_config_t */ + /* Store the config in a WebRtcAgcConfig */ stt->usedConfig.compressionGaindB = agcConfig.compressionGaindB; stt->usedConfig.limiterEnable = agcConfig.limiterEnable; stt->usedConfig.targetLevelDbfs = agcConfig.targetLevelDbfs; @@ -1445,10 +1286,9 @@ int WebRtcAgc_set_config(void *agcInst, WebRtcAgc_config_t agcConfig) return 0; } -int WebRtcAgc_get_config(void *agcInst, WebRtcAgc_config_t *config) -{ - Agc_t *stt; - stt = (Agc_t *)agcInst; +int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config) { + LegacyAgc* stt; + stt = (LegacyAgc*)agcInst; if (stt == NULL) { @@ -1474,61 +1314,46 @@ int WebRtcAgc_get_config(void *agcInst, WebRtcAgc_config_t *config) return 0; } -int WebRtcAgc_Create(void **agcInst) -{ - Agc_t *stt; - if (agcInst == NULL) - { - return -1; - } - stt = (Agc_t *)malloc(sizeof(Agc_t)); +void* WebRtcAgc_Create() { + LegacyAgc* stt = malloc(sizeof(LegacyAgc)); - *agcInst = stt; - if (stt == NULL) - { - return -1; - } - -#ifdef AGC_DEBUG - stt->fpt = fopen("./agc_test_log.txt", "wt"); - stt->agcLog = fopen("./agc_debug_log.txt", "wt"); - stt->digitalAgc.logFile = fopen("./agc_log.txt", "wt"); +#ifdef WEBRTC_AGC_DEBUG_DUMP + stt->fpt = fopen("./agc_test_log.txt", "wt"); + stt->agcLog = fopen("./agc_debug_log.txt", "wt"); + stt->digitalAgc.logFile = fopen("./agc_log.txt", "wt"); #endif - stt->initFlag = 0; - stt->lastError = 0; + stt->initFlag = 0; + stt->lastError = 0; - return 0; + return stt; } -int WebRtcAgc_Free(void *state) -{ - Agc_t *stt; +void WebRtcAgc_Free(void *state) { + LegacyAgc* stt; - stt = (Agc_t *)state; -#ifdef AGC_DEBUG - fclose(stt->fpt); - fclose(stt->agcLog); - fclose(stt->digitalAgc.logFile); + stt = (LegacyAgc*)state; +#ifdef WEBRTC_AGC_DEBUG_DUMP + fclose(stt->fpt); + fclose(stt->agcLog); + fclose(stt->digitalAgc.logFile); #endif - free(stt); - - return 0; + free(stt); } /* minLevel - Minimum volume level * maxLevel - Maximum volume level */ -int WebRtcAgc_Init(void *agcInst, WebRtc_Word32 minLevel, WebRtc_Word32 maxLevel, - WebRtc_Word16 agcMode, WebRtc_UWord32 fs) +int WebRtcAgc_Init(void *agcInst, int32_t minLevel, int32_t maxLevel, + int16_t agcMode, uint32_t fs) { - WebRtc_Word32 max_add, tmp32; - WebRtc_Word16 i; + int32_t max_add, tmp32; + int16_t i; int tmpNorm; - Agc_t *stt; + LegacyAgc* stt; /* typecast state pointer */ - stt = (Agc_t *)agcInst; + stt = (LegacyAgc*)agcInst; if (WebRtcAgc_InitDigital(&stt->digitalAgc, agcMode) != 0) { @@ -1544,13 +1369,13 @@ int WebRtcAgc_Init(void *agcInst, WebRtc_Word32 minLevel, WebRtc_Word32 maxLevel * 2 - Digital Automatic Gain Control [-targetLevelDbfs (default -3 dBOv)] * 3 - Fixed Digital Gain [compressionGaindB (default 8 dB)] */ -#ifdef AGC_DEBUG//test log +#ifdef WEBRTC_AGC_DEBUG_DUMP stt->fcount = 0; fprintf(stt->fpt, "AGC->Init\n"); #endif if (agcMode < kAgcModeUnchanged || agcMode > kAgcModeFixedDigital) { -#ifdef AGC_DEBUG//test log +#ifdef WEBRTC_AGC_DEBUG_DUMP fprintf(stt->fpt, "AGC->Init: error, incorrect mode\n\n"); #endif return -1; @@ -1563,7 +1388,7 @@ int WebRtcAgc_Init(void *agcInst, WebRtc_Word32 minLevel, WebRtc_Word32 maxLevel /* If the volume range is smaller than 0-256 then * the levels are shifted up to Q8-domain */ - tmpNorm = WebRtcSpl_NormU32((WebRtc_UWord32)maxLevel); + tmpNorm = WebRtcSpl_NormU32((uint32_t)maxLevel); stt->scale = tmpNorm - 23; if (stt->scale < 0) { @@ -1572,8 +1397,8 @@ int WebRtcAgc_Init(void *agcInst, WebRtc_Word32 minLevel, WebRtc_Word32 maxLevel // TODO(bjornv): Investigate if we really need to scale up a small range now when we have // a guard against zero-increments. For now, we do not support scale up (scale = 0). stt->scale = 0; - maxLevel = WEBRTC_SPL_LSHIFT_W32(maxLevel, stt->scale); - minLevel = WEBRTC_SPL_LSHIFT_W32(minLevel, stt->scale); + maxLevel <<= stt->scale; + minLevel <<= stt->scale; /* Make minLevel and maxLevel static in AdaptiveDigital */ if (stt->agcMode == kAgcModeAdaptiveDigital) @@ -1584,7 +1409,7 @@ int WebRtcAgc_Init(void *agcInst, WebRtc_Word32 minLevel, WebRtc_Word32 maxLevel } /* The maximum supplemental volume range is based on a vague idea * of how much lower the gain will be than the real analog gain. */ - max_add = WEBRTC_SPL_RSHIFT_W32(maxLevel - minLevel, 2); + max_add = (maxLevel - minLevel) / 4; /* Minimum/maximum volume level that can be set */ stt->minLevel = minLevel; @@ -1593,6 +1418,7 @@ int WebRtcAgc_Init(void *agcInst, WebRtc_Word32 minLevel, WebRtc_Word32 maxLevel stt->maxInit = stt->maxLevel; stt->zeroCtrlMax = stt->maxAnalog; + stt->lastInMicLevel = 0; /* Initialize micVol parameter */ stt->micVol = stt->maxAnalog; @@ -1606,14 +1432,16 @@ int WebRtcAgc_Init(void *agcInst, WebRtc_Word32 minLevel, WebRtc_Word32 maxLevel stt->numBlocksMicLvlSat = 0; stt->micLvlSat = 0; #endif -#ifdef AGC_DEBUG//test log +#ifdef WEBRTC_AGC_DEBUG_DUMP fprintf(stt->fpt, "AGC->Init: minLevel = %d, maxAnalog = %d, maxLevel = %d\n", - stt->minLevel, stt->maxAnalog, stt->maxLevel); + stt->minLevel, + stt->maxAnalog, + stt->maxLevel); #endif /* Minimum output volume is 4% higher than the available lowest volume level */ - tmp32 = WEBRTC_SPL_RSHIFT_W32((stt->maxLevel - stt->minLevel) * (WebRtc_Word32)10, 8); + tmp32 = ((stt->maxLevel - stt->minLevel) * 10) >> 8; stt->minOutput = (stt->minLevel + tmp32); stt->msTooLow = 0; @@ -1635,20 +1463,21 @@ int WebRtcAgc_Init(void *agcInst, WebRtc_Word32 minLevel, WebRtc_Word32 maxLevel for (i = 0; i < RXX_BUFFER_LEN; i++) { - stt->Rxx16_vectorw32[i] = (WebRtc_Word32)1000; /* -54dBm0 */ + stt->Rxx16_vectorw32[i] = (int32_t)1000; /* -54dBm0 */ } stt->Rxx160w32 = 125 * RXX_BUFFER_LEN; /* (stt->Rxx16_vectorw32[0]>>3) = 125 */ stt->Rxx16pos = 0; - stt->Rxx16_LPw32 = (WebRtc_Word32)16284; /* Q(-4) */ + stt->Rxx16_LPw32 = (int32_t)16284; /* Q(-4) */ for (i = 0; i < 5; i++) { stt->Rxx16w32_array[0][i] = 0; } - for (i = 0; i < 20; i++) + for (i = 0; i < 10; i++) { stt->env[0][i] = 0; + stt->env[1][i] = 0; } stt->inQueue = 0; @@ -1676,34 +1505,15 @@ int WebRtcAgc_Init(void *agcInst, WebRtc_Word32 minLevel, WebRtc_Word32 maxLevel /* Only positive values are allowed that are not too large */ if ((minLevel >= maxLevel) || (maxLevel & 0xFC000000)) { -#ifdef AGC_DEBUG//test log +#ifdef WEBRTC_AGC_DEBUG_DUMP fprintf(stt->fpt, "minLevel, maxLevel value(s) are invalid\n\n"); #endif return -1; } else { -#ifdef AGC_DEBUG//test log +#ifdef WEBRTC_AGC_DEBUG_DUMP fprintf(stt->fpt, "\n"); #endif return 0; } } - -int WebRtcAgc_Version(WebRtc_Word8 *versionStr, WebRtc_Word16 length) -{ - const WebRtc_Word8 version[] = "AGC 1.7.0"; - const WebRtc_Word16 versionLen = (WebRtc_Word16)strlen(version) + 1; - - if (versionStr == NULL) - { - return -1; - } - - if (versionLen > length) - { - return -1; - } - - strncpy(versionStr, version, versionLen); - return 0; -} diff --git a/webrtc/modules/audio_processing/agc/legacy/analog_agc.h b/webrtc/modules/audio_processing/agc/legacy/analog_agc.h new file mode 100644 index 0000000..820221a --- /dev/null +++ b/webrtc/modules/audio_processing/agc/legacy/analog_agc.h @@ -0,0 +1,133 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ + +//#define MIC_LEVEL_FEEDBACK +#ifdef WEBRTC_AGC_DEBUG_DUMP +#include +#endif + +#include "webrtc/modules/audio_processing/agc/legacy/digital_agc.h" +#include "webrtc/modules/audio_processing/agc/legacy/gain_control.h" +#include "webrtc/typedefs.h" + +/* Analog Automatic Gain Control variables: + * Constant declarations (inner limits inside which no changes are done) + * In the beginning the range is narrower to widen as soon as the measure + * 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0 + * and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal + * go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm + * The limits are created by running the AGC with a file having the desired + * signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined + * by out=10*log10(in/260537279.7); Set the target level to the average level + * of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in + * Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) ) + */ +#define RXX_BUFFER_LEN 10 + +static const int16_t kMsecSpeechInner = 520; +static const int16_t kMsecSpeechOuter = 340; + +static const int16_t kNormalVadThreshold = 400; + +static const int16_t kAlphaShortTerm = 6; // 1 >> 6 = 0.0156 +static const int16_t kAlphaLongTerm = 10; // 1 >> 10 = 0.000977 + +typedef struct +{ + // Configurable parameters/variables + uint32_t fs; // Sampling frequency + int16_t compressionGaindB; // Fixed gain level in dB + int16_t targetLevelDbfs; // Target level in -dBfs of envelope (default -3) + int16_t agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig) + uint8_t limiterEnable; // Enabling limiter (on/off (default off)) + WebRtcAgcConfig defaultConfig; + WebRtcAgcConfig usedConfig; + + // General variables + int16_t initFlag; + int16_t lastError; + + // Target level parameters + // Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7) + int32_t analogTargetLevel; // = RXX_BUFFER_LEN * 846805; -22 dBfs + int32_t startUpperLimit; // = RXX_BUFFER_LEN * 1066064; -21 dBfs + int32_t startLowerLimit; // = RXX_BUFFER_LEN * 672641; -23 dBfs + int32_t upperPrimaryLimit; // = RXX_BUFFER_LEN * 1342095; -20 dBfs + int32_t lowerPrimaryLimit; // = RXX_BUFFER_LEN * 534298; -24 dBfs + int32_t upperSecondaryLimit;// = RXX_BUFFER_LEN * 2677832; -17 dBfs + int32_t lowerSecondaryLimit;// = RXX_BUFFER_LEN * 267783; -27 dBfs + uint16_t targetIdx; // Table index for corresponding target level +#ifdef MIC_LEVEL_FEEDBACK + uint16_t targetIdxOffset; // Table index offset for level compensation +#endif + int16_t analogTarget; // Digital reference level in ENV scale + + // Analog AGC specific variables + int32_t filterState[8]; // For downsampling wb to nb + int32_t upperLimit; // Upper limit for mic energy + int32_t lowerLimit; // Lower limit for mic energy + int32_t Rxx160w32; // Average energy for one frame + int32_t Rxx16_LPw32; // Low pass filtered subframe energies + int32_t Rxx160_LPw32; // Low pass filtered frame energies + int32_t Rxx16_LPw32Max; // Keeps track of largest energy subframe + int32_t Rxx16_vectorw32[RXX_BUFFER_LEN];// Array with subframe energies + int32_t Rxx16w32_array[2][5];// Energy values of microphone signal + int32_t env[2][10]; // Envelope values of subframes + + int16_t Rxx16pos; // Current position in the Rxx16_vectorw32 + int16_t envSum; // Filtered scaled envelope in subframes + int16_t vadThreshold; // Threshold for VAD decision + int16_t inActive; // Inactive time in milliseconds + int16_t msTooLow; // Milliseconds of speech at a too low level + int16_t msTooHigh; // Milliseconds of speech at a too high level + int16_t changeToSlowMode; // Change to slow mode after some time at target + int16_t firstCall; // First call to the process-function + int16_t msZero; // Milliseconds of zero input + int16_t msecSpeechOuterChange;// Min ms of speech between volume changes + int16_t msecSpeechInnerChange;// Min ms of speech between volume changes + int16_t activeSpeech; // Milliseconds of active speech + int16_t muteGuardMs; // Counter to prevent mute action + int16_t inQueue; // 10 ms batch indicator + + // Microphone level variables + int32_t micRef; // Remember ref. mic level for virtual mic + uint16_t gainTableIdx; // Current position in virtual gain table + int32_t micGainIdx; // Gain index of mic level to increase slowly + int32_t micVol; // Remember volume between frames + int32_t maxLevel; // Max possible vol level, incl dig gain + int32_t maxAnalog; // Maximum possible analog volume level + int32_t maxInit; // Initial value of "max" + int32_t minLevel; // Minimum possible volume level + int32_t minOutput; // Minimum output volume level + int32_t zeroCtrlMax; // Remember max gain => don't amp low input + int32_t lastInMicLevel; + + int16_t scale; // Scale factor for internal volume levels +#ifdef MIC_LEVEL_FEEDBACK + int16_t numBlocksMicLvlSat; + uint8_t micLvlSat; +#endif + // Structs for VAD and digital_agc + AgcVad vadMic; + DigitalAgc digitalAgc; + +#ifdef WEBRTC_AGC_DEBUG_DUMP + FILE* fpt; + FILE* agcLog; + int32_t fcount; +#endif + + int16_t lowLevelSignal; +} LegacyAgc; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_ diff --git a/webrtc/modules/audio_processing/agc/digital_agc.c b/webrtc/modules/audio_processing/agc/legacy/digital_agc.c similarity index 52% rename from webrtc/modules/audio_processing/agc/digital_agc.c rename to webrtc/modules/audio_processing/agc/legacy/digital_agc.c index e303a92..aeafb65 100644 --- a/webrtc/modules/audio_processing/agc/digital_agc.c +++ b/webrtc/modules/audio_processing/agc/legacy/digital_agc.c @@ -12,12 +12,15 @@ * */ +#include "webrtc/modules/audio_processing/agc/legacy/digital_agc.h" + +#include #include -#ifdef AGC_DEBUG +#ifdef WEBRTC_AGC_DEBUG_DUMP #include #endif -#include "digital_agc.h" -#include "gain_control.h" + +#include "webrtc/modules/audio_processing/agc/legacy/gain_control.h" // To generate the gaintable, copy&paste the following lines to a Matlab window: // MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1; @@ -33,7 +36,8 @@ // zoom on; // Generator table for y=log2(1+e^x) in Q8. -static const WebRtc_UWord16 kGenFuncTable[128] = { +enum { kGenFuncTableSize = 128 }; +static const uint16_t kGenFuncTable[kGenFuncTableSize] = { 256, 485, 786, 1126, 1484, 1849, 2217, 2586, 2955, 3324, 3693, 4063, 4432, 4801, 5171, 5540, 5909, 6279, 6648, 7017, 7387, 7756, 8125, 8495, @@ -52,29 +56,29 @@ static const WebRtc_UWord16 kGenFuncTable[128] = { 44320, 44689, 45058, 45428, 45797, 46166, 46536, 46905 }; -static const WebRtc_Word16 kAvgDecayTime = 250; // frames; < 3000 +static const int16_t kAvgDecayTime = 250; // frames; < 3000 -WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16 - WebRtc_Word16 digCompGaindB, // Q0 - WebRtc_Word16 targetLevelDbfs,// Q0 - WebRtc_UWord8 limiterEnable, - WebRtc_Word16 analogTarget) // Q0 +int32_t WebRtcAgc_CalculateGainTable(int32_t *gainTable, // Q16 + int16_t digCompGaindB, // Q0 + int16_t targetLevelDbfs,// Q0 + uint8_t limiterEnable, + int16_t analogTarget) // Q0 { // This function generates the compressor gain table used in the fixed digital part. - WebRtc_UWord32 tmpU32no1, tmpU32no2, absInLevel, logApprox; - WebRtc_Word32 inLevel, limiterLvl; - WebRtc_Word32 tmp32, tmp32no1, tmp32no2, numFIX, den, y32; - const WebRtc_UWord16 kLog10 = 54426; // log2(10) in Q14 - const WebRtc_UWord16 kLog10_2 = 49321; // 10*log10(2) in Q14 - const WebRtc_UWord16 kLogE_1 = 23637; // log2(e) in Q14 - WebRtc_UWord16 constMaxGain; - WebRtc_UWord16 tmpU16, intPart, fracPart; - const WebRtc_Word16 kCompRatio = 3; - const WebRtc_Word16 kSoftLimiterLeft = 1; - WebRtc_Word16 limiterOffset = 0; // Limiter offset - WebRtc_Word16 limiterIdx, limiterLvlX; - WebRtc_Word16 constLinApprox, zeroGainLvl, maxGain, diffGain; - WebRtc_Word16 i, tmp16, tmp16no1; + uint32_t tmpU32no1, tmpU32no2, absInLevel, logApprox; + int32_t inLevel, limiterLvl; + int32_t tmp32, tmp32no1, tmp32no2, numFIX, den, y32; + const uint16_t kLog10 = 54426; // log2(10) in Q14 + const uint16_t kLog10_2 = 49321; // 10*log10(2) in Q14 + const uint16_t kLogE_1 = 23637; // log2(e) in Q14 + uint16_t constMaxGain; + uint16_t tmpU16, intPart, fracPart; + const int16_t kCompRatio = 3; + const int16_t kSoftLimiterLeft = 1; + int16_t limiterOffset = 0; // Limiter offset + int16_t limiterIdx, limiterLvlX; + int16_t constLinApprox, zeroGainLvl, maxGain, diffGain; + int16_t i, tmp16, tmp16no1; int zeros, zerosScale; // Constants @@ -83,11 +87,11 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16 // kLog10_2 = 49321; // 10*log10(2) in Q14 // Calculate maximum digital gain and zero gain level - tmp32no1 = WEBRTC_SPL_MUL_16_16(digCompGaindB - analogTarget, kCompRatio - 1); + tmp32no1 = (digCompGaindB - analogTarget) * (kCompRatio - 1); tmp16no1 = analogTarget - targetLevelDbfs; tmp16no1 += WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs)); - tmp32no1 = WEBRTC_SPL_MUL_16_16(maxGain, kCompRatio); + tmp32no1 = maxGain * kCompRatio; zeroGainLvl = digCompGaindB; zeroGainLvl -= WebRtcSpl_DivW32W16ResW16(tmp32no1 + ((kCompRatio - 1) >> 1), kCompRatio - 1); @@ -100,10 +104,11 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16 // Calculate the difference between maximum gain and gain at 0dB0v: // diffGain = maxGain + (compRatio-1)*zeroGainLvl/compRatio // = (compRatio-1)*digCompGaindB/compRatio - tmp32no1 = WEBRTC_SPL_MUL_16_16(digCompGaindB, kCompRatio - 1); + tmp32no1 = digCompGaindB * (kCompRatio - 1); diffGain = WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio); - if (diffGain < 0) + if (diffGain < 0 || diffGain >= kGenFuncTableSize) { + assert(0); return -1; } @@ -111,9 +116,8 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16 // limiterLvlX = analogTarget - limiterOffset // limiterLvl = targetLevelDbfs + limiterOffset/compRatio limiterLvlX = analogTarget - limiterOffset; - limiterIdx = 2 - + WebRtcSpl_DivW32W16ResW16(WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)limiterLvlX, 13), - WEBRTC_SPL_RSHIFT_U16(kLog10_2, 1)); + limiterIdx = + 2 + WebRtcSpl_DivW32W16ResW16((int32_t)limiterLvlX << 13, kLog10_2 / 2); tmp16no1 = WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio); limiterLvl = targetLevelDbfs + tmp16no1; @@ -134,23 +138,23 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16 { // Calculate scaled input level (compressor): // inLevel = fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio) - tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(kCompRatio - 1, i - 1); // Q0 + tmp16 = (int16_t)((kCompRatio - 1) * (i - 1)); // Q0 tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14 inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14 // Calculate diffGain-inLevel, to map using the genFuncTable - inLevel = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)diffGain, 14) - inLevel; // Q14 + inLevel = ((int32_t)diffGain << 14) - inLevel; // Q14 // Make calculations on abs(inLevel) and compensate for the sign afterwards. - absInLevel = (WebRtc_UWord32)WEBRTC_SPL_ABS_W32(inLevel); // Q14 + absInLevel = (uint32_t)WEBRTC_SPL_ABS_W32(inLevel); // Q14 // LUT with interpolation - intPart = (WebRtc_UWord16)WEBRTC_SPL_RSHIFT_U32(absInLevel, 14); - fracPart = (WebRtc_UWord16)(absInLevel & 0x00003FFF); // extract the fractional part + intPart = (uint16_t)(absInLevel >> 14); + fracPart = (uint16_t)(absInLevel & 0x00003FFF); // extract the fractional part tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8 - tmpU32no1 = WEBRTC_SPL_UMUL_16_16(tmpU16, fracPart); // Q22 - tmpU32no1 += WEBRTC_SPL_LSHIFT_U32((WebRtc_UWord32)kGenFuncTable[intPart], 14); // Q22 - logApprox = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 8); // Q14 + tmpU32no1 = tmpU16 * fracPart; // Q22 + tmpU32no1 += (uint32_t)kGenFuncTable[intPart] << 14; // Q22 + logApprox = tmpU32no1 >> 8; // Q14 // Compensate for negative exponent using the relation: // log2(1 + 2^-x) = log2(1 + 2^x) - x if (inLevel < 0) @@ -160,83 +164,89 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16 if (zeros < 15) { // Not enough space for multiplication - tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(absInLevel, 15 - zeros); // Q(zeros-1) + tmpU32no2 = absInLevel >> (15 - zeros); // Q(zeros-1) tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13) if (zeros < 9) { - tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 9 - zeros); // Q(zeros+13) zerosScale = 9 - zeros; + tmpU32no1 >>= zerosScale; // Q(zeros+13) } else { - tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, zeros - 9); // Q22 + tmpU32no2 >>= zeros - 9; // Q22 } } else { tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28 - tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, 6); // Q22 + tmpU32no2 >>= 6; // Q22 } logApprox = 0; if (tmpU32no2 < tmpU32no1) { - logApprox = WEBRTC_SPL_RSHIFT_U32(tmpU32no1 - tmpU32no2, 8 - zerosScale); //Q14 + logApprox = (tmpU32no1 - tmpU32no2) >> (8 - zerosScale); //Q14 } } - numFIX = WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_U16(maxGain, constMaxGain), 6); // Q14 - numFIX -= WEBRTC_SPL_MUL_32_16((WebRtc_Word32)logApprox, diffGain); // Q14 + numFIX = (maxGain * constMaxGain) << 6; // Q14 + numFIX -= (int32_t)logApprox * diffGain; // Q14 // Calculate ratio - // Shift numFIX as much as possible - zeros = WebRtcSpl_NormW32(numFIX); - numFIX = WEBRTC_SPL_LSHIFT_W32(numFIX, zeros); // Q(14+zeros) + // Shift |numFIX| as much as possible. + // Ensure we avoid wrap-around in |den| as well. + if (numFIX > (den >> 8)) // |den| is Q8. + { + zeros = WebRtcSpl_NormW32(numFIX); + } else + { + zeros = WebRtcSpl_NormW32(den) + 8; + } + numFIX <<= zeros; // Q(14+zeros) // Shift den so we end up in Qy1 tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 8); // Q(zeros) if (numFIX < 0) { - numFIX -= WEBRTC_SPL_RSHIFT_W32(tmp32no1, 1); + numFIX -= tmp32no1 / 2; } else { - numFIX += WEBRTC_SPL_RSHIFT_W32(tmp32no1, 1); + numFIX += tmp32no1 / 2; } - y32 = WEBRTC_SPL_DIV(numFIX, tmp32no1); // in Q14 + y32 = numFIX / tmp32no1; // in Q14 if (limiterEnable && (i < limiterIdx)) { tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14 - tmp32 -= WEBRTC_SPL_LSHIFT_W32(limiterLvl, 14); // Q14 + tmp32 -= limiterLvl << 14; // Q14 y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20); } if (y32 > 39000) { - tmp32 = WEBRTC_SPL_MUL(y32 >> 1, kLog10) + 4096; // in Q27 - tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 13); // in Q14 + tmp32 = (y32 >> 1) * kLog10 + 4096; // in Q27 + tmp32 >>= 13; // In Q14. } else { - tmp32 = WEBRTC_SPL_MUL(y32, kLog10) + 8192; // in Q28 - tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 14); // in Q14 + tmp32 = y32 * kLog10 + 8192; // in Q28 + tmp32 >>= 14; // In Q14. } - tmp32 += WEBRTC_SPL_LSHIFT_W32(16, 14); // in Q14 (Make sure final output is in Q16) + tmp32 += 16 << 14; // in Q14 (Make sure final output is in Q16) // Calculate power if (tmp32 > 0) { - intPart = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 14); - fracPart = (WebRtc_UWord16)(tmp32 & 0x00003FFF); // in Q14 - if (WEBRTC_SPL_RSHIFT_W32(fracPart, 13)) + intPart = (int16_t)(tmp32 >> 14); + fracPart = (uint16_t)(tmp32 & 0x00003FFF); // in Q14 + if ((fracPart >> 13) != 0) { - tmp16 = WEBRTC_SPL_LSHIFT_W16(2, 14) - constLinApprox; - tmp32no2 = WEBRTC_SPL_LSHIFT_W32(1, 14) - fracPart; - tmp32no2 = WEBRTC_SPL_MUL_32_16(tmp32no2, tmp16); - tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 13); - tmp32no2 = WEBRTC_SPL_LSHIFT_W32(1, 14) - tmp32no2; + tmp16 = (2 << 14) - constLinApprox; + tmp32no2 = (1 << 14) - fracPart; + tmp32no2 *= tmp16; + tmp32no2 >>= 13; + tmp32no2 = (1 << 14) - tmp32no2; } else { - tmp16 = constLinApprox - WEBRTC_SPL_LSHIFT_W16(1, 14); - tmp32no2 = WEBRTC_SPL_MUL_32_16(fracPart, tmp16); - tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 13); + tmp16 = constLinApprox - (1 << 14); + tmp32no2 = (fracPart * tmp16) >> 13; } - fracPart = (WebRtc_UWord16)tmp32no2; - gainTable[i] = WEBRTC_SPL_LSHIFT_W32(1, intPart) - + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14); + fracPart = (uint16_t)tmp32no2; + gainTable[i] = + (1 << intPart) + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14); } else { gainTable[i] = 0; @@ -246,9 +256,7 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16 return 0; } -WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode) -{ - +int32_t WebRtcAgc_InitDigital(DigitalAgc* stt, int16_t agcMode) { if (agcMode == kAgcModeFixedDigital) { // start at minimum to find correct gain faster @@ -256,13 +264,13 @@ WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode) } else { // start out with 0 dB gain - stt->capacitorSlow = 134217728; // (WebRtc_Word32)(0.125f * 32768.0f * 32768.0f); + stt->capacitorSlow = 134217728; // (int32_t)(0.125f * 32768.0f * 32768.0f); } stt->capacitorFast = 0; stt->gain = 65536; stt->gatePrevious = 0; stt->agcMode = agcMode; -#ifdef AGC_DEBUG +#ifdef WEBRTC_AGC_DEBUG_DUMP stt->frameCounter = 0; #endif @@ -273,52 +281,45 @@ WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode) return 0; } -WebRtc_Word32 WebRtcAgc_AddFarendToDigital(DigitalAgc_t *stt, const WebRtc_Word16 *in_far, - WebRtc_Word16 nrSamples) -{ - // Check for valid pointer - if (&stt->vadFarend == NULL) - { - return -1; - } - +int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* stt, + const int16_t* in_far, + size_t nrSamples) { + assert(stt != NULL); // VAD for far end WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples); return 0; } -WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *in_near, - const WebRtc_Word16 *in_near_H, WebRtc_Word16 *out, - WebRtc_Word16 *out_H, WebRtc_UWord32 FS, - WebRtc_Word16 lowlevelSignal) -{ +int32_t WebRtcAgc_ProcessDigital(DigitalAgc* stt, + const int16_t* const* in_near, + size_t num_bands, + int16_t* const* out, + uint32_t FS, + int16_t lowlevelSignal) { // array for gains (one value per ms, incl start & end) - WebRtc_Word32 gains[11]; + int32_t gains[11]; - WebRtc_Word32 out_tmp, tmp32; - WebRtc_Word32 env[10]; - WebRtc_Word32 nrg, max_nrg; - WebRtc_Word32 cur_level; - WebRtc_Word32 gain32, delta; - WebRtc_Word16 logratio; - WebRtc_Word16 lower_thr, upper_thr; - WebRtc_Word16 zeros, zeros_fast, frac; - WebRtc_Word16 decay; - WebRtc_Word16 gate, gain_adj; - WebRtc_Word16 k, n; - WebRtc_Word16 L, L2; // samples/subframe + int32_t out_tmp, tmp32; + int32_t env[10]; + int32_t max_nrg; + int32_t cur_level; + int32_t gain32, delta; + int16_t logratio; + int16_t lower_thr, upper_thr; + int16_t zeros = 0, zeros_fast, frac = 0; + int16_t decay; + int16_t gate, gain_adj; + int16_t k; + size_t n, i, L; + int16_t L2; // samples/subframe // determine number of samples per ms if (FS == 8000) { L = 8; L2 = 3; - } else if (FS == 16000) - { - L = 16; - L2 = 4; - } else if (FS == 32000) + } else if (FS == 16000 || FS == 32000 || FS == 48000) { L = 16; L2 = 4; @@ -327,27 +328,22 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i return -1; } - // TODO(andrew): again, we don't need input and output pointers... - if (in_near != out) + for (i = 0; i < num_bands; ++i) { - // Only needed if they don't already point to the same place. - memcpy(out, in_near, 10 * L * sizeof(WebRtc_Word16)); - } - if (FS == 32000) - { - if (in_near_H != out_H) + if (in_near[i] != out[i]) { - memcpy(out_H, in_near_H, 10 * L * sizeof(WebRtc_Word16)); + // Only needed if they don't already point to the same place. + memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0])); } } // VAD for near end - logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out, L * 10); + logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out[0], L * 10); // Account for far end VAD if (stt->vadFarend.counter > 10) { - tmp32 = WEBRTC_SPL_MUL_16_16(3, logratio); - logratio = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 - stt->vadFarend.logRatio, 2); + tmp32 = 3 * logratio; + logratio = (int16_t)((tmp32 - stt->vadFarend.logRatio) >> 2); } // Determine decay factor depending on VAD @@ -364,11 +360,11 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i decay = 0; } else { - // decay = (WebRtc_Word16)(((lower_thr - logratio) + // decay = (int16_t)(((lower_thr - logratio) // * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10); // SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65 - tmp32 = WEBRTC_SPL_MUL_16_16((lower_thr - logratio), 65); - decay = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 10); + tmp32 = (lower_thr - logratio) * 65; + decay = (int16_t)(tmp32 >> 10); } // adjust decay factor for long silence (detected as low standard deviation) @@ -380,9 +376,9 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i decay = 0; } else if (stt->vadNearend.stdLongTerm < 8096) { - // decay = (WebRtc_Word16)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> 12); - tmp32 = WEBRTC_SPL_MUL_16_16((stt->vadNearend.stdLongTerm - 4000), decay); - decay = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 12); + // decay = (int16_t)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> 12); + tmp32 = (stt->vadNearend.stdLongTerm - 4000) * decay; + decay = (int16_t)(tmp32 >> 12); } if (lowlevelSignal != 0) @@ -390,9 +386,14 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i decay = 0; } } -#ifdef AGC_DEBUG +#ifdef WEBRTC_AGC_DEBUG_DUMP stt->frameCounter++; - fprintf(stt->logFile, "%5.2f\t%d\t%d\t%d\t", (float)(stt->frameCounter) / 100, logratio, decay, stt->vadNearend.stdLongTerm); + fprintf(stt->logFile, + "%5.2f\t%d\t%d\t%d\t", + (float)(stt->frameCounter) / 100, + logratio, + decay, + stt->vadNearend.stdLongTerm); #endif // Find max amplitude per sub frame // iterate over sub frames @@ -402,7 +403,7 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i max_nrg = 0; for (n = 0; n < L; n++) { - nrg = WEBRTC_SPL_MUL_16_16(out[k * L + n], out[k * L + n]); + int32_t nrg = out[0][k * L + n] * out[0][k * L + n]; if (nrg > max_nrg) { max_nrg = nrg; @@ -445,34 +446,39 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i } // Translate signal level into gain, using a piecewise linear approximation // find number of leading zeros - zeros = WebRtcSpl_NormU32((WebRtc_UWord32)cur_level); + zeros = WebRtcSpl_NormU32((uint32_t)cur_level); if (cur_level == 0) { zeros = 31; } - tmp32 = (WEBRTC_SPL_LSHIFT_W32(cur_level, zeros) & 0x7FFFFFFF); - frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 19); // Q12 - tmp32 = WEBRTC_SPL_MUL((stt->gainTable[zeros-1] - stt->gainTable[zeros]), frac); - gains[k + 1] = stt->gainTable[zeros] + WEBRTC_SPL_RSHIFT_W32(tmp32, 12); -#ifdef AGC_DEBUG - if (k == 0) - { - fprintf(stt->logFile, "%d\t%d\t%d\t%d\t%d\n", env[0], cur_level, stt->capacitorFast, stt->capacitorSlow, zeros); + tmp32 = (cur_level << zeros) & 0x7FFFFFFF; + frac = (int16_t)(tmp32 >> 19); // Q12. + tmp32 = (stt->gainTable[zeros-1] - stt->gainTable[zeros]) * frac; + gains[k + 1] = stt->gainTable[zeros] + (tmp32 >> 12); +#ifdef WEBRTC_AGC_DEBUG_DUMP + if (k == 0) { + fprintf(stt->logFile, + "%d\t%d\t%d\t%d\t%d\n", + env[0], + cur_level, + stt->capacitorFast, + stt->capacitorSlow, + zeros); } #endif } // Gate processing (lower gain during absence of speech) - zeros = WEBRTC_SPL_LSHIFT_W16(zeros, 9) - WEBRTC_SPL_RSHIFT_W16(frac, 3); + zeros = (zeros << 9) - (frac >> 3); // find number of leading zeros - zeros_fast = WebRtcSpl_NormU32((WebRtc_UWord32)stt->capacitorFast); + zeros_fast = WebRtcSpl_NormU32((uint32_t)stt->capacitorFast); if (stt->capacitorFast == 0) { zeros_fast = 31; } - tmp32 = (WEBRTC_SPL_LSHIFT_W32(stt->capacitorFast, zeros_fast) & 0x7FFFFFFF); - zeros_fast = WEBRTC_SPL_LSHIFT_W16(zeros_fast, 9); - zeros_fast -= (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 22); + tmp32 = (stt->capacitorFast << zeros_fast) & 0x7FFFFFFF; + zeros_fast <<= 9; + zeros_fast -= (int16_t)(tmp32 >> 22); gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm; @@ -481,8 +487,8 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i stt->gatePrevious = 0; } else { - tmp32 = WEBRTC_SPL_MUL_16_16(stt->gatePrevious, 7); - gate = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32((WebRtc_Word32)gate + tmp32, 3); + tmp32 = stt->gatePrevious * 7; + gate = (int16_t)((gate + tmp32) >> 3); stt->gatePrevious = gate; } // gate < 0 -> no gate @@ -491,7 +497,7 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i { if (gate < 2500) { - gain_adj = WEBRTC_SPL_RSHIFT_W16(2500 - gate, 5); + gain_adj = (2500 - gate) >> 5; } else { gain_adj = 0; @@ -501,12 +507,12 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i if ((gains[k + 1] - stt->gainTable[0]) > 8388608) { // To prevent wraparound - tmp32 = WEBRTC_SPL_RSHIFT_W32((gains[k+1] - stt->gainTable[0]), 8); - tmp32 = WEBRTC_SPL_MUL(tmp32, (178 + gain_adj)); + tmp32 = (gains[k + 1] - stt->gainTable[0]) >> 8; + tmp32 *= 178 + gain_adj; } else { - tmp32 = WEBRTC_SPL_MUL((gains[k+1] - stt->gainTable[0]), (178 + gain_adj)); - tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 8); + tmp32 = (gains[k+1] - stt->gainTable[0]) * (178 + gain_adj); + tmp32 >>= 8; } gains[k + 1] = stt->gainTable[0] + tmp32; } @@ -521,23 +527,23 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i { zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]); } - gain32 = WEBRTC_SPL_RSHIFT_W32(gains[k+1], zeros) + 1; - gain32 = WEBRTC_SPL_MUL(gain32, gain32); + gain32 = (gains[k + 1] >> zeros) + 1; + gain32 *= gain32; // check for overflow - while (AGC_MUL32(WEBRTC_SPL_RSHIFT_W32(env[k], 12) + 1, gain32) - > WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)32767, 2 * (1 - zeros + 10))) + while (AGC_MUL32((env[k] >> 12) + 1, gain32) + > WEBRTC_SPL_SHIFT_W32((int32_t)32767, 2 * (1 - zeros + 10))) { // multiply by 253/256 ==> -0.1 dB if (gains[k + 1] > 8388607) { // Prevent wrap around - gains[k + 1] = WEBRTC_SPL_MUL(WEBRTC_SPL_RSHIFT_W32(gains[k+1], 8), 253); + gains[k + 1] = (gains[k+1] / 256) * 253; } else { - gains[k + 1] = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(gains[k+1], 253), 8); + gains[k + 1] = (gains[k+1] * 253) / 256; } - gain32 = WEBRTC_SPL_RSHIFT_W32(gains[k+1], zeros) + 1; - gain32 = WEBRTC_SPL_MUL(gain32, gain32); + gain32 = (gains[k + 1] >> zeros) + 1; + gain32 *= gain32; } } // gain reductions should be done 1 ms earlier than gain increases @@ -553,42 +559,25 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i // Apply gain // handle first sub frame separately - delta = WEBRTC_SPL_LSHIFT_W32(gains[1] - gains[0], (4 - L2)); - gain32 = WEBRTC_SPL_LSHIFT_W32(gains[0], 4); + delta = (gains[1] - gains[0]) << (4 - L2); + gain32 = gains[0] << 4; // iterate over samples for (n = 0; n < L; n++) { - // For lower band - tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[n], WEBRTC_SPL_RSHIFT_W32(gain32 + 127, 7)); - out_tmp = WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); - if (out_tmp > 4095) + for (i = 0; i < num_bands; ++i) { - out[n] = (WebRtc_Word16)32767; - } else if (out_tmp < -4096) - { - out[n] = (WebRtc_Word16)-32768; - } else - { - tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[n], WEBRTC_SPL_RSHIFT_W32(gain32, 4)); - out[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); - } - // For higher band - if (FS == 32000) - { - tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[n], - WEBRTC_SPL_RSHIFT_W32(gain32 + 127, 7)); - out_tmp = WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + tmp32 = out[i][n] * ((gain32 + 127) >> 7); + out_tmp = tmp32 >> 16; if (out_tmp > 4095) { - out_H[n] = (WebRtc_Word16)32767; + out[i][n] = (int16_t)32767; } else if (out_tmp < -4096) { - out_H[n] = (WebRtc_Word16)-32768; + out[i][n] = (int16_t)-32768; } else { - tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[n], - WEBRTC_SPL_RSHIFT_W32(gain32, 4)); - out_H[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + tmp32 = out[i][n] * (gain32 >> 4); + out[i][n] = (int16_t)(tmp32 >> 16); } } // @@ -598,21 +587,15 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i // iterate over subframes for (k = 1; k < 10; k++) { - delta = WEBRTC_SPL_LSHIFT_W32(gains[k+1] - gains[k], (4 - L2)); - gain32 = WEBRTC_SPL_LSHIFT_W32(gains[k], 4); + delta = (gains[k+1] - gains[k]) << (4 - L2); + gain32 = gains[k] << 4; // iterate over samples for (n = 0; n < L; n++) { - // For lower band - tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[k * L + n], - WEBRTC_SPL_RSHIFT_W32(gain32, 4)); - out[k * L + n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); - // For higher band - if (FS == 32000) + for (i = 0; i < num_bands; ++i) { - tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[k * L + n], - WEBRTC_SPL_RSHIFT_W32(gain32, 4)); - out_H[k * L + n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16); + tmp32 = out[i][k * L + n] * (gain32 >> 4); + out[i][k * L + n] = (int16_t)(tmp32 >> 16); } gain32 += delta; } @@ -621,24 +604,23 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i return 0; } -void WebRtcAgc_InitVad(AgcVad_t *state) -{ - WebRtc_Word16 k; +void WebRtcAgc_InitVad(AgcVad* state) { + int16_t k; state->HPstate = 0; // state of high pass filter state->logRatio = 0; // log( P(active) / P(inactive) ) // average input level (Q10) - state->meanLongTerm = WEBRTC_SPL_LSHIFT_W16(15, 10); + state->meanLongTerm = 15 << 10; // variance of input level (Q8) - state->varianceLongTerm = WEBRTC_SPL_LSHIFT_W32(500, 8); + state->varianceLongTerm = 500 << 8; state->stdLongTerm = 0; // standard deviation of input level in dB // short-term average input level (Q10) - state->meanShortTerm = WEBRTC_SPL_LSHIFT_W16(15, 10); + state->meanShortTerm = 15 << 10; // short-term variance of input level (Q8) - state->varianceShortTerm = WEBRTC_SPL_LSHIFT_W32(500, 8); + state->varianceShortTerm = 500 << 8; state->stdShortTerm = 0; // short-term standard deviation of input level in dB state->counter = 3; // counts updates @@ -649,17 +631,17 @@ void WebRtcAgc_InitVad(AgcVad_t *state) } } -WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state - const WebRtc_Word16 *in, // (i) Speech signal - WebRtc_Word16 nrSamples) // (i) number of samples +int16_t WebRtcAgc_ProcessVad(AgcVad* state, // (i) VAD state + const int16_t* in, // (i) Speech signal + size_t nrSamples) // (i) number of samples { - WebRtc_Word32 out, nrg, tmp32, tmp32b; - WebRtc_UWord16 tmpU16; - WebRtc_Word16 k, subfr, tmp16; - WebRtc_Word16 buf1[8]; - WebRtc_Word16 buf2[4]; - WebRtc_Word16 HPstate; - WebRtc_Word16 zeros, dB; + int32_t out, nrg, tmp32, tmp32b; + uint16_t tmpU16; + int16_t k, subfr, tmp16; + int16_t buf1[8]; + int16_t buf2[4]; + int16_t HPstate; + int16_t zeros, dB; // process in 10 sub frames of 1 ms (to save on memory) nrg = 0; @@ -671,9 +653,9 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state { for (k = 0; k < 8; k++) { - tmp32 = (WebRtc_Word32)in[2 * k] + (WebRtc_Word32)in[2 * k + 1]; - tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 1); - buf1[k] = (WebRtc_Word16)tmp32; + tmp32 = (int32_t)in[2 * k] + (int32_t)in[2 * k + 1]; + tmp32 >>= 1; + buf1[k] = (int16_t)tmp32; } in += 16; @@ -688,10 +670,9 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state for (k = 0; k < 4; k++) { out = buf2[k] + HPstate; - tmp32 = WEBRTC_SPL_MUL(600, out); - HPstate = (WebRtc_Word16)(WEBRTC_SPL_RSHIFT_W32(tmp32, 10) - buf2[k]); - tmp32 = WEBRTC_SPL_MUL(out, out); - nrg += WEBRTC_SPL_RSHIFT_W32(tmp32, 6); + tmp32 = 600 * out; + HPstate = (int16_t)((tmp32 >> 10) - buf2[k]); + nrg += (out * out) >> 6; } } state->HPstate = HPstate; @@ -722,7 +703,7 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state } // energy level (range {-32..30}) (Q10) - dB = WEBRTC_SPL_LSHIFT_W16(15 - zeros, 11); + dB = (15 - zeros) << 11; // Update statistics @@ -733,44 +714,49 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state } // update short-term estimate of mean energy level (Q10) - tmp32 = (WEBRTC_SPL_MUL_16_16(state->meanShortTerm, 15) + (WebRtc_Word32)dB); - state->meanShortTerm = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 4); + tmp32 = state->meanShortTerm * 15 + dB; + state->meanShortTerm = (int16_t)(tmp32 >> 4); // update short-term estimate of variance in energy level (Q8) - tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(dB, dB), 12); - tmp32 += WEBRTC_SPL_MUL(state->varianceShortTerm, 15); - state->varianceShortTerm = WEBRTC_SPL_RSHIFT_W32(tmp32, 4); + tmp32 = (dB * dB) >> 12; + tmp32 += state->varianceShortTerm * 15; + state->varianceShortTerm = tmp32 / 16; // update short-term estimate of standard deviation in energy level (Q10) - tmp32 = WEBRTC_SPL_MUL_16_16(state->meanShortTerm, state->meanShortTerm); - tmp32 = WEBRTC_SPL_LSHIFT_W32(state->varianceShortTerm, 12) - tmp32; - state->stdShortTerm = (WebRtc_Word16)WebRtcSpl_Sqrt(tmp32); + tmp32 = state->meanShortTerm * state->meanShortTerm; + tmp32 = (state->varianceShortTerm << 12) - tmp32; + state->stdShortTerm = (int16_t)WebRtcSpl_Sqrt(tmp32); // update long-term estimate of mean energy level (Q10) - tmp32 = WEBRTC_SPL_MUL_16_16(state->meanLongTerm, state->counter) + (WebRtc_Word32)dB; - state->meanLongTerm = WebRtcSpl_DivW32W16ResW16(tmp32, - WEBRTC_SPL_ADD_SAT_W16(state->counter, 1)); + tmp32 = state->meanLongTerm * state->counter + dB; + state->meanLongTerm = WebRtcSpl_DivW32W16ResW16( + tmp32, WebRtcSpl_AddSatW16(state->counter, 1)); // update long-term estimate of variance in energy level (Q8) - tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(dB, dB), 12); - tmp32 += WEBRTC_SPL_MUL(state->varianceLongTerm, state->counter); - state->varianceLongTerm = WebRtcSpl_DivW32W16(tmp32, - WEBRTC_SPL_ADD_SAT_W16(state->counter, 1)); + tmp32 = (dB * dB) >> 12; + tmp32 += state->varianceLongTerm * state->counter; + state->varianceLongTerm = WebRtcSpl_DivW32W16( + tmp32, WebRtcSpl_AddSatW16(state->counter, 1)); // update long-term estimate of standard deviation in energy level (Q10) - tmp32 = WEBRTC_SPL_MUL_16_16(state->meanLongTerm, state->meanLongTerm); - tmp32 = WEBRTC_SPL_LSHIFT_W32(state->varianceLongTerm, 12) - tmp32; - state->stdLongTerm = (WebRtc_Word16)WebRtcSpl_Sqrt(tmp32); + tmp32 = state->meanLongTerm * state->meanLongTerm; + tmp32 = (state->varianceLongTerm << 12) - tmp32; + state->stdLongTerm = (int16_t)WebRtcSpl_Sqrt(tmp32); // update voice activity measure (Q10) - tmp16 = WEBRTC_SPL_LSHIFT_W16(3, 12); - tmp32 = WEBRTC_SPL_MUL_16_16(tmp16, (dB - state->meanLongTerm)); + tmp16 = 3 << 12; + // TODO(bjornv): (dB - state->meanLongTerm) can overflow, e.g., in + // ApmTest.Process unit test. Previously the macro WEBRTC_SPL_MUL_16_16() + // was used, which did an intermediate cast to (int16_t), hence losing + // significant bits. This cause logRatio to max out positive, rather than + // negative. This is a bug, but has very little significance. + tmp32 = tmp16 * (int16_t)(dB - state->meanLongTerm); tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm); - tmpU16 = WEBRTC_SPL_LSHIFT_U16((WebRtc_UWord16)13, 12); + tmpU16 = (13 << 12); tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16); - tmp32 += WEBRTC_SPL_RSHIFT_W32(tmp32b, 10); + tmp32 += tmp32b >> 10; - state->logRatio = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 6); + state->logRatio = (int16_t)(tmp32 >> 6); // limit if (state->logRatio > 2048) diff --git a/webrtc/modules/audio_processing/agc/legacy/digital_agc.h b/webrtc/modules/audio_processing/agc/legacy/digital_agc.h new file mode 100644 index 0000000..819844d --- /dev/null +++ b/webrtc/modules/audio_processing/agc/legacy/digital_agc.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ + +#ifdef WEBRTC_AGC_DEBUG_DUMP +#include +#endif +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/typedefs.h" + +// the 32 most significant bits of A(19) * B(26) >> 13 +#define AGC_MUL32(A, B) (((B)>>13)*(A) + ( ((0x00001FFF & (B))*(A)) >> 13 )) +// C + the 32 most significant bits of A * B +#define AGC_SCALEDIFF32(A, B, C) ((C) + ((B)>>16)*(A) + ( ((0x0000FFFF & (B))*(A)) >> 16 )) + +typedef struct +{ + int32_t downState[8]; + int16_t HPstate; + int16_t counter; + int16_t logRatio; // log( P(active) / P(inactive) ) (Q10) + int16_t meanLongTerm; // Q10 + int32_t varianceLongTerm; // Q8 + int16_t stdLongTerm; // Q10 + int16_t meanShortTerm; // Q10 + int32_t varianceShortTerm; // Q8 + int16_t stdShortTerm; // Q10 +} AgcVad; // total = 54 bytes + +typedef struct +{ + int32_t capacitorSlow; + int32_t capacitorFast; + int32_t gain; + int32_t gainTable[32]; + int16_t gatePrevious; + int16_t agcMode; + AgcVad vadNearend; + AgcVad vadFarend; +#ifdef WEBRTC_AGC_DEBUG_DUMP + FILE* logFile; + int frameCounter; +#endif +} DigitalAgc; + +int32_t WebRtcAgc_InitDigital(DigitalAgc* digitalAgcInst, int16_t agcMode); + +int32_t WebRtcAgc_ProcessDigital(DigitalAgc* digitalAgcInst, + const int16_t* const* inNear, + size_t num_bands, + int16_t* const* out, + uint32_t FS, + int16_t lowLevelSignal); + +int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* digitalAgcInst, + const int16_t* inFar, + size_t nrSamples); + +void WebRtcAgc_InitVad(AgcVad* vadInst); + +int16_t WebRtcAgc_ProcessVad(AgcVad* vadInst, // (i) VAD state + const int16_t* in, // (i) Speech signal + size_t nrSamples); // (i) number of samples + +int32_t WebRtcAgc_CalculateGainTable(int32_t *gainTable, // Q16 + int16_t compressionGaindB, // Q0 (in dB) + int16_t targetLevelDbfs,// Q0 (in dB) + uint8_t limiterEnable, + int16_t analogTarget); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_ diff --git a/webrtc/modules/audio_processing/agc/interface/gain_control.h b/webrtc/modules/audio_processing/agc/legacy/gain_control.h similarity index 51% rename from webrtc/modules/audio_processing/agc/interface/gain_control.h rename to webrtc/modules/audio_processing/agc/legacy/gain_control.h index 2893331..08c1988 100644 --- a/webrtc/modules/audio_processing/agc/interface/gain_control.h +++ b/webrtc/modules/audio_processing/agc/legacy/gain_control.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,10 +8,10 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ -#include "typedefs.h" +#include "webrtc/typedefs.h" // Errors #define AGC_UNSPECIFIED_ERROR 18000 @@ -39,10 +39,10 @@ enum typedef struct { - WebRtc_Word16 targetLevelDbfs; // default 3 (-3 dBOv) - WebRtc_Word16 compressionGaindB; // default 9 dB - WebRtc_UWord8 limiterEnable; // default kAgcTrue (on) -} WebRtcAgc_config_t; + int16_t targetLevelDbfs; // default 3 (-3 dBOv) + int16_t compressionGaindB; // default 9 dB + uint8_t limiterEnable; // default kAgcTrue (on) +} WebRtcAgcConfig; #if defined(__cplusplus) extern "C" @@ -50,14 +50,14 @@ extern "C" #endif /* - * This function processes a 10/20ms frame of far-end speech to determine - * if there is active speech. Far-end speech length can be either 10ms or - * 20ms. The length of the input speech vector must be given in samples - * (80/160 when FS=8000, and 160/320 when FS=16000 or FS=32000). + * This function processes a 10 ms frame of far-end speech to determine + * if there is active speech. The length of the input speech vector must be + * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or + * FS=48000). * * Input: * - agcInst : AGC instance. - * - inFar : Far-end input speech vector (10 or 20ms) + * - inFar : Far-end input speech vector * - samples : Number of samples in input vector * * Return value: @@ -65,26 +65,23 @@ extern "C" * : -1 - Error */ int WebRtcAgc_AddFarend(void* agcInst, - const WebRtc_Word16* inFar, - WebRtc_Word16 samples); + const int16_t* inFar, + size_t samples); /* - * This function processes a 10/20ms frame of microphone speech to determine - * if there is active speech. Microphone speech length can be either 10ms or - * 20ms. The length of the input speech vector must be given in samples - * (80/160 when FS=8000, and 160/320 when FS=16000 or FS=32000). For very low - * input levels, the input signal is increased in level by multiplying and - * overwriting the samples in inMic[]. + * This function processes a 10 ms frame of microphone speech to determine + * if there is active speech. The length of the input speech vector must be + * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or + * FS=48000). For very low input levels, the input signal is increased in level + * by multiplying and overwriting the samples in inMic[]. * * This function should be called before any further processing of the * near-end microphone signal. * * Input: * - agcInst : AGC instance. - * - inMic : Microphone input speech vector (10 or 20 ms) for - * L band - * - inMic_H : Microphone input speech vector (10 or 20 ms) for - * H band + * - inMic : Microphone input speech vector for each band + * - num_bands : Number of bands in input vector * - samples : Number of samples in input vector * * Return value: @@ -92,24 +89,21 @@ int WebRtcAgc_AddFarend(void* agcInst, * : -1 - Error */ int WebRtcAgc_AddMic(void* agcInst, - WebRtc_Word16* inMic, - WebRtc_Word16* inMic_H, - WebRtc_Word16 samples); + int16_t* const* inMic, + size_t num_bands, + size_t samples); /* * This function replaces the analog microphone with a virtual one. * It is a digital gain applied to the input signal and is used in the - * agcAdaptiveDigital mode where no microphone level is adjustable. - * Microphone speech length can be either 10ms or 20ms. The length of the - * input speech vector must be given in samples (80/160 when FS=8000, and - * 160/320 when FS=16000 or FS=32000). + * agcAdaptiveDigital mode where no microphone level is adjustable. The length + * of the input speech vector must be given in samples (80 when FS=8000, and 160 + * when FS=16000, FS=32000 or FS=48000). * * Input: * - agcInst : AGC instance. - * - inMic : Microphone input speech vector for (10 or 20 ms) - * L band - * - inMic_H : Microphone input speech vector for (10 or 20 ms) - * H band + * - inMic : Microphone input speech vector for each band + * - num_bands : Number of bands in input vector * - samples : Number of samples in input vector * - micLevelIn : Input level of microphone (static) * @@ -123,30 +117,27 @@ int WebRtcAgc_AddMic(void* agcInst, * : -1 - Error */ int WebRtcAgc_VirtualMic(void* agcInst, - WebRtc_Word16* inMic, - WebRtc_Word16* inMic_H, - WebRtc_Word16 samples, - WebRtc_Word32 micLevelIn, - WebRtc_Word32* micLevelOut); + int16_t* const* inMic, + size_t num_bands, + size_t samples, + int32_t micLevelIn, + int32_t* micLevelOut); /* - * This function processes a 10/20ms frame and adjusts (normalizes) the gain - * both analog and digitally. The gain adjustments are done only during - * active periods of speech. The input speech length can be either 10ms or - * 20ms and the output is of the same length. The length of the speech - * vectors must be given in samples (80/160 when FS=8000, and 160/320 when - * FS=16000 or FS=32000). The echo parameter can be used to ensure the AGC will - * not adjust upward in the presence of echo. + * This function processes a 10 ms frame and adjusts (normalizes) the gain both + * analog and digitally. The gain adjustments are done only during active + * periods of speech. The length of the speech vectors must be given in samples + * (80 when FS=8000, and 160 when FS=16000, FS=32000 or FS=48000). The echo + * parameter can be used to ensure the AGC will not adjust upward in the + * presence of echo. * * This function should be called after processing the near-end microphone * signal, in any case after any echo cancellation. * * Input: * - agcInst : AGC instance - * - inNear : Near-end input speech vector (10 or 20 ms) for - * L band - * - inNear_H : Near-end input speech vector (10 or 20 ms) for - * H band + * - inNear : Near-end input speech vector for each band + * - num_bands : Number of bands in input/output vector * - samples : Number of samples in input/output vector * - inMicLevel : Current microphone volume level * - echo : Set to 0 if the signal passed to add_mic is @@ -156,9 +147,8 @@ int WebRtcAgc_VirtualMic(void* agcInst, * * Output: * - outMicLevel : Adjusted microphone volume level - * - out : Gain-adjusted near-end speech vector (L band) + * - out : Gain-adjusted near-end speech vector * : May be the same vector as the input. - * - out_H : Gain-adjusted near-end speech vector (H band) * - saturationWarning : A returned value of 1 indicates a saturation event * has occurred and the volume cannot be further * reduced. Otherwise will be set to 0. @@ -168,15 +158,14 @@ int WebRtcAgc_VirtualMic(void* agcInst, * : -1 - Error */ int WebRtcAgc_Process(void* agcInst, - const WebRtc_Word16* inNear, - const WebRtc_Word16* inNear_H, - WebRtc_Word16 samples, - WebRtc_Word16* out, - WebRtc_Word16* out_H, - WebRtc_Word32 inMicLevel, - WebRtc_Word32* outMicLevel, - WebRtc_Word16 echo, - WebRtc_UWord8* saturationWarning); + const int16_t* const* inNear, + size_t num_bands, + size_t samples, + int16_t* const* out, + int32_t inMicLevel, + int32_t* outMicLevel, + int16_t echo, + uint8_t* saturationWarning); /* * This function sets the config parameters (targetLevelDbfs, @@ -192,7 +181,7 @@ int WebRtcAgc_Process(void* agcInst, * : 0 - Normal operation. * : -1 - Error */ -int WebRtcAgc_set_config(void* agcInst, WebRtcAgc_config_t config); +int WebRtcAgc_set_config(void* agcInst, WebRtcAgcConfig config); /* * This function returns the config parameters (targetLevelDbfs, @@ -208,27 +197,21 @@ int WebRtcAgc_set_config(void* agcInst, WebRtcAgc_config_t config); * : 0 - Normal operation. * : -1 - Error */ -int WebRtcAgc_get_config(void* agcInst, WebRtcAgc_config_t* config); +int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config); /* - * This function creates an AGC instance, which will contain the state - * information for one (duplex) channel. - * - * Return value : AGC instance if successful - * : 0 (i.e., a NULL pointer) if unsuccessful + * This function creates and returns an AGC instance, which will contain the + * state information for one (duplex) channel. */ -int WebRtcAgc_Create(void **agcInst); +void* WebRtcAgc_Create(); /* * This function frees the AGC instance created at the beginning. * * Input: * - agcInst : AGC instance. - * - * Return value : 0 - Ok - * -1 - Error */ -int WebRtcAgc_Free(void *agcInst); +void WebRtcAgc_Free(void* agcInst); /* * This function initializes an AGC instance. @@ -247,27 +230,13 @@ int WebRtcAgc_Free(void *agcInst); * -1 - Error */ int WebRtcAgc_Init(void *agcInst, - WebRtc_Word32 minLevel, - WebRtc_Word32 maxLevel, - WebRtc_Word16 agcMode, - WebRtc_UWord32 fs); - -/* - * This function returns a text string containing the version. - * - * Input: - * - length : Length of the char array pointed to by version - * Output: - * - version : Pointer to a char array of to which the version - * : string will be copied. - * - * Return value : 0 - OK - * -1 - Error - */ -int WebRtcAgc_Version(WebRtc_Word8 *versionStr, WebRtc_Word16 length); + int32_t minLevel, + int32_t maxLevel, + int16_t agcMode, + uint32_t fs); #if defined(__cplusplus) } #endif -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_ diff --git a/webrtc/modules/audio_processing/agc/utility.cc b/webrtc/modules/audio_processing/agc/utility.cc new file mode 100644 index 0000000..48458ad --- /dev/null +++ b/webrtc/modules/audio_processing/agc/utility.cc @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/agc/utility.h" + +#include + +static const double kLog10 = 2.30258509299; +static const double kLinear2DbScale = 20.0 / kLog10; +static const double kLinear2LoudnessScale = 13.4 / kLog10; + +double Loudness2Db(double loudness) { + return loudness * kLinear2DbScale / kLinear2LoudnessScale; +} + +double Linear2Loudness(double rms) { + if (rms == 0) + return -15; + return kLinear2LoudnessScale * log(rms); +} + +double Db2Loudness(double db) { + return db * kLinear2LoudnessScale / kLinear2DbScale; +} + +double Dbfs2Loudness(double dbfs) { + return Db2Loudness(90 + dbfs); +} diff --git a/webrtc/modules/audio_processing/agc/utility.h b/webrtc/modules/audio_processing/agc/utility.h new file mode 100644 index 0000000..df85c2e --- /dev/null +++ b/webrtc/modules/audio_processing/agc/utility.h @@ -0,0 +1,23 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ + +// TODO(turajs): Add description of function. +double Loudness2Db(double loudness); + +double Linear2Loudness(double rms); + +double Db2Loudness(double db); + +double Dbfs2Loudness(double dbfs); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_ diff --git a/webrtc/modules/audio_processing/audio_buffer.cc b/webrtc/modules/audio_processing/audio_buffer.cc index f7c55b4..81790a1 100644 --- a/webrtc/modules/audio_processing/audio_buffer.cc +++ b/webrtc/modules/audio_processing/audio_buffer.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,173 +8,331 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "audio_buffer.h" +#include "webrtc/modules/audio_processing/audio_buffer.h" + +#include "webrtc/common_audio/include/audio_util.h" +#include "webrtc/common_audio/resampler/push_sinc_resampler.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/common_audio/channel_buffer.h" +#include "webrtc/modules/audio_processing/common.h" namespace webrtc { namespace { -enum { - kSamplesPer8kHzChannel = 80, - kSamplesPer16kHzChannel = 160, - kSamplesPer32kHzChannel = 320 -}; +const size_t kSamplesPer16kHzChannel = 160; +const size_t kSamplesPer32kHzChannel = 320; +const size_t kSamplesPer48kHzChannel = 480; -void StereoToMono(const WebRtc_Word16* left, const WebRtc_Word16* right, - WebRtc_Word16* out, int samples_per_channel) { - WebRtc_Word32 data_int32 = 0; - for (int i = 0; i < samples_per_channel; i++) { - data_int32 = (left[i] + right[i]) >> 1; - if (data_int32 > 32767) { - data_int32 = 32767; - } else if (data_int32 < -32768) { - data_int32 = -32768; - } - - out[i] = static_cast(data_int32); +int KeyboardChannelIndex(const StreamConfig& stream_config) { + if (!stream_config.has_keyboard()) { + assert(false); + return -1; } + + return stream_config.num_channels(); } + +size_t NumBandsFromSamplesPerChannel(size_t num_frames) { + size_t num_bands = 1; + if (num_frames == kSamplesPer32kHzChannel || + num_frames == kSamplesPer48kHzChannel) { + num_bands = rtc::CheckedDivExact(num_frames, kSamplesPer16kHzChannel); + } + return num_bands; +} + } // namespace -struct AudioChannel { - AudioChannel() { - memset(data, 0, sizeof(data)); - } - - WebRtc_Word16 data[kSamplesPer32kHzChannel]; -}; - -struct SplitAudioChannel { - SplitAudioChannel() { - memset(low_pass_data, 0, sizeof(low_pass_data)); - memset(high_pass_data, 0, sizeof(high_pass_data)); - memset(analysis_filter_state1, 0, sizeof(analysis_filter_state1)); - memset(analysis_filter_state2, 0, sizeof(analysis_filter_state2)); - memset(synthesis_filter_state1, 0, sizeof(synthesis_filter_state1)); - memset(synthesis_filter_state2, 0, sizeof(synthesis_filter_state2)); - } - - WebRtc_Word16 low_pass_data[kSamplesPer16kHzChannel]; - WebRtc_Word16 high_pass_data[kSamplesPer16kHzChannel]; - - WebRtc_Word32 analysis_filter_state1[6]; - WebRtc_Word32 analysis_filter_state2[6]; - WebRtc_Word32 synthesis_filter_state1[6]; - WebRtc_Word32 synthesis_filter_state2[6]; -}; - -// TODO(andrew): check range of input parameters? -AudioBuffer::AudioBuffer(int max_num_channels, - int samples_per_channel) - : max_num_channels_(max_num_channels), - num_channels_(0), - num_mixed_channels_(0), - num_mixed_low_pass_channels_(0), - samples_per_channel_(samples_per_channel), - samples_per_split_channel_(samples_per_channel), +AudioBuffer::AudioBuffer(size_t input_num_frames, + int num_input_channels, + size_t process_num_frames, + int num_process_channels, + size_t output_num_frames) + : input_num_frames_(input_num_frames), + num_input_channels_(num_input_channels), + proc_num_frames_(process_num_frames), + num_proc_channels_(num_process_channels), + output_num_frames_(output_num_frames), + num_channels_(num_process_channels), + num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)), + num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)), + mixed_low_pass_valid_(false), reference_copied_(false), activity_(AudioFrame::kVadUnknown), - data_(NULL), - channels_(NULL), - split_channels_(NULL), - mixed_low_pass_channels_(NULL), - low_pass_reference_channels_(NULL) { - if (max_num_channels_ > 1) { - channels_ = new AudioChannel[max_num_channels_]; - mixed_low_pass_channels_ = new AudioChannel[max_num_channels_]; - } - low_pass_reference_channels_ = new AudioChannel[max_num_channels_]; + keyboard_data_(NULL), + data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) { + assert(input_num_frames_ > 0); + assert(proc_num_frames_ > 0); + assert(output_num_frames_ > 0); + assert(num_input_channels_ > 0); + assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_); - if (samples_per_channel_ == kSamplesPer32kHzChannel) { - split_channels_ = new SplitAudioChannel[max_num_channels_]; - samples_per_split_channel_ = kSamplesPer16kHzChannel; + if (input_num_frames_ != proc_num_frames_ || + output_num_frames_ != proc_num_frames_) { + // Create an intermediate buffer for resampling. + process_buffer_.reset(new ChannelBuffer(proc_num_frames_, + num_proc_channels_)); + + if (input_num_frames_ != proc_num_frames_) { + for (int i = 0; i < num_proc_channels_; ++i) { + input_resamplers_.push_back( + new PushSincResampler(input_num_frames_, + proc_num_frames_)); + } + } + + if (output_num_frames_ != proc_num_frames_) { + for (int i = 0; i < num_proc_channels_; ++i) { + output_resamplers_.push_back( + new PushSincResampler(proc_num_frames_, + output_num_frames_)); + } + } + } + + if (num_bands_ > 1) { + split_data_.reset(new IFChannelBuffer(proc_num_frames_, + num_proc_channels_, + num_bands_)); + splitting_filter_.reset(new SplittingFilter(num_proc_channels_, + num_bands_, + proc_num_frames_)); } } -AudioBuffer::~AudioBuffer() { - if (channels_ != NULL) { - delete [] channels_; +AudioBuffer::~AudioBuffer() {} + +void AudioBuffer::CopyFrom(const float* const* data, + const StreamConfig& stream_config) { + assert(stream_config.num_frames() == input_num_frames_); + assert(stream_config.num_channels() == num_input_channels_); + InitForNewData(); + // Initialized lazily because there's a different condition in + // DeinterleaveFrom. + const bool need_to_downmix = + num_input_channels_ > 1 && num_proc_channels_ == 1; + if (need_to_downmix && !input_buffer_) { + input_buffer_.reset( + new IFChannelBuffer(input_num_frames_, num_proc_channels_)); } - if (mixed_low_pass_channels_ != NULL) { - delete [] mixed_low_pass_channels_; + if (stream_config.has_keyboard()) { + keyboard_data_ = data[KeyboardChannelIndex(stream_config)]; } - if (low_pass_reference_channels_ != NULL) { - delete [] low_pass_reference_channels_; + // Downmix. + const float* const* data_ptr = data; + if (need_to_downmix) { + DownmixToMono(data, input_num_frames_, num_input_channels_, + input_buffer_->fbuf()->channels()[0]); + data_ptr = input_buffer_->fbuf_const()->channels(); } - if (split_channels_ != NULL) { - delete [] split_channels_; + // Resample. + if (input_num_frames_ != proc_num_frames_) { + for (int i = 0; i < num_proc_channels_; ++i) { + input_resamplers_[i]->Resample(data_ptr[i], + input_num_frames_, + process_buffer_->channels()[i], + proc_num_frames_); + } + data_ptr = process_buffer_->channels(); + } + + // Convert to the S16 range. + for (int i = 0; i < num_proc_channels_; ++i) { + FloatToFloatS16(data_ptr[i], + proc_num_frames_, + data_->fbuf()->channels()[i]); } } -WebRtc_Word16* AudioBuffer::data(int channel) const { - assert(channel >= 0 && channel < num_channels_); - if (data_ != NULL) { - return data_; +void AudioBuffer::CopyTo(const StreamConfig& stream_config, + float* const* data) { + assert(stream_config.num_frames() == output_num_frames_); + assert(stream_config.num_channels() == num_channels_); + + // Convert to the float range. + float* const* data_ptr = data; + if (output_num_frames_ != proc_num_frames_) { + // Convert to an intermediate buffer for subsequent resampling. + data_ptr = process_buffer_->channels(); + } + for (int i = 0; i < num_channels_; ++i) { + FloatS16ToFloat(data_->fbuf()->channels()[i], + proc_num_frames_, + data_ptr[i]); } - return channels_[channel].data; + // Resample. + if (output_num_frames_ != proc_num_frames_) { + for (int i = 0; i < num_channels_; ++i) { + output_resamplers_[i]->Resample(data_ptr[i], + proc_num_frames_, + data[i], + output_num_frames_); + } + } } -WebRtc_Word16* AudioBuffer::low_pass_split_data(int channel) const { - assert(channel >= 0 && channel < num_channels_); - if (split_channels_ == NULL) { - return data(channel); +void AudioBuffer::InitForNewData() { + keyboard_data_ = NULL; + mixed_low_pass_valid_ = false; + reference_copied_ = false; + activity_ = AudioFrame::kVadUnknown; + num_channels_ = num_proc_channels_; +} + +const int16_t* const* AudioBuffer::channels_const() const { + return data_->ibuf_const()->channels(); +} + +int16_t* const* AudioBuffer::channels() { + mixed_low_pass_valid_ = false; + return data_->ibuf()->channels(); +} + +const int16_t* const* AudioBuffer::split_bands_const(int channel) const { + return split_data_.get() ? + split_data_->ibuf_const()->bands(channel) : + data_->ibuf_const()->bands(channel); +} + +int16_t* const* AudioBuffer::split_bands(int channel) { + mixed_low_pass_valid_ = false; + return split_data_.get() ? + split_data_->ibuf()->bands(channel) : + data_->ibuf()->bands(channel); +} + +const int16_t* const* AudioBuffer::split_channels_const(Band band) const { + if (split_data_.get()) { + return split_data_->ibuf_const()->channels(band); + } else { + return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr; + } +} + +int16_t* const* AudioBuffer::split_channels(Band band) { + mixed_low_pass_valid_ = false; + if (split_data_.get()) { + return split_data_->ibuf()->channels(band); + } else { + return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr; + } +} + +ChannelBuffer* AudioBuffer::data() { + mixed_low_pass_valid_ = false; + return data_->ibuf(); +} + +const ChannelBuffer* AudioBuffer::data() const { + return data_->ibuf_const(); +} + +ChannelBuffer* AudioBuffer::split_data() { + mixed_low_pass_valid_ = false; + return split_data_.get() ? split_data_->ibuf() : data_->ibuf(); +} + +const ChannelBuffer* AudioBuffer::split_data() const { + return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const(); +} + +const float* const* AudioBuffer::channels_const_f() const { + return data_->fbuf_const()->channels(); +} + +float* const* AudioBuffer::channels_f() { + mixed_low_pass_valid_ = false; + return data_->fbuf()->channels(); +} + +const float* const* AudioBuffer::split_bands_const_f(int channel) const { + return split_data_.get() ? + split_data_->fbuf_const()->bands(channel) : + data_->fbuf_const()->bands(channel); +} + +float* const* AudioBuffer::split_bands_f(int channel) { + mixed_low_pass_valid_ = false; + return split_data_.get() ? + split_data_->fbuf()->bands(channel) : + data_->fbuf()->bands(channel); +} + +const float* const* AudioBuffer::split_channels_const_f(Band band) const { + if (split_data_.get()) { + return split_data_->fbuf_const()->channels(band); + } else { + return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr; + } +} + +float* const* AudioBuffer::split_channels_f(Band band) { + mixed_low_pass_valid_ = false; + if (split_data_.get()) { + return split_data_->fbuf()->channels(band); + } else { + return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr; + } +} + +ChannelBuffer* AudioBuffer::data_f() { + mixed_low_pass_valid_ = false; + return data_->fbuf(); +} + +const ChannelBuffer* AudioBuffer::data_f() const { + return data_->fbuf_const(); +} + +ChannelBuffer* AudioBuffer::split_data_f() { + mixed_low_pass_valid_ = false; + return split_data_.get() ? split_data_->fbuf() : data_->fbuf(); +} + +const ChannelBuffer* AudioBuffer::split_data_f() const { + return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const(); +} + +const int16_t* AudioBuffer::mixed_low_pass_data() { + if (num_proc_channels_ == 1) { + return split_bands_const(0)[kBand0To8kHz]; } - return split_channels_[channel].low_pass_data; -} + if (!mixed_low_pass_valid_) { + if (!mixed_low_pass_channels_.get()) { + mixed_low_pass_channels_.reset( + new ChannelBuffer(num_split_frames_, 1)); + } -WebRtc_Word16* AudioBuffer::high_pass_split_data(int channel) const { - assert(channel >= 0 && channel < num_channels_); - if (split_channels_ == NULL) { - return NULL; + DownmixToMono(split_channels_const(kBand0To8kHz), + num_split_frames_, num_channels_, + mixed_low_pass_channels_->channels()[0]); + mixed_low_pass_valid_ = true; } - - return split_channels_[channel].high_pass_data; + return mixed_low_pass_channels_->channels()[0]; } -WebRtc_Word16* AudioBuffer::mixed_low_pass_data(int channel) const { - assert(channel >= 0 && channel < num_mixed_low_pass_channels_); - - return mixed_low_pass_channels_[channel].data; -} - -WebRtc_Word16* AudioBuffer::low_pass_reference(int channel) const { - assert(channel >= 0 && channel < num_channels_); +const int16_t* AudioBuffer::low_pass_reference(int channel) const { if (!reference_copied_) { return NULL; } - return low_pass_reference_channels_[channel].data; + return low_pass_reference_channels_->channels()[channel]; } -WebRtc_Word32* AudioBuffer::analysis_filter_state1(int channel) const { - assert(channel >= 0 && channel < num_channels_); - return split_channels_[channel].analysis_filter_state1; -} - -WebRtc_Word32* AudioBuffer::analysis_filter_state2(int channel) const { - assert(channel >= 0 && channel < num_channels_); - return split_channels_[channel].analysis_filter_state2; -} - -WebRtc_Word32* AudioBuffer::synthesis_filter_state1(int channel) const { - assert(channel >= 0 && channel < num_channels_); - return split_channels_[channel].synthesis_filter_state1; -} - -WebRtc_Word32* AudioBuffer::synthesis_filter_state2(int channel) const { - assert(channel >= 0 && channel < num_channels_); - return split_channels_[channel].synthesis_filter_state2; +const float* AudioBuffer::keyboard_data() const { + return keyboard_data_; } void AudioBuffer::set_activity(AudioFrame::VADActivity activity) { activity_ = activity; } -AudioFrame::VADActivity AudioBuffer::activity() { +AudioFrame::VADActivity AudioBuffer::activity() const { return activity_; } @@ -182,107 +340,123 @@ int AudioBuffer::num_channels() const { return num_channels_; } -int AudioBuffer::samples_per_channel() const { - return samples_per_channel_; +void AudioBuffer::set_num_channels(int num_channels) { + num_channels_ = num_channels; } -int AudioBuffer::samples_per_split_channel() const { - return samples_per_split_channel_; +size_t AudioBuffer::num_frames() const { + return proc_num_frames_; } -// TODO(andrew): Do deinterleaving and mixing in one step? +size_t AudioBuffer::num_frames_per_band() const { + return num_split_frames_; +} + +size_t AudioBuffer::num_keyboard_frames() const { + // We don't resample the keyboard channel. + return input_num_frames_; +} + +size_t AudioBuffer::num_bands() const { + return num_bands_; +} + +// The resampler is only for supporting 48kHz to 16kHz in the reverse stream. void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) { - assert(frame->_audioChannel <= max_num_channels_); - assert(frame->_payloadDataLengthInSamples == samples_per_channel_); + assert(frame->num_channels_ == num_input_channels_); + assert(frame->samples_per_channel_ == input_num_frames_); + InitForNewData(); + // Initialized lazily because there's a different condition in CopyFrom. + if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) { + input_buffer_.reset( + new IFChannelBuffer(input_num_frames_, num_proc_channels_)); + } + activity_ = frame->vad_activity_; - num_channels_ = frame->_audioChannel; - num_mixed_channels_ = 0; - num_mixed_low_pass_channels_ = 0; - reference_copied_ = false; - activity_ = frame->_vadActivity; + int16_t* const* deinterleaved; + if (input_num_frames_ == proc_num_frames_) { + deinterleaved = data_->ibuf()->channels(); + } else { + deinterleaved = input_buffer_->ibuf()->channels(); + } + if (num_proc_channels_ == 1) { + // Downmix and deinterleave simultaneously. + DownmixInterleavedToMono(frame->data_, input_num_frames_, + num_input_channels_, deinterleaved[0]); + } else { + assert(num_proc_channels_ == num_input_channels_); + Deinterleave(frame->data_, + input_num_frames_, + num_proc_channels_, + deinterleaved); + } - if (num_channels_ == 1) { - // We can get away with a pointer assignment in this case. - data_ = frame->_payloadData; + // Resample. + if (input_num_frames_ != proc_num_frames_) { + for (int i = 0; i < num_proc_channels_; ++i) { + input_resamplers_[i]->Resample(input_buffer_->fbuf_const()->channels()[i], + input_num_frames_, + data_->fbuf()->channels()[i], + proc_num_frames_); + } + } +} + +void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) { + frame->vad_activity_ = activity_; + if (!data_changed) { return; } - WebRtc_Word16* interleaved = frame->_payloadData; - for (int i = 0; i < num_channels_; i++) { - WebRtc_Word16* deinterleaved = channels_[i].data; - int interleaved_idx = i; - for (int j = 0; j < samples_per_channel_; j++) { - deinterleaved[j] = interleaved[interleaved_idx]; - interleaved_idx += num_channels_; + assert(frame->num_channels_ == num_channels_ || num_channels_ == 1); + assert(frame->samples_per_channel_ == output_num_frames_); + + // Resample if necessary. + IFChannelBuffer* data_ptr = data_.get(); + if (proc_num_frames_ != output_num_frames_) { + if (!output_buffer_) { + output_buffer_.reset( + new IFChannelBuffer(output_num_frames_, num_channels_)); } - } -} - -void AudioBuffer::InterleaveTo(AudioFrame* frame) const { - assert(frame->_audioChannel == num_channels_); - assert(frame->_payloadDataLengthInSamples == samples_per_channel_); - frame->_vadActivity = activity_; - - if (num_channels_ == 1) { - if (num_mixed_channels_ == 1) { - memcpy(frame->_payloadData, - channels_[0].data, - sizeof(WebRtc_Word16) * samples_per_channel_); - } else { - // These should point to the same buffer in this case. - assert(data_ == frame->_payloadData); + for (int i = 0; i < num_channels_; ++i) { + output_resamplers_[i]->Resample( + data_->fbuf()->channels()[i], proc_num_frames_, + output_buffer_->fbuf()->channels()[i], output_num_frames_); } - - return; + data_ptr = output_buffer_.get(); } - WebRtc_Word16* interleaved = frame->_payloadData; - for (int i = 0; i < num_channels_; i++) { - WebRtc_Word16* deinterleaved = channels_[i].data; - int interleaved_idx = i; - for (int j = 0; j < samples_per_channel_; j++) { - interleaved[interleaved_idx] = deinterleaved[j]; - interleaved_idx += num_channels_; - } + if (frame->num_channels_ == num_channels_) { + Interleave(data_ptr->ibuf()->channels(), proc_num_frames_, num_channels_, + frame->data_); + } else { + UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], proc_num_frames_, + frame->num_channels_, frame->data_); } } -// TODO(andrew): would be good to support the no-mix case with pointer -// assignment. -// TODO(andrew): handle mixing to multiple channels? -void AudioBuffer::Mix(int num_mixed_channels) { - // We currently only support the stereo to mono case. - assert(num_channels_ == 2); - assert(num_mixed_channels == 1); - - StereoToMono(channels_[0].data, - channels_[1].data, - channels_[0].data, - samples_per_channel_); - - num_channels_ = num_mixed_channels; - num_mixed_channels_ = num_mixed_channels; -} - -void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) { - // We currently only support the stereo to mono case. - assert(num_channels_ == 2); - assert(num_mixed_channels == 1); - - StereoToMono(low_pass_split_data(0), - low_pass_split_data(1), - mixed_low_pass_channels_[0].data, - samples_per_split_channel_); - - num_mixed_low_pass_channels_ = num_mixed_channels; -} - void AudioBuffer::CopyLowPassToReference() { reference_copied_ = true; - for (int i = 0; i < num_channels_; i++) { - memcpy(low_pass_reference_channels_[i].data, - low_pass_split_data(i), - sizeof(WebRtc_Word16) * samples_per_split_channel_); + if (!low_pass_reference_channels_.get() || + low_pass_reference_channels_->num_channels() != num_channels_) { + low_pass_reference_channels_.reset( + new ChannelBuffer(num_split_frames_, + num_proc_channels_)); + } + for (int i = 0; i < num_proc_channels_; i++) { + memcpy(low_pass_reference_channels_->channels()[i], + split_bands_const(i)[kBand0To8kHz], + low_pass_reference_channels_->num_frames_per_band() * + sizeof(split_bands_const(i)[kBand0To8kHz][0])); } } + +void AudioBuffer::SplitIntoFrequencyBands() { + splitting_filter_->Analysis(data_.get(), split_data_.get()); +} + +void AudioBuffer::MergeFrequencyBands() { + splitting_filter_->Synthesis(split_data_.get(), data_.get()); +} + } // namespace webrtc diff --git a/webrtc/modules/audio_processing/audio_buffer.h b/webrtc/modules/audio_processing/audio_buffer.h index 1bdd3c7..f82ab61 100644 --- a/webrtc/modules/audio_processing/audio_buffer.h +++ b/webrtc/modules/audio_processing/audio_buffer.h @@ -8,64 +8,156 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ -#include "module_common_types.h" -#include "typedefs.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/channel_buffer.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/splitting_filter.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/scoped_vector.h" +#include "webrtc/typedefs.h" namespace webrtc { -struct AudioChannel; -struct SplitAudioChannel; +class PushSincResampler; +class IFChannelBuffer; + +enum Band { + kBand0To8kHz = 0, + kBand8To16kHz = 1, + kBand16To24kHz = 2 +}; class AudioBuffer { public: - AudioBuffer(int max_num_channels, int samples_per_channel); + // TODO(ajm): Switch to take ChannelLayouts. + AudioBuffer(size_t input_num_frames, + int num_input_channels, + size_t process_num_frames, + int num_process_channels, + size_t output_num_frames); virtual ~AudioBuffer(); int num_channels() const; - int samples_per_channel() const; - int samples_per_split_channel() const; + void set_num_channels(int num_channels); + size_t num_frames() const; + size_t num_frames_per_band() const; + size_t num_keyboard_frames() const; + size_t num_bands() const; - WebRtc_Word16* data(int channel) const; - WebRtc_Word16* low_pass_split_data(int channel) const; - WebRtc_Word16* high_pass_split_data(int channel) const; - WebRtc_Word16* mixed_low_pass_data(int channel) const; - WebRtc_Word16* low_pass_reference(int channel) const; + // Returns a pointer array to the full-band channels. + // Usage: + // channels()[channel][sample]. + // Where: + // 0 <= channel < |num_proc_channels_| + // 0 <= sample < |proc_num_frames_| + int16_t* const* channels(); + const int16_t* const* channels_const() const; + float* const* channels_f(); + const float* const* channels_const_f() const; - WebRtc_Word32* analysis_filter_state1(int channel) const; - WebRtc_Word32* analysis_filter_state2(int channel) const; - WebRtc_Word32* synthesis_filter_state1(int channel) const; - WebRtc_Word32* synthesis_filter_state2(int channel) const; + // Returns a pointer array to the bands for a specific channel. + // Usage: + // split_bands(channel)[band][sample]. + // Where: + // 0 <= channel < |num_proc_channels_| + // 0 <= band < |num_bands_| + // 0 <= sample < |num_split_frames_| + int16_t* const* split_bands(int channel); + const int16_t* const* split_bands_const(int channel) const; + float* const* split_bands_f(int channel); + const float* const* split_bands_const_f(int channel) const; + + // Returns a pointer array to the channels for a specific band. + // Usage: + // split_channels(band)[channel][sample]. + // Where: + // 0 <= band < |num_bands_| + // 0 <= channel < |num_proc_channels_| + // 0 <= sample < |num_split_frames_| + int16_t* const* split_channels(Band band); + const int16_t* const* split_channels_const(Band band) const; + float* const* split_channels_f(Band band); + const float* const* split_channels_const_f(Band band) const; + + // Returns a pointer to the ChannelBuffer that encapsulates the full-band + // data. + ChannelBuffer* data(); + const ChannelBuffer* data() const; + ChannelBuffer* data_f(); + const ChannelBuffer* data_f() const; + + // Returns a pointer to the ChannelBuffer that encapsulates the split data. + ChannelBuffer* split_data(); + const ChannelBuffer* split_data() const; + ChannelBuffer* split_data_f(); + const ChannelBuffer* split_data_f() const; + + // Returns a pointer to the low-pass data downmixed to mono. If this data + // isn't already available it re-calculates it. + const int16_t* mixed_low_pass_data(); + const int16_t* low_pass_reference(int channel) const; + + const float* keyboard_data() const; void set_activity(AudioFrame::VADActivity activity); - AudioFrame::VADActivity activity(); + AudioFrame::VADActivity activity() const; + // Use for int16 interleaved data. void DeinterleaveFrom(AudioFrame* audioFrame); - void InterleaveTo(AudioFrame* audioFrame) const; - void Mix(int num_mixed_channels); - void CopyAndMixLowPass(int num_mixed_channels); + // If |data_changed| is false, only the non-audio data members will be copied + // to |frame|. + void InterleaveTo(AudioFrame* frame, bool data_changed); + + // Use for float deinterleaved data. + void CopyFrom(const float* const* data, const StreamConfig& stream_config); + void CopyTo(const StreamConfig& stream_config, float* const* data); void CopyLowPassToReference(); + // Splits the signal into different bands. + void SplitIntoFrequencyBands(); + // Recombine the different bands into one signal. + void MergeFrequencyBands(); + private: - const int max_num_channels_; + // Called from DeinterleaveFrom() and CopyFrom(). + void InitForNewData(); + + // The audio is passed into DeinterleaveFrom() or CopyFrom() with input + // format (samples per channel and number of channels). + const size_t input_num_frames_; + const int num_input_channels_; + // The audio is stored by DeinterleaveFrom() or CopyFrom() with processing + // format. + const size_t proc_num_frames_; + const int num_proc_channels_; + // The audio is returned by InterleaveTo() and CopyTo() with output samples + // per channels and the current number of channels. This last one can be + // changed at any time using set_num_channels(). + const size_t output_num_frames_; int num_channels_; - int num_mixed_channels_; - int num_mixed_low_pass_channels_; - const int samples_per_channel_; - int samples_per_split_channel_; + + size_t num_bands_; + size_t num_split_frames_; + bool mixed_low_pass_valid_; bool reference_copied_; AudioFrame::VADActivity activity_; - WebRtc_Word16* data_; - // TODO(andrew): use vectors here. - AudioChannel* channels_; - SplitAudioChannel* split_channels_; - // TODO(andrew): improve this, we don't need the full 32 kHz space here. - AudioChannel* mixed_low_pass_channels_; - AudioChannel* low_pass_reference_channels_; + const float* keyboard_data_; + rtc::scoped_ptr data_; + rtc::scoped_ptr split_data_; + rtc::scoped_ptr splitting_filter_; + rtc::scoped_ptr > mixed_low_pass_channels_; + rtc::scoped_ptr > low_pass_reference_channels_; + rtc::scoped_ptr input_buffer_; + rtc::scoped_ptr output_buffer_; + rtc::scoped_ptr > process_buffer_; + ScopedVector input_resamplers_; + ScopedVector output_resamplers_; }; + } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_ diff --git a/webrtc/modules/audio_processing/audio_processing_impl.cc b/webrtc/modules/audio_processing/audio_processing_impl.cc index ed81f3d..f3ee0a3 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.cc +++ b/webrtc/modules/audio_processing/audio_processing_impl.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,41 +8,168 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "audio_processing_impl.h" +#include "webrtc/modules/audio_processing/audio_processing_impl.h" #include +#include -#include "audio_buffer.h" -#include "critical_section_wrapper.h" -#include "echo_cancellation_impl.h" -#include "echo_control_mobile_impl.h" -#ifndef NDEBUG -#include "file_wrapper.h" -#endif -#include "high_pass_filter_impl.h" -#include "gain_control_impl.h" -#include "level_estimator_impl.h" -#include "module_common_types.h" -#include "noise_suppression_impl.h" -#include "processing_component.h" -#include "splitting_filter.h" -#include "voice_detection_impl.h" -#ifndef NDEBUG -#ifdef WEBRTC_ANDROID -#include "external/webrtc/src/modules/audio_processing/main/source/debug.pb.h" +#include "webrtc/base/checks.h" +#include "webrtc/base/platform_file.h" +#include "webrtc/common_audio/audio_converter.h" +#include "webrtc/common_audio/channel_buffer.h" +#include "webrtc/common_audio/include/audio_util.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +extern "C" { +#include "webrtc/modules/audio_processing/aec/aec_core.h" +} +#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h" +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h" +#include "webrtc/modules/audio_processing/common.h" +#include "webrtc/modules/audio_processing/echo_cancellation_impl.h" +#include "webrtc/modules/audio_processing/echo_control_mobile_impl.h" +#include "webrtc/modules/audio_processing/gain_control_impl.h" +#include "webrtc/modules/audio_processing/high_pass_filter_impl.h" +#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h" +#include "webrtc/modules/audio_processing/level_estimator_impl.h" +#include "webrtc/modules/audio_processing/noise_suppression_impl.h" +#include "webrtc/modules/audio_processing/processing_component.h" +#include "webrtc/modules/audio_processing/transient/transient_suppressor.h" +#include "webrtc/modules/audio_processing/voice_detection_impl.h" +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" +#include "webrtc/system_wrappers/interface/file_wrapper.h" +#include "webrtc/system_wrappers/interface/logging.h" +#include "webrtc/system_wrappers/interface/metrics.h" + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP +// Files generated at build-time by the protobuf compiler. +#ifdef WEBRTC_ANDROID_PLATFORM_BUILD +#include "external/webrtc/webrtc/modules/audio_processing/debug.pb.h" #else #include "webrtc/audio_processing/debug.pb.h" #endif -#endif /* NDEBUG */ +#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP + +#define RETURN_ON_ERR(expr) \ + do { \ + int err = (expr); \ + if (err != kNoError) { \ + return err; \ + } \ + } while (0) namespace webrtc { -AudioProcessing* AudioProcessing::Create(int id) { - /*WEBRTC_TRACE(webrtc::kTraceModuleCall, - webrtc::kTraceAudioProcessing, - id, - "AudioProcessing::Create()");*/ +namespace { - AudioProcessingImpl* apm = new AudioProcessingImpl(id); +static bool LayoutHasKeyboard(AudioProcessing::ChannelLayout layout) { + switch (layout) { + case AudioProcessing::kMono: + case AudioProcessing::kStereo: + return false; + case AudioProcessing::kMonoAndKeyboard: + case AudioProcessing::kStereoAndKeyboard: + return true; + } + + assert(false); + return false; +} + +} // namespace + +// Throughout webrtc, it's assumed that success is represented by zero. +static_assert(AudioProcessing::kNoError == 0, "kNoError must be zero"); + +// This class has two main functionalities: +// +// 1) It is returned instead of the real GainControl after the new AGC has been +// enabled in order to prevent an outside user from overriding compression +// settings. It doesn't do anything in its implementation, except for +// delegating the const methods and Enable calls to the real GainControl, so +// AGC can still be disabled. +// +// 2) It is injected into AgcManagerDirect and implements volume callbacks for +// getting and setting the volume level. It just caches this value to be used +// in VoiceEngine later. +class GainControlForNewAgc : public GainControl, public VolumeCallbacks { + public: + explicit GainControlForNewAgc(GainControlImpl* gain_control) + : real_gain_control_(gain_control), volume_(0) {} + + // GainControl implementation. + int Enable(bool enable) override { + return real_gain_control_->Enable(enable); + } + bool is_enabled() const override { return real_gain_control_->is_enabled(); } + int set_stream_analog_level(int level) override { + volume_ = level; + return AudioProcessing::kNoError; + } + int stream_analog_level() override { return volume_; } + int set_mode(Mode mode) override { return AudioProcessing::kNoError; } + Mode mode() const override { return GainControl::kAdaptiveAnalog; } + int set_target_level_dbfs(int level) override { + return AudioProcessing::kNoError; + } + int target_level_dbfs() const override { + return real_gain_control_->target_level_dbfs(); + } + int set_compression_gain_db(int gain) override { + return AudioProcessing::kNoError; + } + int compression_gain_db() const override { + return real_gain_control_->compression_gain_db(); + } + int enable_limiter(bool enable) override { return AudioProcessing::kNoError; } + bool is_limiter_enabled() const override { + return real_gain_control_->is_limiter_enabled(); + } + int set_analog_level_limits(int minimum, int maximum) override { + return AudioProcessing::kNoError; + } + int analog_level_minimum() const override { + return real_gain_control_->analog_level_minimum(); + } + int analog_level_maximum() const override { + return real_gain_control_->analog_level_maximum(); + } + bool stream_is_saturated() const override { + return real_gain_control_->stream_is_saturated(); + } + + // VolumeCallbacks implementation. + void SetMicVolume(int volume) override { volume_ = volume; } + int GetMicVolume() override { return volume_; } + + private: + GainControl* real_gain_control_; + int volume_; +}; + +const int AudioProcessing::kNativeSampleRatesHz[] = { + AudioProcessing::kSampleRate8kHz, + AudioProcessing::kSampleRate16kHz, + AudioProcessing::kSampleRate32kHz, + AudioProcessing::kSampleRate48kHz}; +const size_t AudioProcessing::kNumNativeSampleRates = + arraysize(AudioProcessing::kNativeSampleRatesHz); +const int AudioProcessing::kMaxNativeSampleRateHz = AudioProcessing:: + kNativeSampleRatesHz[AudioProcessing::kNumNativeSampleRates - 1]; +const int AudioProcessing::kMaxAECMSampleRateHz = kSampleRate16kHz; + +AudioProcessing* AudioProcessing::Create() { + Config config; + return Create(config, nullptr); +} + +AudioProcessing* AudioProcessing::Create(const Config& config) { + return Create(config, nullptr); +} + +AudioProcessing* AudioProcessing::Create(const Config& config, + Beamformer* beamformer) { + AudioProcessingImpl* apm = new AudioProcessingImpl(config, beamformer); if (apm->Initialize() != kNoError) { delete apm; apm = NULL; @@ -51,131 +178,186 @@ AudioProcessing* AudioProcessing::Create(int id) { return apm; } -void AudioProcessing::Destroy(AudioProcessing* apm) { - delete static_cast(apm); -} +AudioProcessingImpl::AudioProcessingImpl(const Config& config) + : AudioProcessingImpl(config, nullptr) {} -AudioProcessingImpl::AudioProcessingImpl(int id) - : id_(id), - echo_cancellation_(NULL), +AudioProcessingImpl::AudioProcessingImpl(const Config& config, + Beamformer* beamformer) + : echo_cancellation_(NULL), echo_control_mobile_(NULL), gain_control_(NULL), high_pass_filter_(NULL), level_estimator_(NULL), noise_suppression_(NULL), voice_detection_(NULL), -#ifndef NDEBUG + crit_(CriticalSectionWrapper::CreateCriticalSection()), +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP debug_file_(FileWrapper::Create()), event_msg_(new audioproc::Event()), #endif - crit_(CriticalSectionWrapper::CreateCriticalSection()), - render_audio_(NULL), - capture_audio_(NULL), - sample_rate_hz_(kSampleRate16kHz), - split_sample_rate_hz_(kSampleRate16kHz), - samples_per_channel_(sample_rate_hz_ / 100), + api_format_({{{kSampleRate16kHz, 1, false}, + {kSampleRate16kHz, 1, false}, + {kSampleRate16kHz, 1, false}, + {kSampleRate16kHz, 1, false}}}), + fwd_proc_format_(kSampleRate16kHz), + rev_proc_format_(kSampleRate16kHz, 1), + split_rate_(kSampleRate16kHz), stream_delay_ms_(0), + delay_offset_ms_(0), was_stream_delay_set_(false), - num_reverse_channels_(1), - num_input_channels_(1), - num_output_channels_(1) { - - echo_cancellation_ = new EchoCancellationImpl(this); + last_stream_delay_ms_(0), + last_aec_system_delay_ms_(0), + stream_delay_jumps_(-1), + aec_system_delay_jumps_(-1), + output_will_be_muted_(false), + key_pressed_(false), +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) + use_new_agc_(false), +#else + use_new_agc_(config.Get().enabled), +#endif + agc_startup_min_volume_(config.Get().startup_min_volume), +#if defined(WEBRTC_ANDROID) || defined(WEBRTC_IOS) + transient_suppressor_enabled_(false), +#else + transient_suppressor_enabled_(config.Get().enabled), +#endif + beamformer_enabled_(config.Get().enabled), + beamformer_(beamformer), + array_geometry_(config.Get().array_geometry), + intelligibility_enabled_(config.Get().enabled) { + echo_cancellation_ = new EchoCancellationImpl(this, crit_); component_list_.push_back(echo_cancellation_); - echo_control_mobile_ = new EchoControlMobileImpl(this); + echo_control_mobile_ = new EchoControlMobileImpl(this, crit_); component_list_.push_back(echo_control_mobile_); - gain_control_ = new GainControlImpl(this); + gain_control_ = new GainControlImpl(this, crit_); component_list_.push_back(gain_control_); - high_pass_filter_ = new HighPassFilterImpl(this); + high_pass_filter_ = new HighPassFilterImpl(this, crit_); component_list_.push_back(high_pass_filter_); - level_estimator_ = new LevelEstimatorImpl(this); + level_estimator_ = new LevelEstimatorImpl(this, crit_); component_list_.push_back(level_estimator_); - noise_suppression_ = new NoiseSuppressionImpl(this); + noise_suppression_ = new NoiseSuppressionImpl(this, crit_); component_list_.push_back(noise_suppression_); - voice_detection_ = new VoiceDetectionImpl(this); + voice_detection_ = new VoiceDetectionImpl(this, crit_); component_list_.push_back(voice_detection_); + + gain_control_for_new_agc_.reset(new GainControlForNewAgc(gain_control_)); + + SetExtraOptions(config); } AudioProcessingImpl::~AudioProcessingImpl() { - while (!component_list_.empty()) { - ProcessingComponent* component = component_list_.front(); - component->Destroy(); - delete component; - component_list_.pop_front(); - } + { + CriticalSectionScoped crit_scoped(crit_); + // Depends on gain_control_ and gain_control_for_new_agc_. + agc_manager_.reset(); + // Depends on gain_control_. + gain_control_for_new_agc_.reset(); + while (!component_list_.empty()) { + ProcessingComponent* component = component_list_.front(); + component->Destroy(); + delete component; + component_list_.pop_front(); + } -#ifndef NDEBUG - if (debug_file_->Open()) { - debug_file_->CloseFile(); - } - delete debug_file_; - debug_file_ = NULL; - - delete event_msg_; - event_msg_ = NULL; +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + if (debug_file_->Open()) { + debug_file_->CloseFile(); + } #endif - + } delete crit_; crit_ = NULL; - - if (render_audio_) { - delete render_audio_; - render_audio_ = NULL; - } - - if (capture_audio_) { - delete capture_audio_; - capture_audio_ = NULL; - } -} - -CriticalSectionWrapper* AudioProcessingImpl::crit() const { - return crit_; -} - -int AudioProcessingImpl::split_sample_rate_hz() const { - return split_sample_rate_hz_; } int AudioProcessingImpl::Initialize() { - CriticalSectionScoped crit_scoped(*crit_); + CriticalSectionScoped crit_scoped(crit_); return InitializeLocked(); } +int AudioProcessingImpl::Initialize(int input_sample_rate_hz, + int output_sample_rate_hz, + int reverse_sample_rate_hz, + ChannelLayout input_layout, + ChannelLayout output_layout, + ChannelLayout reverse_layout) { + const ProcessingConfig processing_config = { + {{input_sample_rate_hz, + ChannelsFromLayout(input_layout), + LayoutHasKeyboard(input_layout)}, + {output_sample_rate_hz, + ChannelsFromLayout(output_layout), + LayoutHasKeyboard(output_layout)}, + {reverse_sample_rate_hz, + ChannelsFromLayout(reverse_layout), + LayoutHasKeyboard(reverse_layout)}, + {reverse_sample_rate_hz, + ChannelsFromLayout(reverse_layout), + LayoutHasKeyboard(reverse_layout)}}}; + + return Initialize(processing_config); +} + +int AudioProcessingImpl::Initialize(const ProcessingConfig& processing_config) { + CriticalSectionScoped crit_scoped(crit_); + return InitializeLocked(processing_config); +} + int AudioProcessingImpl::InitializeLocked() { - if (render_audio_ != NULL) { - delete render_audio_; - render_audio_ = NULL; + const int fwd_audio_buffer_channels = + beamformer_enabled_ ? api_format_.input_stream().num_channels() + : api_format_.output_stream().num_channels(); + const int rev_audio_buffer_out_num_frames = + api_format_.reverse_output_stream().num_frames() == 0 + ? rev_proc_format_.num_frames() + : api_format_.reverse_output_stream().num_frames(); + if (api_format_.reverse_input_stream().num_channels() > 0) { + render_audio_.reset(new AudioBuffer( + api_format_.reverse_input_stream().num_frames(), + api_format_.reverse_input_stream().num_channels(), + rev_proc_format_.num_frames(), rev_proc_format_.num_channels(), + rev_audio_buffer_out_num_frames)); + if (rev_conversion_needed()) { + render_converter_ = AudioConverter::Create( + api_format_.reverse_input_stream().num_channels(), + api_format_.reverse_input_stream().num_frames(), + api_format_.reverse_output_stream().num_channels(), + api_format_.reverse_output_stream().num_frames()); + } else { + render_converter_.reset(nullptr); + } + } else { + render_audio_.reset(nullptr); + render_converter_.reset(nullptr); } - - if (capture_audio_ != NULL) { - delete capture_audio_; - capture_audio_ = NULL; - } - - render_audio_ = new AudioBuffer(num_reverse_channels_, - samples_per_channel_); - capture_audio_ = new AudioBuffer(num_input_channels_, - samples_per_channel_); - - was_stream_delay_set_ = false; + capture_audio_.reset(new AudioBuffer( + api_format_.input_stream().num_frames(), + api_format_.input_stream().num_channels(), fwd_proc_format_.num_frames(), + fwd_audio_buffer_channels, api_format_.output_stream().num_frames())); // Initialize all components. - std::list::iterator it; - for (it = component_list_.begin(); it != component_list_.end(); it++) { - int err = (*it)->Initialize(); + for (auto item : component_list_) { + int err = item->Initialize(); if (err != kNoError) { return err; } } -#ifndef NDEBUG + InitializeExperimentalAgc(); + + InitializeTransient(); + + InitializeBeamformer(); + + InitializeIntelligibility(); + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { int err = WriteInitMessage(); if (err != kNoError) { @@ -187,294 +369,514 @@ int AudioProcessingImpl::InitializeLocked() { return kNoError; } -int AudioProcessingImpl::set_sample_rate_hz(int rate) { - CriticalSectionScoped crit_scoped(*crit_); - if (rate != kSampleRate8kHz && - rate != kSampleRate16kHz && - rate != kSampleRate32kHz) { - return kBadParameterError; +int AudioProcessingImpl::InitializeLocked(const ProcessingConfig& config) { + for (const auto& stream : config.streams) { + if (stream.num_channels() < 0) { + return kBadNumberChannelsError; + } + if (stream.num_channels() > 0 && stream.sample_rate_hz() <= 0) { + return kBadSampleRateError; + } } - sample_rate_hz_ = rate; - samples_per_channel_ = rate / 100; + const int num_in_channels = config.input_stream().num_channels(); + const int num_out_channels = config.output_stream().num_channels(); - if (sample_rate_hz_ == kSampleRate32kHz) { - split_sample_rate_hz_ = kSampleRate16kHz; - } else { - split_sample_rate_hz_ = sample_rate_hz_; - } - - return InitializeLocked(); -} - -int AudioProcessingImpl::sample_rate_hz() const { - return sample_rate_hz_; -} - -int AudioProcessingImpl::set_num_reverse_channels(int channels) { - CriticalSectionScoped crit_scoped(*crit_); - // Only stereo supported currently. - if (channels > 2 || channels < 1) { - return kBadParameterError; - } - - num_reverse_channels_ = channels; - - return InitializeLocked(); -} - -int AudioProcessingImpl::num_reverse_channels() const { - return num_reverse_channels_; -} - -int AudioProcessingImpl::set_num_channels( - int input_channels, - int output_channels) { - CriticalSectionScoped crit_scoped(*crit_); - if (output_channels > input_channels) { - return kBadParameterError; - } - - // Only stereo supported currently. - if (input_channels > 2 || input_channels < 1) { - return kBadParameterError; - } - - if (output_channels > 2 || output_channels < 1) { - return kBadParameterError; - } - - num_input_channels_ = input_channels; - num_output_channels_ = output_channels; - - return InitializeLocked(); -} - -int AudioProcessingImpl::num_input_channels() const { - return num_input_channels_; -} - -int AudioProcessingImpl::num_output_channels() const { - return num_output_channels_; -} - -int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { - CriticalSectionScoped crit_scoped(*crit_); - int err = kNoError; - - if (frame == NULL) { - return kNullPointerError; - } - - if (frame->_frequencyInHz != sample_rate_hz_) { - return kBadSampleRateError; - } - - if (frame->_audioChannel != num_input_channels_) { + // Need at least one input channel. + // Need either one output channel or as many outputs as there are inputs. + if (num_in_channels == 0 || + !(num_out_channels == 1 || num_out_channels == num_in_channels)) { return kBadNumberChannelsError; } - if (frame->_payloadDataLengthInSamples != samples_per_channel_) { + if (beamformer_enabled_ && + (static_cast(num_in_channels) != array_geometry_.size() || + num_out_channels > 1)) { + return kBadNumberChannelsError; + } + + api_format_ = config; + + // We process at the closest native rate >= min(input rate, output rate)... + const int min_proc_rate = + std::min(api_format_.input_stream().sample_rate_hz(), + api_format_.output_stream().sample_rate_hz()); + int fwd_proc_rate; + for (size_t i = 0; i < kNumNativeSampleRates; ++i) { + fwd_proc_rate = kNativeSampleRatesHz[i]; + if (fwd_proc_rate >= min_proc_rate) { + break; + } + } + // ...with one exception. + if (echo_control_mobile_->is_enabled() && + min_proc_rate > kMaxAECMSampleRateHz) { + fwd_proc_rate = kMaxAECMSampleRateHz; + } + + fwd_proc_format_ = StreamConfig(fwd_proc_rate); + + // We normally process the reverse stream at 16 kHz. Unless... + int rev_proc_rate = kSampleRate16kHz; + if (fwd_proc_format_.sample_rate_hz() == kSampleRate8kHz) { + // ...the forward stream is at 8 kHz. + rev_proc_rate = kSampleRate8kHz; + } else { + if (api_format_.reverse_input_stream().sample_rate_hz() == + kSampleRate32kHz) { + // ...or the input is at 32 kHz, in which case we use the splitting + // filter rather than the resampler. + rev_proc_rate = kSampleRate32kHz; + } + } + + // Always downmix the reverse stream to mono for analysis. This has been + // demonstrated to work well for AEC in most practical scenarios. + rev_proc_format_ = StreamConfig(rev_proc_rate, 1); + + if (fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz || + fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz) { + split_rate_ = kSampleRate16kHz; + } else { + split_rate_ = fwd_proc_format_.sample_rate_hz(); + } + + return InitializeLocked(); +} + +// Calls InitializeLocked() if any of the audio parameters have changed from +// their current values. +int AudioProcessingImpl::MaybeInitializeLocked( + const ProcessingConfig& processing_config) { + if (processing_config == api_format_) { + return kNoError; + } + return InitializeLocked(processing_config); +} + +void AudioProcessingImpl::SetExtraOptions(const Config& config) { + CriticalSectionScoped crit_scoped(crit_); + for (auto item : component_list_) { + item->SetExtraOptions(config); + } + + if (transient_suppressor_enabled_ != config.Get().enabled) { + transient_suppressor_enabled_ = config.Get().enabled; + InitializeTransient(); + } +} + + +int AudioProcessingImpl::proc_sample_rate_hz() const { + return fwd_proc_format_.sample_rate_hz(); +} + +int AudioProcessingImpl::proc_split_sample_rate_hz() const { + return split_rate_; +} + +int AudioProcessingImpl::num_reverse_channels() const { + return rev_proc_format_.num_channels(); +} + +int AudioProcessingImpl::num_input_channels() const { + return api_format_.input_stream().num_channels(); +} + +int AudioProcessingImpl::num_output_channels() const { + return api_format_.output_stream().num_channels(); +} + +void AudioProcessingImpl::set_output_will_be_muted(bool muted) { + CriticalSectionScoped lock(crit_); + output_will_be_muted_ = muted; + if (agc_manager_.get()) { + agc_manager_->SetCaptureMuted(output_will_be_muted_); + } +} + + +int AudioProcessingImpl::ProcessStream(const float* const* src, + size_t samples_per_channel, + int input_sample_rate_hz, + ChannelLayout input_layout, + int output_sample_rate_hz, + ChannelLayout output_layout, + float* const* dest) { + CriticalSectionScoped crit_scoped(crit_); + StreamConfig input_stream = api_format_.input_stream(); + input_stream.set_sample_rate_hz(input_sample_rate_hz); + input_stream.set_num_channels(ChannelsFromLayout(input_layout)); + input_stream.set_has_keyboard(LayoutHasKeyboard(input_layout)); + + StreamConfig output_stream = api_format_.output_stream(); + output_stream.set_sample_rate_hz(output_sample_rate_hz); + output_stream.set_num_channels(ChannelsFromLayout(output_layout)); + output_stream.set_has_keyboard(LayoutHasKeyboard(output_layout)); + + if (samples_per_channel != input_stream.num_frames()) { + return kBadDataLengthError; + } + return ProcessStream(src, input_stream, output_stream, dest); +} + +int AudioProcessingImpl::ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) { + CriticalSectionScoped crit_scoped(crit_); + if (!src || !dest) { + return kNullPointerError; + } + + ProcessingConfig processing_config = api_format_; + processing_config.input_stream() = input_config; + processing_config.output_stream() = output_config; + + RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); + assert(processing_config.input_stream().num_frames() == + api_format_.input_stream().num_frames()); + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + if (debug_file_->Open()) { + RETURN_ON_ERR(WriteConfigMessage(false)); + + event_msg_->set_type(audioproc::Event::STREAM); + audioproc::Stream* msg = event_msg_->mutable_stream(); + const size_t channel_size = + sizeof(float) * api_format_.input_stream().num_frames(); + for (int i = 0; i < api_format_.input_stream().num_channels(); ++i) + msg->add_input_channel(src[i], channel_size); + } +#endif + + capture_audio_->CopyFrom(src, api_format_.input_stream()); + RETURN_ON_ERR(ProcessStreamLocked()); + capture_audio_->CopyTo(api_format_.output_stream(), dest); + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + if (debug_file_->Open()) { + audioproc::Stream* msg = event_msg_->mutable_stream(); + const size_t channel_size = + sizeof(float) * api_format_.output_stream().num_frames(); + for (int i = 0; i < api_format_.output_stream().num_channels(); ++i) + msg->add_output_channel(dest[i], channel_size); + RETURN_ON_ERR(WriteMessageToDebugFile()); + } +#endif + + return kNoError; +} + +int AudioProcessingImpl::ProcessStream(AudioFrame* frame) { + CriticalSectionScoped crit_scoped(crit_); + if (!frame) { + return kNullPointerError; + } + // Must be a native rate. + if (frame->sample_rate_hz_ != kSampleRate8kHz && + frame->sample_rate_hz_ != kSampleRate16kHz && + frame->sample_rate_hz_ != kSampleRate32kHz && + frame->sample_rate_hz_ != kSampleRate48kHz) { + return kBadSampleRateError; + } + if (echo_control_mobile_->is_enabled() && + frame->sample_rate_hz_ > kMaxAECMSampleRateHz) { + LOG(LS_ERROR) << "AECM only supports 16 or 8 kHz sample rates"; + return kUnsupportedComponentError; + } + + // TODO(ajm): The input and output rates and channels are currently + // constrained to be identical in the int16 interface. + ProcessingConfig processing_config = api_format_; + processing_config.input_stream().set_sample_rate_hz(frame->sample_rate_hz_); + processing_config.input_stream().set_num_channels(frame->num_channels_); + processing_config.output_stream().set_sample_rate_hz(frame->sample_rate_hz_); + processing_config.output_stream().set_num_channels(frame->num_channels_); + + RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); + if (frame->samples_per_channel_ != api_format_.input_stream().num_frames()) { return kBadDataLengthError; } -#ifndef NDEBUG +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { event_msg_->set_type(audioproc::Event::STREAM); audioproc::Stream* msg = event_msg_->mutable_stream(); - const size_t data_size = sizeof(WebRtc_Word16) * - frame->_payloadDataLengthInSamples * - frame->_audioChannel; - msg->set_input_data(frame->_payloadData, data_size); - msg->set_delay(stream_delay_ms_); - msg->set_drift(echo_cancellation_->stream_drift_samples()); - msg->set_level(gain_control_->stream_analog_level()); + const size_t data_size = + sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_; + msg->set_input_data(frame->data_, data_size); } #endif capture_audio_->DeinterleaveFrom(frame); + RETURN_ON_ERR(ProcessStreamLocked()); + capture_audio_->InterleaveTo(frame, output_copy_needed(is_data_processed())); - // TODO(ajm): experiment with mixing and AEC placement. - if (num_output_channels_ < num_input_channels_) { - capture_audio_->Mix(num_output_channels_); - - frame->_audioChannel = num_output_channels_; - } - - if (sample_rate_hz_ == kSampleRate32kHz) { - for (int i = 0; i < num_input_channels_; i++) { - // Split into a low and high band. - SplittingFilterAnalysis(capture_audio_->data(i), - capture_audio_->low_pass_split_data(i), - capture_audio_->high_pass_split_data(i), - capture_audio_->analysis_filter_state1(i), - capture_audio_->analysis_filter_state2(i)); - } - } - - err = high_pass_filter_->ProcessCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } - - err = gain_control_->AnalyzeCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } - - err = echo_cancellation_->ProcessCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } - - if (echo_control_mobile_->is_enabled() && - noise_suppression_->is_enabled()) { - capture_audio_->CopyLowPassToReference(); - } - - err = noise_suppression_->ProcessCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } - - err = echo_control_mobile_->ProcessCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } - - err = voice_detection_->ProcessCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } - - err = gain_control_->ProcessCaptureAudio(capture_audio_); - if (err != kNoError) { - return err; - } - - //err = level_estimator_->ProcessCaptureAudio(capture_audio_); - //if (err != kNoError) { - // return err; - //} - - if (sample_rate_hz_ == kSampleRate32kHz) { - for (int i = 0; i < num_output_channels_; i++) { - // Recombine low and high bands. - SplittingFilterSynthesis(capture_audio_->low_pass_split_data(i), - capture_audio_->high_pass_split_data(i), - capture_audio_->data(i), - capture_audio_->synthesis_filter_state1(i), - capture_audio_->synthesis_filter_state2(i)); - } - } - - capture_audio_->InterleaveTo(frame); - -#ifndef NDEBUG +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { audioproc::Stream* msg = event_msg_->mutable_stream(); - const size_t data_size = sizeof(WebRtc_Word16) * - frame->_payloadDataLengthInSamples * - frame->_audioChannel; - msg->set_output_data(frame->_payloadData, data_size); - err = WriteMessageToDebugFile(); - if (err != kNoError) { - return err; - } + const size_t data_size = + sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_; + msg->set_output_data(frame->data_, data_size); + RETURN_ON_ERR(WriteMessageToDebugFile()); } #endif return kNoError; } +int AudioProcessingImpl::ProcessStreamLocked() { +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + if (debug_file_->Open()) { + audioproc::Stream* msg = event_msg_->mutable_stream(); + msg->set_delay(stream_delay_ms_); + msg->set_drift(echo_cancellation_->stream_drift_samples()); + msg->set_level(gain_control()->stream_analog_level()); + msg->set_keypress(key_pressed_); + } +#endif + + MaybeUpdateHistograms(); + + AudioBuffer* ca = capture_audio_.get(); // For brevity. + + if (use_new_agc_ && gain_control_->is_enabled()) { + agc_manager_->AnalyzePreProcess(ca->channels()[0], ca->num_channels(), + fwd_proc_format_.num_frames()); + } + + bool data_processed = is_data_processed(); + if (analysis_needed(data_processed)) { + ca->SplitIntoFrequencyBands(); + } + + if (intelligibility_enabled_) { + intelligibility_enhancer_->AnalyzeCaptureAudio( + ca->split_channels_f(kBand0To8kHz), split_rate_, ca->num_channels()); + } + + if (beamformer_enabled_) { + beamformer_->ProcessChunk(*ca->split_data_f(), ca->split_data_f()); + ca->set_num_channels(1); + } + + RETURN_ON_ERR(high_pass_filter_->ProcessCaptureAudio(ca)); + RETURN_ON_ERR(gain_control_->AnalyzeCaptureAudio(ca)); + RETURN_ON_ERR(noise_suppression_->AnalyzeCaptureAudio(ca)); + RETURN_ON_ERR(echo_cancellation_->ProcessCaptureAudio(ca)); + + if (echo_control_mobile_->is_enabled() && noise_suppression_->is_enabled()) { + ca->CopyLowPassToReference(); + } + RETURN_ON_ERR(noise_suppression_->ProcessCaptureAudio(ca)); + RETURN_ON_ERR(echo_control_mobile_->ProcessCaptureAudio(ca)); + RETURN_ON_ERR(voice_detection_->ProcessCaptureAudio(ca)); + + if (use_new_agc_ && gain_control_->is_enabled() && + (!beamformer_enabled_ || beamformer_->is_target_present())) { + agc_manager_->Process(ca->split_bands_const(0)[kBand0To8kHz], + ca->num_frames_per_band(), split_rate_); + } + RETURN_ON_ERR(gain_control_->ProcessCaptureAudio(ca)); + + if (synthesis_needed(data_processed)) { + ca->MergeFrequencyBands(); + } + + // TODO(aluebs): Investigate if the transient suppression placement should be + // before or after the AGC. + if (transient_suppressor_enabled_) { + float voice_probability = + agc_manager_.get() ? agc_manager_->voice_probability() : 1.f; + + transient_suppressor_->Suppress( + ca->channels_f()[0], ca->num_frames(), ca->num_channels(), + ca->split_bands_const_f(0)[kBand0To8kHz], ca->num_frames_per_band(), + ca->keyboard_data(), ca->num_keyboard_frames(), voice_probability, + key_pressed_); + } + + // The level estimator operates on the recombined data. + RETURN_ON_ERR(level_estimator_->ProcessStream(ca)); + + was_stream_delay_set_ = false; + return kNoError; +} + +int AudioProcessingImpl::AnalyzeReverseStream(const float* const* data, + size_t samples_per_channel, + int rev_sample_rate_hz, + ChannelLayout layout) { + const StreamConfig reverse_config = { + rev_sample_rate_hz, ChannelsFromLayout(layout), LayoutHasKeyboard(layout), + }; + if (samples_per_channel != reverse_config.num_frames()) { + return kBadDataLengthError; + } + return AnalyzeReverseStream(data, reverse_config, reverse_config); +} + +int AudioProcessingImpl::ProcessReverseStream( + const float* const* src, + const StreamConfig& reverse_input_config, + const StreamConfig& reverse_output_config, + float* const* dest) { + RETURN_ON_ERR( + AnalyzeReverseStream(src, reverse_input_config, reverse_output_config)); + if (is_rev_processed()) { + render_audio_->CopyTo(api_format_.reverse_output_stream(), dest); + } else if (rev_conversion_needed()) { + render_converter_->Convert(src, reverse_input_config.num_samples(), dest, + reverse_output_config.num_samples()); + } else { + CopyAudioIfNeeded(src, reverse_input_config.num_frames(), + reverse_input_config.num_channels(), dest); + } + + return kNoError; +} + +int AudioProcessingImpl::AnalyzeReverseStream( + const float* const* src, + const StreamConfig& reverse_input_config, + const StreamConfig& reverse_output_config) { + CriticalSectionScoped crit_scoped(crit_); + if (src == NULL) { + return kNullPointerError; + } + + if (reverse_input_config.num_channels() <= 0) { + return kBadNumberChannelsError; + } + + ProcessingConfig processing_config = api_format_; + processing_config.reverse_input_stream() = reverse_input_config; + processing_config.reverse_output_stream() = reverse_output_config; + + RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); + assert(reverse_input_config.num_frames() == + api_format_.reverse_input_stream().num_frames()); + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + if (debug_file_->Open()) { + event_msg_->set_type(audioproc::Event::REVERSE_STREAM); + audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); + const size_t channel_size = + sizeof(float) * api_format_.reverse_input_stream().num_frames(); + for (int i = 0; i < api_format_.reverse_input_stream().num_channels(); ++i) + msg->add_channel(src[i], channel_size); + RETURN_ON_ERR(WriteMessageToDebugFile()); + } +#endif + + render_audio_->CopyFrom(src, api_format_.reverse_input_stream()); + return ProcessReverseStreamLocked(); +} + +int AudioProcessingImpl::ProcessReverseStream(AudioFrame* frame) { + RETURN_ON_ERR(AnalyzeReverseStream(frame)); + if (is_rev_processed()) { + render_audio_->InterleaveTo(frame, true); + } + + return kNoError; +} + int AudioProcessingImpl::AnalyzeReverseStream(AudioFrame* frame) { - CriticalSectionScoped crit_scoped(*crit_); - int err = kNoError; - + CriticalSectionScoped crit_scoped(crit_); if (frame == NULL) { return kNullPointerError; } - - if (frame->_frequencyInHz != sample_rate_hz_) { + // Must be a native rate. + if (frame->sample_rate_hz_ != kSampleRate8kHz && + frame->sample_rate_hz_ != kSampleRate16kHz && + frame->sample_rate_hz_ != kSampleRate32kHz && + frame->sample_rate_hz_ != kSampleRate48kHz) { + return kBadSampleRateError; + } + // This interface does not tolerate different forward and reverse rates. + if (frame->sample_rate_hz_ != api_format_.input_stream().sample_rate_hz()) { return kBadSampleRateError; } - if (frame->_audioChannel != num_reverse_channels_) { + if (frame->num_channels_ <= 0) { return kBadNumberChannelsError; } - if (frame->_payloadDataLengthInSamples != samples_per_channel_) { + ProcessingConfig processing_config = api_format_; + processing_config.reverse_input_stream().set_sample_rate_hz( + frame->sample_rate_hz_); + processing_config.reverse_input_stream().set_num_channels( + frame->num_channels_); + processing_config.reverse_output_stream().set_sample_rate_hz( + frame->sample_rate_hz_); + processing_config.reverse_output_stream().set_num_channels( + frame->num_channels_); + + RETURN_ON_ERR(MaybeInitializeLocked(processing_config)); + if (frame->samples_per_channel_ != + api_format_.reverse_input_stream().num_frames()) { return kBadDataLengthError; } -#ifndef NDEBUG +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP if (debug_file_->Open()) { event_msg_->set_type(audioproc::Event::REVERSE_STREAM); audioproc::ReverseStream* msg = event_msg_->mutable_reverse_stream(); - const size_t data_size = sizeof(WebRtc_Word16) * - frame->_payloadDataLengthInSamples * - frame->_audioChannel; - msg->set_data(frame->_payloadData, data_size); - err = WriteMessageToDebugFile(); - if (err != kNoError) { - return err; - } + const size_t data_size = + sizeof(int16_t) * frame->samples_per_channel_ * frame->num_channels_; + msg->set_data(frame->data_, data_size); + RETURN_ON_ERR(WriteMessageToDebugFile()); } #endif - render_audio_->DeinterleaveFrom(frame); + return ProcessReverseStreamLocked(); +} - // TODO(ajm): turn the splitting filter into a component? - if (sample_rate_hz_ == kSampleRate32kHz) { - for (int i = 0; i < num_reverse_channels_; i++) { - // Split into low and high band. - SplittingFilterAnalysis(render_audio_->data(i), - render_audio_->low_pass_split_data(i), - render_audio_->high_pass_split_data(i), - render_audio_->analysis_filter_state1(i), - render_audio_->analysis_filter_state2(i)); - } +int AudioProcessingImpl::ProcessReverseStreamLocked() { + AudioBuffer* ra = render_audio_.get(); // For brevity. + if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz) { + ra->SplitIntoFrequencyBands(); } - // TODO(ajm): warnings possible from components? - err = echo_cancellation_->ProcessRenderAudio(render_audio_); - if (err != kNoError) { - return err; + if (intelligibility_enabled_) { + intelligibility_enhancer_->ProcessRenderAudio( + ra->split_channels_f(kBand0To8kHz), split_rate_, ra->num_channels()); } - err = echo_control_mobile_->ProcessRenderAudio(render_audio_); - if (err != kNoError) { - return err; + RETURN_ON_ERR(echo_cancellation_->ProcessRenderAudio(ra)); + RETURN_ON_ERR(echo_control_mobile_->ProcessRenderAudio(ra)); + if (!use_new_agc_) { + RETURN_ON_ERR(gain_control_->ProcessRenderAudio(ra)); } - err = gain_control_->ProcessRenderAudio(render_audio_); - if (err != kNoError) { - return err; + if (rev_proc_format_.sample_rate_hz() == kSampleRate32kHz && + is_rev_processed()) { + ra->MergeFrequencyBands(); } - //err = level_estimator_->AnalyzeReverseStream(render_audio_); - //if (err != kNoError) { - // return err; - //} - - was_stream_delay_set_ = false; - return err; // TODO(ajm): this is for returning warnings; necessary? + return kNoError; } int AudioProcessingImpl::set_stream_delay_ms(int delay) { + Error retval = kNoError; was_stream_delay_set_ = true; + delay += delay_offset_ms_; + if (delay < 0) { - return kBadParameterError; + delay = 0; + retval = kBadStreamParameterWarning; } // TODO(ajm): the max is rather arbitrarily chosen; investigate. if (delay > 500) { - stream_delay_ms_ = 500; - return kBadStreamParameterWarning; + delay = 500; + retval = kBadStreamParameterWarning; } stream_delay_ms_ = delay; - return kNoError; + return retval; } int AudioProcessingImpl::stream_delay_ms() const { @@ -485,16 +887,29 @@ bool AudioProcessingImpl::was_stream_delay_set() const { return was_stream_delay_set_; } +void AudioProcessingImpl::set_stream_key_pressed(bool key_pressed) { + key_pressed_ = key_pressed; +} + +void AudioProcessingImpl::set_delay_offset_ms(int offset) { + CriticalSectionScoped crit_scoped(crit_); + delay_offset_ms_ = offset; +} + +int AudioProcessingImpl::delay_offset_ms() const { + return delay_offset_ms_; +} + int AudioProcessingImpl::StartDebugRecording( const char filename[AudioProcessing::kMaxFilenameSize]) { -#ifndef NDEBUG - CriticalSectionScoped crit_scoped(*crit_); - assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize); + CriticalSectionScoped crit_scoped(crit_); + static_assert(kMaxFilenameSize == FileWrapper::kMaxFileNameSize, ""); if (filename == NULL) { return kNullPointerError; } +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP // Stop any ongoing recording. if (debug_file_->Open()) { if (debug_file_->CloseFile() == -1) { @@ -507,27 +922,61 @@ int AudioProcessingImpl::StartDebugRecording( return kFileError; } - int err = WriteInitMessage(); - if (err != kNoError) { - return err; - } -#endif - + RETURN_ON_ERR(WriteConfigMessage(true)); + RETURN_ON_ERR(WriteInitMessage()); return kNoError; +#else + return kUnsupportedFunctionError; +#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP +} + +int AudioProcessingImpl::StartDebugRecording(FILE* handle) { + CriticalSectionScoped crit_scoped(crit_); + + if (handle == NULL) { + return kNullPointerError; + } + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + // Stop any ongoing recording. + if (debug_file_->Open()) { + if (debug_file_->CloseFile() == -1) { + return kFileError; + } + } + + if (debug_file_->OpenFromFileHandle(handle, true, false) == -1) { + return kFileError; + } + + RETURN_ON_ERR(WriteConfigMessage(true)); + RETURN_ON_ERR(WriteInitMessage()); + return kNoError; +#else + return kUnsupportedFunctionError; +#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP +} + +int AudioProcessingImpl::StartDebugRecordingForPlatformFile( + rtc::PlatformFile handle) { + FILE* stream = rtc::FdopenPlatformFileForWriting(handle); + return StartDebugRecording(stream); } int AudioProcessingImpl::StopDebugRecording() { -#ifndef NDEBUG - CriticalSectionScoped crit_scoped(*crit_); + CriticalSectionScoped crit_scoped(crit_); + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP // We just return if recording hasn't started. if (debug_file_->Open()) { if (debug_file_->CloseFile() == -1) { return kFileError; } } -#endif - return kNoError; +#else + return kUnsupportedFunctionError; +#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP } EchoCancellation* AudioProcessingImpl::echo_cancellation() const { @@ -539,6 +988,9 @@ EchoControlMobile* AudioProcessingImpl::echo_control_mobile() const { } GainControl* AudioProcessingImpl::gain_control() const { + if (use_new_agc_) { + return gain_control_for_new_agc_.get(); + } return gain_control_; } @@ -558,82 +1010,184 @@ VoiceDetection* AudioProcessingImpl::voice_detection() const { return voice_detection_; } -WebRtc_Word32 AudioProcessingImpl::Version(WebRtc_Word8* version, - WebRtc_UWord32& bytes_remaining, WebRtc_UWord32& position) const { - if (version == NULL) { - /*WEBRTC_TRACE(webrtc::kTraceError, - webrtc::kTraceAudioProcessing, - -1, - "Null version pointer");*/ - return kNullPointerError; - } - memset(&version[position], 0, bytes_remaining); - - char my_version[] = "AudioProcessing 1.0.0"; - // Includes null termination. - WebRtc_UWord32 length = static_cast(strlen(my_version)); - if (bytes_remaining < length) { - /*WEBRTC_TRACE(webrtc::kTraceError, - webrtc::kTraceAudioProcessing, - -1, - "Buffer of insufficient length");*/ - return kBadParameterError; - } - memcpy(&version[position], my_version, length); - bytes_remaining -= length; - position += length; - - std::list::const_iterator it; - for (it = component_list_.begin(); it != component_list_.end(); it++) { - char component_version[256]; - strcpy(component_version, "\n"); - int err = (*it)->get_version(&component_version[1], - sizeof(component_version) - 1); - if (err != kNoError) { - return err; - } - if (strncmp(&component_version[1], "\0", 1) == 0) { - // Assume empty if first byte is NULL. - continue; - } - - length = static_cast(strlen(component_version)); - if (bytes_remaining < length) { - /*WEBRTC_TRACE(webrtc::kTraceError, - webrtc::kTraceAudioProcessing, - -1, - "Buffer of insufficient length");*/ - return kBadParameterError; - } - memcpy(&version[position], component_version, length); - bytes_remaining -= length; - position += length; +bool AudioProcessingImpl::is_data_processed() const { + if (beamformer_enabled_) { + return true; } - return kNoError; + int enabled_count = 0; + for (auto item : component_list_) { + if (item->is_component_enabled()) { + enabled_count++; + } + } + + // Data is unchanged if no components are enabled, or if only level_estimator_ + // or voice_detection_ is enabled. + if (enabled_count == 0) { + return false; + } else if (enabled_count == 1) { + if (level_estimator_->is_enabled() || voice_detection_->is_enabled()) { + return false; + } + } else if (enabled_count == 2) { + if (level_estimator_->is_enabled() && voice_detection_->is_enabled()) { + return false; + } + } + return true; } -WebRtc_Word32 AudioProcessingImpl::ChangeUniqueId(const WebRtc_Word32 id) { - CriticalSectionScoped crit_scoped(*crit_); - /*WEBRTC_TRACE(webrtc::kTraceModuleCall, - webrtc::kTraceAudioProcessing, - id_, - "ChangeUniqueId(new id = %d)", - id);*/ - id_ = id; - - return kNoError; +bool AudioProcessingImpl::output_copy_needed(bool is_data_processed) const { + // Check if we've upmixed or downmixed the audio. + return ((api_format_.output_stream().num_channels() != + api_format_.input_stream().num_channels()) || + is_data_processed || transient_suppressor_enabled_); } -#ifndef NDEBUG +bool AudioProcessingImpl::synthesis_needed(bool is_data_processed) const { + return (is_data_processed && + (fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz || + fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz)); +} + +bool AudioProcessingImpl::analysis_needed(bool is_data_processed) const { + if (!is_data_processed && !voice_detection_->is_enabled() && + !transient_suppressor_enabled_) { + // Only level_estimator_ is enabled. + return false; + } else if (fwd_proc_format_.sample_rate_hz() == kSampleRate32kHz || + fwd_proc_format_.sample_rate_hz() == kSampleRate48kHz) { + // Something besides level_estimator_ is enabled, and we have super-wb. + return true; + } + return false; +} + +bool AudioProcessingImpl::is_rev_processed() const { + return intelligibility_enabled_ && intelligibility_enhancer_->active(); +} + +bool AudioProcessingImpl::rev_conversion_needed() const { + return (api_format_.reverse_input_stream() != + api_format_.reverse_output_stream()); +} + +void AudioProcessingImpl::InitializeExperimentalAgc() { + if (use_new_agc_) { + if (!agc_manager_.get()) { + agc_manager_.reset(new AgcManagerDirect(gain_control_, + gain_control_for_new_agc_.get(), + agc_startup_min_volume_)); + } + agc_manager_->Initialize(); + agc_manager_->SetCaptureMuted(output_will_be_muted_); + } +} + +void AudioProcessingImpl::InitializeTransient() { + if (transient_suppressor_enabled_) { + if (!transient_suppressor_.get()) { + transient_suppressor_.reset(new TransientSuppressor()); + } + transient_suppressor_->Initialize( + fwd_proc_format_.sample_rate_hz(), split_rate_, + api_format_.output_stream().num_channels()); + } +} + +void AudioProcessingImpl::InitializeBeamformer() { + if (beamformer_enabled_) { + if (!beamformer_) { + beamformer_.reset(new NonlinearBeamformer(array_geometry_)); + } + beamformer_->Initialize(kChunkSizeMs, split_rate_); + } +} + +void AudioProcessingImpl::InitializeIntelligibility() { + if (intelligibility_enabled_) { + IntelligibilityEnhancer::Config config; + config.sample_rate_hz = split_rate_; + config.num_capture_channels = capture_audio_->num_channels(); + config.num_render_channels = render_audio_->num_channels(); + intelligibility_enhancer_.reset(new IntelligibilityEnhancer(config)); + } +} + +void AudioProcessingImpl::MaybeUpdateHistograms() { + static const int kMinDiffDelayMs = 60; + + if (echo_cancellation()->is_enabled()) { + // Activate delay_jumps_ counters if we know echo_cancellation is runnning. + // If a stream has echo we know that the echo_cancellation is in process. + if (stream_delay_jumps_ == -1 && echo_cancellation()->stream_has_echo()) { + stream_delay_jumps_ = 0; + } + if (aec_system_delay_jumps_ == -1 && + echo_cancellation()->stream_has_echo()) { + aec_system_delay_jumps_ = 0; + } + + // Detect a jump in platform reported system delay and log the difference. + const int diff_stream_delay_ms = stream_delay_ms_ - last_stream_delay_ms_; + if (diff_stream_delay_ms > kMinDiffDelayMs && last_stream_delay_ms_ != 0) { + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.PlatformReportedStreamDelayJump", + diff_stream_delay_ms, kMinDiffDelayMs, 1000, 100); + if (stream_delay_jumps_ == -1) { + stream_delay_jumps_ = 0; // Activate counter if needed. + } + stream_delay_jumps_++; + } + last_stream_delay_ms_ = stream_delay_ms_; + + // Detect a jump in AEC system delay and log the difference. + const int frames_per_ms = rtc::CheckedDivExact(split_rate_, 1000); + const int aec_system_delay_ms = + WebRtcAec_system_delay(echo_cancellation()->aec_core()) / frames_per_ms; + const int diff_aec_system_delay_ms = + aec_system_delay_ms - last_aec_system_delay_ms_; + if (diff_aec_system_delay_ms > kMinDiffDelayMs && + last_aec_system_delay_ms_ != 0) { + RTC_HISTOGRAM_COUNTS("WebRTC.Audio.AecSystemDelayJump", + diff_aec_system_delay_ms, kMinDiffDelayMs, 1000, + 100); + if (aec_system_delay_jumps_ == -1) { + aec_system_delay_jumps_ = 0; // Activate counter if needed. + } + aec_system_delay_jumps_++; + } + last_aec_system_delay_ms_ = aec_system_delay_ms; + } +} + +void AudioProcessingImpl::UpdateHistogramsOnCallEnd() { + CriticalSectionScoped crit_scoped(crit_); + if (stream_delay_jumps_ > -1) { + RTC_HISTOGRAM_ENUMERATION( + "WebRTC.Audio.NumOfPlatformReportedStreamDelayJumps", + stream_delay_jumps_, 51); + } + stream_delay_jumps_ = -1; + last_stream_delay_ms_ = 0; + + if (aec_system_delay_jumps_ > -1) { + RTC_HISTOGRAM_ENUMERATION("WebRTC.Audio.NumOfAecSystemDelayJumps", + aec_system_delay_jumps_, 51); + } + aec_system_delay_jumps_ = -1; + last_aec_system_delay_ms_ = 0; +} + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP int AudioProcessingImpl::WriteMessageToDebugFile() { int32_t size = event_msg_->ByteSize(); if (size <= 0) { return kUnspecifiedError; } -#if defined(WEBRTC_BIG_ENDIAN) - // TODO(ajm): Use little-endian "on the wire". For the moment, we can be - // pretty safe in assuming little-endian. +#if defined(WEBRTC_ARCH_BIG_ENDIAN) +// TODO(ajm): Use little-endian "on the wire". For the moment, we can be +// pretty safe in assuming little-endian. #endif if (!event_msg_->SerializeToString(&event_str_)) { @@ -650,24 +1204,70 @@ int AudioProcessingImpl::WriteMessageToDebugFile() { event_msg_->Clear(); - return 0; + return kNoError; } int AudioProcessingImpl::WriteInitMessage() { event_msg_->set_type(audioproc::Event::INIT); audioproc::Init* msg = event_msg_->mutable_init(); - msg->set_sample_rate(sample_rate_hz_); - msg->set_device_sample_rate(echo_cancellation_->device_sample_rate_hz()); - msg->set_num_input_channels(num_input_channels_); - msg->set_num_output_channels(num_output_channels_); - msg->set_num_reverse_channels(num_reverse_channels_); - - int err = WriteMessageToDebugFile(); - if (err != kNoError) { - return err; - } + msg->set_sample_rate(api_format_.input_stream().sample_rate_hz()); + msg->set_num_input_channels(api_format_.input_stream().num_channels()); + msg->set_num_output_channels(api_format_.output_stream().num_channels()); + msg->set_num_reverse_channels( + api_format_.reverse_input_stream().num_channels()); + msg->set_reverse_sample_rate( + api_format_.reverse_input_stream().sample_rate_hz()); + msg->set_output_sample_rate(api_format_.output_stream().sample_rate_hz()); + // TODO(ekmeyerson): Add reverse output fields to event_msg_. + RETURN_ON_ERR(WriteMessageToDebugFile()); return kNoError; } -#endif + +int AudioProcessingImpl::WriteConfigMessage(bool forced) { + audioproc::Config config; + + config.set_aec_enabled(echo_cancellation_->is_enabled()); + config.set_aec_delay_agnostic_enabled( + echo_cancellation_->is_delay_agnostic_enabled()); + config.set_aec_drift_compensation_enabled( + echo_cancellation_->is_drift_compensation_enabled()); + config.set_aec_extended_filter_enabled( + echo_cancellation_->is_extended_filter_enabled()); + config.set_aec_suppression_level( + static_cast(echo_cancellation_->suppression_level())); + + config.set_aecm_enabled(echo_control_mobile_->is_enabled()); + config.set_aecm_comfort_noise_enabled( + echo_control_mobile_->is_comfort_noise_enabled()); + config.set_aecm_routing_mode( + static_cast(echo_control_mobile_->routing_mode())); + + config.set_agc_enabled(gain_control_->is_enabled()); + config.set_agc_mode(static_cast(gain_control_->mode())); + config.set_agc_limiter_enabled(gain_control_->is_limiter_enabled()); + config.set_noise_robust_agc_enabled(use_new_agc_); + + config.set_hpf_enabled(high_pass_filter_->is_enabled()); + + config.set_ns_enabled(noise_suppression_->is_enabled()); + config.set_ns_level(static_cast(noise_suppression_->level())); + + config.set_transient_suppression_enabled(transient_suppressor_enabled_); + + std::string serialized_config = config.SerializeAsString(); + if (!forced && last_serialized_config_ == serialized_config) { + return kNoError; + } + + last_serialized_config_ = serialized_config; + + event_msg_->set_type(audioproc::Event::CONFIG); + event_msg_->mutable_config()->CopyFrom(config); + + RETURN_ON_ERR(WriteMessageToDebugFile()); + return kNoError; +} +#endif // WEBRTC_AUDIOPROC_DEBUG_DUMP + } // namespace webrtc diff --git a/webrtc/modules/audio_processing/audio_processing_impl.h b/webrtc/modules/audio_processing/audio_processing_impl.h index fc35937..bf29bf3 100644 --- a/webrtc/modules/audio_processing/audio_processing_impl.h +++ b/webrtc/modules/audio_processing/audio_processing_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,81 +8,140 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ #include #include +#include -#include "audio_processing.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/base/thread_annotations.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" namespace webrtc { -namespace audioproc { -class Event; -} // audioproc + +class AgcManagerDirect; class AudioBuffer; +class AudioConverter; + +template +class Beamformer; + class CriticalSectionWrapper; class EchoCancellationImpl; class EchoControlMobileImpl; class FileWrapper; class GainControlImpl; +class GainControlForNewAgc; class HighPassFilterImpl; class LevelEstimatorImpl; class NoiseSuppressionImpl; class ProcessingComponent; +class TransientSuppressor; class VoiceDetectionImpl; +class IntelligibilityEnhancer; + +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP +namespace audioproc { + +class Event; + +} // namespace audioproc +#endif class AudioProcessingImpl : public AudioProcessing { public: - enum { - kSampleRate8kHz = 8000, - kSampleRate16kHz = 16000, - kSampleRate32kHz = 32000 - }; + explicit AudioProcessingImpl(const Config& config); - explicit AudioProcessingImpl(int id); + // AudioProcessingImpl takes ownership of beamformer. + AudioProcessingImpl(const Config& config, Beamformer* beamformer); virtual ~AudioProcessingImpl(); - CriticalSectionWrapper* crit() const; - - int split_sample_rate_hz() const; - bool was_stream_delay_set() const; - // AudioProcessing methods. - virtual int Initialize(); - virtual int InitializeLocked(); - virtual int set_sample_rate_hz(int rate); - virtual int sample_rate_hz() const; - virtual int set_num_channels(int input_channels, int output_channels); - virtual int num_input_channels() const; - virtual int num_output_channels() const; - virtual int set_num_reverse_channels(int channels); - virtual int num_reverse_channels() const; - virtual int ProcessStream(AudioFrame* frame); - virtual int AnalyzeReverseStream(AudioFrame* frame); - virtual int set_stream_delay_ms(int delay); - virtual int stream_delay_ms() const; - virtual int StartDebugRecording(const char filename[kMaxFilenameSize]); - virtual int StopDebugRecording(); - virtual EchoCancellation* echo_cancellation() const; - virtual EchoControlMobile* echo_control_mobile() const; - virtual GainControl* gain_control() const; - virtual HighPassFilter* high_pass_filter() const; - virtual LevelEstimator* level_estimator() const; - virtual NoiseSuppression* noise_suppression() const; - virtual VoiceDetection* voice_detection() const; + int Initialize() override; + int Initialize(int input_sample_rate_hz, + int output_sample_rate_hz, + int reverse_sample_rate_hz, + ChannelLayout input_layout, + ChannelLayout output_layout, + ChannelLayout reverse_layout) override; + int Initialize(const ProcessingConfig& processing_config) override; + void SetExtraOptions(const Config& config) override; + int proc_sample_rate_hz() const override; + int proc_split_sample_rate_hz() const override; + int num_input_channels() const override; + int num_output_channels() const override; + int num_reverse_channels() const override; + void set_output_will_be_muted(bool muted) override; + int ProcessStream(AudioFrame* frame) override; + int ProcessStream(const float* const* src, + size_t samples_per_channel, + int input_sample_rate_hz, + ChannelLayout input_layout, + int output_sample_rate_hz, + ChannelLayout output_layout, + float* const* dest) override; + int ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) override; + int AnalyzeReverseStream(AudioFrame* frame) override; + int ProcessReverseStream(AudioFrame* frame) override; + int AnalyzeReverseStream(const float* const* data, + size_t samples_per_channel, + int sample_rate_hz, + ChannelLayout layout) override; + int ProcessReverseStream(const float* const* src, + const StreamConfig& reverse_input_config, + const StreamConfig& reverse_output_config, + float* const* dest) override; + int set_stream_delay_ms(int delay) override; + int stream_delay_ms() const override; + bool was_stream_delay_set() const override; + void set_delay_offset_ms(int offset) override; + int delay_offset_ms() const override; + void set_stream_key_pressed(bool key_pressed) override; + int StartDebugRecording(const char filename[kMaxFilenameSize]) override; + int StartDebugRecording(FILE* handle) override; + int StartDebugRecordingForPlatformFile(rtc::PlatformFile handle) override; + int StopDebugRecording() override; + void UpdateHistogramsOnCallEnd() override; + EchoCancellation* echo_cancellation() const override; + EchoControlMobile* echo_control_mobile() const override; + GainControl* gain_control() const override; + HighPassFilter* high_pass_filter() const override; + LevelEstimator* level_estimator() const override; + NoiseSuppression* noise_suppression() const override; + VoiceDetection* voice_detection() const override; - // Module methods. - virtual WebRtc_Word32 Version(WebRtc_Word8* version, - WebRtc_UWord32& remainingBufferInBytes, - WebRtc_UWord32& position) const; - virtual WebRtc_Word32 ChangeUniqueId(const WebRtc_Word32 id); + protected: + // Overridden in a mock. + virtual int InitializeLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_); private: - int WriteMessageToDebugFile(); - int WriteInitMessage(); + int InitializeLocked(const ProcessingConfig& config) + EXCLUSIVE_LOCKS_REQUIRED(crit_); + int MaybeInitializeLocked(const ProcessingConfig& config) + EXCLUSIVE_LOCKS_REQUIRED(crit_); + // TODO(ekm): Remove once all clients updated to new interface. + int AnalyzeReverseStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config); + int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_); + int ProcessReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_); - int id_; + bool is_data_processed() const; + bool output_copy_needed(bool is_data_processed) const; + bool synthesis_needed(bool is_data_processed) const; + bool analysis_needed(bool is_data_processed) const; + bool is_rev_processed() const; + bool rev_conversion_needed() const; + void InitializeExperimentalAgc() EXCLUSIVE_LOCKS_REQUIRED(crit_); + void InitializeTransient() EXCLUSIVE_LOCKS_REQUIRED(crit_); + void InitializeBeamformer() EXCLUSIVE_LOCKS_REQUIRED(crit_); + void InitializeIntelligibility() EXCLUSIVE_LOCKS_REQUIRED(crit_); + void MaybeUpdateHistograms() EXCLUSIVE_LOCKS_REQUIRED(crit_); EchoCancellationImpl* echo_cancellation_; EchoControlMobileImpl* echo_control_mobile_; @@ -91,27 +150,69 @@ class AudioProcessingImpl : public AudioProcessing { LevelEstimatorImpl* level_estimator_; NoiseSuppressionImpl* noise_suppression_; VoiceDetectionImpl* voice_detection_; + rtc::scoped_ptr gain_control_for_new_agc_; std::list component_list_; - - FileWrapper* debug_file_; - audioproc::Event* event_msg_; // Protobuf message. - std::string event_str_; // Memory for protobuf serialization. CriticalSectionWrapper* crit_; + rtc::scoped_ptr render_audio_; + rtc::scoped_ptr capture_audio_; + rtc::scoped_ptr render_converter_; +#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP + // TODO(andrew): make this more graceful. Ideally we would split this stuff + // out into a separate class with an "enabled" and "disabled" implementation. + int WriteMessageToDebugFile(); + int WriteInitMessage(); - AudioBuffer* render_audio_; - AudioBuffer* capture_audio_; + // Writes Config message. If not |forced|, only writes the current config if + // it is different from the last saved one; if |forced|, writes the config + // regardless of the last saved. + int WriteConfigMessage(bool forced); + + rtc::scoped_ptr debug_file_; + rtc::scoped_ptr event_msg_; // Protobuf message. + std::string event_str_; // Memory for protobuf serialization. + + // Serialized string of last saved APM configuration. + std::string last_serialized_config_; +#endif + + // Format of processing streams at input/output call sites. + ProcessingConfig api_format_; + + // Only the rate and samples fields of fwd_proc_format_ are used because the + // forward processing number of channels is mutable and is tracked by the + // capture_audio_. + StreamConfig fwd_proc_format_; + StreamConfig rev_proc_format_; + int split_rate_; - int sample_rate_hz_; - int split_sample_rate_hz_; - int samples_per_channel_; int stream_delay_ms_; + int delay_offset_ms_; bool was_stream_delay_set_; + int last_stream_delay_ms_; + int last_aec_system_delay_ms_; + int stream_delay_jumps_; + int aec_system_delay_jumps_; - int num_reverse_channels_; - int num_input_channels_; - int num_output_channels_; + bool output_will_be_muted_ GUARDED_BY(crit_); + + bool key_pressed_; + + // Only set through the constructor's Config parameter. + const bool use_new_agc_; + rtc::scoped_ptr agc_manager_ GUARDED_BY(crit_); + int agc_startup_min_volume_; + + bool transient_suppressor_enabled_; + rtc::scoped_ptr transient_suppressor_; + const bool beamformer_enabled_; + rtc::scoped_ptr> beamformer_; + const std::vector array_geometry_; + + bool intelligibility_enabled_; + rtc::scoped_ptr intelligibility_enhancer_; }; + } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_ diff --git a/webrtc/modules/audio_processing/beamformer/array_util.h b/webrtc/modules/audio_processing/beamformer/array_util.h new file mode 100644 index 0000000..f7598c0 --- /dev/null +++ b/webrtc/modules/audio_processing/beamformer/array_util.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_ + +#include + +namespace webrtc { + +// Coordinates in meters. +template +struct CartesianPoint { + CartesianPoint(T x, T y, T z) { + c[0] = x; + c[1] = y; + c[2] = z; + } + T x() const { return c[0]; } + T y() const { return c[1]; } + T z() const { return c[2]; } + T c[3]; +}; + +using Point = CartesianPoint; + +template +float Distance(CartesianPoint a, CartesianPoint b) { + return std::sqrt((a.x() - b.x()) * (a.x() - b.x()) + + (a.y() - b.y()) * (a.y() - b.y()) + + (a.z() - b.z()) * (a.z() - b.z())); +} + +template +struct SphericalPoint { + SphericalPoint(T azimuth, T elevation, T radius) { + s[0] = azimuth; + s[1] = elevation; + s[2] = radius; + } + T azimuth() const { return s[0]; } + T elevation() const { return s[1]; } + T distance() const { return s[2]; } + T s[3]; +}; + +using SphericalPointf = SphericalPoint; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_ diff --git a/webrtc/modules/audio_processing/beamformer/beamformer.h b/webrtc/modules/audio_processing/beamformer/beamformer.h new file mode 100644 index 0000000..54734dd --- /dev/null +++ b/webrtc/modules/audio_processing/beamformer/beamformer.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_ + +#include "webrtc/common_audio/channel_buffer.h" +#include "webrtc/modules/audio_processing/beamformer/array_util.h" + +namespace webrtc { + +template +class Beamformer { + public: + virtual ~Beamformer() {} + + // Process one time-domain chunk of audio. The audio is expected to be split + // into frequency bands inside the ChannelBuffer. The number of frames and + // channels must correspond to the constructor parameters. The same + // ChannelBuffer can be passed in as |input| and |output|. + virtual void ProcessChunk(const ChannelBuffer& input, + ChannelBuffer* output) = 0; + + // Sample rate corresponds to the lower band. + // Needs to be called before the the Beamformer can be used. + virtual void Initialize(int chunk_size_ms, int sample_rate_hz) = 0; + + // Indicates whether a given point is inside of the beam. + virtual bool IsInBeam(const SphericalPointf& spherical_point) { return true; } + + // Returns true if the current data contains the target signal. + // Which signals are considered "targets" is implementation dependent. + virtual bool is_target_present() = 0; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_ diff --git a/webrtc/modules/audio_processing/beamformer/complex_matrix.h b/webrtc/modules/audio_processing/beamformer/complex_matrix.h new file mode 100644 index 0000000..bfa3563 --- /dev/null +++ b/webrtc/modules/audio_processing/beamformer/complex_matrix.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_ + +#include + +#include "webrtc/base/checks.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/modules/audio_processing/beamformer/matrix.h" + +namespace webrtc { + +using std::complex; + +// An extension of Matrix for operations that only work on a complex type. +template +class ComplexMatrix : public Matrix > { + public: + ComplexMatrix() : Matrix >() {} + + ComplexMatrix(int num_rows, int num_columns) + : Matrix >(num_rows, num_columns) {} + + ComplexMatrix(const complex* data, int num_rows, int num_columns) + : Matrix >(data, num_rows, num_columns) {} + + // Complex Matrix operations. + ComplexMatrix& PointwiseConjugate() { + complex* const data = this->data(); + size_t size = this->num_rows() * this->num_columns(); + for (size_t i = 0; i < size; ++i) { + data[i] = conj(data[i]); + } + + return *this; + } + + ComplexMatrix& PointwiseConjugate(const ComplexMatrix& operand) { + this->CopyFrom(operand); + return PointwiseConjugate(); + } + + ComplexMatrix& ConjugateTranspose() { + this->CopyDataToScratch(); + int num_rows = this->num_rows(); + this->SetNumRows(this->num_columns()); + this->SetNumColumns(num_rows); + this->Resize(); + return ConjugateTranspose(this->scratch_elements()); + } + + ComplexMatrix& ConjugateTranspose(const ComplexMatrix& operand) { + RTC_CHECK_EQ(operand.num_rows(), this->num_columns()); + RTC_CHECK_EQ(operand.num_columns(), this->num_rows()); + return ConjugateTranspose(operand.elements()); + } + + ComplexMatrix& ZeroImag() { + complex* const data = this->data(); + size_t size = this->num_rows() * this->num_columns(); + for (size_t i = 0; i < size; ++i) { + data[i] = complex(data[i].real(), 0); + } + + return *this; + } + + ComplexMatrix& ZeroImag(const ComplexMatrix& operand) { + this->CopyFrom(operand); + return ZeroImag(); + } + + private: + ComplexMatrix& ConjugateTranspose(const complex* const* src) { + complex* const* elements = this->elements(); + for (int i = 0; i < this->num_rows(); ++i) { + for (int j = 0; j < this->num_columns(); ++j) { + elements[i][j] = conj(src[j][i]); + } + } + + return *this; + } +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_ diff --git a/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc b/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc new file mode 100644 index 0000000..efc5b0f --- /dev/null +++ b/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#define _USE_MATH_DEFINES + +#include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h" + +#include + +namespace { + +float BesselJ0(float x) { +#if WEBRTC_WIN + return _j0(x); +#else + return j0(x); +#endif +} + +} // namespace + +namespace webrtc { + +void CovarianceMatrixGenerator::UniformCovarianceMatrix( + float wave_number, + const std::vector& geometry, + ComplexMatrix* mat) { + RTC_CHECK_EQ(static_cast(geometry.size()), mat->num_rows()); + RTC_CHECK_EQ(static_cast(geometry.size()), mat->num_columns()); + + complex* const* mat_els = mat->elements(); + for (size_t i = 0; i < geometry.size(); ++i) { + for (size_t j = 0; j < geometry.size(); ++j) { + if (wave_number > 0.f) { + mat_els[i][j] = + BesselJ0(wave_number * Distance(geometry[i], geometry[j])); + } else { + mat_els[i][j] = i == j ? 1.f : 0.f; + } + } + } +} + +void CovarianceMatrixGenerator::AngledCovarianceMatrix( + float sound_speed, + float angle, + size_t frequency_bin, + size_t fft_size, + size_t num_freq_bins, + int sample_rate, + const std::vector& geometry, + ComplexMatrix* mat) { + RTC_CHECK_EQ(static_cast(geometry.size()), mat->num_rows()); + RTC_CHECK_EQ(static_cast(geometry.size()), mat->num_columns()); + + ComplexMatrix interf_cov_vector(1, geometry.size()); + ComplexMatrix interf_cov_vector_transposed(geometry.size(), 1); + PhaseAlignmentMasks(frequency_bin, + fft_size, + sample_rate, + sound_speed, + geometry, + angle, + &interf_cov_vector); + interf_cov_vector_transposed.Transpose(interf_cov_vector); + interf_cov_vector.PointwiseConjugate(); + mat->Multiply(interf_cov_vector_transposed, interf_cov_vector); +} + +void CovarianceMatrixGenerator::PhaseAlignmentMasks( + size_t frequency_bin, + size_t fft_size, + int sample_rate, + float sound_speed, + const std::vector& geometry, + float angle, + ComplexMatrix* mat) { + RTC_CHECK_EQ(1, mat->num_rows()); + RTC_CHECK_EQ(static_cast(geometry.size()), mat->num_columns()); + + float freq_in_hertz = + (static_cast(frequency_bin) / fft_size) * sample_rate; + + complex* const* mat_els = mat->elements(); + for (size_t c_ix = 0; c_ix < geometry.size(); ++c_ix) { + float distance = std::cos(angle) * geometry[c_ix].x() + + std::sin(angle) * geometry[c_ix].y(); + float phase_shift = -2.f * M_PI * distance * freq_in_hertz / sound_speed; + + // Euler's formula for mat[0][c_ix] = e^(j * phase_shift). + mat_els[0][c_ix] = complex(cos(phase_shift), sin(phase_shift)); + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h b/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h new file mode 100644 index 0000000..5375518 --- /dev/null +++ b/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COVARIANCE_MATRIX_GENERATOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COVARIANCE_MATRIX_GENERATOR_H_ + +#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" +#include "webrtc/modules/audio_processing/beamformer/array_util.h" + +namespace webrtc { + +// Helper class for Beamformer in charge of generating covariance matrices. For +// each function, the passed-in ComplexMatrix is expected to be of size +// |num_input_channels| x |num_input_channels|. +class CovarianceMatrixGenerator { + public: + // A uniform covariance matrix with a gap at the target location. WARNING: + // The target angle is assumed to be 0. + static void UniformCovarianceMatrix(float wave_number, + const std::vector& geometry, + ComplexMatrix* mat); + + // The covariance matrix of a source at the given angle. + static void AngledCovarianceMatrix(float sound_speed, + float angle, + size_t frequency_bin, + size_t fft_size, + size_t num_freq_bins, + int sample_rate, + const std::vector& geometry, + ComplexMatrix* mat); + + // Calculates phase shifts that, when applied to a multichannel signal and + // added together, cause constructive interferernce for sources located at + // the given angle. + static void PhaseAlignmentMasks(size_t frequency_bin, + size_t fft_size, + int sample_rate, + float sound_speed, + const std::vector& geometry, + float angle, + ComplexMatrix* mat); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BF_HELPERS_H_ diff --git a/webrtc/modules/audio_processing/beamformer/matrix.h b/webrtc/modules/audio_processing/beamformer/matrix.h new file mode 100644 index 0000000..162aef1 --- /dev/null +++ b/webrtc/modules/audio_processing/beamformer/matrix.h @@ -0,0 +1,368 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_ + +#include +#include +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/base/constructormagic.h" +#include "webrtc/base/scoped_ptr.h" + +namespace { + +// Wrappers to get around the compiler warning resulting from the fact that +// there's no std::sqrt overload for ints. We cast all non-complex types to +// a double for the sqrt method. +template +T sqrt_wrapper(T x) { + return sqrt(static_cast(x)); +} + +template +std::complex sqrt_wrapper(std::complex x) { + return sqrt(x); +} +} // namespace + +namespace webrtc { + +// Matrix is a class for doing standard matrix operations on 2 dimensional +// matrices of any size. Results of matrix operations are stored in the +// calling object. Function overloads exist for both in-place (the calling +// object is used as both an operand and the result) and out-of-place (all +// operands are passed in as parameters) operations. If operand dimensions +// mismatch, the program crashes. Out-of-place operations change the size of +// the calling object, if necessary, before operating. +// +// 'In-place' operations that inherently change the size of the matrix (eg. +// Transpose, Multiply on different-sized matrices) must make temporary copies +// (|scratch_elements_| and |scratch_data_|) of existing data to complete the +// operations. +// +// The data is stored contiguously. Data can be accessed internally as a flat +// array, |data_|, or as an array of row pointers, |elements_|, but is +// available to users only as an array of row pointers through |elements()|. +// Memory for storage is allocated when a matrix is resized only if the new +// size overflows capacity. Memory needed temporarily for any operations is +// similarly resized only if the new size overflows capacity. +// +// If you pass in storage through the ctor, that storage is copied into the +// matrix. TODO(claguna): albeit tricky, allow for data to be referenced +// instead of copied, and owned by the user. +template +class Matrix { + public: + Matrix() : num_rows_(0), num_columns_(0) {} + + // Allocates space for the elements and initializes all values to zero. + Matrix(int num_rows, int num_columns) + : num_rows_(num_rows), num_columns_(num_columns) { + Resize(); + scratch_data_.resize(num_rows_ * num_columns_); + scratch_elements_.resize(num_rows_); + } + + // Copies |data| into the new Matrix. + Matrix(const T* data, int num_rows, int num_columns) + : num_rows_(0), num_columns_(0) { + CopyFrom(data, num_rows, num_columns); + scratch_data_.resize(num_rows_ * num_columns_); + scratch_elements_.resize(num_rows_); + } + + virtual ~Matrix() {} + + // Deep copy an existing matrix. + void CopyFrom(const Matrix& other) { + CopyFrom(&other.data_[0], other.num_rows_, other.num_columns_); + } + + // Copy |data| into the Matrix. The current data is lost. + void CopyFrom(const T* const data, int num_rows, int num_columns) { + Resize(num_rows, num_columns); + memcpy(&data_[0], data, num_rows_ * num_columns_ * sizeof(data_[0])); + } + + Matrix& CopyFromColumn(const T* const* src, + size_t column_index, + int num_rows) { + Resize(1, num_rows); + for (int i = 0; i < num_columns_; ++i) { + data_[i] = src[i][column_index]; + } + + return *this; + } + + void Resize(int num_rows, int num_columns) { + if (num_rows != num_rows_ || num_columns != num_columns_) { + num_rows_ = num_rows; + num_columns_ = num_columns; + Resize(); + } + } + + // Accessors and mutators. + int num_rows() const { return num_rows_; } + int num_columns() const { return num_columns_; } + T* const* elements() { return &elements_[0]; } + const T* const* elements() const { return &elements_[0]; } + + T Trace() { + RTC_CHECK_EQ(num_rows_, num_columns_); + + T trace = 0; + for (int i = 0; i < num_rows_; ++i) { + trace += elements_[i][i]; + } + return trace; + } + + // Matrix Operations. Returns *this to support method chaining. + Matrix& Transpose() { + CopyDataToScratch(); + Resize(num_columns_, num_rows_); + return Transpose(scratch_elements()); + } + + Matrix& Transpose(const Matrix& operand) { + RTC_CHECK_EQ(operand.num_rows_, num_columns_); + RTC_CHECK_EQ(operand.num_columns_, num_rows_); + + return Transpose(operand.elements()); + } + + template + Matrix& Scale(const S& scalar) { + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] *= scalar; + } + + return *this; + } + + template + Matrix& Scale(const Matrix& operand, const S& scalar) { + CopyFrom(operand); + return Scale(scalar); + } + + Matrix& Add(const Matrix& operand) { + RTC_CHECK_EQ(num_rows_, operand.num_rows_); + RTC_CHECK_EQ(num_columns_, operand.num_columns_); + + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] += operand.data_[i]; + } + + return *this; + } + + Matrix& Add(const Matrix& lhs, const Matrix& rhs) { + CopyFrom(lhs); + return Add(rhs); + } + + Matrix& Subtract(const Matrix& operand) { + RTC_CHECK_EQ(num_rows_, operand.num_rows_); + RTC_CHECK_EQ(num_columns_, operand.num_columns_); + + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] -= operand.data_[i]; + } + + return *this; + } + + Matrix& Subtract(const Matrix& lhs, const Matrix& rhs) { + CopyFrom(lhs); + return Subtract(rhs); + } + + Matrix& PointwiseMultiply(const Matrix& operand) { + RTC_CHECK_EQ(num_rows_, operand.num_rows_); + RTC_CHECK_EQ(num_columns_, operand.num_columns_); + + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] *= operand.data_[i]; + } + + return *this; + } + + Matrix& PointwiseMultiply(const Matrix& lhs, const Matrix& rhs) { + CopyFrom(lhs); + return PointwiseMultiply(rhs); + } + + Matrix& PointwiseDivide(const Matrix& operand) { + RTC_CHECK_EQ(num_rows_, operand.num_rows_); + RTC_CHECK_EQ(num_columns_, operand.num_columns_); + + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] /= operand.data_[i]; + } + + return *this; + } + + Matrix& PointwiseDivide(const Matrix& lhs, const Matrix& rhs) { + CopyFrom(lhs); + return PointwiseDivide(rhs); + } + + Matrix& PointwiseSquareRoot() { + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] = sqrt_wrapper(data_[i]); + } + + return *this; + } + + Matrix& PointwiseSquareRoot(const Matrix& operand) { + CopyFrom(operand); + return PointwiseSquareRoot(); + } + + Matrix& PointwiseAbsoluteValue() { + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] = abs(data_[i]); + } + + return *this; + } + + Matrix& PointwiseAbsoluteValue(const Matrix& operand) { + CopyFrom(operand); + return PointwiseAbsoluteValue(); + } + + Matrix& PointwiseSquare() { + for (size_t i = 0; i < data_.size(); ++i) { + data_[i] *= data_[i]; + } + + return *this; + } + + Matrix& PointwiseSquare(const Matrix& operand) { + CopyFrom(operand); + return PointwiseSquare(); + } + + Matrix& Multiply(const Matrix& lhs, const Matrix& rhs) { + RTC_CHECK_EQ(lhs.num_columns_, rhs.num_rows_); + RTC_CHECK_EQ(num_rows_, lhs.num_rows_); + RTC_CHECK_EQ(num_columns_, rhs.num_columns_); + + return Multiply(lhs.elements(), rhs.num_rows_, rhs.elements()); + } + + Matrix& Multiply(const Matrix& rhs) { + RTC_CHECK_EQ(num_columns_, rhs.num_rows_); + + CopyDataToScratch(); + Resize(num_rows_, rhs.num_columns_); + return Multiply(scratch_elements(), rhs.num_rows_, rhs.elements()); + } + + std::string ToString() const { + std::ostringstream ss; + ss << std::endl << "Matrix" << std::endl; + + for (int i = 0; i < num_rows_; ++i) { + for (int j = 0; j < num_columns_; ++j) { + ss << elements_[i][j] << " "; + } + ss << std::endl; + } + ss << std::endl; + + return ss.str(); + } + + protected: + void SetNumRows(const int num_rows) { num_rows_ = num_rows; } + void SetNumColumns(const int num_columns) { num_columns_ = num_columns; } + T* data() { return &data_[0]; } + const T* data() const { return &data_[0]; } + const T* const* scratch_elements() const { return &scratch_elements_[0]; } + + // Resize the matrix. If an increase in capacity is required, the current + // data is lost. + void Resize() { + size_t size = num_rows_ * num_columns_; + data_.resize(size); + elements_.resize(num_rows_); + + for (int i = 0; i < num_rows_; ++i) { + elements_[i] = &data_[i * num_columns_]; + } + } + + // Copies data_ into scratch_data_ and updates scratch_elements_ accordingly. + void CopyDataToScratch() { + scratch_data_ = data_; + scratch_elements_.resize(num_rows_); + + for (int i = 0; i < num_rows_; ++i) { + scratch_elements_[i] = &scratch_data_[i * num_columns_]; + } + } + + private: + int num_rows_; + int num_columns_; + std::vector data_; + std::vector elements_; + + // Stores temporary copies of |data_| and |elements_| for in-place operations + // where referring to original data is necessary. + std::vector scratch_data_; + std::vector scratch_elements_; + + // Helpers for Transpose and Multiply operations that unify in-place and + // out-of-place solutions. + Matrix& Transpose(const T* const* src) { + for (int i = 0; i < num_rows_; ++i) { + for (int j = 0; j < num_columns_; ++j) { + elements_[i][j] = src[j][i]; + } + } + + return *this; + } + + Matrix& Multiply(const T* const* lhs, int num_rows_rhs, const T* const* rhs) { + for (int row = 0; row < num_rows_; ++row) { + for (int col = 0; col < num_columns_; ++col) { + T cur_element = 0; + for (int i = 0; i < num_rows_rhs; ++i) { + cur_element += lhs[row][i] * rhs[i][col]; + } + + elements_[row][col] = cur_element; + } + } + + return *this; + } + + RTC_DISALLOW_COPY_AND_ASSIGN(Matrix); +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_ diff --git a/webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h b/webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h new file mode 100644 index 0000000..7c58670 --- /dev/null +++ b/webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_ + +#include "testing/gtest/include/gtest/gtest.h" +#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" +#include "webrtc/modules/audio_processing/beamformer/matrix.h" + +namespace { +const float kTolerance = 0.001f; +} + +namespace webrtc { + +using std::complex; + +// Functions used in both matrix_unittest and complex_matrix_unittest. +class MatrixTestHelpers { + public: + template + static void ValidateMatrixEquality(const Matrix& expected, + const Matrix& actual) { + EXPECT_EQ(expected.num_rows(), actual.num_rows()); + EXPECT_EQ(expected.num_columns(), actual.num_columns()); + + const T* const* expected_elements = expected.elements(); + const T* const* actual_elements = actual.elements(); + for (int i = 0; i < expected.num_rows(); ++i) { + for (int j = 0; j < expected.num_columns(); ++j) { + EXPECT_EQ(expected_elements[i][j], actual_elements[i][j]); + } + } + } + + static void ValidateMatrixEqualityFloat(const Matrix& expected, + const Matrix& actual) { + EXPECT_EQ(expected.num_rows(), actual.num_rows()); + EXPECT_EQ(expected.num_columns(), actual.num_columns()); + + const float* const* expected_elements = expected.elements(); + const float* const* actual_elements = actual.elements(); + for (int i = 0; i < expected.num_rows(); ++i) { + for (int j = 0; j < expected.num_columns(); ++j) { + EXPECT_NEAR(expected_elements[i][j], actual_elements[i][j], kTolerance); + } + } + } + + static void ValidateMatrixEqualityComplexFloat( + const Matrix >& expected, + const Matrix >& actual) { + EXPECT_EQ(expected.num_rows(), actual.num_rows()); + EXPECT_EQ(expected.num_columns(), actual.num_columns()); + + const complex* const* expected_elements = expected.elements(); + const complex* const* actual_elements = actual.elements(); + for (int i = 0; i < expected.num_rows(); ++i) { + for (int j = 0; j < expected.num_columns(); ++j) { + EXPECT_NEAR(expected_elements[i][j].real(), + actual_elements[i][j].real(), + kTolerance); + EXPECT_NEAR(expected_elements[i][j].imag(), + actual_elements[i][j].imag(), + kTolerance); + } + } + } + + static void ValidateMatrixNearEqualityComplexFloat( + const Matrix >& expected, + const Matrix >& actual, + float tolerance) { + EXPECT_EQ(expected.num_rows(), actual.num_rows()); + EXPECT_EQ(expected.num_columns(), actual.num_columns()); + + const complex* const* expected_elements = expected.elements(); + const complex* const* actual_elements = actual.elements(); + for (int i = 0; i < expected.num_rows(); ++i) { + for (int j = 0; j < expected.num_columns(); ++j) { + EXPECT_NEAR(expected_elements[i][j].real(), + actual_elements[i][j].real(), + tolerance); + EXPECT_NEAR(expected_elements[i][j].imag(), + actual_elements[i][j].imag(), + tolerance); + } + } + } +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_ diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc new file mode 100644 index 0000000..da7ad0d --- /dev/null +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc @@ -0,0 +1,516 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#define _USE_MATH_DEFINES + +#include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h" + +#include +#include +#include +#include + +#include "webrtc/base/arraysize.h" +#include "webrtc/common_audio/window_generator.h" +#include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h" + +namespace webrtc { +namespace { + +// Alpha for the Kaiser Bessel Derived window. +const float kKbdAlpha = 1.5f; + +// The minimum value a post-processing mask can take. +const float kMaskMinimum = 0.01f; + +const float kSpeedOfSoundMeterSeconds = 343; + +// For both target and interference angles, PI / 2 is perpendicular to the +// microphone array, facing forwards. The positive direction goes +// counterclockwise. +// The angle at which we amplify sound. +const float kTargetAngleRadians = static_cast(M_PI) / 2.f; + +// The angle at which we suppress sound. Suppression is symmetric around PI / 2 +// radians, so sound is suppressed at both +|kInterfAngleRadians| and +// PI - |kInterfAngleRadians|. Since the beamformer is robust, this should +// suppress sound coming from close angles as well. +const float kInterfAngleRadians = static_cast(M_PI) / 4.f; + +// When calculating the interference covariance matrix, this is the weight for +// the weighted average between the uniform covariance matrix and the angled +// covariance matrix. +// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance) +const float kBalance = 0.4f; + +const float kHalfBeamWidthRadians = static_cast(M_PI) * 20.f / 180.f; + +// TODO(claguna): need comment here. +const float kBeamwidthConstant = 0.00002f; + +// Alpha coefficients for mask smoothing. +const float kMaskTimeSmoothAlpha = 0.2f; +const float kMaskFrequencySmoothAlpha = 0.6f; + +// The average mask is computed from masks in this mid-frequency range. If these +// ranges are changed |kMaskQuantile| might need to be adjusted. +const int kLowMeanStartHz = 200; +const int kLowMeanEndHz = 400; + +const int kHighMeanStartHz = 3000; +const int kHighMeanEndHz = 5000; + +// Quantile of mask values which is used to estimate target presence. +const float kMaskQuantile = 0.7f; +// Mask threshold over which the data is considered signal and not interference. +const float kMaskTargetThreshold = 0.3f; +// Time in seconds after which the data is considered interference if the mask +// does not pass |kMaskTargetThreshold|. +const float kHoldTargetSeconds = 0.25f; + +// Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is +// used; to accomplish this, we compute both multiplications in the same loop. +// The returned norm is clamped to be non-negative. +float Norm(const ComplexMatrix& mat, + const ComplexMatrix& norm_mat) { + RTC_CHECK_EQ(norm_mat.num_rows(), 1); + RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows()); + RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns()); + + complex first_product = complex(0.f, 0.f); + complex second_product = complex(0.f, 0.f); + + const complex* const* mat_els = mat.elements(); + const complex* const* norm_mat_els = norm_mat.elements(); + + for (int i = 0; i < norm_mat.num_columns(); ++i) { + for (int j = 0; j < norm_mat.num_columns(); ++j) { + first_product += conj(norm_mat_els[0][j]) * mat_els[j][i]; + } + second_product += first_product * norm_mat_els[0][i]; + first_product = 0.f; + } + return std::max(second_product.real(), 0.f); +} + +// Does conjugate(|lhs|) * |rhs| for row vectors |lhs| and |rhs|. +complex ConjugateDotProduct(const ComplexMatrix& lhs, + const ComplexMatrix& rhs) { + RTC_CHECK_EQ(lhs.num_rows(), 1); + RTC_CHECK_EQ(rhs.num_rows(), 1); + RTC_CHECK_EQ(lhs.num_columns(), rhs.num_columns()); + + const complex* const* lhs_elements = lhs.elements(); + const complex* const* rhs_elements = rhs.elements(); + + complex result = complex(0.f, 0.f); + for (int i = 0; i < lhs.num_columns(); ++i) { + result += conj(lhs_elements[0][i]) * rhs_elements[0][i]; + } + + return result; +} + +// Works for positive numbers only. +size_t Round(float x) { + return static_cast(std::floor(x + 0.5f)); +} + +// Calculates the sum of absolute values of a complex matrix. +float SumAbs(const ComplexMatrix& mat) { + float sum_abs = 0.f; + const complex* const* mat_els = mat.elements(); + for (int i = 0; i < mat.num_rows(); ++i) { + for (int j = 0; j < mat.num_columns(); ++j) { + sum_abs += std::abs(mat_els[i][j]); + } + } + return sum_abs; +} + +// Calculates the sum of squares of a complex matrix. +float SumSquares(const ComplexMatrix& mat) { + float sum_squares = 0.f; + const complex* const* mat_els = mat.elements(); + for (int i = 0; i < mat.num_rows(); ++i) { + for (int j = 0; j < mat.num_columns(); ++j) { + float abs_value = std::abs(mat_els[i][j]); + sum_squares += abs_value * abs_value; + } + } + return sum_squares; +} + +// Does |out| = |in|.' * conj(|in|) for row vector |in|. +void TransposedConjugatedProduct(const ComplexMatrix& in, + ComplexMatrix* out) { + RTC_CHECK_EQ(in.num_rows(), 1); + RTC_CHECK_EQ(out->num_rows(), in.num_columns()); + RTC_CHECK_EQ(out->num_columns(), in.num_columns()); + const complex* in_elements = in.elements()[0]; + complex* const* out_elements = out->elements(); + for (int i = 0; i < out->num_rows(); ++i) { + for (int j = 0; j < out->num_columns(); ++j) { + out_elements[i][j] = in_elements[i] * conj(in_elements[j]); + } + } +} + +std::vector GetCenteredArray(std::vector array_geometry) { + for (int dim = 0; dim < 3; ++dim) { + float center = 0.f; + for (size_t i = 0; i < array_geometry.size(); ++i) { + center += array_geometry[i].c[dim]; + } + center /= array_geometry.size(); + for (size_t i = 0; i < array_geometry.size(); ++i) { + array_geometry[i].c[dim] -= center; + } + } + return array_geometry; +} + +} // namespace + +// static +const size_t NonlinearBeamformer::kNumFreqBins; + +NonlinearBeamformer::NonlinearBeamformer( + const std::vector& array_geometry) + : num_input_channels_(array_geometry.size()), + array_geometry_(GetCenteredArray(array_geometry)) { + WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_); +} + +void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) { + chunk_length_ = + static_cast(sample_rate_hz / (1000.f / chunk_size_ms)); + sample_rate_hz_ = sample_rate_hz; + low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_); + low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_); + high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_); + high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_); + // These bin indexes determine the regions over which a mean is taken. This + // is applied as a constant value over the adjacent end "frequency correction" + // regions. + // + // low_mean_start_bin_ high_mean_start_bin_ + // v v constant + // |----------------|--------|----------------|-------|----------------| + // constant ^ ^ + // low_mean_end_bin_ high_mean_end_bin_ + // + RTC_DCHECK_GT(low_mean_start_bin_, 0U); + RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_); + RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_); + RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_); + RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1); + + high_pass_postfilter_mask_ = 1.f; + is_target_present_ = false; + hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize; + interference_blocks_count_ = hold_target_blocks_; + + + lapped_transform_.reset(new LappedTransform(num_input_channels_, + 1, + chunk_length_, + window_, + kFftSize, + kFftSize / 2, + this)); + for (size_t i = 0; i < kNumFreqBins; ++i) { + time_smooth_mask_[i] = 1.f; + final_mask_[i] = 1.f; + float freq_hz = (static_cast(i) / kFftSize) * sample_rate_hz_; + wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds; + mask_thresholds_[i] = num_input_channels_ * num_input_channels_ * + kBeamwidthConstant * wave_numbers_[i] * + wave_numbers_[i]; + } + + // Initialize all nonadaptive values before looping through the frames. + InitDelaySumMasks(); + InitTargetCovMats(); + InitInterfCovMats(); + + for (size_t i = 0; i < kNumFreqBins; ++i) { + rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]); + rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]); + reflected_rpsiws_[i] = + Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]); + } +} + +void NonlinearBeamformer::InitDelaySumMasks() { + for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { + delay_sum_masks_[f_ix].Resize(1, num_input_channels_); + CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix, + kFftSize, + sample_rate_hz_, + kSpeedOfSoundMeterSeconds, + array_geometry_, + kTargetAngleRadians, + &delay_sum_masks_[f_ix]); + + complex_f norm_factor = sqrt( + ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix])); + delay_sum_masks_[f_ix].Scale(1.f / norm_factor); + normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]); + normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs( + normalized_delay_sum_masks_[f_ix])); + } +} + +void NonlinearBeamformer::InitTargetCovMats() { + for (size_t i = 0; i < kNumFreqBins; ++i) { + target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); + TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]); + complex_f normalization_factor = target_cov_mats_[i].Trace(); + target_cov_mats_[i].Scale(1.f / normalization_factor); + } +} + +void NonlinearBeamformer::InitInterfCovMats() { + for (size_t i = 0; i < kNumFreqBins; ++i) { + interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_); + ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_); + ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_); + + CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i], + array_geometry_, + &uniform_cov_mat); + + CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds, + kInterfAngleRadians, + i, + kFftSize, + kNumFreqBins, + sample_rate_hz_, + array_geometry_, + &angled_cov_mat); + // Normalize matrices before averaging them. + complex_f normalization_factor = uniform_cov_mat.Trace(); + uniform_cov_mat.Scale(1.f / normalization_factor); + normalization_factor = angled_cov_mat.Trace(); + angled_cov_mat.Scale(1.f / normalization_factor); + + // Average matrices. + uniform_cov_mat.Scale(1 - kBalance); + angled_cov_mat.Scale(kBalance); + interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat); + reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]); + } +} + +void NonlinearBeamformer::ProcessChunk(const ChannelBuffer& input, + ChannelBuffer* output) { + RTC_DCHECK_EQ(input.num_channels(), num_input_channels_); + RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_); + + float old_high_pass_mask = high_pass_postfilter_mask_; + lapped_transform_->ProcessChunk(input.channels(0), output->channels(0)); + // Ramp up/down for smoothing. 1 mask per 10ms results in audible + // discontinuities. + const float ramp_increment = + (high_pass_postfilter_mask_ - old_high_pass_mask) / + input.num_frames_per_band(); + // Apply delay and sum and post-filter in the time domain. WARNING: only works + // because delay-and-sum is not frequency dependent. + for (size_t i = 1; i < input.num_bands(); ++i) { + float smoothed_mask = old_high_pass_mask; + for (size_t j = 0; j < input.num_frames_per_band(); ++j) { + smoothed_mask += ramp_increment; + + // Applying the delay and sum (at zero degrees, this is equivalent to + // averaging). + float sum = 0.f; + for (int k = 0; k < input.num_channels(); ++k) { + sum += input.channels(i)[k][j]; + } + output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask; + } + } +} + +bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) { + // If more than half-beamwidth degrees away from the beam's center, + // you are out of the beam. + return fabs(spherical_point.azimuth() - kTargetAngleRadians) < + kHalfBeamWidthRadians; +} + +void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input, + int num_input_channels, + size_t num_freq_bins, + int num_output_channels, + complex_f* const* output) { + RTC_CHECK_EQ(num_freq_bins, kNumFreqBins); + RTC_CHECK_EQ(num_input_channels, num_input_channels_); + RTC_CHECK_EQ(num_output_channels, 1); + + // Calculating the post-filter masks. Note that we need two for each + // frequency bin to account for the positive and negative interferer + // angle. + for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { + eig_m_.CopyFromColumn(input, i, num_input_channels_); + float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_)); + if (eig_m_norm_factor != 0.f) { + eig_m_.Scale(1.f / eig_m_norm_factor); + } + + float rxim = Norm(target_cov_mats_[i], eig_m_); + float ratio_rxiw_rxim = 0.f; + if (rxim > 0.f) { + ratio_rxiw_rxim = rxiws_[i] / rxim; + } + + complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_)); + rmw *= rmw; + float rmw_r = rmw.real(); + + new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i], + rpsiws_[i], + ratio_rxiw_rxim, + rmw_r, + mask_thresholds_[i]); + + new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i], + reflected_rpsiws_[i], + ratio_rxiw_rxim, + rmw_r, + mask_thresholds_[i]); + } + + ApplyMaskTimeSmoothing(); + EstimateTargetPresence(); + ApplyLowFrequencyCorrection(); + ApplyHighFrequencyCorrection(); + ApplyMaskFrequencySmoothing(); + ApplyMasks(input, output); +} + +float NonlinearBeamformer::CalculatePostfilterMask( + const ComplexMatrixF& interf_cov_mat, + float rpsiw, + float ratio_rxiw_rxim, + float rmw_r, + float mask_threshold) { + float rpsim = Norm(interf_cov_mat, eig_m_); + + // Find lambda. + float ratio = 0.f; + if (rpsim > 0.f) { + ratio = rpsiw / rpsim; + } + float numerator = rmw_r - ratio; + float denominator = ratio_rxiw_rxim - ratio; + + float mask = 1.f; + if (denominator > mask_threshold) { + float lambda = numerator / denominator; + mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum); + } + return mask; +} + +void NonlinearBeamformer::ApplyMasks(const complex_f* const* input, + complex_f* const* output) { + complex_f* output_channel = output[0]; + for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) { + output_channel[f_ix] = complex_f(0.f, 0.f); + + const complex_f* delay_sum_mask_els = + normalized_delay_sum_masks_[f_ix].elements()[0]; + for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) { + output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix]; + } + + output_channel[f_ix] *= final_mask_[f_ix]; + } +} + +// Smooth new_mask_ into time_smooth_mask_. +void NonlinearBeamformer::ApplyMaskTimeSmoothing() { + for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) { + time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] + + (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i]; + } +} + +// Copy time_smooth_mask_ to final_mask_ and smooth over frequency. +void NonlinearBeamformer::ApplyMaskFrequencySmoothing() { + // Smooth over frequency in both directions. The "frequency correction" + // regions have constant value, but we enter them to smooth over the jump + // that exists at the boundary. However, this does mean when smoothing "away" + // from the region that we only need to use the last element. + // + // Upward smoothing: + // low_mean_start_bin_ + // v + // |------|------------|------| + // ^------------------>^ + // + // Downward smoothing: + // high_mean_end_bin_ + // v + // |------|------------|------| + // ^<------------------^ + std::copy(time_smooth_mask_, time_smooth_mask_ + kNumFreqBins, final_mask_); + for (size_t i = low_mean_start_bin_; i < kNumFreqBins; ++i) { + final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] + + (1 - kMaskFrequencySmoothAlpha) * final_mask_[i - 1]; + } + for (size_t i = high_mean_end_bin_ + 1; i > 0; --i) { + final_mask_[i - 1] = kMaskFrequencySmoothAlpha * final_mask_[i - 1] + + (1 - kMaskFrequencySmoothAlpha) * final_mask_[i]; + } +} + +// Apply low frequency correction to time_smooth_mask_. +void NonlinearBeamformer::ApplyLowFrequencyCorrection() { + const float low_frequency_mask = + MaskRangeMean(low_mean_start_bin_, low_mean_end_bin_ + 1); + std::fill(time_smooth_mask_, time_smooth_mask_ + low_mean_start_bin_, + low_frequency_mask); +} + +// Apply high frequency correction to time_smooth_mask_. Update +// high_pass_postfilter_mask_ to use for the high frequency time-domain bands. +void NonlinearBeamformer::ApplyHighFrequencyCorrection() { + high_pass_postfilter_mask_ = + MaskRangeMean(high_mean_start_bin_, high_mean_end_bin_ + 1); + std::fill(time_smooth_mask_ + high_mean_end_bin_ + 1, + time_smooth_mask_ + kNumFreqBins, high_pass_postfilter_mask_); +} + +// Compute mean over the given range of time_smooth_mask_, [first, last). +float NonlinearBeamformer::MaskRangeMean(size_t first, size_t last) { + RTC_DCHECK_GT(last, first); + const float sum = std::accumulate(time_smooth_mask_ + first, + time_smooth_mask_ + last, 0.f); + return sum / (last - first); +} + +void NonlinearBeamformer::EstimateTargetPresence() { + const size_t quantile = static_cast( + (high_mean_end_bin_ - low_mean_start_bin_) * kMaskQuantile + + low_mean_start_bin_); + std::nth_element(new_mask_ + low_mean_start_bin_, new_mask_ + quantile, + new_mask_ + high_mean_end_bin_ + 1); + if (new_mask_[quantile] > kMaskTargetThreshold) { + is_target_present_ = true; + interference_blocks_count_ = 0; + } else { + is_target_present_ = interference_blocks_count_++ < hold_target_blocks_; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h new file mode 100644 index 0000000..46c68bf --- /dev/null +++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ + +#include + +#include "webrtc/common_audio/lapped_transform.h" +#include "webrtc/common_audio/channel_buffer.h" +#include "webrtc/modules/audio_processing/beamformer/beamformer.h" +#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h" + +namespace webrtc { + +// Enhances sound sources coming directly in front of a uniform linear array +// and suppresses sound sources coming from all other directions. Operates on +// multichannel signals and produces single-channel output. +// +// The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear +// Beamforming Postprocessor" by Bastiaan Kleijn. +// +// TODO(aluebs): Target angle assumed to be 0. Parameterize target angle. +class NonlinearBeamformer + : public Beamformer, + public LappedTransform::Callback { + public: + // At the moment it only accepts uniform linear microphone arrays. Using the + // first microphone as a reference position [0, 0, 0] is a natural choice. + explicit NonlinearBeamformer(const std::vector& array_geometry); + + // Sample rate corresponds to the lower band. + // Needs to be called before the NonlinearBeamformer can be used. + void Initialize(int chunk_size_ms, int sample_rate_hz) override; + + // Process one time-domain chunk of audio. The audio is expected to be split + // into frequency bands inside the ChannelBuffer. The number of frames and + // channels must correspond to the constructor parameters. The same + // ChannelBuffer can be passed in as |input| and |output|. + void ProcessChunk(const ChannelBuffer& input, + ChannelBuffer* output) override; + + bool IsInBeam(const SphericalPointf& spherical_point) override; + + // After processing each block |is_target_present_| is set to true if the + // target signal es present and to false otherwise. This methods can be called + // to know if the data is target signal or interference and process it + // accordingly. + bool is_target_present() override { return is_target_present_; } + + protected: + // Process one frequency-domain block of audio. This is where the fun + // happens. Implements LappedTransform::Callback. + void ProcessAudioBlock(const complex* const* input, + int num_input_channels, + size_t num_freq_bins, + int num_output_channels, + complex* const* output) override; + + private: + typedef Matrix MatrixF; + typedef ComplexMatrix ComplexMatrixF; + typedef complex complex_f; + + void InitDelaySumMasks(); + void InitTargetCovMats(); // TODO(aluebs): Make this depend on target angle. + void InitInterfCovMats(); + + // An implementation of equation 18, which calculates postfilter masks that, + // when applied, minimize the mean-square error of our estimation of the + // desired signal. A sub-task is to calculate lambda, which is solved via + // equation 13. + float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat, + float rpsiw, + float ratio_rxiw_rxim, + float rmxi_r, + float mask_threshold); + + // Prevents the postfilter masks from degenerating too quickly (a cause of + // musical noise). + void ApplyMaskTimeSmoothing(); + void ApplyMaskFrequencySmoothing(); + + // The postfilter masks are unreliable at low frequencies. Calculates a better + // mask by averaging mid-low frequency values. + void ApplyLowFrequencyCorrection(); + + // Postfilter masks are also unreliable at high frequencies. Average mid-high + // frequency masks to calculate a single mask per block which can be applied + // in the time-domain. Further, we average these block-masks over a chunk, + // resulting in one postfilter mask per audio chunk. This allows us to skip + // both transforming and blocking the high-frequency signal. + void ApplyHighFrequencyCorrection(); + + // Compute the means needed for the above frequency correction. + float MaskRangeMean(size_t start_bin, size_t end_bin); + + // Applies both sets of masks to |input| and store in |output|. + void ApplyMasks(const complex_f* const* input, complex_f* const* output); + + void EstimateTargetPresence(); + + static const size_t kFftSize = 256; + static const size_t kNumFreqBins = kFftSize / 2 + 1; + + // Deals with the fft transform and blocking. + size_t chunk_length_; + rtc::scoped_ptr lapped_transform_; + float window_[kFftSize]; + + // Parameters exposed to the user. + const int num_input_channels_; + int sample_rate_hz_; + + const std::vector array_geometry_; + + // Calculated based on user-input and constants in the .cc file. + size_t low_mean_start_bin_; + size_t low_mean_end_bin_; + size_t high_mean_start_bin_; + size_t high_mean_end_bin_; + + // Quickly varying mask updated every block. + float new_mask_[kNumFreqBins]; + // Time smoothed mask. + float time_smooth_mask_[kNumFreqBins]; + // Time and frequency smoothed mask. + float final_mask_[kNumFreqBins]; + + // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|. + ComplexMatrixF delay_sum_masks_[kNumFreqBins]; + ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins]; + + // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x + // |num_input_channels_|. + ComplexMatrixF target_cov_mats_[kNumFreqBins]; + + // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x + // |num_input_channels_|. + ComplexMatrixF interf_cov_mats_[kNumFreqBins]; + ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins]; + + // Of length |kNumFreqBins|. + float mask_thresholds_[kNumFreqBins]; + float wave_numbers_[kNumFreqBins]; + + // Preallocated for ProcessAudioBlock() + // Of length |kNumFreqBins|. + float rxiws_[kNumFreqBins]; + float rpsiws_[kNumFreqBins]; + float reflected_rpsiws_[kNumFreqBins]; + + // The microphone normalization factor. + ComplexMatrixF eig_m_; + + // For processing the high-frequency input signal. + float high_pass_postfilter_mask_; + + // True when the target signal is present. + bool is_target_present_; + // Number of blocks after which the data is considered interference if the + // mask does not pass |kMaskSignalThreshold|. + size_t hold_target_blocks_; + // Number of blocks since the last mask that passed |kMaskSignalThreshold|. + size_t interference_blocks_count_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_ diff --git a/webrtc/modules/audio_processing/common.h b/webrtc/modules/audio_processing/common.h new file mode 100644 index 0000000..ed8a054 --- /dev/null +++ b/webrtc/modules/audio_processing/common.h @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_ + +#include + +#include "webrtc/modules/audio_processing/include/audio_processing.h" + +namespace webrtc { + +static inline int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) { + switch (layout) { + case AudioProcessing::kMono: + case AudioProcessing::kMonoAndKeyboard: + return 1; + case AudioProcessing::kStereo: + case AudioProcessing::kStereoAndKeyboard: + return 2; + } + assert(false); + return -1; +} + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_ diff --git a/webrtc/modules/audio_processing/echo_cancellation_impl.cc b/webrtc/modules/audio_processing/echo_cancellation_impl.cc index 61940b1..567d9a4 100644 --- a/webrtc/modules/audio_processing/echo_cancellation_impl.cc +++ b/webrtc/modules/audio_processing/echo_cancellation_impl.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,23 +8,24 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "echo_cancellation_impl.h" +#include "webrtc/modules/audio_processing/echo_cancellation_impl.h" -#include +#include #include -#include "critical_section_wrapper.h" -#include "echo_cancellation.h" - -#include "audio_processing_impl.h" -#include "audio_buffer.h" +extern "C" { +#include "webrtc/modules/audio_processing/aec/aec_core.h" +} +#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h" +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" namespace webrtc { typedef void Handle; namespace { -WebRtc_Word16 MapSetting(EchoCancellation::SuppressionLevel level) { +int16_t MapSetting(EchoCancellation::SuppressionLevel level) { switch (level) { case EchoCancellation::kLowSuppression: return kAecNlpConservative; @@ -32,22 +33,19 @@ WebRtc_Word16 MapSetting(EchoCancellation::SuppressionLevel level) { return kAecNlpModerate; case EchoCancellation::kHighSuppression: return kAecNlpAggressive; - default: - return -1; } + assert(false); + return -1; } -int MapError(int err) { +AudioProcessing::Error MapError(int err) { switch (err) { case AEC_UNSUPPORTED_FUNCTION_ERROR: return AudioProcessing::kUnsupportedFunctionError; - break; case AEC_BAD_PARAMETER_ERROR: return AudioProcessing::kBadParameterError; - break; case AEC_BAD_PARAMETER_WARNING: return AudioProcessing::kBadStreamParameterWarning; - break; default: // AEC_UNSPECIFIED_ERROR // AEC_UNINITIALIZED_ERROR @@ -57,17 +55,21 @@ int MapError(int err) { } } // namespace -EchoCancellationImpl::EchoCancellationImpl(const AudioProcessingImpl* apm) - : ProcessingComponent(apm), - apm_(apm), - drift_compensation_enabled_(false), - metrics_enabled_(false), - suppression_level_(kModerateSuppression), - device_sample_rate_hz_(48000), - stream_drift_samples_(0), - was_stream_drift_set_(false), - stream_has_echo_(false), - delay_logging_enabled_(false) {} +EchoCancellationImpl::EchoCancellationImpl(const AudioProcessing* apm, + CriticalSectionWrapper* crit) + : ProcessingComponent(), + apm_(apm), + crit_(crit), + drift_compensation_enabled_(false), + metrics_enabled_(false), + suppression_level_(kModerateSuppression), + stream_drift_samples_(0), + was_stream_drift_set_(false), + stream_has_echo_(false), + delay_logging_enabled_(false), + extended_filter_enabled_(false), + delay_agnostic_enabled_(false) { +} EchoCancellationImpl::~EchoCancellationImpl() {} @@ -76,7 +78,7 @@ int EchoCancellationImpl::ProcessRenderAudio(const AudioBuffer* audio) { return apm_->kNoError; } - assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_frames_per_band() <= 160); assert(audio->num_channels() == apm_->num_reverse_channels()); int err = apm_->kNoError; @@ -88,8 +90,8 @@ int EchoCancellationImpl::ProcessRenderAudio(const AudioBuffer* audio) { Handle* my_handle = static_cast(handle(handle_index)); err = WebRtcAec_BufferFarend( my_handle, - audio->low_pass_split_data(j), - static_cast(audio->samples_per_split_channel())); + audio->split_bands_const_f(j)[kBand0To8kHz], + audio->num_frames_per_band()); if (err != apm_->kNoError) { return GetHandleError(my_handle); // TODO(ajm): warning possible? @@ -115,7 +117,7 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) { return apm_->kStreamParameterNotSetError; } - assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_frames_per_band() <= 160); assert(audio->num_channels() == apm_->num_output_channels()); int err = apm_->kNoError; @@ -128,11 +130,10 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) { Handle* my_handle = handle(handle_index); err = WebRtcAec_Process( my_handle, - audio->low_pass_split_data(i), - audio->high_pass_split_data(i), - audio->low_pass_split_data(i), - audio->high_pass_split_data(i), - static_cast(audio->samples_per_split_channel()), + audio->split_bands_const_f(i), + audio->num_bands(), + audio->split_bands_f(i), + audio->num_frames_per_band(), apm_->stream_delay_ms(), stream_drift_samples_); @@ -144,7 +145,7 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) { } } - WebRtc_Word16 status = 0; + int status = 0; err = WebRtcAec_get_echo_status(my_handle, &status); if (err != apm_->kNoError) { return GetHandleError(my_handle); @@ -163,7 +164,7 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) { } int EchoCancellationImpl::Enable(bool enable) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); // Ensure AEC and AECM are not both enabled. if (enable && apm_->echo_control_mobile()->is_enabled()) { return apm_->kBadParameterError; @@ -177,7 +178,7 @@ bool EchoCancellationImpl::is_enabled() const { } int EchoCancellationImpl::set_suppression_level(SuppressionLevel level) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); if (MapSetting(level) == -1) { return apm_->kBadParameterError; } @@ -192,7 +193,7 @@ EchoCancellation::SuppressionLevel EchoCancellationImpl::suppression_level() } int EchoCancellationImpl::enable_drift_compensation(bool enable) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); drift_compensation_enabled_ = enable; return Configure(); } @@ -201,24 +202,9 @@ bool EchoCancellationImpl::is_drift_compensation_enabled() const { return drift_compensation_enabled_; } -int EchoCancellationImpl::set_device_sample_rate_hz(int rate) { - CriticalSectionScoped crit_scoped(*apm_->crit()); - if (rate < 8000 || rate > 96000) { - return apm_->kBadParameterError; - } - - device_sample_rate_hz_ = rate; - return Initialize(); -} - -int EchoCancellationImpl::device_sample_rate_hz() const { - return device_sample_rate_hz_; -} - -int EchoCancellationImpl::set_stream_drift_samples(int drift) { +void EchoCancellationImpl::set_stream_drift_samples(int drift) { was_stream_drift_set_ = true; stream_drift_samples_ = drift; - return apm_->kNoError; } int EchoCancellationImpl::stream_drift_samples() const { @@ -226,7 +212,7 @@ int EchoCancellationImpl::stream_drift_samples() const { } int EchoCancellationImpl::enable_metrics(bool enable) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); metrics_enabled_ = enable; return Configure(); } @@ -238,7 +224,7 @@ bool EchoCancellationImpl::are_metrics_enabled() const { // TODO(ajm): we currently just use the metrics from the first AEC. Think more // aboue the best way to extend this to multi-channel. int EchoCancellationImpl::GetMetrics(Metrics* metrics) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); if (metrics == NULL) { return apm_->kNullPointerError; } @@ -285,7 +271,7 @@ bool EchoCancellationImpl::stream_has_echo() const { } int EchoCancellationImpl::enable_delay_logging(bool enable) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); delay_logging_enabled_ = enable; return Configure(); } @@ -294,9 +280,23 @@ bool EchoCancellationImpl::is_delay_logging_enabled() const { return delay_logging_enabled_; } +bool EchoCancellationImpl::is_delay_agnostic_enabled() const { + return delay_agnostic_enabled_; +} + +bool EchoCancellationImpl::is_extended_filter_enabled() const { + return extended_filter_enabled_; +} + // TODO(bjornv): How should we handle the multi-channel case? int EchoCancellationImpl::GetDelayMetrics(int* median, int* std) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + float fraction_poor_delays = 0; + return GetDelayMetrics(median, std, &fraction_poor_delays); +} + +int EchoCancellationImpl::GetDelayMetrics(int* median, int* std, + float* fraction_poor_delays) { + CriticalSectionScoped crit_scoped(crit_); if (median == NULL) { return apm_->kNullPointerError; } @@ -309,7 +309,7 @@ int EchoCancellationImpl::GetDelayMetrics(int* median, int* std) { } Handle* my_handle = static_cast(handle(0)); - if (WebRtcAec_GetDelayMetrics(my_handle, median, std) != + if (WebRtcAec_GetDelayMetrics(my_handle, median, std, fraction_poor_delays) != apm_->kNoError) { return GetHandleError(my_handle); } @@ -317,47 +317,47 @@ int EchoCancellationImpl::GetDelayMetrics(int* median, int* std) { return apm_->kNoError; } +struct AecCore* EchoCancellationImpl::aec_core() const { + CriticalSectionScoped crit_scoped(crit_); + if (!is_component_enabled()) { + return NULL; + } + Handle* my_handle = static_cast(handle(0)); + return WebRtcAec_aec_core(my_handle); +} + int EchoCancellationImpl::Initialize() { int err = ProcessingComponent::Initialize(); if (err != apm_->kNoError || !is_component_enabled()) { return err; } - was_stream_drift_set_ = false; - return apm_->kNoError; } -int EchoCancellationImpl::get_version(char* version, - int version_len_bytes) const { - if (WebRtcAec_get_version(version, version_len_bytes) != 0) { - return apm_->kBadParameterError; - } - - return apm_->kNoError; +void EchoCancellationImpl::SetExtraOptions(const Config& config) { + extended_filter_enabled_ = config.Get().enabled; + delay_agnostic_enabled_ = config.Get().enabled; + Configure(); } void* EchoCancellationImpl::CreateHandle() const { - Handle* handle = NULL; - if (WebRtcAec_Create(&handle) != apm_->kNoError) { - handle = NULL; - } else { - assert(handle != NULL); - } - - return handle; + return WebRtcAec_Create(); } -int EchoCancellationImpl::DestroyHandle(void* handle) const { +void EchoCancellationImpl::DestroyHandle(void* handle) const { assert(handle != NULL); - return WebRtcAec_Free(static_cast(handle)); + WebRtcAec_Free(static_cast(handle)); } int EchoCancellationImpl::InitializeHandle(void* handle) const { assert(handle != NULL); + // TODO(ajm): Drift compensation is disabled in practice. If restored, it + // should be managed internally and not depend on the hardware sample rate. + // For now, just hardcode a 48 kHz value. return WebRtcAec_Init(static_cast(handle), - apm_->sample_rate_hz(), - device_sample_rate_hz_); + apm_->proc_sample_rate_hz(), + 48000); } int EchoCancellationImpl::ConfigureHandle(void* handle) const { @@ -368,6 +368,12 @@ int EchoCancellationImpl::ConfigureHandle(void* handle) const { config.skewMode = drift_compensation_enabled_; config.delay_logging = delay_logging_enabled_; + WebRtcAec_enable_extended_filter( + WebRtcAec_aec_core(static_cast(handle)), + extended_filter_enabled_ ? 1 : 0); + WebRtcAec_enable_delay_agnostic( + WebRtcAec_aec_core(static_cast(handle)), + delay_agnostic_enabled_ ? 1 : 0); return WebRtcAec_set_config(static_cast(handle), config); } diff --git a/webrtc/modules/audio_processing/echo_cancellation_impl.h b/webrtc/modules/audio_processing/echo_cancellation_impl.h index a483a3a..070dcab 100644 --- a/webrtc/modules/audio_processing/echo_cancellation_impl.h +++ b/webrtc/modules/audio_processing/echo_cancellation_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,69 +8,79 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CANCELLATION_IMPL_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CANCELLATION_IMPL_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_ -#include "audio_processing.h" -#include "processing_component.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/processing_component.h" namespace webrtc { -class AudioProcessingImpl; + class AudioBuffer; +class CriticalSectionWrapper; class EchoCancellationImpl : public EchoCancellation, public ProcessingComponent { public: - explicit EchoCancellationImpl(const AudioProcessingImpl* apm); + EchoCancellationImpl(const AudioProcessing* apm, + CriticalSectionWrapper* crit); virtual ~EchoCancellationImpl(); int ProcessRenderAudio(const AudioBuffer* audio); int ProcessCaptureAudio(AudioBuffer* audio); // EchoCancellation implementation. - virtual bool is_enabled() const; - virtual int device_sample_rate_hz() const; - virtual int stream_drift_samples() const; + bool is_enabled() const override; + int stream_drift_samples() const override; + SuppressionLevel suppression_level() const override; + bool is_drift_compensation_enabled() const override; // ProcessingComponent implementation. - virtual int Initialize(); - virtual int get_version(char* version, int version_len_bytes) const; + int Initialize() override; + void SetExtraOptions(const Config& config) override; + + bool is_delay_agnostic_enabled() const; + bool is_extended_filter_enabled() const; private: // EchoCancellation implementation. - virtual int Enable(bool enable); - virtual int enable_drift_compensation(bool enable); - virtual bool is_drift_compensation_enabled() const; - virtual int set_device_sample_rate_hz(int rate); - virtual int set_stream_drift_samples(int drift); - virtual int set_suppression_level(SuppressionLevel level); - virtual SuppressionLevel suppression_level() const; - virtual int enable_metrics(bool enable); - virtual bool are_metrics_enabled() const; - virtual bool stream_has_echo() const; - virtual int GetMetrics(Metrics* metrics); - virtual int enable_delay_logging(bool enable); - virtual bool is_delay_logging_enabled() const; - virtual int GetDelayMetrics(int* median, int* std); + int Enable(bool enable) override; + int enable_drift_compensation(bool enable) override; + void set_stream_drift_samples(int drift) override; + int set_suppression_level(SuppressionLevel level) override; + int enable_metrics(bool enable) override; + bool are_metrics_enabled() const override; + bool stream_has_echo() const override; + int GetMetrics(Metrics* metrics) override; + int enable_delay_logging(bool enable) override; + bool is_delay_logging_enabled() const override; + int GetDelayMetrics(int* median, int* std) override; + int GetDelayMetrics(int* median, + int* std, + float* fraction_poor_delays) override; + struct AecCore* aec_core() const override; // ProcessingComponent implementation. - virtual void* CreateHandle() const; - virtual int InitializeHandle(void* handle) const; - virtual int ConfigureHandle(void* handle) const; - virtual int DestroyHandle(void* handle) const; - virtual int num_handles_required() const; - virtual int GetHandleError(void* handle) const; + void* CreateHandle() const override; + int InitializeHandle(void* handle) const override; + int ConfigureHandle(void* handle) const override; + void DestroyHandle(void* handle) const override; + int num_handles_required() const override; + int GetHandleError(void* handle) const override; - const AudioProcessingImpl* apm_; + const AudioProcessing* apm_; + CriticalSectionWrapper* crit_; bool drift_compensation_enabled_; bool metrics_enabled_; SuppressionLevel suppression_level_; - int device_sample_rate_hz_; int stream_drift_samples_; bool was_stream_drift_set_; bool stream_has_echo_; bool delay_logging_enabled_; + bool extended_filter_enabled_; + bool delay_agnostic_enabled_; }; + } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CANCELLATION_IMPL_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_ diff --git a/webrtc/modules/audio_processing/echo_control_mobile_impl.cc b/webrtc/modules/audio_processing/echo_control_mobile_impl.cc index ff15255..8d5ec9c 100644 --- a/webrtc/modules/audio_processing/echo_control_mobile_impl.cc +++ b/webrtc/modules/audio_processing/echo_control_mobile_impl.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,23 +8,22 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "echo_control_mobile_impl.h" +#include "webrtc/modules/audio_processing/echo_control_mobile_impl.h" -#include -#include +#include +#include -#include "critical_section_wrapper.h" -#include "echo_control_mobile.h" - -#include "audio_processing_impl.h" -#include "audio_buffer.h" +#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" +#include "webrtc/system_wrappers/interface/logging.h" namespace webrtc { typedef void Handle; namespace { -WebRtc_Word16 MapSetting(EchoControlMobile::RoutingMode mode) { +int16_t MapSetting(EchoControlMobile::RoutingMode mode) { switch (mode) { case EchoControlMobile::kQuietEarpieceOrHeadset: return 0; @@ -36,12 +35,12 @@ WebRtc_Word16 MapSetting(EchoControlMobile::RoutingMode mode) { return 3; case EchoControlMobile::kLoudSpeakerphone: return 4; - default: - return -1; } + assert(false); + return -1; } -int MapError(int err) { +AudioProcessing::Error MapError(int err) { switch (err) { case AECM_UNSUPPORTED_FUNCTION_ERROR: return AudioProcessing::kUnsupportedFunctionError; @@ -63,9 +62,11 @@ size_t EchoControlMobile::echo_path_size_bytes() { return WebRtcAecm_echo_path_size_bytes(); } -EchoControlMobileImpl::EchoControlMobileImpl(const AudioProcessingImpl* apm) - : ProcessingComponent(apm), +EchoControlMobileImpl::EchoControlMobileImpl(const AudioProcessing* apm, + CriticalSectionWrapper* crit) + : ProcessingComponent(), apm_(apm), + crit_(crit), routing_mode_(kSpeakerphone), comfort_noise_enabled_(true), external_echo_path_(NULL) {} @@ -82,7 +83,7 @@ int EchoControlMobileImpl::ProcessRenderAudio(const AudioBuffer* audio) { return apm_->kNoError; } - assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_frames_per_band() <= 160); assert(audio->num_channels() == apm_->num_reverse_channels()); int err = apm_->kNoError; @@ -94,8 +95,8 @@ int EchoControlMobileImpl::ProcessRenderAudio(const AudioBuffer* audio) { Handle* my_handle = static_cast(handle(handle_index)); err = WebRtcAecm_BufferFarend( my_handle, - audio->low_pass_split_data(j), - static_cast(audio->samples_per_split_channel())); + audio->split_bands_const(j)[kBand0To8kHz], + audio->num_frames_per_band()); if (err != apm_->kNoError) { return GetHandleError(my_handle); // TODO(ajm): warning possible? @@ -117,7 +118,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) { return apm_->kStreamParameterNotSetError; } - assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_frames_per_band() <= 160); assert(audio->num_channels() == apm_->num_output_channels()); int err = apm_->kNoError; @@ -127,8 +128,8 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) { for (int i = 0; i < audio->num_channels(); i++) { // TODO(ajm): improve how this works, possibly inside AECM. // This is kind of hacked up. - WebRtc_Word16* noisy = audio->low_pass_reference(i); - WebRtc_Word16* clean = audio->low_pass_split_data(i); + const int16_t* noisy = audio->low_pass_reference(i); + const int16_t* clean = audio->split_bands_const(i)[kBand0To8kHz]; if (noisy == NULL) { noisy = clean; clean = NULL; @@ -139,8 +140,8 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) { my_handle, noisy, clean, - audio->low_pass_split_data(i), - static_cast(audio->samples_per_split_channel()), + audio->split_bands(i)[kBand0To8kHz], + audio->num_frames_per_band(), apm_->stream_delay_ms()); if (err != apm_->kNoError) { @@ -155,7 +156,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) { } int EchoControlMobileImpl::Enable(bool enable) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); // Ensure AEC and AECM are not both enabled. if (enable && apm_->echo_cancellation()->is_enabled()) { return apm_->kBadParameterError; @@ -169,7 +170,7 @@ bool EchoControlMobileImpl::is_enabled() const { } int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); if (MapSetting(mode) == -1) { return apm_->kBadParameterError; } @@ -184,7 +185,7 @@ EchoControlMobile::RoutingMode EchoControlMobileImpl::routing_mode() } int EchoControlMobileImpl::enable_comfort_noise(bool enable) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); comfort_noise_enabled_ = enable; return Configure(); } @@ -195,7 +196,7 @@ bool EchoControlMobileImpl::is_comfort_noise_enabled() const { int EchoControlMobileImpl::SetEchoPath(const void* echo_path, size_t size_bytes) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); if (echo_path == NULL) { return apm_->kNullPointerError; } @@ -214,7 +215,7 @@ int EchoControlMobileImpl::SetEchoPath(const void* echo_path, int EchoControlMobileImpl::GetEchoPath(void* echo_path, size_t size_bytes) const { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); if (echo_path == NULL) { return apm_->kNullPointerError; } @@ -240,42 +241,26 @@ int EchoControlMobileImpl::Initialize() { return apm_->kNoError; } - if (apm_->sample_rate_hz() == apm_->kSampleRate32kHz) { - // AECM doesn't support super-wideband. + if (apm_->proc_sample_rate_hz() > apm_->kSampleRate16kHz) { + LOG(LS_ERROR) << "AECM only supports 16 kHz or lower sample rates"; return apm_->kBadSampleRateError; } return ProcessingComponent::Initialize(); } -int EchoControlMobileImpl::get_version(char* version, - int version_len_bytes) const { - if (WebRtcAecm_get_version(version, version_len_bytes) != 0) { - return apm_->kBadParameterError; - } - - return apm_->kNoError; -} - void* EchoControlMobileImpl::CreateHandle() const { - Handle* handle = NULL; - if (WebRtcAecm_Create(&handle) != apm_->kNoError) { - handle = NULL; - } else { - assert(handle != NULL); - } - - return handle; + return WebRtcAecm_Create(); } -int EchoControlMobileImpl::DestroyHandle(void* handle) const { - return WebRtcAecm_Free(static_cast(handle)); +void EchoControlMobileImpl::DestroyHandle(void* handle) const { + WebRtcAecm_Free(static_cast(handle)); } int EchoControlMobileImpl::InitializeHandle(void* handle) const { assert(handle != NULL); Handle* my_handle = static_cast(handle); - if (WebRtcAecm_Init(my_handle, apm_->sample_rate_hz()) != 0) { + if (WebRtcAecm_Init(my_handle, apm_->proc_sample_rate_hz()) != 0) { return GetHandleError(my_handle); } if (external_echo_path_ != NULL) { diff --git a/webrtc/modules/audio_processing/echo_control_mobile_impl.h b/webrtc/modules/audio_processing/echo_control_mobile_impl.h index 6314e66..da70225 100644 --- a/webrtc/modules/audio_processing/echo_control_mobile_impl.h +++ b/webrtc/modules/audio_processing/echo_control_mobile_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,55 +8,57 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CONTROL_MOBILE_IMPL_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CONTROL_MOBILE_IMPL_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ -#include "audio_processing.h" -#include "processing_component.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/processing_component.h" namespace webrtc { -class AudioProcessingImpl; + class AudioBuffer; +class CriticalSectionWrapper; class EchoControlMobileImpl : public EchoControlMobile, public ProcessingComponent { public: - explicit EchoControlMobileImpl(const AudioProcessingImpl* apm); + EchoControlMobileImpl(const AudioProcessing* apm, + CriticalSectionWrapper* crit); virtual ~EchoControlMobileImpl(); int ProcessRenderAudio(const AudioBuffer* audio); int ProcessCaptureAudio(AudioBuffer* audio); // EchoControlMobile implementation. - virtual bool is_enabled() const; + bool is_enabled() const override; + RoutingMode routing_mode() const override; + bool is_comfort_noise_enabled() const override; // ProcessingComponent implementation. - virtual int Initialize(); - virtual int get_version(char* version, int version_len_bytes) const; + int Initialize() override; private: // EchoControlMobile implementation. - virtual int Enable(bool enable); - virtual int set_routing_mode(RoutingMode mode); - virtual RoutingMode routing_mode() const; - virtual int enable_comfort_noise(bool enable); - virtual bool is_comfort_noise_enabled() const; - virtual int SetEchoPath(const void* echo_path, size_t size_bytes); - virtual int GetEchoPath(void* echo_path, size_t size_bytes) const; + int Enable(bool enable) override; + int set_routing_mode(RoutingMode mode) override; + int enable_comfort_noise(bool enable) override; + int SetEchoPath(const void* echo_path, size_t size_bytes) override; + int GetEchoPath(void* echo_path, size_t size_bytes) const override; // ProcessingComponent implementation. - virtual void* CreateHandle() const; - virtual int InitializeHandle(void* handle) const; - virtual int ConfigureHandle(void* handle) const; - virtual int DestroyHandle(void* handle) const; - virtual int num_handles_required() const; - virtual int GetHandleError(void* handle) const; + void* CreateHandle() const override; + int InitializeHandle(void* handle) const override; + int ConfigureHandle(void* handle) const override; + void DestroyHandle(void* handle) const override; + int num_handles_required() const override; + int GetHandleError(void* handle) const override; - const AudioProcessingImpl* apm_; + const AudioProcessing* apm_; + CriticalSectionWrapper* crit_; RoutingMode routing_mode_; bool comfort_noise_enabled_; unsigned char* external_echo_path_; }; } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CONTROL_MOBILE_IMPL_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_ diff --git a/webrtc/modules/audio_processing/gain_control_impl.cc b/webrtc/modules/audio_processing/gain_control_impl.cc index dc3e565..8a3612d 100644 --- a/webrtc/modules/audio_processing/gain_control_impl.cc +++ b/webrtc/modules/audio_processing/gain_control_impl.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,54 +8,38 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "gain_control_impl.h" +#include "webrtc/modules/audio_processing/gain_control_impl.h" -#include +#include -#include "critical_section_wrapper.h" -#include "gain_control.h" - -#include "audio_processing_impl.h" -#include "audio_buffer.h" +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/modules/audio_processing/agc/legacy/gain_control.h" +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" namespace webrtc { typedef void Handle; -/*template -class GainControlHandle : public ComponentHandle { - public: - GainControlHandle(); - virtual ~GainControlHandle(); - - virtual int Create(); - virtual T* ptr() const; - - private: - T* handle; -};*/ - namespace { -WebRtc_Word16 MapSetting(GainControl::Mode mode) { +int16_t MapSetting(GainControl::Mode mode) { switch (mode) { case GainControl::kAdaptiveAnalog: return kAgcModeAdaptiveAnalog; - break; case GainControl::kAdaptiveDigital: return kAgcModeAdaptiveDigital; - break; case GainControl::kFixedDigital: return kAgcModeFixedDigital; - break; - default: - return -1; } + assert(false); + return -1; } } // namespace -GainControlImpl::GainControlImpl(const AudioProcessingImpl* apm) - : ProcessingComponent(apm), +GainControlImpl::GainControlImpl(const AudioProcessing* apm, + CriticalSectionWrapper* crit) + : ProcessingComponent(), apm_(apm), + crit_(crit), mode_(kAdaptiveAnalog), minimum_capture_level_(0), maximum_capture_level_(255), @@ -73,20 +57,14 @@ int GainControlImpl::ProcessRenderAudio(AudioBuffer* audio) { return apm_->kNoError; } - assert(audio->samples_per_split_channel() <= 160); - - WebRtc_Word16* mixed_data = audio->low_pass_split_data(0); - if (audio->num_channels() > 1) { - audio->CopyAndMixLowPass(1); - mixed_data = audio->mixed_low_pass_data(0); - } + assert(audio->num_frames_per_band() <= 160); for (int i = 0; i < num_handles(); i++) { Handle* my_handle = static_cast(handle(i)); int err = WebRtcAgc_AddFarend( my_handle, - mixed_data, - static_cast(audio->samples_per_split_channel())); + audio->mixed_low_pass_data(), + audio->num_frames_per_band()); if (err != apm_->kNoError) { return GetHandleError(my_handle); @@ -101,19 +79,20 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { return apm_->kNoError; } - assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_frames_per_band() <= 160); assert(audio->num_channels() == num_handles()); int err = apm_->kNoError; if (mode_ == kAdaptiveAnalog) { + capture_levels_.assign(num_handles(), analog_capture_level_); for (int i = 0; i < num_handles(); i++) { Handle* my_handle = static_cast(handle(i)); err = WebRtcAgc_AddMic( my_handle, - audio->low_pass_split_data(i), - audio->high_pass_split_data(i), - static_cast(audio->samples_per_split_channel())); + audio->split_bands(i), + audio->num_bands(), + audio->num_frames_per_band()); if (err != apm_->kNoError) { return GetHandleError(my_handle); @@ -123,14 +102,13 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { for (int i = 0; i < num_handles(); i++) { Handle* my_handle = static_cast(handle(i)); - WebRtc_Word32 capture_level_out = 0; + int32_t capture_level_out = 0; err = WebRtcAgc_VirtualMic( my_handle, - audio->low_pass_split_data(i), - audio->high_pass_split_data(i), - static_cast(audio->samples_per_split_channel()), - //capture_levels_[i], + audio->split_bands(i), + audio->num_bands(), + audio->num_frames_per_band(), analog_capture_level_, &capture_level_out); @@ -155,22 +133,21 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) { return apm_->kStreamParameterNotSetError; } - assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_frames_per_band() <= 160); assert(audio->num_channels() == num_handles()); stream_is_saturated_ = false; for (int i = 0; i < num_handles(); i++) { Handle* my_handle = static_cast(handle(i)); - WebRtc_Word32 capture_level_out = 0; - WebRtc_UWord8 saturation_warning = 0; + int32_t capture_level_out = 0; + uint8_t saturation_warning = 0; int err = WebRtcAgc_Process( my_handle, - audio->low_pass_split_data(i), - audio->high_pass_split_data(i), - static_cast(audio->samples_per_split_channel()), - audio->low_pass_split_data(i), - audio->high_pass_split_data(i), + audio->split_bands_const(i), + audio->num_bands(), + audio->num_frames_per_band(), + audio->split_bands(i), capture_levels_[i], &capture_level_out, apm_->echo_cancellation()->stream_has_echo(), @@ -202,17 +179,11 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) { // TODO(ajm): ensure this is called under kAdaptiveAnalog. int GainControlImpl::set_stream_analog_level(int level) { + CriticalSectionScoped crit_scoped(crit_); was_analog_level_set_ = true; if (level < minimum_capture_level_ || level > maximum_capture_level_) { return apm_->kBadParameterError; } - - if (mode_ == kAdaptiveAnalog) { - if (level != analog_capture_level_) { - // The analog level has been changed; update our internal levels. - capture_levels_.assign(num_handles(), level); - } - } analog_capture_level_ = level; return apm_->kNoError; @@ -226,7 +197,7 @@ int GainControlImpl::stream_analog_level() { } int GainControlImpl::Enable(bool enable) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); return EnableComponent(enable); } @@ -235,7 +206,7 @@ bool GainControlImpl::is_enabled() const { } int GainControlImpl::set_mode(Mode mode) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); if (MapSetting(mode) == -1) { return apm_->kBadParameterError; } @@ -250,7 +221,7 @@ GainControl::Mode GainControlImpl::mode() const { int GainControlImpl::set_analog_level_limits(int minimum, int maximum) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); if (minimum < 0) { return apm_->kBadParameterError; } @@ -282,7 +253,7 @@ bool GainControlImpl::stream_is_saturated() const { } int GainControlImpl::set_target_level_dbfs(int level) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); if (level > 31 || level < 0) { return apm_->kBadParameterError; } @@ -296,7 +267,7 @@ int GainControlImpl::target_level_dbfs() const { } int GainControlImpl::set_compression_gain_db(int gain) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); if (gain < 0 || gain > 90) { return apm_->kBadParameterError; } @@ -310,7 +281,7 @@ int GainControlImpl::compression_gain_db() const { } int GainControlImpl::enable_limiter(bool enable) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); limiter_enabled_ = enable; return Configure(); } @@ -325,35 +296,16 @@ int GainControlImpl::Initialize() { return err; } - analog_capture_level_ = - (maximum_capture_level_ - minimum_capture_level_) >> 1; capture_levels_.assign(num_handles(), analog_capture_level_); - was_analog_level_set_ = false; - - return apm_->kNoError; -} - -int GainControlImpl::get_version(char* version, int version_len_bytes) const { - if (WebRtcAgc_Version(version, version_len_bytes) != 0) { - return apm_->kBadParameterError; - } - return apm_->kNoError; } void* GainControlImpl::CreateHandle() const { - Handle* handle = NULL; - if (WebRtcAgc_Create(&handle) != apm_->kNoError) { - handle = NULL; - } else { - assert(handle != NULL); - } - - return handle; + return WebRtcAgc_Create(); } -int GainControlImpl::DestroyHandle(void* handle) const { - return WebRtcAgc_Free(static_cast(handle)); +void GainControlImpl::DestroyHandle(void* handle) const { + WebRtcAgc_Free(static_cast(handle)); } int GainControlImpl::InitializeHandle(void* handle) const { @@ -361,18 +313,18 @@ int GainControlImpl::InitializeHandle(void* handle) const { minimum_capture_level_, maximum_capture_level_, MapSetting(mode_), - apm_->sample_rate_hz()); + apm_->proc_sample_rate_hz()); } int GainControlImpl::ConfigureHandle(void* handle) const { - WebRtcAgc_config_t config; + WebRtcAgcConfig config; // TODO(ajm): Flip the sign here (since AGC expects a positive value) if we // change the interface. //assert(target_level_dbfs_ <= 0); - //config.targetLevelDbfs = static_cast(-target_level_dbfs_); - config.targetLevelDbfs = static_cast(target_level_dbfs_); + //config.targetLevelDbfs = static_cast(-target_level_dbfs_); + config.targetLevelDbfs = static_cast(target_level_dbfs_); config.compressionGaindB = - static_cast(compression_gain_db_); + static_cast(compression_gain_db_); config.limiterEnable = limiter_enabled_; return WebRtcAgc_set_config(static_cast(handle), config); diff --git a/webrtc/modules/audio_processing/gain_control_impl.h b/webrtc/modules/audio_processing/gain_control_impl.h index 7b6987e..f24d200 100644 --- a/webrtc/modules/audio_processing/gain_control_impl.h +++ b/webrtc/modules/audio_processing/gain_control_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,22 +8,24 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_GAIN_CONTROL_IMPL_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_GAIN_CONTROL_IMPL_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_ #include -#include "audio_processing.h" -#include "processing_component.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/processing_component.h" namespace webrtc { -class AudioProcessingImpl; + class AudioBuffer; +class CriticalSectionWrapper; class GainControlImpl : public GainControl, public ProcessingComponent { public: - explicit GainControlImpl(const AudioProcessingImpl* apm); + GainControlImpl(const AudioProcessing* apm, + CriticalSectionWrapper* crit); virtual ~GainControlImpl(); int ProcessRenderAudio(AudioBuffer* audio); @@ -31,39 +33,39 @@ class GainControlImpl : public GainControl, int ProcessCaptureAudio(AudioBuffer* audio); // ProcessingComponent implementation. - virtual int Initialize(); - virtual int get_version(char* version, int version_len_bytes) const; + int Initialize() override; // GainControl implementation. - virtual bool is_enabled() const; - virtual int stream_analog_level(); + bool is_enabled() const override; + int stream_analog_level() override; + bool is_limiter_enabled() const override; + Mode mode() const override; private: // GainControl implementation. - virtual int Enable(bool enable); - virtual int set_stream_analog_level(int level); - virtual int set_mode(Mode mode); - virtual Mode mode() const; - virtual int set_target_level_dbfs(int level); - virtual int target_level_dbfs() const; - virtual int set_compression_gain_db(int gain); - virtual int compression_gain_db() const; - virtual int enable_limiter(bool enable); - virtual bool is_limiter_enabled() const; - virtual int set_analog_level_limits(int minimum, int maximum); - virtual int analog_level_minimum() const; - virtual int analog_level_maximum() const; - virtual bool stream_is_saturated() const; + int Enable(bool enable) override; + int set_stream_analog_level(int level) override; + int set_mode(Mode mode) override; + int set_target_level_dbfs(int level) override; + int target_level_dbfs() const override; + int set_compression_gain_db(int gain) override; + int compression_gain_db() const override; + int enable_limiter(bool enable) override; + int set_analog_level_limits(int minimum, int maximum) override; + int analog_level_minimum() const override; + int analog_level_maximum() const override; + bool stream_is_saturated() const override; // ProcessingComponent implementation. - virtual void* CreateHandle() const; - virtual int InitializeHandle(void* handle) const; - virtual int ConfigureHandle(void* handle) const; - virtual int DestroyHandle(void* handle) const; - virtual int num_handles_required() const; - virtual int GetHandleError(void* handle) const; + void* CreateHandle() const override; + int InitializeHandle(void* handle) const override; + int ConfigureHandle(void* handle) const override; + void DestroyHandle(void* handle) const override; + int num_handles_required() const override; + int GetHandleError(void* handle) const override; - const AudioProcessingImpl* apm_; + const AudioProcessing* apm_; + CriticalSectionWrapper* crit_; Mode mode_; int minimum_capture_level_; int maximum_capture_level_; @@ -77,4 +79,4 @@ class GainControlImpl : public GainControl, }; } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_GAIN_CONTROL_IMPL_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_ diff --git a/webrtc/modules/audio_processing/high_pass_filter_impl.cc b/webrtc/modules/audio_processing/high_pass_filter_impl.cc index fa6d5d5..6302f13 100644 --- a/webrtc/modules/audio_processing/high_pass_filter_impl.cc +++ b/webrtc/modules/audio_processing/high_pass_filter_impl.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,35 +8,34 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "high_pass_filter_impl.h" +#include "webrtc/modules/audio_processing/high_pass_filter_impl.h" -#include +#include -#include "critical_section_wrapper.h" -#include "typedefs.h" -#include "signal_processing_library.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" +#include "webrtc/typedefs.h" -#include "audio_processing_impl.h" -#include "audio_buffer.h" namespace webrtc { namespace { -const WebRtc_Word16 kFilterCoefficients8kHz[5] = +const int16_t kFilterCoefficients8kHz[5] = {3798, -7596, 3798, 7807, -3733}; -const WebRtc_Word16 kFilterCoefficients[5] = +const int16_t kFilterCoefficients[5] = {4012, -8024, 4012, 8002, -3913}; struct FilterState { - WebRtc_Word16 y[4]; - WebRtc_Word16 x[2]; - const WebRtc_Word16* ba; + int16_t y[4]; + int16_t x[2]; + const int16_t* ba; }; int InitializeFilter(FilterState* hpf, int sample_rate_hz) { assert(hpf != NULL); - if (sample_rate_hz == AudioProcessingImpl::kSampleRate8kHz) { + if (sample_rate_hz == AudioProcessing::kSampleRate8kHz) { hpf->ba = kFilterCoefficients8kHz; } else { hpf->ba = kFilterCoefficients; @@ -48,32 +47,28 @@ int InitializeFilter(FilterState* hpf, int sample_rate_hz) { return AudioProcessing::kNoError; } -int Filter(FilterState* hpf, WebRtc_Word16* data, int length) { +int Filter(FilterState* hpf, int16_t* data, size_t length) { assert(hpf != NULL); - WebRtc_Word32 tmp_int32 = 0; - WebRtc_Word16* y = hpf->y; - WebRtc_Word16* x = hpf->x; - const WebRtc_Word16* ba = hpf->ba; + int32_t tmp_int32 = 0; + int16_t* y = hpf->y; + int16_t* x = hpf->x; + const int16_t* ba = hpf->ba; - for (int i = 0; i < length; i++) { + for (size_t i = 0; i < length; i++) { // y[i] = b[0] * x[i] + b[1] * x[i-1] + b[2] * x[i-2] // + -a[1] * y[i-1] + -a[2] * y[i-2]; - tmp_int32 = - WEBRTC_SPL_MUL_16_16(y[1], ba[3]); // -a[1] * y[i-1] (low part) - tmp_int32 += - WEBRTC_SPL_MUL_16_16(y[3], ba[4]); // -a[2] * y[i-2] (low part) + tmp_int32 = y[1] * ba[3]; // -a[1] * y[i-1] (low part) + tmp_int32 += y[3] * ba[4]; // -a[2] * y[i-2] (low part) tmp_int32 = (tmp_int32 >> 15); - tmp_int32 += - WEBRTC_SPL_MUL_16_16(y[0], ba[3]); // -a[1] * y[i-1] (high part) - tmp_int32 += - WEBRTC_SPL_MUL_16_16(y[2], ba[4]); // -a[2] * y[i-2] (high part) + tmp_int32 += y[0] * ba[3]; // -a[1] * y[i-1] (high part) + tmp_int32 += y[2] * ba[4]; // -a[2] * y[i-2] (high part) tmp_int32 = (tmp_int32 << 1); - tmp_int32 += WEBRTC_SPL_MUL_16_16(data[i], ba[0]); // b[0]*x[0] - tmp_int32 += WEBRTC_SPL_MUL_16_16(x[0], ba[1]); // b[1]*x[i-1] - tmp_int32 += WEBRTC_SPL_MUL_16_16(x[1], ba[2]); // b[2]*x[i-2] + tmp_int32 += data[i] * ba[0]; // b[0]*x[0] + tmp_int32 += x[0] * ba[1]; // b[1]*x[i-1] + tmp_int32 += x[1] * ba[2]; // b[2]*x[i-2] // Update state (input part) x[1] = x[0]; @@ -82,21 +77,20 @@ int Filter(FilterState* hpf, WebRtc_Word16* data, int length) { // Update state (filtered part) y[2] = y[0]; y[3] = y[1]; - y[0] = static_cast(tmp_int32 >> 13); - y[1] = static_cast((tmp_int32 - - WEBRTC_SPL_LSHIFT_W32(static_cast(y[0]), 13)) << 2); + y[0] = static_cast(tmp_int32 >> 13); + y[1] = static_cast( + (tmp_int32 - (static_cast(y[0]) << 13)) << 2); // Rounding in Q12, i.e. add 2^11 tmp_int32 += 2048; // Saturate (to 2^27) so that the HP filtered signal does not overflow - tmp_int32 = WEBRTC_SPL_SAT(static_cast(134217727), + tmp_int32 = WEBRTC_SPL_SAT(static_cast(134217727), tmp_int32, - static_cast(-134217728)); - - // Convert back to Q0 and use rounding - data[i] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp_int32, 12); + static_cast(-134217728)); + // Convert back to Q0 and use rounding. + data[i] = (int16_t)(tmp_int32 >> 12); } return AudioProcessing::kNoError; @@ -105,9 +99,11 @@ int Filter(FilterState* hpf, WebRtc_Word16* data, int length) { typedef FilterState Handle; -HighPassFilterImpl::HighPassFilterImpl(const AudioProcessingImpl* apm) - : ProcessingComponent(apm), - apm_(apm) {} +HighPassFilterImpl::HighPassFilterImpl(const AudioProcessing* apm, + CriticalSectionWrapper* crit) + : ProcessingComponent(), + apm_(apm), + crit_(crit) {} HighPassFilterImpl::~HighPassFilterImpl() {} @@ -118,13 +114,13 @@ int HighPassFilterImpl::ProcessCaptureAudio(AudioBuffer* audio) { return apm_->kNoError; } - assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_frames_per_band() <= 160); for (int i = 0; i < num_handles(); i++) { Handle* my_handle = static_cast(handle(i)); err = Filter(my_handle, - audio->low_pass_split_data(i), - audio->samples_per_split_channel()); + audio->split_bands(i)[kBand0To8kHz], + audio->num_frames_per_band()); if (err != apm_->kNoError) { return GetHandleError(my_handle); @@ -135,7 +131,7 @@ int HighPassFilterImpl::ProcessCaptureAudio(AudioBuffer* audio) { } int HighPassFilterImpl::Enable(bool enable) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); return EnableComponent(enable); } @@ -143,25 +139,17 @@ bool HighPassFilterImpl::is_enabled() const { return is_component_enabled(); } -int HighPassFilterImpl::get_version(char* version, - int version_len_bytes) const { - // An empty string is used to indicate no version information. - memset(version, 0, version_len_bytes); - return apm_->kNoError; -} - void* HighPassFilterImpl::CreateHandle() const { return new FilterState; } -int HighPassFilterImpl::DestroyHandle(void* handle) const { +void HighPassFilterImpl::DestroyHandle(void* handle) const { delete static_cast(handle); - return apm_->kNoError; } int HighPassFilterImpl::InitializeHandle(void* handle) const { return InitializeFilter(static_cast(handle), - apm_->sample_rate_hz()); + apm_->proc_sample_rate_hz()); } int HighPassFilterImpl::ConfigureHandle(void* /*handle*/) const { diff --git a/webrtc/modules/audio_processing/high_pass_filter_impl.h b/webrtc/modules/audio_processing/high_pass_filter_impl.h index 4c23754..90b393e 100644 --- a/webrtc/modules/audio_processing/high_pass_filter_impl.h +++ b/webrtc/modules/audio_processing/high_pass_filter_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,44 +8,43 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_HIGH_PASS_FILTER_IMPL_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_HIGH_PASS_FILTER_IMPL_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_IMPL_H_ -#include "audio_processing.h" -#include "processing_component.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/processing_component.h" namespace webrtc { -class AudioProcessingImpl; + class AudioBuffer; +class CriticalSectionWrapper; class HighPassFilterImpl : public HighPassFilter, public ProcessingComponent { public: - explicit HighPassFilterImpl(const AudioProcessingImpl* apm); + HighPassFilterImpl(const AudioProcessing* apm, CriticalSectionWrapper* crit); virtual ~HighPassFilterImpl(); int ProcessCaptureAudio(AudioBuffer* audio); // HighPassFilter implementation. - virtual bool is_enabled() const; - - // ProcessingComponent implementation. - virtual int get_version(char* version, int version_len_bytes) const; + bool is_enabled() const override; private: // HighPassFilter implementation. - virtual int Enable(bool enable); + int Enable(bool enable) override; // ProcessingComponent implementation. - virtual void* CreateHandle() const; - virtual int InitializeHandle(void* handle) const; - virtual int ConfigureHandle(void* handle) const; - virtual int DestroyHandle(void* handle) const; - virtual int num_handles_required() const; - virtual int GetHandleError(void* handle) const; + void* CreateHandle() const override; + int InitializeHandle(void* handle) const override; + int ConfigureHandle(void* handle) const override; + void DestroyHandle(void* handle) const override; + int num_handles_required() const override; + int GetHandleError(void* handle) const override; - const AudioProcessingImpl* apm_; + const AudioProcessing* apm_; + CriticalSectionWrapper* crit_; }; } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_HIGH_PASS_FILTER_IMPL_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_IMPL_H_ diff --git a/webrtc/modules/audio_processing/interface/audio_processing.h b/webrtc/modules/audio_processing/include/audio_processing.h similarity index 52% rename from webrtc/modules/audio_processing/interface/audio_processing.h rename to webrtc/modules/audio_processing/include/audio_processing.h index 3dc698b..318b2f8 100644 --- a/webrtc/modules/audio_processing/interface/audio_processing.h +++ b/webrtc/modules/audio_processing/include/audio_processing.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,17 +8,31 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ -#include // size_t +#include // size_t +#include // FILE +#include -#include "typedefs.h" -#include "module.h" +#include "webrtc/base/arraysize.h" +#include "webrtc/base/platform_file.h" +#include "webrtc/common.h" +#include "webrtc/modules/audio_processing/beamformer/array_util.h" +#include "webrtc/typedefs.h" + +struct AecCore; namespace webrtc { class AudioFrame; + +template +class Beamformer; + +class StreamConfig; +class ProcessingConfig; + class EchoCancellation; class EchoControlMobile; class GainControl; @@ -27,6 +41,94 @@ class LevelEstimator; class NoiseSuppression; class VoiceDetection; +// Use to enable the extended filter mode in the AEC, along with robustness +// measures around the reported system delays. It comes with a significant +// increase in AEC complexity, but is much more robust to unreliable reported +// delays. +// +// Detailed changes to the algorithm: +// - The filter length is changed from 48 to 128 ms. This comes with tuning of +// several parameters: i) filter adaptation stepsize and error threshold; +// ii) non-linear processing smoothing and overdrive. +// - Option to ignore the reported delays on platforms which we deem +// sufficiently unreliable. See WEBRTC_UNTRUSTED_DELAY in echo_cancellation.c. +// - Faster startup times by removing the excessive "startup phase" processing +// of reported delays. +// - Much more conservative adjustments to the far-end read pointer. We smooth +// the delay difference more heavily, and back off from the difference more. +// Adjustments force a readaptation of the filter, so they should be avoided +// except when really necessary. +struct ExtendedFilter { + ExtendedFilter() : enabled(false) {} + explicit ExtendedFilter(bool enabled) : enabled(enabled) {} + bool enabled; +}; + +// Enables delay-agnostic echo cancellation. This feature relies on internally +// estimated delays between the process and reverse streams, thus not relying +// on reported system delays. This configuration only applies to +// EchoCancellation and not EchoControlMobile. It can be set in the constructor +// or using AudioProcessing::SetExtraOptions(). +struct DelayAgnostic { + DelayAgnostic() : enabled(false) {} + explicit DelayAgnostic(bool enabled) : enabled(enabled) {} + bool enabled; +}; + +// Use to enable experimental gain control (AGC). At startup the experimental +// AGC moves the microphone volume up to |startup_min_volume| if the current +// microphone volume is set too low. The value is clamped to its operating range +// [12, 255]. Here, 255 maps to 100%. +// +// Must be provided through AudioProcessing::Create(Confg&). +#if defined(WEBRTC_CHROMIUM_BUILD) +static const int kAgcStartupMinVolume = 85; +#else +static const int kAgcStartupMinVolume = 0; +#endif // defined(WEBRTC_CHROMIUM_BUILD) +struct ExperimentalAgc { + ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {} + explicit ExperimentalAgc(bool enabled) + : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {} + ExperimentalAgc(bool enabled, int startup_min_volume) + : enabled(enabled), startup_min_volume(startup_min_volume) {} + bool enabled; + int startup_min_volume; +}; + +// Use to enable experimental noise suppression. It can be set in the +// constructor or using AudioProcessing::SetExtraOptions(). +struct ExperimentalNs { + ExperimentalNs() : enabled(false) {} + explicit ExperimentalNs(bool enabled) : enabled(enabled) {} + bool enabled; +}; + +// Use to enable beamforming. Must be provided through the constructor. It will +// have no impact if used with AudioProcessing::SetExtraOptions(). +struct Beamforming { + Beamforming() + : enabled(false), + array_geometry() {} + Beamforming(bool enabled, const std::vector& array_geometry) + : enabled(enabled), + array_geometry(array_geometry) {} + const bool enabled; + const std::vector array_geometry; +}; + +// Use to enable intelligibility enhancer in audio processing. Must be provided +// though the constructor. It will have no impact if used with +// AudioProcessing::SetExtraOptions(). +// +// Note: If enabled and the reverse stream has more than one output channel, +// the reverse stream will become an upmixed mono signal. +struct Intelligibility { + Intelligibility() : enabled(false) {} + explicit Intelligibility(bool enabled) : enabled(enabled) {} + bool enabled; +}; + // The Audio Processing Module (APM) provides a collection of voice processing // components designed for real-time communications software. // @@ -56,16 +158,12 @@ class VoiceDetection; // 2. Parameter getters are never called concurrently with the corresponding // setter. // -// APM accepts only 16-bit linear PCM audio data in frames of 10 ms. Multiple -// channels should be interleaved. +// APM accepts only linear PCM audio data in chunks of 10 ms. The int16 +// interfaces use interleaved data, while the float interfaces use deinterleaved +// data. // // Usage example, omitting error checking: // AudioProcessing* apm = AudioProcessing::Create(0); -// apm->set_sample_rate_hz(32000); // Super-wideband processing. -// -// // Mono capture and stereo render. -// apm->set_num_channels(1, 1); -// apm->set_num_reverse_channels(2); // // apm->high_pass_filter()->Enable(true); // @@ -102,44 +200,84 @@ class VoiceDetection; // apm->Initialize(); // // // Close the application... -// AudioProcessing::Destroy(apm); -// apm = NULL; +// delete apm; // -class AudioProcessing : public Module { +class AudioProcessing { public: - // Creates a APM instance, with identifier |id|. Use one instance for every - // primary audio stream requiring processing. On the client-side, this would - // typically be one instance for the near-end stream, and additional instances - // for each far-end stream which requires processing. On the server-side, - // this would typically be one instance for every incoming stream. - static AudioProcessing* Create(int id); + // TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone. + enum ChannelLayout { + kMono, + // Left, right. + kStereo, + // Mono, keyboard mic. + kMonoAndKeyboard, + // Left, right, keyboard mic. + kStereoAndKeyboard + }; - // Destroys a |apm| instance. - static void Destroy(AudioProcessing* apm); + // Creates an APM instance. Use one instance for every primary audio stream + // requiring processing. On the client-side, this would typically be one + // instance for the near-end stream, and additional instances for each far-end + // stream which requires processing. On the server-side, this would typically + // be one instance for every incoming stream. + static AudioProcessing* Create(); + // Allows passing in an optional configuration at create-time. + static AudioProcessing* Create(const Config& config); + // Only for testing. + static AudioProcessing* Create(const Config& config, + Beamformer* beamformer); + virtual ~AudioProcessing() {} // Initializes internal states, while retaining all user settings. This // should be called before beginning to process a new audio stream. However, // it is not necessary to call before processing the first stream after // creation. + // + // It is also not necessary to call if the audio parameters (sample + // rate and number of channels) have changed. Passing updated parameters + // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible. + // If the parameters are known at init-time though, they may be provided. virtual int Initialize() = 0; - // Sets the sample |rate| in Hz for both the primary and reverse audio - // streams. 8000, 16000 or 32000 Hz are permitted. - virtual int set_sample_rate_hz(int rate) = 0; - virtual int sample_rate_hz() const = 0; + // The int16 interfaces require: + // - only |NativeRate|s be used + // - that the input, output and reverse rates must match + // - that |processing_config.output_stream()| matches + // |processing_config.input_stream()|. + // + // The float interfaces accept arbitrary rates and support differing input and + // output layouts, but the output must have either one channel or the same + // number of channels as the input. + virtual int Initialize(const ProcessingConfig& processing_config) = 0; - // Sets the number of channels for the primary audio stream. Input frames must - // contain a number of channels given by |input_channels|, while output frames - // will be returned with number of channels given by |output_channels|. - virtual int set_num_channels(int input_channels, int output_channels) = 0; + // Initialize with unpacked parameters. See Initialize() above for details. + // + // TODO(mgraczyk): Remove once clients are updated to use the new interface. + virtual int Initialize(int input_sample_rate_hz, + int output_sample_rate_hz, + int reverse_sample_rate_hz, + ChannelLayout input_layout, + ChannelLayout output_layout, + ChannelLayout reverse_layout) = 0; + + // Pass down additional options which don't have explicit setters. This + // ensures the options are applied immediately. + virtual void SetExtraOptions(const Config& config) = 0; + + // TODO(ajm): Only intended for internal use. Make private and friend the + // necessary classes? + virtual int proc_sample_rate_hz() const = 0; + virtual int proc_split_sample_rate_hz() const = 0; virtual int num_input_channels() const = 0; virtual int num_output_channels() const = 0; - - // Sets the number of channels for the reverse audio stream. Input frames must - // contain a number of channels given by |channels|. - virtual int set_num_reverse_channels(int channels) = 0; virtual int num_reverse_channels() const = 0; + // Set to true when the output of AudioProcessing will be muted or in some + // other way not used. Ideally, the captured audio would still be processed, + // but some components may change behavior based on this information. + // Default false. + virtual void set_output_will_be_muted(bool muted) = 0; + // Processes a 10 ms |frame| of the primary audio stream. On the client-side, // this is the near-end (or captured) audio. // @@ -147,11 +285,40 @@ class AudioProcessing : public Module { // must be called prior to processing the current frame. Any getter function // with the stream_ tag which is needed should be called after processing. // - // The |_frequencyInHz|, |_audioChannel|, and |_payloadDataLengthInSamples| - // members of |frame| must be valid, and correspond to settings supplied - // to APM. + // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| + // members of |frame| must be valid. If changed from the previous call to this + // method, it will trigger an initialization. virtual int ProcessStream(AudioFrame* frame) = 0; + // Accepts deinterleaved float audio with the range [-1, 1]. Each element + // of |src| points to a channel buffer, arranged according to + // |input_layout|. At output, the channels will be arranged according to + // |output_layout| at |output_sample_rate_hz| in |dest|. + // + // The output layout must have one channel or as many channels as the input. + // |src| and |dest| may use the same memory, if desired. + // + // TODO(mgraczyk): Remove once clients are updated to use the new interface. + virtual int ProcessStream(const float* const* src, + size_t samples_per_channel, + int input_sample_rate_hz, + ChannelLayout input_layout, + int output_sample_rate_hz, + ChannelLayout output_layout, + float* const* dest) = 0; + + // Accepts deinterleaved float audio with the range [-1, 1]. Each element of + // |src| points to a channel buffer, arranged according to |input_stream|. At + // output, the channels will be arranged according to |output_stream| in + // |dest|. + // + // The output must have one channel or as many channels as the input. |src| + // and |dest| may use the same memory, if desired. + virtual int ProcessStream(const float* const* src, + const StreamConfig& input_config, + const StreamConfig& output_config, + float* const* dest) = 0; + // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame // will not be modified. On the client-side, this is the far-end (or to be // rendered) audio. @@ -162,12 +329,34 @@ class AudioProcessing : public Module { // typically will not be used. If you're not sure what to pass in here, // chances are you don't need to use it. // - // The |_frequencyInHz|, |_audioChannel|, and |_payloadDataLengthInSamples| - // members of |frame| must be valid. + // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| + // members of |frame| must be valid. |sample_rate_hz_| must correspond to + // |input_sample_rate_hz()| // // TODO(ajm): add const to input; requires an implementation fix. + // DEPRECATED: Use |ProcessReverseStream| instead. + // TODO(ekm): Remove once all users have updated to |ProcessReverseStream|. virtual int AnalyzeReverseStream(AudioFrame* frame) = 0; + // Same as |AnalyzeReverseStream|, but may modify |frame| if intelligibility + // is enabled. + virtual int ProcessReverseStream(AudioFrame* frame) = 0; + + // Accepts deinterleaved float audio with the range [-1, 1]. Each element + // of |data| points to a channel buffer, arranged according to |layout|. + // TODO(mgraczyk): Remove once clients are updated to use the new interface. + virtual int AnalyzeReverseStream(const float* const* data, + size_t samples_per_channel, + int rev_sample_rate_hz, + ChannelLayout layout) = 0; + + // Accepts deinterleaved float audio with the range [-1, 1]. Each element of + // |data| points to a channel buffer, arranged according to |reverse_config|. + virtual int ProcessReverseStream(const float* const* src, + const StreamConfig& reverse_input_config, + const StreamConfig& reverse_output_config, + float* const* dest) = 0; + // This must be called if and only if echo processing is enabled. // // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end @@ -183,18 +372,46 @@ class AudioProcessing : public Module { // ProcessStream(). virtual int set_stream_delay_ms(int delay) = 0; virtual int stream_delay_ms() const = 0; + virtual bool was_stream_delay_set() const = 0; + + // Call to signal that a key press occurred (true) or did not occur (false) + // with this chunk of audio. + virtual void set_stream_key_pressed(bool key_pressed) = 0; + + // Sets a delay |offset| in ms to add to the values passed in through + // set_stream_delay_ms(). May be positive or negative. + // + // Note that this could cause an otherwise valid value passed to + // set_stream_delay_ms() to return an error. + virtual void set_delay_offset_ms(int offset) = 0; + virtual int delay_offset_ms() const = 0; // Starts recording debugging information to a file specified by |filename|, // a NULL-terminated string. If there is an ongoing recording, the old file // will be closed, and recording will continue in the newly specified file. // An already existing file will be overwritten without warning. - static const int kMaxFilenameSize = 1024; + static const size_t kMaxFilenameSize = 1024; virtual int StartDebugRecording(const char filename[kMaxFilenameSize]) = 0; + // Same as above but uses an existing file handle. Takes ownership + // of |handle| and closes it at StopDebugRecording(). + virtual int StartDebugRecording(FILE* handle) = 0; + + // Same as above but uses an existing PlatformFile handle. Takes ownership + // of |handle| and closes it at StopDebugRecording(). + // TODO(xians): Make this interface pure virtual. + virtual int StartDebugRecordingForPlatformFile(rtc::PlatformFile handle) { + return -1; + } + // Stops recording debugging information, and closes the file. Recording // cannot be resumed in the same file (without overwriting it). virtual int StopDebugRecording() = 0; + // Use to send UMA histograms at end of a call. Note that all histogram + // specific member variables are reset. + virtual void UpdateHistogramsOnCallEnd() = 0; + // These provide access to the component interfaces and should never return // NULL. The pointers will be valid for the lifetime of the APM instance. // The memory for these objects is entirely managed internally. @@ -213,8 +430,8 @@ class AudioProcessing : public Module { int minimum; // Long-term minimum. }; - // Fatal errors. - enum Errors { + enum Error { + // Fatal errors. kNoError = 0, kUnspecifiedError = -1, kCreationFailedError = -2, @@ -227,22 +444,134 @@ class AudioProcessing : public Module { kBadNumberChannelsError = -9, kFileError = -10, kStreamParameterNotSetError = -11, - kNotEnabledError = -12 - }; + kNotEnabledError = -12, - // Warnings are non-fatal. - enum Warnings { + // Warnings are non-fatal. // This results when a set_stream_ parameter is out of range. Processing // will continue, but the parameter may have been truncated. - kBadStreamParameterWarning = -13, + kBadStreamParameterWarning = -13 }; - // Inherited from Module. - virtual WebRtc_Word32 TimeUntilNextProcess() { return -1; }; - virtual WebRtc_Word32 Process() { return -1; }; + enum NativeRate { + kSampleRate8kHz = 8000, + kSampleRate16kHz = 16000, + kSampleRate32kHz = 32000, + kSampleRate48kHz = 48000 + }; - protected: - virtual ~AudioProcessing() {}; + static const int kNativeSampleRatesHz[]; + static const size_t kNumNativeSampleRates; + static const int kMaxNativeSampleRateHz; + static const int kMaxAECMSampleRateHz; + + static const int kChunkSizeMs = 10; +}; + +class StreamConfig { + public: + // sample_rate_hz: The sampling rate of the stream. + // + // num_channels: The number of audio channels in the stream, excluding the + // keyboard channel if it is present. When passing a + // StreamConfig with an array of arrays T*[N], + // + // N == {num_channels + 1 if has_keyboard + // {num_channels if !has_keyboard + // + // has_keyboard: True if the stream has a keyboard channel. When has_keyboard + // is true, the last channel in any corresponding list of + // channels is the keyboard channel. + StreamConfig(int sample_rate_hz = 0, + int num_channels = 0, + bool has_keyboard = false) + : sample_rate_hz_(sample_rate_hz), + num_channels_(num_channels), + has_keyboard_(has_keyboard), + num_frames_(calculate_frames(sample_rate_hz)) {} + + void set_sample_rate_hz(int value) { + sample_rate_hz_ = value; + num_frames_ = calculate_frames(value); + } + void set_num_channels(int value) { num_channels_ = value; } + void set_has_keyboard(bool value) { has_keyboard_ = value; } + + int sample_rate_hz() const { return sample_rate_hz_; } + + // The number of channels in the stream, not including the keyboard channel if + // present. + int num_channels() const { return num_channels_; } + + bool has_keyboard() const { return has_keyboard_; } + size_t num_frames() const { return num_frames_; } + size_t num_samples() const { return num_channels_ * num_frames_; } + + bool operator==(const StreamConfig& other) const { + return sample_rate_hz_ == other.sample_rate_hz_ && + num_channels_ == other.num_channels_ && + has_keyboard_ == other.has_keyboard_; + } + + bool operator!=(const StreamConfig& other) const { return !(*this == other); } + + private: + static size_t calculate_frames(int sample_rate_hz) { + return static_cast( + AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000); + } + + int sample_rate_hz_; + int num_channels_; + bool has_keyboard_; + size_t num_frames_; +}; + +class ProcessingConfig { + public: + enum StreamName { + kInputStream, + kOutputStream, + kReverseInputStream, + kReverseOutputStream, + kNumStreamNames, + }; + + const StreamConfig& input_stream() const { + return streams[StreamName::kInputStream]; + } + const StreamConfig& output_stream() const { + return streams[StreamName::kOutputStream]; + } + const StreamConfig& reverse_input_stream() const { + return streams[StreamName::kReverseInputStream]; + } + const StreamConfig& reverse_output_stream() const { + return streams[StreamName::kReverseOutputStream]; + } + + StreamConfig& input_stream() { return streams[StreamName::kInputStream]; } + StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; } + StreamConfig& reverse_input_stream() { + return streams[StreamName::kReverseInputStream]; + } + StreamConfig& reverse_output_stream() { + return streams[StreamName::kReverseOutputStream]; + } + + bool operator==(const ProcessingConfig& other) const { + for (int i = 0; i < StreamName::kNumStreamNames; ++i) { + if (this->streams[i] != other.streams[i]) { + return false; + } + } + return true; + } + + bool operator!=(const ProcessingConfig& other) const { + return !(*this == other); + } + + StreamConfig streams[StreamName::kNumStreamNames]; }; // The acoustic echo cancellation (AEC) component provides better performance @@ -263,20 +592,14 @@ class EchoCancellation { // render and capture devices are used, particularly with webcams. // // This enables a compensation mechanism, and requires that - // |set_device_sample_rate_hz()| and |set_stream_drift_samples()| be called. + // set_stream_drift_samples() be called. virtual int enable_drift_compensation(bool enable) = 0; virtual bool is_drift_compensation_enabled() const = 0; - // Provides the sampling rate of the audio devices. It is assumed the render - // and capture devices use the same nominal sample rate. Required if and only - // if drift compensation is enabled. - virtual int set_device_sample_rate_hz(int rate) = 0; - virtual int device_sample_rate_hz() const = 0; - // Sets the difference between the number of samples rendered and captured by // the audio devices since the last call to |ProcessStream()|. Must be called - // if and only if drift compensation is enabled, prior to |ProcessStream()|. - virtual int set_stream_drift_samples(int drift) = 0; + // if drift compensation is enabled, prior to |ProcessStream()|. + virtual void set_stream_drift_samples(int drift) = 0; virtual int stream_drift_samples() const = 0; enum SuppressionLevel { @@ -328,12 +651,26 @@ class EchoCancellation { virtual bool is_delay_logging_enabled() const = 0; // The delay metrics consists of the delay |median| and the delay standard - // deviation |std|. The values are averaged over the time period since the - // last call to |GetDelayMetrics()|. + // deviation |std|. It also consists of the fraction of delay estimates + // |fraction_poor_delays| that can make the echo cancellation perform poorly. + // The values are aggregated until the first call to |GetDelayMetrics()| and + // afterwards aggregated and updated every second. + // Note that if there are several clients pulling metrics from + // |GetDelayMetrics()| during a session the first call from any of them will + // change to one second aggregation window for all. + // TODO(bjornv): Deprecated, remove. virtual int GetDelayMetrics(int* median, int* std) = 0; + virtual int GetDelayMetrics(int* median, int* std, + float* fraction_poor_delays) = 0; + + // Returns a pointer to the low level AEC component. In case of multiple + // channels, the pointer to the first one is returned. A NULL pointer is + // returned when the AEC component is disabled or has not been initialized + // successfully. + virtual struct AecCore* aec_core() const = 0; protected: - virtual ~EchoCancellation() {}; + virtual ~EchoCancellation() {} }; // The acoustic echo control for mobile (AECM) component is a low complexity @@ -389,7 +726,7 @@ class EchoControlMobile { static size_t echo_path_size_bytes(); protected: - virtual ~EchoControlMobile() {}; + virtual ~EchoControlMobile() {} }; // The automatic gain control (AGC) component brings the signal to an @@ -481,7 +818,7 @@ class GainControl { virtual bool stream_is_saturated() const = 0; protected: - virtual ~GainControl() {}; + virtual ~GainControl() {} }; // A filtering component which removes DC offset and low-frequency noise. @@ -492,34 +829,29 @@ class HighPassFilter { virtual bool is_enabled() const = 0; protected: - virtual ~HighPassFilter() {}; + virtual ~HighPassFilter() {} }; // An estimation component used to retrieve level metrics. -// NOTE: currently unavailable. All methods return errors. class LevelEstimator { public: virtual int Enable(bool enable) = 0; virtual bool is_enabled() const = 0; - // The metrics are reported in dBFs calculated as: - // Level = 10log_10(P_s / P_max) [dBFs], where - // P_s is the signal power and P_max is the maximum possible (or peak) - // power. With 16-bit signals, P_max = (2^15)^2. - struct Metrics { - AudioProcessing::Statistic signal; // Overall signal level. - AudioProcessing::Statistic speech; // Speech level. - AudioProcessing::Statistic noise; // Noise level. - }; - - virtual int GetMetrics(Metrics* metrics, Metrics* reverse_metrics) = 0; - - //virtual int enable_noise_warning(bool enable) = 0; - //bool is_noise_warning_enabled() const = 0; - //virtual bool stream_has_high_noise() const = 0; + // Returns the root mean square (RMS) level in dBFs (decibels from digital + // full-scale), or alternately dBov. It is computed over all primary stream + // frames since the last call to RMS(). The returned value is positive but + // should be interpreted as negative. It is constrained to [0, 127]. + // + // The computation follows: https://tools.ietf.org/html/rfc6465 + // with the intent that it can provide the RTP audio level indication. + // + // Frames passed to ProcessStream() with an |_energy| of zero are considered + // to have been muted. The RMS of the frame will be interpreted as -127. + virtual int RMS() = 0; protected: - virtual ~LevelEstimator() {}; + virtual ~LevelEstimator() {} }; // The noise suppression (NS) component attempts to remove noise while @@ -543,8 +875,13 @@ class NoiseSuppression { virtual int set_level(Level level) = 0; virtual Level level() const = 0; + // Returns the internally computed prior speech probability of current frame + // averaged over output channels. This is not supported in fixed point, for + // which |kUnsupportedFunctionError| is returned. + virtual float speech_probability() const = 0; + protected: - virtual ~NoiseSuppression() {}; + virtual ~NoiseSuppression() {} }; // The voice activity detection (VAD) component analyzes the stream to @@ -552,7 +889,7 @@ class NoiseSuppression { // external VAD decision. // // In addition to |stream_has_voice()| the VAD decision is provided through the -// |AudioFrame| passed to |ProcessStream()|. The |_vadActivity| member will be +// |AudioFrame| passed to |ProcessStream()|. The |vad_activity_| member will be // modified to reflect the current decision. class VoiceDetection { public: @@ -594,8 +931,8 @@ class VoiceDetection { virtual int frame_size_ms() const = 0; protected: - virtual ~VoiceDetection() {}; + virtual ~VoiceDetection() {} }; } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_ diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc new file mode 100644 index 0000000..d014ce0 --- /dev/null +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// +// Implements core class for intelligibility enhancer. +// +// Details of the model and algorithm can be found in the original paper: +// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788 +// + +#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h" + +#include +#include +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/common_audio/include/audio_util.h" +#include "webrtc/common_audio/window_generator.h" + +namespace webrtc { + +namespace { + +const size_t kErbResolution = 2; +const int kWindowSizeMs = 2; +const int kChunkSizeMs = 10; // Size provided by APM. +const float kClipFreq = 200.0f; +const float kConfigRho = 0.02f; // Default production and interpretation SNR. +const float kKbdAlpha = 1.5f; +const float kLambdaBot = -1.0f; // Extreme values in bisection +const float kLambdaTop = -10e-18f; // search for lamda. + +} // namespace + +using std::complex; +using std::max; +using std::min; +using VarianceType = intelligibility::VarianceArray::StepType; + +IntelligibilityEnhancer::TransformCallback::TransformCallback( + IntelligibilityEnhancer* parent, + IntelligibilityEnhancer::AudioSource source) + : parent_(parent), source_(source) { +} + +void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock( + const complex* const* in_block, + int in_channels, + size_t frames, + int /* out_channels */, + complex* const* out_block) { + RTC_DCHECK_EQ(parent_->freqs_, frames); + for (int i = 0; i < in_channels; ++i) { + parent_->DispatchAudio(source_, in_block[i], out_block[i]); + } +} + +IntelligibilityEnhancer::IntelligibilityEnhancer() + : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) { +} + +IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config) + : freqs_(RealFourier::ComplexLength( + RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))), + window_size_(static_cast(1 << RealFourier::FftOrder(freqs_))), + chunk_length_( + static_cast(config.sample_rate_hz * kChunkSizeMs / 1000)), + bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)), + sample_rate_hz_(config.sample_rate_hz), + erb_resolution_(kErbResolution), + num_capture_channels_(config.num_capture_channels), + num_render_channels_(config.num_render_channels), + analysis_rate_(config.analysis_rate), + active_(true), + clear_variance_(freqs_, + config.var_type, + config.var_window_size, + config.var_decay_rate), + noise_variance_(freqs_, + config.var_type, + config.var_window_size, + config.var_decay_rate), + filtered_clear_var_(new float[bank_size_]), + filtered_noise_var_(new float[bank_size_]), + filter_bank_(bank_size_), + center_freqs_(new float[bank_size_]), + rho_(new float[bank_size_]), + gains_eq_(new float[bank_size_]), + gain_applier_(freqs_, config.gain_change_limit), + temp_render_out_buffer_(chunk_length_, num_render_channels_), + temp_capture_out_buffer_(chunk_length_, num_capture_channels_), + kbd_window_(new float[window_size_]), + render_callback_(this, AudioSource::kRenderStream), + capture_callback_(this, AudioSource::kCaptureStream), + block_count_(0), + analysis_step_(0) { + RTC_DCHECK_LE(config.rho, 1.0f); + + CreateErbBank(); + + // Assumes all rho equal. + for (size_t i = 0; i < bank_size_; ++i) { + rho_[i] = config.rho * config.rho; + } + + float freqs_khz = kClipFreq / 1000.0f; + size_t erb_index = static_cast(ceilf( + 11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f)); + start_freq_ = std::max(static_cast(1), erb_index * erb_resolution_); + + WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_, + kbd_window_.get()); + render_mangler_.reset(new LappedTransform( + num_render_channels_, num_render_channels_, chunk_length_, + kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_)); + capture_mangler_.reset(new LappedTransform( + num_capture_channels_, num_capture_channels_, chunk_length_, + kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_)); +} + +void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio, + int sample_rate_hz, + int num_channels) { + RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); + RTC_CHECK_EQ(num_render_channels_, num_channels); + + if (active_) { + render_mangler_->ProcessChunk(audio, temp_render_out_buffer_.channels()); + } + + if (active_) { + for (int i = 0; i < num_render_channels_; ++i) { + memcpy(audio[i], temp_render_out_buffer_.channels()[i], + chunk_length_ * sizeof(**audio)); + } + } +} + +void IntelligibilityEnhancer::AnalyzeCaptureAudio(float* const* audio, + int sample_rate_hz, + int num_channels) { + RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz); + RTC_CHECK_EQ(num_capture_channels_, num_channels); + + capture_mangler_->ProcessChunk(audio, temp_capture_out_buffer_.channels()); +} + +void IntelligibilityEnhancer::DispatchAudio( + IntelligibilityEnhancer::AudioSource source, + const complex* in_block, + complex* out_block) { + switch (source) { + case kRenderStream: + ProcessClearBlock(in_block, out_block); + break; + case kCaptureStream: + ProcessNoiseBlock(in_block, out_block); + break; + } +} + +void IntelligibilityEnhancer::ProcessClearBlock(const complex* in_block, + complex* out_block) { + if (block_count_ < 2) { + memset(out_block, 0, freqs_ * sizeof(*out_block)); + ++block_count_; + return; + } + + // TODO(ekm): Use VAD to |Step| and |AnalyzeClearBlock| only if necessary. + if (true) { + clear_variance_.Step(in_block, false); + if (block_count_ % analysis_rate_ == analysis_rate_ - 1) { + const float power_target = std::accumulate( + clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.f); + AnalyzeClearBlock(power_target); + ++analysis_step_; + } + ++block_count_; + } + + if (active_) { + gain_applier_.Apply(in_block, out_block); + } +} + +void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) { + FilterVariance(clear_variance_.variance(), filtered_clear_var_.get()); + FilterVariance(noise_variance_.variance(), filtered_noise_var_.get()); + + SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get()); + const float power_top = + DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_); + SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get()); + const float power_bot = + DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_); + if (power_target >= power_bot && power_target <= power_top) { + SolveForLambda(power_target, power_bot, power_top); + UpdateErbGains(); + } // Else experiencing variance underflow, so do nothing. +} + +void IntelligibilityEnhancer::SolveForLambda(float power_target, + float power_bot, + float power_top) { + const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values + const int kMaxIters = 100; // for these, based on experiments. + + const float reciprocal_power_target = 1.f / power_target; + float lambda_bot = kLambdaBot; + float lambda_top = kLambdaTop; + float power_ratio = 2.0f; // Ratio of achieved power to target power. + int iters = 0; + while (std::fabs(power_ratio - 1.0f) > kConvergeThresh && + iters <= kMaxIters) { + const float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.0f; + SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get()); + const float power = + DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_); + if (power < power_target) { + lambda_bot = lambda; + } else { + lambda_top = lambda; + } + power_ratio = std::fabs(power * reciprocal_power_target); + ++iters; + } +} + +void IntelligibilityEnhancer::UpdateErbGains() { + // (ERB gain) = filterbank' * (freq gain) + float* gains = gain_applier_.target(); + for (size_t i = 0; i < freqs_; ++i) { + gains[i] = 0.0f; + for (size_t j = 0; j < bank_size_; ++j) { + gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]); + } + } +} + +void IntelligibilityEnhancer::ProcessNoiseBlock(const complex* in_block, + complex* /*out_block*/) { + noise_variance_.Step(in_block); +} + +size_t IntelligibilityEnhancer::GetBankSize(int sample_rate, + size_t erb_resolution) { + float freq_limit = sample_rate / 2000.0f; + size_t erb_scale = static_cast(ceilf( + 11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f)); + return erb_scale * erb_resolution; +} + +void IntelligibilityEnhancer::CreateErbBank() { + size_t lf = 1, rf = 4; + + for (size_t i = 0; i < bank_size_; ++i) { + float abs_temp = fabsf((i + 1.0f) / static_cast(erb_resolution_)); + center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp)); + center_freqs_[i] -= 14678.49f; + } + float last_center_freq = center_freqs_[bank_size_ - 1]; + for (size_t i = 0; i < bank_size_; ++i) { + center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq; + } + + for (size_t i = 0; i < bank_size_; ++i) { + filter_bank_[i].resize(freqs_); + } + + for (size_t i = 1; i <= bank_size_; ++i) { + size_t lll, ll, rr, rrr; + static const size_t kOne = 1; // Avoids repeated static_cast<>s below. + lll = static_cast(round( + center_freqs_[max(kOne, i - lf) - 1] * freqs_ / + (0.5f * sample_rate_hz_))); + ll = static_cast(round( + center_freqs_[max(kOne, i) - 1] * freqs_ / (0.5f * sample_rate_hz_))); + lll = min(freqs_, max(lll, kOne)) - 1; + ll = min(freqs_, max(ll, kOne)) - 1; + + rrr = static_cast(round( + center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ / + (0.5f * sample_rate_hz_))); + rr = static_cast(round( + center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ / + (0.5f * sample_rate_hz_))); + rrr = min(freqs_, max(rrr, kOne)) - 1; + rr = min(freqs_, max(rr, kOne)) - 1; + + float step, element; + + step = 1.0f / (ll - lll); + element = 0.0f; + for (size_t j = lll; j <= ll; ++j) { + filter_bank_[i - 1][j] = element; + element += step; + } + step = 1.0f / (rrr - rr); + element = 1.0f; + for (size_t j = rr; j <= rrr; ++j) { + filter_bank_[i - 1][j] = element; + element -= step; + } + for (size_t j = ll; j <= rr; ++j) { + filter_bank_[i - 1][j] = 1.0f; + } + } + + float sum; + for (size_t i = 0; i < freqs_; ++i) { + sum = 0.0f; + for (size_t j = 0; j < bank_size_; ++j) { + sum += filter_bank_[j][i]; + } + for (size_t j = 0; j < bank_size_; ++j) { + filter_bank_[j][i] /= sum; + } + } +} + +void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda, + size_t start_freq, + float* sols) { + bool quadratic = (kConfigRho < 1.0f); + const float* var_x0 = filtered_clear_var_.get(); + const float* var_n0 = filtered_noise_var_.get(); + + for (size_t n = 0; n < start_freq; ++n) { + sols[n] = 1.0f; + } + + // Analytic solution for optimal gains. See paper for derivation. + for (size_t n = start_freq - 1; n < bank_size_; ++n) { + float alpha0, beta0, gamma0; + gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] + + lambda * var_x0[n] * var_n0[n] * var_n0[n]; + beta0 = lambda * var_x0[n] * (2 - rho_[n]) * var_x0[n] * var_n0[n]; + if (quadratic) { + alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n]; + sols[n] = + (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0); + } else { + sols[n] = -gamma0 / beta0; + } + sols[n] = fmax(0, sols[n]); + } +} + +void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) { + RTC_DCHECK_GT(freqs_, 0u); + for (size_t i = 0; i < bank_size_; ++i) { + result[i] = DotProduct(&filter_bank_[i][0], var, freqs_); + } +} + +float IntelligibilityEnhancer::DotProduct(const float* a, + const float* b, + size_t length) { + float ret = 0.0f; + + for (size_t i = 0; i < length; ++i) { + ret = fmaf(a[i], b[i], ret); + } + return ret; +} + +bool IntelligibilityEnhancer::active() const { + return active_; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h new file mode 100644 index 0000000..1e9e35a --- /dev/null +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h @@ -0,0 +1,182 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// +// Specifies core class for intelligbility enhancement. +// + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_ + +#include +#include + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/lapped_transform.h" +#include "webrtc/common_audio/channel_buffer.h" +#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h" + +namespace webrtc { + +// Speech intelligibility enhancement module. Reads render and capture +// audio streams and modifies the render stream with a set of gains per +// frequency bin to enhance speech against the noise background. +// Note: assumes speech and noise streams are already separated. +class IntelligibilityEnhancer { + public: + struct Config { + // |var_*| are parameters for the VarianceArray constructor for the + // clear speech stream. + // TODO(bercic): the |var_*|, |*_rate| and |gain_limit| parameters should + // probably go away once fine tuning is done. + Config() + : sample_rate_hz(16000), + num_capture_channels(1), + num_render_channels(1), + var_type(intelligibility::VarianceArray::kStepDecaying), + var_decay_rate(0.9f), + var_window_size(10), + analysis_rate(800), + gain_change_limit(0.1f), + rho(0.02f) {} + int sample_rate_hz; + int num_capture_channels; + int num_render_channels; + intelligibility::VarianceArray::StepType var_type; + float var_decay_rate; + size_t var_window_size; + int analysis_rate; + float gain_change_limit; + float rho; + }; + + explicit IntelligibilityEnhancer(const Config& config); + IntelligibilityEnhancer(); // Initialize with default config. + + // Reads and processes chunk of noise stream in time domain. + void AnalyzeCaptureAudio(float* const* audio, + int sample_rate_hz, + int num_channels); + + // Reads chunk of speech in time domain and updates with modified signal. + void ProcessRenderAudio(float* const* audio, + int sample_rate_hz, + int num_channels); + bool active() const; + + private: + enum AudioSource { + kRenderStream = 0, // Clear speech stream. + kCaptureStream, // Noise stream. + }; + + // Provides access point to the frequency domain. + class TransformCallback : public LappedTransform::Callback { + public: + TransformCallback(IntelligibilityEnhancer* parent, AudioSource source); + + // All in frequency domain, receives input |in_block|, applies + // intelligibility enhancement, and writes result to |out_block|. + void ProcessAudioBlock(const std::complex* const* in_block, + int in_channels, + size_t frames, + int out_channels, + std::complex* const* out_block) override; + + private: + IntelligibilityEnhancer* parent_; + AudioSource source_; + }; + friend class TransformCallback; + FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation); + FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains); + + // Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source. + void DispatchAudio(AudioSource source, + const std::complex* in_block, + std::complex* out_block); + + // Updates variance computation and analysis with |in_block_|, + // and writes modified speech to |out_block|. + void ProcessClearBlock(const std::complex* in_block, + std::complex* out_block); + + // Computes and sets modified gains. + void AnalyzeClearBlock(float power_target); + + // Bisection search for optimal |lambda|. + void SolveForLambda(float power_target, float power_bot, float power_top); + + // Transforms freq gains to ERB gains. + void UpdateErbGains(); + + // Updates variance calculation for noise input with |in_block|. + void ProcessNoiseBlock(const std::complex* in_block, + std::complex* out_block); + + // Returns number of ERB filters. + static size_t GetBankSize(int sample_rate, size_t erb_resolution); + + // Initializes ERB filterbank. + void CreateErbBank(); + + // Analytically solves quadratic for optimal gains given |lambda|. + // Negative gains are set to 0. Stores the results in |sols|. + void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols); + + // Computes variance across ERB filters from freq variance |var|. + // Stores in |result|. + void FilterVariance(const float* var, float* result); + + // Returns dot product of vectors specified by size |length| arrays |a|,|b|. + static float DotProduct(const float* a, const float* b, size_t length); + + const size_t freqs_; // Num frequencies in frequency domain. + const size_t window_size_; // Window size in samples; also the block size. + const size_t chunk_length_; // Chunk size in samples. + const size_t bank_size_; // Num ERB filters. + const int sample_rate_hz_; + const int erb_resolution_; + const int num_capture_channels_; + const int num_render_channels_; + const int analysis_rate_; // Num blocks before gains recalculated. + + const bool active_; // Whether render gains are being updated. + // TODO(ekm): Add logic for updating |active_|. + + intelligibility::VarianceArray clear_variance_; + intelligibility::VarianceArray noise_variance_; + rtc::scoped_ptr filtered_clear_var_; + rtc::scoped_ptr filtered_noise_var_; + std::vector> filter_bank_; + rtc::scoped_ptr center_freqs_; + size_t start_freq_; + rtc::scoped_ptr rho_; // Production and interpretation SNR. + // for each ERB band. + rtc::scoped_ptr gains_eq_; // Pre-filter modified gains. + intelligibility::GainApplier gain_applier_; + + // Destination buffers used to reassemble blocked chunks before overwriting + // the original input array with modifications. + ChannelBuffer temp_render_out_buffer_; + ChannelBuffer temp_capture_out_buffer_; + + rtc::scoped_ptr kbd_window_; + TransformCallback render_callback_; + TransformCallback capture_callback_; + rtc::scoped_ptr render_mangler_; + rtc::scoped_ptr capture_mangler_; + int block_count_; + int analysis_step_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_ diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc new file mode 100644 index 0000000..7da9b95 --- /dev/null +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc @@ -0,0 +1,314 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// +// Implements helper functions and classes for intelligibility enhancement. +// + +#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h" + +#include +#include +#include +#include + +using std::complex; +using std::min; + +namespace webrtc { + +namespace intelligibility { + +float UpdateFactor(float target, float current, float limit) { + float delta = fabsf(target - current); + float sign = copysign(1.0f, target - current); + return current + sign * fminf(delta, limit); +} + +float AddDitherIfZero(float value) { + return value == 0.f ? std::rand() * 0.01f / RAND_MAX : value; +} + +complex zerofudge(complex c) { + return complex(AddDitherIfZero(c.real()), AddDitherIfZero(c.imag())); +} + +complex NewMean(complex mean, complex data, size_t count) { + return mean + (data - mean) / static_cast(count); +} + +void AddToMean(complex data, size_t count, complex* mean) { + (*mean) = NewMean(*mean, data, count); +} + + +static const size_t kWindowBlockSize = 10; + +VarianceArray::VarianceArray(size_t num_freqs, + StepType type, + size_t window_size, + float decay) + : running_mean_(new complex[num_freqs]()), + running_mean_sq_(new complex[num_freqs]()), + sub_running_mean_(new complex[num_freqs]()), + sub_running_mean_sq_(new complex[num_freqs]()), + variance_(new float[num_freqs]()), + conj_sum_(new float[num_freqs]()), + num_freqs_(num_freqs), + window_size_(window_size), + decay_(decay), + history_cursor_(0), + count_(0), + array_mean_(0.0f), + buffer_full_(false) { + history_.reset(new rtc::scoped_ptr[]>[num_freqs_]()); + for (size_t i = 0; i < num_freqs_; ++i) { + history_[i].reset(new complex[window_size_]()); + } + subhistory_.reset(new rtc::scoped_ptr[]>[num_freqs_]()); + for (size_t i = 0; i < num_freqs_; ++i) { + subhistory_[i].reset(new complex[window_size_]()); + } + subhistory_sq_.reset(new rtc::scoped_ptr[]>[num_freqs_]()); + for (size_t i = 0; i < num_freqs_; ++i) { + subhistory_sq_[i].reset(new complex[window_size_]()); + } + switch (type) { + case kStepInfinite: + step_func_ = &VarianceArray::InfiniteStep; + break; + case kStepDecaying: + step_func_ = &VarianceArray::DecayStep; + break; + case kStepWindowed: + step_func_ = &VarianceArray::WindowedStep; + break; + case kStepBlocked: + step_func_ = &VarianceArray::BlockedStep; + break; + case kStepBlockBasedMovingAverage: + step_func_ = &VarianceArray::BlockBasedMovingAverage; + break; + } +} + +// Compute the variance with Welford's algorithm, adding some fudge to +// the input in case of all-zeroes. +void VarianceArray::InfiniteStep(const complex* data, bool skip_fudge) { + array_mean_ = 0.0f; + ++count_; + for (size_t i = 0; i < num_freqs_; ++i) { + complex sample = data[i]; + if (!skip_fudge) { + sample = zerofudge(sample); + } + if (count_ == 1) { + running_mean_[i] = sample; + variance_[i] = 0.0f; + } else { + float old_sum = conj_sum_[i]; + complex old_mean = running_mean_[i]; + running_mean_[i] = + old_mean + (sample - old_mean) / static_cast(count_); + conj_sum_[i] = + (old_sum + std::conj(sample - old_mean) * (sample - running_mean_[i])) + .real(); + variance_[i] = + conj_sum_[i] / (count_ - 1); + } + array_mean_ += (variance_[i] - array_mean_) / (i + 1); + } +} + +// Compute the variance from the beginning, with exponential decaying of the +// series data. +void VarianceArray::DecayStep(const complex* data, bool /*dummy*/) { + array_mean_ = 0.0f; + ++count_; + for (size_t i = 0; i < num_freqs_; ++i) { + complex sample = data[i]; + sample = zerofudge(sample); + + if (count_ == 1) { + running_mean_[i] = sample; + running_mean_sq_[i] = sample * std::conj(sample); + variance_[i] = 0.0f; + } else { + complex prev = running_mean_[i]; + complex prev2 = running_mean_sq_[i]; + running_mean_[i] = decay_ * prev + (1.0f - decay_) * sample; + running_mean_sq_[i] = + decay_ * prev2 + (1.0f - decay_) * sample * std::conj(sample); + variance_[i] = (running_mean_sq_[i] - + running_mean_[i] * std::conj(running_mean_[i])).real(); + } + + array_mean_ += (variance_[i] - array_mean_) / (i + 1); + } +} + +// Windowed variance computation. On each step, the variances for the +// window are recomputed from scratch, using Welford's algorithm. +void VarianceArray::WindowedStep(const complex* data, bool /*dummy*/) { + size_t num = min(count_ + 1, window_size_); + array_mean_ = 0.0f; + for (size_t i = 0; i < num_freqs_; ++i) { + complex mean; + float conj_sum = 0.0f; + + history_[i][history_cursor_] = data[i]; + + mean = history_[i][history_cursor_]; + variance_[i] = 0.0f; + for (size_t j = 1; j < num; ++j) { + complex sample = + zerofudge(history_[i][(history_cursor_ + j) % window_size_]); + sample = history_[i][(history_cursor_ + j) % window_size_]; + float old_sum = conj_sum; + complex old_mean = mean; + + mean = old_mean + (sample - old_mean) / static_cast(j + 1); + conj_sum = + (old_sum + std::conj(sample - old_mean) * (sample - mean)).real(); + variance_[i] = conj_sum / (j); + } + array_mean_ += (variance_[i] - array_mean_) / (i + 1); + } + history_cursor_ = (history_cursor_ + 1) % window_size_; + ++count_; +} + +// Variance with a window of blocks. Within each block, the variances are +// recomputed from scratch at every stp, using |Var(X) = E(X^2) - E^2(X)|. +// Once a block is filled with kWindowBlockSize samples, it is added to the +// history window and a new block is started. The variances for the window +// are recomputed from scratch at each of these transitions. +void VarianceArray::BlockedStep(const complex* data, bool /*dummy*/) { + size_t blocks = min(window_size_, history_cursor_ + 1); + for (size_t i = 0; i < num_freqs_; ++i) { + AddToMean(data[i], count_ + 1, &sub_running_mean_[i]); + AddToMean(data[i] * std::conj(data[i]), count_ + 1, + &sub_running_mean_sq_[i]); + subhistory_[i][history_cursor_ % window_size_] = sub_running_mean_[i]; + subhistory_sq_[i][history_cursor_ % window_size_] = sub_running_mean_sq_[i]; + + variance_[i] = + (NewMean(running_mean_sq_[i], sub_running_mean_sq_[i], blocks) - + NewMean(running_mean_[i], sub_running_mean_[i], blocks) * + std::conj(NewMean(running_mean_[i], sub_running_mean_[i], blocks))) + .real(); + if (count_ == kWindowBlockSize - 1) { + sub_running_mean_[i] = complex(0.0f, 0.0f); + sub_running_mean_sq_[i] = complex(0.0f, 0.0f); + running_mean_[i] = complex(0.0f, 0.0f); + running_mean_sq_[i] = complex(0.0f, 0.0f); + for (size_t j = 0; j < min(window_size_, history_cursor_); ++j) { + AddToMean(subhistory_[i][j], j + 1, &running_mean_[i]); + AddToMean(subhistory_sq_[i][j], j + 1, &running_mean_sq_[i]); + } + ++history_cursor_; + } + } + ++count_; + if (count_ == kWindowBlockSize) { + count_ = 0; + } +} + +// Recomputes variances for each window from scratch based on previous window. +void VarianceArray::BlockBasedMovingAverage(const std::complex* data, + bool /*dummy*/) { + // TODO(ekmeyerson) To mitigate potential divergence, add counter so that + // after every so often sums are computed scratch by summing over all + // elements instead of subtracting oldest and adding newest. + for (size_t i = 0; i < num_freqs_; ++i) { + sub_running_mean_[i] += data[i]; + sub_running_mean_sq_[i] += data[i] * std::conj(data[i]); + } + ++count_; + + // TODO(ekmeyerson) Make kWindowBlockSize nonconstant to allow + // experimentation with different block size,window size pairs. + if (count_ >= kWindowBlockSize) { + count_ = 0; + + for (size_t i = 0; i < num_freqs_; ++i) { + running_mean_[i] -= subhistory_[i][history_cursor_]; + running_mean_sq_[i] -= subhistory_sq_[i][history_cursor_]; + + float scale = 1.f / kWindowBlockSize; + subhistory_[i][history_cursor_] = sub_running_mean_[i] * scale; + subhistory_sq_[i][history_cursor_] = sub_running_mean_sq_[i] * scale; + + sub_running_mean_[i] = std::complex(0.0f, 0.0f); + sub_running_mean_sq_[i] = std::complex(0.0f, 0.0f); + + running_mean_[i] += subhistory_[i][history_cursor_]; + running_mean_sq_[i] += subhistory_sq_[i][history_cursor_]; + + scale = 1.f / (buffer_full_ ? window_size_ : history_cursor_ + 1); + variance_[i] = std::real(running_mean_sq_[i] * scale - + running_mean_[i] * scale * + std::conj(running_mean_[i]) * scale); + } + + ++history_cursor_; + if (history_cursor_ >= window_size_) { + buffer_full_ = true; + history_cursor_ = 0; + } + } +} + +void VarianceArray::Clear() { + memset(running_mean_.get(), 0, sizeof(*running_mean_.get()) * num_freqs_); + memset(running_mean_sq_.get(), 0, + sizeof(*running_mean_sq_.get()) * num_freqs_); + memset(variance_.get(), 0, sizeof(*variance_.get()) * num_freqs_); + memset(conj_sum_.get(), 0, sizeof(*conj_sum_.get()) * num_freqs_); + history_cursor_ = 0; + count_ = 0; + array_mean_ = 0.0f; +} + +void VarianceArray::ApplyScale(float scale) { + array_mean_ = 0.0f; + for (size_t i = 0; i < num_freqs_; ++i) { + variance_[i] *= scale * scale; + array_mean_ += (variance_[i] - array_mean_) / (i + 1); + } +} + +GainApplier::GainApplier(size_t freqs, float change_limit) + : num_freqs_(freqs), + change_limit_(change_limit), + target_(new float[freqs]()), + current_(new float[freqs]()) { + for (size_t i = 0; i < freqs; ++i) { + target_[i] = 1.0f; + current_[i] = 1.0f; + } +} + +void GainApplier::Apply(const complex* in_block, + complex* out_block) { + for (size_t i = 0; i < num_freqs_; ++i) { + float factor = sqrtf(fabsf(current_[i])); + if (!std::isnormal(factor)) { + factor = 1.0f; + } + out_block[i] = factor * in_block[i]; + current_[i] = UpdateFactor(target_[i], current_[i], change_limit_); + } +} + +} // namespace intelligibility + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h new file mode 100644 index 0000000..4ac1167 --- /dev/null +++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h @@ -0,0 +1,160 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// +// Specifies helper classes for intelligibility enhancement. +// + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_ + +#include + +#include "webrtc/base/scoped_ptr.h" + +namespace webrtc { + +namespace intelligibility { + +// Return |current| changed towards |target|, with the change being at most +// |limit|. +float UpdateFactor(float target, float current, float limit); + +// Apply a small fudge to degenerate complex values. The numbers in the array +// were chosen randomly, so that even a series of all zeroes has some small +// variability. +std::complex zerofudge(std::complex c); + +// Incremental mean computation. Return the mean of the series with the +// mean |mean| with added |data|. +std::complex NewMean(std::complex mean, + std::complex data, + size_t count); + +// Updates |mean| with added |data|; +void AddToMean(std::complex data, + size_t count, + std::complex* mean); + +// Internal helper for computing the variances of a stream of arrays. +// The result is an array of variances per position: the i-th variance +// is the variance of the stream of data on the i-th positions in the +// input arrays. +// There are four methods of computation: +// * kStepInfinite computes variances from the beginning onwards +// * kStepDecaying uses a recursive exponential decay formula with a +// settable forgetting factor +// * kStepWindowed computes variances within a moving window +// * kStepBlocked is similar to kStepWindowed, but history is kept +// as a rolling window of blocks: multiple input elements are used for +// one block and the history then consists of the variances of these blocks +// with the same effect as kStepWindowed, but less storage, so the window +// can be longer +class VarianceArray { + public: + enum StepType { + kStepInfinite = 0, + kStepDecaying, + kStepWindowed, + kStepBlocked, + kStepBlockBasedMovingAverage + }; + + // Construct an instance for the given input array length (|freqs|) and + // computation algorithm (|type|), with the appropriate parameters. + // |window_size| is the number of samples for kStepWindowed and + // the number of blocks for kStepBlocked. |decay| is the forgetting factor + // for kStepDecaying. + VarianceArray(size_t freqs, StepType type, size_t window_size, float decay); + + // Add a new data point to the series and compute the new variances. + // TODO(bercic) |skip_fudge| is a flag for kStepWindowed and kStepDecaying, + // whether they should skip adding some small dummy values to the input + // to prevent problems with all-zero inputs. Can probably be removed. + void Step(const std::complex* data, bool skip_fudge = false) { + (this->*step_func_)(data, skip_fudge); + } + // Reset variances to zero and forget all history. + void Clear(); + // Scale the input data by |scale|. Effectively multiply variances + // by |scale^2|. + void ApplyScale(float scale); + + // The current set of variances. + const float* variance() const { return variance_.get(); } + + // The mean value of the current set of variances. + float array_mean() const { return array_mean_; } + + private: + void InfiniteStep(const std::complex* data, bool dummy); + void DecayStep(const std::complex* data, bool dummy); + void WindowedStep(const std::complex* data, bool dummy); + void BlockedStep(const std::complex* data, bool dummy); + void BlockBasedMovingAverage(const std::complex* data, bool dummy); + + // TODO(ekmeyerson): Switch the following running means + // and histories from rtc::scoped_ptr to std::vector. + + // The current average X and X^2. + rtc::scoped_ptr[]> running_mean_; + rtc::scoped_ptr[]> running_mean_sq_; + + // Average X and X^2 for the current block in kStepBlocked. + rtc::scoped_ptr[]> sub_running_mean_; + rtc::scoped_ptr[]> sub_running_mean_sq_; + + // Sample history for the rolling window in kStepWindowed and block-wise + // histories for kStepBlocked. + rtc::scoped_ptr[]>[]> history_; + rtc::scoped_ptr[]>[]> subhistory_; + rtc::scoped_ptr[]>[]> subhistory_sq_; + + // The current set of variances and sums for Welford's algorithm. + rtc::scoped_ptr variance_; + rtc::scoped_ptr conj_sum_; + + const size_t num_freqs_; + const size_t window_size_; + const float decay_; + size_t history_cursor_; + size_t count_; + float array_mean_; + bool buffer_full_; + void (VarianceArray::*step_func_)(const std::complex*, bool); +}; + +// Helper class for smoothing gain changes. On each applicatiion step, the +// currently used gains are changed towards a set of settable target gains, +// constrained by a limit on the magnitude of the changes. +class GainApplier { + public: + GainApplier(size_t freqs, float change_limit); + + // Copy |in_block| to |out_block|, multiplied by the current set of gains, + // and step the current set of gains towards the target set. + void Apply(const std::complex* in_block, + std::complex* out_block); + + // Return the current target gain set. Modify this array to set the targets. + float* target() const { return target_.get(); } + + private: + const size_t num_freqs_; + const float change_limit_; + rtc::scoped_ptr target_; + rtc::scoped_ptr current_; +}; + +} // namespace intelligibility + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_ diff --git a/webrtc/modules/audio_processing/level_estimator_impl.cc b/webrtc/modules/audio_processing/level_estimator_impl.cc index 799a962..26a61dc 100644 --- a/webrtc/modules/audio_processing/level_estimator_impl.cc +++ b/webrtc/modules/audio_processing/level_estimator_impl.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,175 +8,79 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "level_estimator_impl.h" +#include "webrtc/modules/audio_processing/level_estimator_impl.h" -#include -#include - -#include "critical_section_wrapper.h" - -#include "audio_processing_impl.h" -#include "audio_buffer.h" - -// TODO(ajm): implement the underlying level estimator component. +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/rms_level.h" +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" namespace webrtc { -typedef void Handle; - -namespace { -/*int EstimateLevel(AudioBuffer* audio, Handle* my_handle) { - assert(audio->samples_per_split_channel() <= 160); - - WebRtc_Word16* mixed_data = audio->low_pass_split_data(0); - if (audio->num_channels() > 1) { - audio->CopyAndMixLowPass(1); - mixed_data = audio->mixed_low_pass_data(0); - } - - int err = UpdateLvlEst(my_handle, - mixed_data, - audio->samples_per_split_channel()); - if (err != AudioProcessing::kNoError) { - return GetHandleError(my_handle); - } - - return AudioProcessing::kNoError; -} - -int GetMetricsLocal(Handle* my_handle, LevelEstimator::Metrics* metrics) { - level_t levels; - memset(&levels, 0, sizeof(levels)); - - int err = ExportLevels(my_handle, &levels, 2); - if (err != AudioProcessing::kNoError) { - return err; - } - metrics->signal.instant = levels.instant; - metrics->signal.average = levels.average; - metrics->signal.maximum = levels.max; - metrics->signal.minimum = levels.min; - - err = ExportLevels(my_handle, &levels, 1); - if (err != AudioProcessing::kNoError) { - return err; - } - metrics->speech.instant = levels.instant; - metrics->speech.average = levels.average; - metrics->speech.maximum = levels.max; - metrics->speech.minimum = levels.min; - - err = ExportLevels(my_handle, &levels, 0); - if (err != AudioProcessing::kNoError) { - return err; - } - metrics->noise.instant = levels.instant; - metrics->noise.average = levels.average; - metrics->noise.maximum = levels.max; - metrics->noise.minimum = levels.min; - - return AudioProcessing::kNoError; -}*/ -} // namespace - -LevelEstimatorImpl::LevelEstimatorImpl(const AudioProcessingImpl* apm) - : ProcessingComponent(apm), - apm_(apm) {} +LevelEstimatorImpl::LevelEstimatorImpl(const AudioProcessing* apm, + CriticalSectionWrapper* crit) + : ProcessingComponent(), + crit_(crit) {} LevelEstimatorImpl::~LevelEstimatorImpl() {} -int LevelEstimatorImpl::AnalyzeReverseStream(AudioBuffer* /*audio*/) { - return apm_->kUnsupportedComponentError; - /*if (!is_component_enabled()) { - return apm_->kNoError; +int LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) { + if (!is_component_enabled()) { + return AudioProcessing::kNoError; } - return EstimateLevel(audio, static_cast(handle(1)));*/ -} - -int LevelEstimatorImpl::ProcessCaptureAudio(AudioBuffer* /*audio*/) { - return apm_->kUnsupportedComponentError; - /*if (!is_component_enabled()) { - return apm_->kNoError; + RMSLevel* rms_level = static_cast(handle(0)); + for (int i = 0; i < audio->num_channels(); ++i) { + rms_level->Process(audio->channels_const()[i], + audio->num_frames()); } - return EstimateLevel(audio, static_cast(handle(0)));*/ + return AudioProcessing::kNoError; } -int LevelEstimatorImpl::Enable(bool /*enable*/) { - CriticalSectionScoped crit_scoped(*apm_->crit()); - return apm_->kUnsupportedComponentError; - //return EnableComponent(enable); +int LevelEstimatorImpl::Enable(bool enable) { + CriticalSectionScoped crit_scoped(crit_); + return EnableComponent(enable); } bool LevelEstimatorImpl::is_enabled() const { return is_component_enabled(); } -int LevelEstimatorImpl::GetMetrics(LevelEstimator::Metrics* /*metrics*/, - LevelEstimator::Metrics* /*reverse_metrics*/) { - return apm_->kUnsupportedComponentError; - /*if (!is_component_enabled()) { - return apm_->kNotEnabledError; +int LevelEstimatorImpl::RMS() { + if (!is_component_enabled()) { + return AudioProcessing::kNotEnabledError; } - int err = GetMetricsLocal(static_cast(handle(0)), metrics); - if (err != apm_->kNoError) { - return err; - } - - err = GetMetricsLocal(static_cast(handle(1)), reverse_metrics); - if (err != apm_->kNoError) { - return err; - } - - return apm_->kNoError;*/ -} - -int LevelEstimatorImpl::get_version(char* version, - int version_len_bytes) const { - // An empty string is used to indicate no version information. - memset(version, 0, version_len_bytes); - return apm_->kNoError; + RMSLevel* rms_level = static_cast(handle(0)); + return rms_level->RMS(); } +// The ProcessingComponent implementation is pretty weird in this class since +// we have only a single instance of the trivial underlying component. void* LevelEstimatorImpl::CreateHandle() const { - Handle* handle = NULL; - /*if (CreateLvlEst(&handle) != apm_->kNoError) { - handle = NULL; - } else { - assert(handle != NULL); - }*/ - - return handle; + return new RMSLevel; } -int LevelEstimatorImpl::DestroyHandle(void* /*handle*/) const { - return apm_->kUnsupportedComponentError; - //return FreeLvlEst(static_cast(handle)); +void LevelEstimatorImpl::DestroyHandle(void* handle) const { + delete static_cast(handle); } -int LevelEstimatorImpl::InitializeHandle(void* /*handle*/) const { - return apm_->kUnsupportedComponentError; - /*const double kIntervalSeconds = 1.5; - return InitLvlEst(static_cast(handle), - apm_->sample_rate_hz(), - kIntervalSeconds);*/ +int LevelEstimatorImpl::InitializeHandle(void* handle) const { + static_cast(handle)->Reset(); + return AudioProcessing::kNoError; } int LevelEstimatorImpl::ConfigureHandle(void* /*handle*/) const { - return apm_->kUnsupportedComponentError; - //return apm_->kNoError; + return AudioProcessing::kNoError; } int LevelEstimatorImpl::num_handles_required() const { - return apm_->kUnsupportedComponentError; - //return 2; + return 1; } -int LevelEstimatorImpl::GetHandleError(void* handle) const { - // The component has no detailed errors. - assert(handle != NULL); - return apm_->kUnspecifiedError; +int LevelEstimatorImpl::GetHandleError(void* /*handle*/) const { + return AudioProcessing::kUnspecifiedError; } + } // namespace webrtc diff --git a/webrtc/modules/audio_processing/level_estimator_impl.h b/webrtc/modules/audio_processing/level_estimator_impl.h index 1515722..0d0050c 100644 --- a/webrtc/modules/audio_processing/level_estimator_impl.h +++ b/webrtc/modules/audio_processing/level_estimator_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,46 +8,46 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_LEVEL_ESTIMATOR_IMPL_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_LEVEL_ESTIMATOR_IMPL_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_ -#include "audio_processing.h" -#include "processing_component.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/processing_component.h" +#include "webrtc/modules/audio_processing/rms_level.h" namespace webrtc { -class AudioProcessingImpl; + class AudioBuffer; +class CriticalSectionWrapper; class LevelEstimatorImpl : public LevelEstimator, public ProcessingComponent { public: - explicit LevelEstimatorImpl(const AudioProcessingImpl* apm); + LevelEstimatorImpl(const AudioProcessing* apm, + CriticalSectionWrapper* crit); virtual ~LevelEstimatorImpl(); - int AnalyzeReverseStream(AudioBuffer* audio); - int ProcessCaptureAudio(AudioBuffer* audio); + int ProcessStream(AudioBuffer* audio); // LevelEstimator implementation. - virtual bool is_enabled() const; - - // ProcessingComponent implementation. - virtual int get_version(char* version, int version_len_bytes) const; + bool is_enabled() const override; private: // LevelEstimator implementation. - virtual int Enable(bool enable); - virtual int GetMetrics(Metrics* metrics, Metrics* reverse_metrics); + int Enable(bool enable) override; + int RMS() override; // ProcessingComponent implementation. - virtual void* CreateHandle() const; - virtual int InitializeHandle(void* handle) const; - virtual int ConfigureHandle(void* handle) const; - virtual int DestroyHandle(void* handle) const; - virtual int num_handles_required() const; - virtual int GetHandleError(void* handle) const; + void* CreateHandle() const override; + int InitializeHandle(void* handle) const override; + int ConfigureHandle(void* handle) const override; + void DestroyHandle(void* handle) const override; + int num_handles_required() const override; + int GetHandleError(void* handle) const override; - const AudioProcessingImpl* apm_; + CriticalSectionWrapper* crit_; }; + } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_LEVEL_ESTIMATOR_IMPL_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_ diff --git a/webrtc/modules/audio_processing/logging/aec_logging.h b/webrtc/modules/audio_processing/logging/aec_logging.h new file mode 100644 index 0000000..3cf9ff8 --- /dev/null +++ b/webrtc/modules/audio_processing/logging/aec_logging.h @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_ + +#include + +#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h" + +// To enable AEC logging, invoke GYP with -Daec_debug_dump=1. +#ifdef WEBRTC_AEC_DEBUG_DUMP +// Dumps a wav data to file. +#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \ + do { \ + rtc_WavWriteSamples(file, data, num_samples); \ + } while (0) + +// (Re)opens a wav file for writing using the specified sample rate. +#define RTC_AEC_DEBUG_WAV_REOPEN(name, instance_index, process_rate, \ + sample_rate, wav_file) \ + do { \ + WebRtcAec_ReopenWav(name, instance_index, process_rate, sample_rate, \ + wav_file); \ + } while (0) + +// Closes a wav file. +#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \ + do { \ + rtc_WavClose(wav_file); \ + } while (0) + +// Dumps a raw data to file. +#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \ + do { \ + (void) fwrite(data, data_size, 1, file); \ + } while (0) + +// Opens a raw data file for writing using the specified sample rate. +#define RTC_AEC_DEBUG_RAW_OPEN(name, instance_counter, file) \ + do { \ + WebRtcAec_RawFileOpen(name, instance_counter, file); \ + } while (0) + +// Closes a raw data file. +#define RTC_AEC_DEBUG_RAW_CLOSE(file) \ + do { \ + fclose(file); \ + } while (0) + +#else // RTC_AEC_DEBUG_DUMP +#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \ + do { \ + } while (0) + +#define RTC_AEC_DEBUG_WAV_REOPEN(wav_file, name, instance_index, process_rate, \ + sample_rate) \ + do { \ + } while (0) + +#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \ + do { \ + } while (0) + +#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \ + do { \ + } while (0) + +#define RTC_AEC_DEBUG_RAW_OPEN(file, name, instance_counter) \ + do { \ + } while (0) + +#define RTC_AEC_DEBUG_RAW_CLOSE(file) \ + do { \ + } while (0) + +#endif // WEBRTC_AEC_DEBUG_DUMP + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_ diff --git a/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc b/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc new file mode 100644 index 0000000..3a43471 --- /dev/null +++ b/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h" + +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/base/stringutils.h" +#include "webrtc/common_audio/wav_file.h" +#include "webrtc/typedefs.h" + +#ifdef WEBRTC_AEC_DEBUG_DUMP +void WebRtcAec_ReopenWav(const char* name, + int instance_index, + int process_rate, + int sample_rate, + rtc_WavWriter** wav_file) { + if (*wav_file) { + if (rtc_WavSampleRate(*wav_file) == sample_rate) + return; + rtc_WavClose(*wav_file); + } + char filename[64]; + int written = rtc::sprintfn(filename, sizeof(filename), "%s%d-%d.wav", name, + instance_index, process_rate); + + // Ensure there was no buffer output error. + RTC_DCHECK_GE(written, 0); + // Ensure that the buffer size was sufficient. + RTC_DCHECK_LT(static_cast(written), sizeof(filename)); + + *wav_file = rtc_WavOpen(filename, sample_rate, 1); +} + +void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file) { + char filename[64]; + int written = rtc::sprintfn(filename, sizeof(filename), "%s_%d.dat", name, + instance_index); + + // Ensure there was no buffer output error. + RTC_DCHECK_GE(written, 0); + // Ensure that the buffer size was sufficient. + RTC_DCHECK_LT(static_cast(written), sizeof(filename)); + + *file = fopen(filename, "wb"); +} + +#endif // WEBRTC_AEC_DEBUG_DUMP diff --git a/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h b/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h new file mode 100644 index 0000000..5ec8394 --- /dev/null +++ b/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_ + +#include + +#include "webrtc/common_audio/wav_file.h" +#include "webrtc/typedefs.h" + +#ifdef __cplusplus +extern "C" { +#endif + +#ifdef WEBRTC_AEC_DEBUG_DUMP +// Opens a new Wav file for writing. If it was already open with a different +// sample frequency, it closes it first. +void WebRtcAec_ReopenWav(const char* name, + int instance_index, + int process_rate, + int sample_rate, + rtc_WavWriter** wav_file); + +// Opens dumpfile with instance-specific filename. +void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file); + +#endif // WEBRTC_AEC_DEBUG_DUMP + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_ diff --git a/webrtc/modules/audio_processing/noise_suppression_impl.cc b/webrtc/modules/audio_processing/noise_suppression_impl.cc index f899f35..1501911 100644 --- a/webrtc/modules/audio_processing/noise_suppression_impl.cc +++ b/webrtc/modules/audio_processing/noise_suppression_impl.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,19 +8,18 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "noise_suppression_impl.h" +#include "webrtc/modules/audio_processing/noise_suppression_impl.h" -#include +#include -#include "critical_section_wrapper.h" +#include "webrtc/modules/audio_processing/audio_buffer.h" #if defined(WEBRTC_NS_FLOAT) -#include "noise_suppression.h" +#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h" #elif defined(WEBRTC_NS_FIXED) -#include "noise_suppression_x.h" +#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" #endif +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" -#include "audio_processing_impl.h" -#include "audio_buffer.h" namespace webrtc { @@ -41,54 +40,64 @@ int MapSetting(NoiseSuppression::Level level) { return 2; case NoiseSuppression::kVeryHigh: return 3; - default: - return -1; } + assert(false); + return -1; } } // namespace -NoiseSuppressionImpl::NoiseSuppressionImpl(const AudioProcessingImpl* apm) - : ProcessingComponent(apm), +NoiseSuppressionImpl::NoiseSuppressionImpl(const AudioProcessing* apm, + CriticalSectionWrapper* crit) + : ProcessingComponent(), apm_(apm), + crit_(crit), level_(kModerate) {} NoiseSuppressionImpl::~NoiseSuppressionImpl() {} -int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) { - int err = apm_->kNoError; - +int NoiseSuppressionImpl::AnalyzeCaptureAudio(AudioBuffer* audio) { +#if defined(WEBRTC_NS_FLOAT) if (!is_component_enabled()) { return apm_->kNoError; } - assert(audio->samples_per_split_channel() <= 160); + assert(audio->num_frames_per_band() <= 160); assert(audio->num_channels() == num_handles()); - for (int i = 0; i < num_handles(); i++) { + for (int i = 0; i < num_handles(); ++i) { + Handle* my_handle = static_cast(handle(i)); + + WebRtcNs_Analyze(my_handle, audio->split_bands_const_f(i)[kBand0To8kHz]); + } +#endif + return apm_->kNoError; +} + +int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) { + if (!is_component_enabled()) { + return apm_->kNoError; + } + assert(audio->num_frames_per_band() <= 160); + assert(audio->num_channels() == num_handles()); + + for (int i = 0; i < num_handles(); ++i) { Handle* my_handle = static_cast(handle(i)); #if defined(WEBRTC_NS_FLOAT) - err = WebRtcNs_Process(static_cast(handle(i)), - audio->low_pass_split_data(i), - audio->high_pass_split_data(i), - audio->low_pass_split_data(i), - audio->high_pass_split_data(i)); + WebRtcNs_Process(my_handle, + audio->split_bands_const_f(i), + audio->num_bands(), + audio->split_bands_f(i)); #elif defined(WEBRTC_NS_FIXED) - err = WebRtcNsx_Process(static_cast(handle(i)), - audio->low_pass_split_data(i), - audio->high_pass_split_data(i), - audio->low_pass_split_data(i), - audio->high_pass_split_data(i)); + WebRtcNsx_Process(my_handle, + audio->split_bands_const(i), + audio->num_bands(), + audio->split_bands(i)); #endif - - if (err != apm_->kNoError) { - return GetHandleError(my_handle); - } } - return apm_->kNoError; } int NoiseSuppressionImpl::Enable(bool enable) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); return EnableComponent(enable); } @@ -97,7 +106,7 @@ bool NoiseSuppressionImpl::is_enabled() const { } int NoiseSuppressionImpl::set_level(Level level) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); if (MapSetting(level) == -1) { return apm_->kBadParameterError; } @@ -110,49 +119,43 @@ NoiseSuppression::Level NoiseSuppressionImpl::level() const { return level_; } -int NoiseSuppressionImpl::get_version(char* version, - int version_len_bytes) const { +float NoiseSuppressionImpl::speech_probability() const { #if defined(WEBRTC_NS_FLOAT) - if (WebRtcNs_get_version(version, version_len_bytes) != 0) -#elif defined(WEBRTC_NS_FIXED) - if (WebRtcNsx_get_version(version, version_len_bytes) != 0) -#endif - { - return apm_->kBadParameterError; + float probability_average = 0.0f; + for (int i = 0; i < num_handles(); i++) { + Handle* my_handle = static_cast(handle(i)); + probability_average += WebRtcNs_prior_speech_probability(my_handle); } - - return apm_->kNoError; + return probability_average / num_handles(); +#elif defined(WEBRTC_NS_FIXED) + // Currently not available for the fixed point implementation. + return apm_->kUnsupportedFunctionError; +#endif } void* NoiseSuppressionImpl::CreateHandle() const { - Handle* handle = NULL; #if defined(WEBRTC_NS_FLOAT) - if (WebRtcNs_Create(&handle) != apm_->kNoError) + return WebRtcNs_Create(); #elif defined(WEBRTC_NS_FIXED) - if (WebRtcNsx_Create(&handle) != apm_->kNoError) + return WebRtcNsx_Create(); #endif - { - handle = NULL; - } else { - assert(handle != NULL); - } - - return handle; } -int NoiseSuppressionImpl::DestroyHandle(void* handle) const { +void NoiseSuppressionImpl::DestroyHandle(void* handle) const { #if defined(WEBRTC_NS_FLOAT) - return WebRtcNs_Free(static_cast(handle)); + WebRtcNs_Free(static_cast(handle)); #elif defined(WEBRTC_NS_FIXED) - return WebRtcNsx_Free(static_cast(handle)); + WebRtcNsx_Free(static_cast(handle)); #endif } int NoiseSuppressionImpl::InitializeHandle(void* handle) const { #if defined(WEBRTC_NS_FLOAT) - return WebRtcNs_Init(static_cast(handle), apm_->sample_rate_hz()); + return WebRtcNs_Init(static_cast(handle), + apm_->proc_sample_rate_hz()); #elif defined(WEBRTC_NS_FIXED) - return WebRtcNsx_Init(static_cast(handle), apm_->sample_rate_hz()); + return WebRtcNsx_Init(static_cast(handle), + apm_->proc_sample_rate_hz()); #endif } @@ -176,4 +179,3 @@ int NoiseSuppressionImpl::GetHandleError(void* handle) const { return apm_->kUnspecifiedError; } } // namespace webrtc - diff --git a/webrtc/modules/audio_processing/noise_suppression_impl.h b/webrtc/modules/audio_processing/noise_suppression_impl.h index c9ff9b3..76a39b8 100644 --- a/webrtc/modules/audio_processing/noise_suppression_impl.h +++ b/webrtc/modules/audio_processing/noise_suppression_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,47 +8,50 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_NOISE_SUPPRESSION_IMPL_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_NOISE_SUPPRESSION_IMPL_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_ -#include "audio_processing.h" -#include "processing_component.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/processing_component.h" namespace webrtc { -class AudioProcessingImpl; + class AudioBuffer; +class CriticalSectionWrapper; class NoiseSuppressionImpl : public NoiseSuppression, public ProcessingComponent { public: - explicit NoiseSuppressionImpl(const AudioProcessingImpl* apm); + NoiseSuppressionImpl(const AudioProcessing* apm, + CriticalSectionWrapper* crit); virtual ~NoiseSuppressionImpl(); + int AnalyzeCaptureAudio(AudioBuffer* audio); int ProcessCaptureAudio(AudioBuffer* audio); // NoiseSuppression implementation. - virtual bool is_enabled() const; - - // ProcessingComponent implementation. - virtual int get_version(char* version, int version_len_bytes) const; + bool is_enabled() const override; + float speech_probability() const override; + Level level() const override; private: // NoiseSuppression implementation. - virtual int Enable(bool enable); - virtual int set_level(Level level); - virtual Level level() const; + int Enable(bool enable) override; + int set_level(Level level) override; // ProcessingComponent implementation. - virtual void* CreateHandle() const; - virtual int InitializeHandle(void* handle) const; - virtual int ConfigureHandle(void* handle) const; - virtual int DestroyHandle(void* handle) const; - virtual int num_handles_required() const; - virtual int GetHandleError(void* handle) const; + void* CreateHandle() const override; + int InitializeHandle(void* handle) const override; + int ConfigureHandle(void* handle) const override; + void DestroyHandle(void* handle) const override; + int num_handles_required() const override; + int GetHandleError(void* handle) const override; - const AudioProcessingImpl* apm_; + const AudioProcessing* apm_; + CriticalSectionWrapper* crit_; Level level_; }; + } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_NOISE_SUPPRESSION_IMPL_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_ diff --git a/webrtc/modules/audio_processing/ns/Makefile.am b/webrtc/modules/audio_processing/ns/Makefile.am deleted file mode 100644 index f879cc1..0000000 --- a/webrtc/modules/audio_processing/ns/Makefile.am +++ /dev/null @@ -1,20 +0,0 @@ -noinst_LTLIBRARIES = libns.la libns_fix.la - -libns_la_SOURCES = interface/noise_suppression.h \ - noise_suppression.c \ - windows_private.h \ - defines.h \ - ns_core.c \ - ns_core.h -libns_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \ - -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \ - -I$(top_srcdir)/src/modules/audio_processing/utility - -libns_fix_la_SOURCES = interface/noise_suppression_x.h \ - noise_suppression_x.c \ - nsx_defines.h \ - nsx_core.c \ - nsx_core.h -libns_fix_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \ - -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \ - -I$(top_srcdir)/src/modules/audio_processing/utility diff --git a/webrtc/modules/audio_processing/ns/defines.h b/webrtc/modules/audio_processing/ns/defines.h index d253967..8271332 100644 --- a/webrtc/modules/audio_processing/ns/defines.h +++ b/webrtc/modules/audio_processing/ns/defines.h @@ -11,13 +11,10 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_ -//#define PROCESS_FLOW_0 // Use the traditional method. -//#define PROCESS_FLOW_1 // Use traditional with DD estimate of prior SNR. -#define PROCESS_FLOW_2 // Use the new method of speech/noise classification. - #define BLOCKL_MAX 160 // max processing block length: 160 #define ANAL_BLOCKL_MAX 256 // max analysis block length: 256 #define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1 +#define NUM_HIGH_BANDS_MAX 2 // max number of high bands: 2 #define QUANTILE (float)0.25 @@ -27,7 +24,6 @@ #define FACTOR (float)40.0 #define WIDTH (float)0.01 -#define SMOOTH (float)0.75 // filter smoothing // Length of fft work arrays. #define IP_LENGTH (ANAL_BLOCKL_MAX >> 1) // must be at least ceil(2 + sqrt(ANAL_BLOCKL_MAX/2)) #define W_LENGTH (ANAL_BLOCKL_MAX >> 1) diff --git a/webrtc/modules/audio_processing/ns/include/noise_suppression.h b/webrtc/modules/audio_processing/ns/include/noise_suppression.h new file mode 100644 index 0000000..9dac56b --- /dev/null +++ b/webrtc/modules/audio_processing/ns/include/noise_suppression.h @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_ + +#include + +#include "webrtc/typedefs.h" + +typedef struct NsHandleT NsHandle; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This function creates an instance of the floating point Noise Suppression. + */ +NsHandle* WebRtcNs_Create(); + +/* + * This function frees the dynamic memory of a specified noise suppression + * instance. + * + * Input: + * - NS_inst : Pointer to NS instance that should be freed + */ +void WebRtcNs_Free(NsHandle* NS_inst); + +/* + * This function initializes a NS instance and has to be called before any other + * processing is made. + * + * Input: + * - NS_inst : Instance that should be initialized + * - fs : sampling frequency + * + * Output: + * - NS_inst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs); + +/* + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - NS_inst : Noise suppression instance. + * - mode : 0: Mild, 1: Medium , 2: Aggressive + * + * Output: + * - NS_inst : Updated instance. + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNs_set_policy(NsHandle* NS_inst, int mode); + +/* + * This functions estimates the background noise for the inserted speech frame. + * The input and output signals should always be 10ms (80 or 160 samples). + * + * Input + * - NS_inst : Noise suppression instance. + * - spframe : Pointer to speech frame buffer for L band + * + * Output: + * - NS_inst : Updated NS instance + */ +void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe); + +/* + * This functions does Noise Suppression for the inserted speech frame. The + * input and output signals should always be 10ms (80 or 160 samples). + * + * Input + * - NS_inst : Noise suppression instance. + * - spframe : Pointer to speech frame buffer for each band + * - num_bands : Number of bands + * + * Output: + * - NS_inst : Updated NS instance + * - outframe : Pointer to output frame for each band + */ +void WebRtcNs_Process(NsHandle* NS_inst, + const float* const* spframe, + size_t num_bands, + float* const* outframe); + +/* Returns the internally used prior speech probability of the current frame. + * There is a frequency bin based one as well, with which this should not be + * confused. + * + * Input + * - handle : Noise suppression instance. + * + * Return value : Prior speech probability in interval [0.0, 1.0]. + * -1 - NULL pointer or uninitialized instance. + */ +float WebRtcNs_prior_speech_probability(NsHandle* handle); + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_ diff --git a/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h b/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h new file mode 100644 index 0000000..88fe4cd --- /dev/null +++ b/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_ + +#include "webrtc/typedefs.h" + +typedef struct NsxHandleT NsxHandle; + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * This function creates an instance of the fixed point Noise Suppression. + */ +NsxHandle* WebRtcNsx_Create(); + +/* + * This function frees the dynamic memory of a specified Noise Suppression + * instance. + * + * Input: + * - nsxInst : Pointer to NS instance that should be freed + */ +void WebRtcNsx_Free(NsxHandle* nsxInst); + +/* + * This function initializes a NS instance + * + * Input: + * - nsxInst : Instance that should be initialized + * - fs : sampling frequency + * + * Output: + * - nsxInst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs); + +/* + * This changes the aggressiveness of the noise suppression method. + * + * Input: + * - nsxInst : Instance that should be initialized + * - mode : 0: Mild, 1: Medium , 2: Aggressive + * + * Output: + * - nsxInst : Initialized instance + * + * Return value : 0 - Ok + * -1 - Error + */ +int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode); + +/* + * This functions does noise suppression for the inserted speech frame. The + * input and output signals should always be 10ms (80 or 160 samples). + * + * Input + * - nsxInst : NSx instance. Needs to be initiated before call. + * - speechFrame : Pointer to speech frame buffer for each band + * - num_bands : Number of bands + * + * Output: + * - nsxInst : Updated NSx instance + * - outFrame : Pointer to output frame for each band + */ +void WebRtcNsx_Process(NsxHandle* nsxInst, + const short* const* speechFrame, + int num_bands, + short* const* outFrame); + +#ifdef __cplusplus +} +#endif + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_ diff --git a/webrtc/modules/audio_processing/ns/interface/noise_suppression.h b/webrtc/modules/audio_processing/ns/interface/noise_suppression.h deleted file mode 100644 index 907faf4..0000000 --- a/webrtc/modules/audio_processing/ns/interface/noise_suppression.h +++ /dev/null @@ -1,124 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_H_ - -#include "typedefs.h" - -typedef struct NsHandleT NsHandle; - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * This function returns the version number of the code. - * - * Input: - * - version : Pointer to a character array where the version - * info is stored. - * - length : Length of version. - * - * Return value : 0 - Ok - * -1 - Error (probably length is not sufficient) - */ -int WebRtcNs_get_version(char* version, short length); - - -/* - * This function creates an instance to the noise reduction structure - * - * Input: - * - NS_inst : Pointer to noise reduction instance that should be - * created - * - * Output: - * - NS_inst : Pointer to created noise reduction instance - * - * Return value : 0 - Ok - * -1 - Error - */ -int WebRtcNs_Create(NsHandle** NS_inst); - - -/* - * This function frees the dynamic memory of a specified Noise Reduction - * instance. - * - * Input: - * - NS_inst : Pointer to NS instance that should be freed - * - * Return value : 0 - Ok - * -1 - Error - */ -int WebRtcNs_Free(NsHandle* NS_inst); - - -/* - * This function initializes a NS instance - * - * Input: - * - NS_inst : Instance that should be initialized - * - fs : sampling frequency - * - * Output: - * - NS_inst : Initialized instance - * - * Return value : 0 - Ok - * -1 - Error - */ -int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs); - -/* - * This changes the aggressiveness of the noise suppression method. - * - * Input: - * - NS_inst : Instance that should be initialized - * - mode : 0: Mild, 1: Medium , 2: Aggressive - * - * Output: - * - NS_inst : Initialized instance - * - * Return value : 0 - Ok - * -1 - Error - */ -int WebRtcNs_set_policy(NsHandle* NS_inst, int mode); - - -/* - * This functions does Noise Suppression for the inserted speech frame. The - * input and output signals should always be 10ms (80 or 160 samples). - * - * Input - * - NS_inst : NS Instance. Needs to be initiated before call. - * - spframe : Pointer to speech frame buffer for L band - * - spframe_H : Pointer to speech frame buffer for H band - * - fs : sampling frequency - * - * Output: - * - NS_inst : Updated NS instance - * - outframe : Pointer to output frame for L band - * - outframe_H : Pointer to output frame for H band - * - * Return value : 0 - OK - * -1 - Error - */ -int WebRtcNs_Process(NsHandle* NS_inst, - short* spframe, - short* spframe_H, - short* outframe, - short* outframe_H); - -#ifdef __cplusplus -} -#endif - -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_H_ diff --git a/webrtc/modules/audio_processing/ns/interface/noise_suppression_x.h b/webrtc/modules/audio_processing/ns/interface/noise_suppression_x.h deleted file mode 100644 index 14443fa..0000000 --- a/webrtc/modules/audio_processing/ns/interface/noise_suppression_x.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_ - -#include "typedefs.h" - -typedef struct NsxHandleT NsxHandle; - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * This function returns the version number of the code. - * - * Input: - * - version : Pointer to a character array where the version - * info is stored. - * - length : Length of version. - * - * Return value : 0 - Ok - * -1 - Error (probably length is not sufficient) - */ -int WebRtcNsx_get_version(char* version, short length); - - -/* - * This function creates an instance to the noise reduction structure - * - * Input: - * - nsxInst : Pointer to noise reduction instance that should be - * created - * - * Output: - * - nsxInst : Pointer to created noise reduction instance - * - * Return value : 0 - Ok - * -1 - Error - */ -int WebRtcNsx_Create(NsxHandle** nsxInst); - - -/* - * This function frees the dynamic memory of a specified Noise Suppression - * instance. - * - * Input: - * - nsxInst : Pointer to NS instance that should be freed - * - * Return value : 0 - Ok - * -1 - Error - */ -int WebRtcNsx_Free(NsxHandle* nsxInst); - - -/* - * This function initializes a NS instance - * - * Input: - * - nsxInst : Instance that should be initialized - * - fs : sampling frequency - * - * Output: - * - nsxInst : Initialized instance - * - * Return value : 0 - Ok - * -1 - Error - */ -int WebRtcNsx_Init(NsxHandle* nsxInst, WebRtc_UWord32 fs); - -/* - * This changes the aggressiveness of the noise suppression method. - * - * Input: - * - nsxInst : Instance that should be initialized - * - mode : 0: Mild, 1: Medium , 2: Aggressive - * - * Output: - * - nsxInst : Initialized instance - * - * Return value : 0 - Ok - * -1 - Error - */ -int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode); - -/* - * This functions does noise suppression for the inserted speech frame. The - * input and output signals should always be 10ms (80 or 160 samples). - * - * Input - * - nsxInst : NSx instance. Needs to be initiated before call. - * - speechFrame : Pointer to speech frame buffer for L band - * - speechFrameHB : Pointer to speech frame buffer for H band - * - fs : sampling frequency - * - * Output: - * - nsxInst : Updated NSx instance - * - outFrame : Pointer to output frame for L band - * - outFrameHB : Pointer to output frame for H band - * - * Return value : 0 - OK - * -1 - Error - */ -int WebRtcNsx_Process(NsxHandle* nsxInst, - short* speechFrame, - short* speechFrameHB, - short* outFrame, - short* outFrameHB); - -#ifdef __cplusplus -} -#endif - -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_ diff --git a/webrtc/modules/audio_processing/ns/noise_suppression.c b/webrtc/modules/audio_processing/ns/noise_suppression.c index d33caa9..13f1b2d 100644 --- a/webrtc/modules/audio_processing/ns/noise_suppression.c +++ b/webrtc/modules/audio_processing/ns/noise_suppression.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,58 +8,52 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h" + #include #include -#include "noise_suppression.h" -#include "ns_core.h" -#include "defines.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/ns/defines.h" +#include "webrtc/modules/audio_processing/ns/ns_core.h" -int WebRtcNs_get_version(char* versionStr, short length) { - const char version[] = "NS 2.2.0"; - const short versionLen = (short)strlen(version) + 1; // +1: null-termination - - if (versionStr == NULL) { - return -1; - } - - if (versionLen > length) { - return -1; - } - - strncpy(versionStr, version, versionLen); - - return 0; +NsHandle* WebRtcNs_Create() { + NoiseSuppressionC* self = malloc(sizeof(NoiseSuppressionC)); + self->initFlag = 0; + return (NsHandle*)self; } -int WebRtcNs_Create(NsHandle** NS_inst) { - *NS_inst = (NsHandle*) malloc(sizeof(NSinst_t)); - if (*NS_inst != NULL) { - (*(NSinst_t**)NS_inst)->initFlag = 0; - return 0; - } else { - return -1; - } - -} - -int WebRtcNs_Free(NsHandle* NS_inst) { +void WebRtcNs_Free(NsHandle* NS_inst) { free(NS_inst); - return 0; } - -int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs) { - return WebRtcNs_InitCore((NSinst_t*) NS_inst, fs); +int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs) { + return WebRtcNs_InitCore((NoiseSuppressionC*)NS_inst, fs); } int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) { - return WebRtcNs_set_policy_core((NSinst_t*) NS_inst, mode); + return WebRtcNs_set_policy_core((NoiseSuppressionC*)NS_inst, mode); } - -int WebRtcNs_Process(NsHandle* NS_inst, short* spframe, short* spframe_H, - short* outframe, short* outframe_H) { - return WebRtcNs_ProcessCore( - (NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H); +void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe) { + WebRtcNs_AnalyzeCore((NoiseSuppressionC*)NS_inst, spframe); +} + +void WebRtcNs_Process(NsHandle* NS_inst, + const float* const* spframe, + size_t num_bands, + float* const* outframe) { + WebRtcNs_ProcessCore((NoiseSuppressionC*)NS_inst, spframe, num_bands, + outframe); +} + +float WebRtcNs_prior_speech_probability(NsHandle* handle) { + NoiseSuppressionC* self = (NoiseSuppressionC*)handle; + if (handle == NULL) { + return -1; + } + if (self->initFlag == 0) { + return -1; + } + return self->priorSpeechProb; } diff --git a/webrtc/modules/audio_processing/ns/noise_suppression_x.c b/webrtc/modules/audio_processing/ns/noise_suppression_x.c index afdea7b..150fe60 100644 --- a/webrtc/modules/audio_processing/ns/noise_suppression_x.c +++ b/webrtc/modules/audio_processing/ns/noise_suppression_x.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,58 +8,39 @@ * be found in the AUTHORS file in the root of the source tree. */ +#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" + #include -#include -#include "noise_suppression_x.h" -#include "nsx_core.h" -#include "nsx_defines.h" +#include "webrtc/common_audio/signal_processing/include/real_fft.h" +#include "webrtc/modules/audio_processing/ns/nsx_core.h" +#include "webrtc/modules/audio_processing/ns/nsx_defines.h" -int WebRtcNsx_get_version(char* versionStr, short length) { - const char version[] = "NS\t3.1.0"; - const short versionLen = (short)strlen(version) + 1; // +1: null-termination - - if (versionStr == NULL) { - return -1; - } - - if (versionLen > length) { - return -1; - } - - strncpy(versionStr, version, versionLen); - - return 0; +NsxHandle* WebRtcNsx_Create() { + NoiseSuppressionFixedC* self = malloc(sizeof(NoiseSuppressionFixedC)); + WebRtcSpl_Init(); + self->real_fft = NULL; + self->initFlag = 0; + return (NsxHandle*)self; } -int WebRtcNsx_Create(NsxHandle** nsxInst) { - *nsxInst = (NsxHandle*)malloc(sizeof(NsxInst_t)); - if (*nsxInst != NULL) { - (*(NsxInst_t**)nsxInst)->initFlag = 0; - return 0; - } else { - return -1; - } - -} - -int WebRtcNsx_Free(NsxHandle* nsxInst) { +void WebRtcNsx_Free(NsxHandle* nsxInst) { + WebRtcSpl_FreeRealFFT(((NoiseSuppressionFixedC*)nsxInst)->real_fft); free(nsxInst); - return 0; } -int WebRtcNsx_Init(NsxHandle* nsxInst, WebRtc_UWord32 fs) { - return WebRtcNsx_InitCore((NsxInst_t*)nsxInst, fs); +int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs) { + return WebRtcNsx_InitCore((NoiseSuppressionFixedC*)nsxInst, fs); } int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) { - return WebRtcNsx_set_policy_core((NsxInst_t*)nsxInst, mode); + return WebRtcNsx_set_policy_core((NoiseSuppressionFixedC*)nsxInst, mode); } -int WebRtcNsx_Process(NsxHandle* nsxInst, short* speechFrame, - short* speechFrameHB, short* outFrame, - short* outFrameHB) { - return WebRtcNsx_ProcessCore( - (NsxInst_t*)nsxInst, speechFrame, speechFrameHB, outFrame, outFrameHB); +void WebRtcNsx_Process(NsxHandle* nsxInst, + const short* const* speechFrame, + int num_bands, + short* const* outFrame) { + WebRtcNsx_ProcessCore((NoiseSuppressionFixedC*)nsxInst, speechFrame, + num_bands, outFrame); } - diff --git a/webrtc/modules/audio_processing/ns/ns.gypi b/webrtc/modules/audio_processing/ns/ns.gypi deleted file mode 100644 index b536b0e..0000000 --- a/webrtc/modules/audio_processing/ns/ns.gypi +++ /dev/null @@ -1,58 +0,0 @@ -# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. -# -# Use of this source code is governed by a BSD-style license -# that can be found in the LICENSE file in the root of the source -# tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may -# be found in the AUTHORS file in the root of the source tree. - -{ - 'targets': [ - { - 'target_name': 'ns', - 'type': '<(library)', - 'dependencies': [ - '<(webrtc_root)/common_audio/common_audio.gyp:spl', - 'apm_util' - ], - 'include_dirs': [ - 'interface', - ], - 'direct_dependent_settings': { - 'include_dirs': [ - 'interface', - ], - }, - 'sources': [ - 'interface/noise_suppression.h', - 'noise_suppression.c', - 'windows_private.h', - 'defines.h', - 'ns_core.c', - 'ns_core.h', - ], - }, - { - 'target_name': 'ns_fix', - 'type': '<(library)', - 'dependencies': [ - '<(webrtc_root)/common_audio/common_audio.gyp:spl', - ], - 'include_dirs': [ - 'interface', - ], - 'direct_dependent_settings': { - 'include_dirs': [ - 'interface', - ], - }, - 'sources': [ - 'interface/noise_suppression_x.h', - 'noise_suppression_x.c', - 'nsx_defines.h', - 'nsx_core.c', - 'nsx_core.h', - ], - }, - ], -} diff --git a/webrtc/modules/audio_processing/ns/ns_core.c b/webrtc/modules/audio_processing/ns/ns_core.c index 791d419..1d60914 100644 --- a/webrtc/modules/audio_processing/ns/ns_core.c +++ b/webrtc/modules/audio_processing/ns/ns_core.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,385 +8,373 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include +#include #include -//#include +#include #include -#include "noise_suppression.h" -#include "ns_core.h" -#include "windows_private.h" -#include "fft4g.h" -#include "signal_processing_library.h" -// Set Feature Extraction Parameters -void WebRtcNs_set_feature_extraction_parameters(NSinst_t* inst) { - //bin size of histogram - inst->featureExtractionParams.binSizeLrt = (float)0.1; - inst->featureExtractionParams.binSizeSpecFlat = (float)0.05; - inst->featureExtractionParams.binSizeSpecDiff = (float)0.1; +#include "webrtc/common_audio/fft4g.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h" +#include "webrtc/modules/audio_processing/ns/ns_core.h" +#include "webrtc/modules/audio_processing/ns/windows_private.h" - //range of histogram over which lrt threshold is computed - inst->featureExtractionParams.rangeAvgHistLrt = (float)1.0; +// Set Feature Extraction Parameters. +static void set_feature_extraction_parameters(NoiseSuppressionC* self) { + // Bin size of histogram. + self->featureExtractionParams.binSizeLrt = 0.1f; + self->featureExtractionParams.binSizeSpecFlat = 0.05f; + self->featureExtractionParams.binSizeSpecDiff = 0.1f; - //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain - // thresholds for prior model - inst->featureExtractionParams.factor1ModelPars = (float)1.20; //for lrt and spectral diff - inst->featureExtractionParams.factor2ModelPars = (float)0.9; //for spectral_flatness: - // used when noise is flatter than speech + // Range of histogram over which LRT threshold is computed. + self->featureExtractionParams.rangeAvgHistLrt = 1.f; - //peak limit for spectral flatness (varies between 0 and 1) - inst->featureExtractionParams.thresPosSpecFlat = (float)0.6; + // Scale parameters: multiply dominant peaks of the histograms by scale factor + // to obtain thresholds for prior model. + // For LRT and spectral difference. + self->featureExtractionParams.factor1ModelPars = 1.2f; + // For spectral_flatness: used when noise is flatter than speech. + self->featureExtractionParams.factor2ModelPars = 0.9f; - //limit on spacing of two highest peaks in histogram: spacing determined by bin size - inst->featureExtractionParams.limitPeakSpacingSpecFlat = - 2 * inst->featureExtractionParams.binSizeSpecFlat; - inst->featureExtractionParams.limitPeakSpacingSpecDiff = - 2 * inst->featureExtractionParams.binSizeSpecDiff; + // Peak limit for spectral flatness (varies between 0 and 1). + self->featureExtractionParams.thresPosSpecFlat = 0.6f; - //limit on relevance of second peak: - inst->featureExtractionParams.limitPeakWeightsSpecFlat = (float)0.5; - inst->featureExtractionParams.limitPeakWeightsSpecDiff = (float)0.5; + // Limit on spacing of two highest peaks in histogram: spacing determined by + // bin size. + self->featureExtractionParams.limitPeakSpacingSpecFlat = + 2 * self->featureExtractionParams.binSizeSpecFlat; + self->featureExtractionParams.limitPeakSpacingSpecDiff = + 2 * self->featureExtractionParams.binSizeSpecDiff; - // fluctuation limit of lrt feature - inst->featureExtractionParams.thresFluctLrt = (float)0.05; + // Limit on relevance of second peak. + self->featureExtractionParams.limitPeakWeightsSpecFlat = 0.5f; + self->featureExtractionParams.limitPeakWeightsSpecDiff = 0.5f; - //limit on the max and min values for the feature thresholds - inst->featureExtractionParams.maxLrt = (float)1.0; - inst->featureExtractionParams.minLrt = (float)0.20; + // Fluctuation limit of LRT feature. + self->featureExtractionParams.thresFluctLrt = 0.05f; - inst->featureExtractionParams.maxSpecFlat = (float)0.95; - inst->featureExtractionParams.minSpecFlat = (float)0.10; + // Limit on the max and min values for the feature thresholds. + self->featureExtractionParams.maxLrt = 1.f; + self->featureExtractionParams.minLrt = 0.2f; - inst->featureExtractionParams.maxSpecDiff = (float)1.0; - inst->featureExtractionParams.minSpecDiff = (float)0.16; + self->featureExtractionParams.maxSpecFlat = 0.95f; + self->featureExtractionParams.minSpecFlat = 0.1f; - //criteria of weight of histogram peak to accept/reject feature - inst->featureExtractionParams.thresWeightSpecFlat = (int)(0.3 - * (inst->modelUpdatePars[1])); //for spectral flatness - inst->featureExtractionParams.thresWeightSpecDiff = (int)(0.3 - * (inst->modelUpdatePars[1])); //for spectral difference + self->featureExtractionParams.maxSpecDiff = 1.f; + self->featureExtractionParams.minSpecDiff = 0.16f; + + // Criteria of weight of histogram peak to accept/reject feature. + self->featureExtractionParams.thresWeightSpecFlat = + (int)(0.3 * (self->modelUpdatePars[1])); // For spectral flatness. + self->featureExtractionParams.thresWeightSpecDiff = + (int)(0.3 * (self->modelUpdatePars[1])); // For spectral difference. } -// Initialize state -int WebRtcNs_InitCore(NSinst_t* inst, WebRtc_UWord32 fs) { +// Initialize state. +int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs) { int i; - //We only support 10ms frames - - //check for valid pointer - if (inst == NULL) { + // Check for valid pointer. + if (self == NULL) { return -1; } - // Initialization of struct - if (fs == 8000 || fs == 16000 || fs == 32000) { - inst->fs = fs; + // Initialization of struct. + if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) { + self->fs = fs; } else { return -1; } - inst->windShift = 0; + self->windShift = 0; + // We only support 10ms frames. if (fs == 8000) { - // We only support 10ms frames - inst->blockLen = 80; - inst->blockLen10ms = 80; - inst->anaLen = 128; - inst->window = kBlocks80w128; - inst->outLen = 0; - } else if (fs == 16000) { - // We only support 10ms frames - inst->blockLen = 160; - inst->blockLen10ms = 160; - inst->anaLen = 256; - inst->window = kBlocks160w256; - inst->outLen = 0; - } else if (fs == 32000) { - // We only support 10ms frames - inst->blockLen = 160; - inst->blockLen10ms = 160; - inst->anaLen = 256; - inst->window = kBlocks160w256; - inst->outLen = 0; + self->blockLen = 80; + self->anaLen = 128; + self->window = kBlocks80w128; + } else { + self->blockLen = 160; + self->anaLen = 256; + self->window = kBlocks160w256; } - inst->magnLen = inst->anaLen / 2 + 1; // Number of frequency bins + self->magnLen = self->anaLen / 2 + 1; // Number of frequency bins. - // Initialize fft work arrays. - inst->ip[0] = 0; // Setting this triggers initialization. - memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); - rdft(inst->anaLen, 1, inst->dataBuf, inst->ip, inst->wfft); + // Initialize FFT work arrays. + self->ip[0] = 0; // Setting this triggers initialization. + memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + WebRtc_rdft(self->anaLen, 1, self->dataBuf, self->ip, self->wfft); - memset(inst->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); - memset(inst->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + memset(self->analyzeBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + memset(self->dataBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); + memset(self->syntBuf, 0, sizeof(float) * ANAL_BLOCKL_MAX); - //for HB processing - memset(inst->dataBufHB, 0, sizeof(float) * ANAL_BLOCKL_MAX); + // For HB processing. + memset(self->dataBufHB, + 0, + sizeof(float) * NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX); - //for quantile noise estimation - memset(inst->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // For quantile noise estimation. + memset(self->quantile, 0, sizeof(float) * HALF_ANAL_BLOCKL); for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { - inst->lquantile[i] = (float)8.0; - inst->density[i] = (float)0.3; + self->lquantile[i] = 8.f; + self->density[i] = 0.3f; } for (i = 0; i < SIMULT; i++) { - inst->counter[i] = (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); + self->counter[i] = + (int)floor((float)(END_STARTUP_LONG * (i + 1)) / (float)SIMULT); } - inst->updates = 0; + self->updates = 0; - // Wiener filter initialization + // Wiener filter initialization. for (i = 0; i < HALF_ANAL_BLOCKL; i++) { - inst->smooth[i] = (float)1.0; + self->smooth[i] = 1.f; } - // Set the aggressiveness: default - inst->aggrMode = 0; + // Set the aggressiveness: default. + self->aggrMode = 0; - //initialize variables for new method - inst->priorSpeechProb = (float)0.5; //prior prob for speech/noise + // Initialize variables for new method. + self->priorSpeechProb = 0.5f; // Prior prob for speech/noise. + // Previous analyze mag spectrum. + memset(self->magnPrevAnalyze, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Previous process mag spectrum. + memset(self->magnPrevProcess, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Current noise-spectrum. + memset(self->noise, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Previous noise-spectrum. + memset(self->noisePrev, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Conservative noise spectrum estimate. + memset(self->magnAvgPause, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // For estimation of HB in second pass. + memset(self->speechProb, 0, sizeof(float) * HALF_ANAL_BLOCKL); + // Initial average magnitude spectrum. + memset(self->initMagnEst, 0, sizeof(float) * HALF_ANAL_BLOCKL); for (i = 0; i < HALF_ANAL_BLOCKL; i++) { - inst->magnPrev[i] = (float)0.0; //previous mag spectrum - inst->noisePrev[i] = (float)0.0; //previous noise-spectrum - inst->logLrtTimeAvg[i] = LRT_FEATURE_THR; //smooth LR ratio (same as threshold) - inst->magnAvgPause[i] = (float)0.0; //conservative noise spectrum estimate - inst->speechProbHB[i] = (float)0.0; //for estimation of HB in second pass - inst->initMagnEst[i] = (float)0.0; //initial average mag spectrum + // Smooth LR (same as threshold). + self->logLrtTimeAvg[i] = LRT_FEATURE_THR; } - //feature quantities - inst->featureData[0] = SF_FEATURE_THR; //spectral flatness (start on threshold) - inst->featureData[1] = (float)0.0; //spectral entropy: not used in this version - inst->featureData[2] = (float)0.0; //spectral variance: not used in this version - inst->featureData[3] = LRT_FEATURE_THR; //average lrt factor (start on threshold) - inst->featureData[4] = SF_FEATURE_THR; //spectral template diff (start on threshold) - inst->featureData[5] = (float)0.0; //normalization for spectral-diff - inst->featureData[6] = (float)0.0; //window time-average of input magnitude spectrum + // Feature quantities. + // Spectral flatness (start on threshold). + self->featureData[0] = SF_FEATURE_THR; + self->featureData[1] = 0.f; // Spectral entropy: not used in this version. + self->featureData[2] = 0.f; // Spectral variance: not used in this version. + // Average LRT factor (start on threshold). + self->featureData[3] = LRT_FEATURE_THR; + // Spectral template diff (start on threshold). + self->featureData[4] = SF_FEATURE_THR; + self->featureData[5] = 0.f; // Normalization for spectral difference. + // Window time-average of input magnitude spectrum. + self->featureData[6] = 0.f; - //histogram quantities: used to estimate/update thresholds for features - for (i = 0; i < HIST_PAR_EST; i++) { - inst->histLrt[i] = 0; - inst->histSpecFlat[i] = 0; - inst->histSpecDiff[i] = 0; - } - - inst->blockInd = -1; //frame counter - inst->priorModelPars[0] = LRT_FEATURE_THR; //default threshold for lrt feature - inst->priorModelPars[1] = (float)0.5; //threshold for spectral flatness: - // determined on-line - inst->priorModelPars[2] = (float)1.0; //sgn_map par for spectral measure: - // 1 for flatness measure - inst->priorModelPars[3] = (float)0.5; //threshold for template-difference feature: - // determined on-line - inst->priorModelPars[4] = (float)1.0; //default weighting parameter for lrt feature - inst->priorModelPars[5] = (float)0.0; //default weighting parameter for - // spectral flatness feature - inst->priorModelPars[6] = (float)0.0; //default weighting parameter for - // spectral difference feature - - inst->modelUpdatePars[0] = 2; //update flag for parameters: - // 0 no update, 1=update once, 2=update every window - inst->modelUpdatePars[1] = 500; //window for update - inst->modelUpdatePars[2] = 0; //counter for update of conservative noise spectrum - //counter if the feature thresholds are updated during the sequence - inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; - - inst->signalEnergy = 0.0; - inst->sumMagn = 0.0; - inst->whiteNoiseLevel = 0.0; - inst->pinkNoiseNumerator = 0.0; - inst->pinkNoiseExp = 0.0; - - WebRtcNs_set_feature_extraction_parameters(inst); // Set feature configuration - - //default mode - WebRtcNs_set_policy_core(inst, 0); + // Histogram quantities: used to estimate/update thresholds for features. + memset(self->histLrt, 0, sizeof(int) * HIST_PAR_EST); + memset(self->histSpecFlat, 0, sizeof(int) * HIST_PAR_EST); + memset(self->histSpecDiff, 0, sizeof(int) * HIST_PAR_EST); - memset(inst->outBuf, 0, sizeof(float) * 3 * BLOCKL_MAX); + self->blockInd = -1; // Frame counter. + // Default threshold for LRT feature. + self->priorModelPars[0] = LRT_FEATURE_THR; + // Threshold for spectral flatness: determined on-line. + self->priorModelPars[1] = 0.5f; + // sgn_map par for spectral measure: 1 for flatness measure. + self->priorModelPars[2] = 1.f; + // Threshold for template-difference feature: determined on-line. + self->priorModelPars[3] = 0.5f; + // Default weighting parameter for LRT feature. + self->priorModelPars[4] = 1.f; + // Default weighting parameter for spectral flatness feature. + self->priorModelPars[5] = 0.f; + // Default weighting parameter for spectral difference feature. + self->priorModelPars[6] = 0.f; - inst->initFlag = 1; + // Update flag for parameters: + // 0 no update, 1 = update once, 2 = update every window. + self->modelUpdatePars[0] = 2; + self->modelUpdatePars[1] = 500; // Window for update. + // Counter for update of conservative noise spectrum. + self->modelUpdatePars[2] = 0; + // Counter if the feature thresholds are updated during the sequence. + self->modelUpdatePars[3] = self->modelUpdatePars[1]; + + self->signalEnergy = 0.0; + self->sumMagn = 0.0; + self->whiteNoiseLevel = 0.0; + self->pinkNoiseNumerator = 0.0; + self->pinkNoiseExp = 0.0; + + set_feature_extraction_parameters(self); + + // Default mode. + WebRtcNs_set_policy_core(self, 0); + + self->initFlag = 1; return 0; } -int WebRtcNs_set_policy_core(NSinst_t* inst, int mode) { - // allow for modes:0,1,2,3 - if (mode < 0 || mode > 3) { - return (-1); - } - - inst->aggrMode = mode; - if (mode == 0) { - inst->overdrive = (float)1.0; - inst->denoiseBound = (float)0.5; - inst->gainmap = 0; - } else if (mode == 1) { - //inst->overdrive = (float)1.25; - inst->overdrive = (float)1.0; - inst->denoiseBound = (float)0.25; - inst->gainmap = 1; - } else if (mode == 2) { - //inst->overdrive = (float)1.25; - inst->overdrive = (float)1.1; - inst->denoiseBound = (float)0.125; - inst->gainmap = 1; - } else if (mode == 3) { - //inst->overdrive = (float)1.30; - inst->overdrive = (float)1.25; - inst->denoiseBound = (float)0.09; - inst->gainmap = 1; - } - return 0; -} - -// Estimate noise -void WebRtcNs_NoiseEstimation(NSinst_t* inst, float* magn, float* noise) { - int i, s, offset; +// Estimate noise. +static void NoiseEstimation(NoiseSuppressionC* self, + float* magn, + float* noise) { + size_t i, s, offset; float lmagn[HALF_ANAL_BLOCKL], delta; - if (inst->updates < END_STARTUP_LONG) { - inst->updates++; + if (self->updates < END_STARTUP_LONG) { + self->updates++; } - for (i = 0; i < inst->magnLen; i++) { + for (i = 0; i < self->magnLen; i++) { lmagn[i] = (float)log(magn[i]); } - // loop over simultaneous estimates + // Loop over simultaneous estimates. for (s = 0; s < SIMULT; s++) { - offset = s * inst->magnLen; + offset = s * self->magnLen; // newquantest(...) - for (i = 0; i < inst->magnLen; i++) { - // compute delta - if (inst->density[offset + i] > 1.0) { - delta = FACTOR * (float)1.0 / inst->density[offset + i]; + for (i = 0; i < self->magnLen; i++) { + // Compute delta. + if (self->density[offset + i] > 1.0) { + delta = FACTOR * 1.f / self->density[offset + i]; } else { delta = FACTOR; } - // update log quantile estimate - if (lmagn[i] > inst->lquantile[offset + i]) { - inst->lquantile[offset + i] += QUANTILE * delta - / (float)(inst->counter[s] + 1); + // Update log quantile estimate. + if (lmagn[i] > self->lquantile[offset + i]) { + self->lquantile[offset + i] += + QUANTILE * delta / (float)(self->counter[s] + 1); } else { - inst->lquantile[offset + i] -= ((float)1.0 - QUANTILE) * delta - / (float)(inst->counter[s] + 1); + self->lquantile[offset + i] -= + (1.f - QUANTILE) * delta / (float)(self->counter[s] + 1); } - // update density estimate - if (fabs(lmagn[i] - inst->lquantile[offset + i]) < WIDTH) { - inst->density[offset + i] = ((float)inst->counter[s] * inst->density[offset - + i] + (float)1.0 / ((float)2.0 * WIDTH)) / (float)(inst->counter[s] + 1); + // Update density estimate. + if (fabs(lmagn[i] - self->lquantile[offset + i]) < WIDTH) { + self->density[offset + i] = + ((float)self->counter[s] * self->density[offset + i] + + 1.f / (2.f * WIDTH)) / + (float)(self->counter[s] + 1); } - } // end loop over magnitude spectrum + } // End loop over magnitude spectrum. - if (inst->counter[s] >= END_STARTUP_LONG) { - inst->counter[s] = 0; - if (inst->updates >= END_STARTUP_LONG) { - for (i = 0; i < inst->magnLen; i++) { - inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); + if (self->counter[s] >= END_STARTUP_LONG) { + self->counter[s] = 0; + if (self->updates >= END_STARTUP_LONG) { + for (i = 0; i < self->magnLen; i++) { + self->quantile[i] = (float)exp(self->lquantile[offset + i]); } } } - inst->counter[s]++; - } // end loop over simultaneous estimates + self->counter[s]++; + } // End loop over simultaneous estimates. - // Sequentially update the noise during startup - if (inst->updates < END_STARTUP_LONG) { + // Sequentially update the noise during startup. + if (self->updates < END_STARTUP_LONG) { // Use the last "s" to get noise during startup that differ from zero. - for (i = 0; i < inst->magnLen; i++) { - inst->quantile[i] = (float)exp(inst->lquantile[offset + i]); + for (i = 0; i < self->magnLen; i++) { + self->quantile[i] = (float)exp(self->lquantile[offset + i]); } } - for (i = 0; i < inst->magnLen; i++) { - noise[i] = inst->quantile[i]; + for (i = 0; i < self->magnLen; i++) { + noise[i] = self->quantile[i]; } } -// Extract thresholds for feature parameters -// histograms are computed over some window_size (given by inst->modelUpdatePars[1]) -// thresholds and weights are extracted every window -// flag 0 means update histogram only, flag 1 means compute the thresholds/weights -// threshold and weights are returned in: inst->priorModelPars -void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { +// Extract thresholds for feature parameters. +// Histograms are computed over some window size (given by +// self->modelUpdatePars[1]). +// Thresholds and weights are extracted every window. +// |flag| = 0 updates histogram only, |flag| = 1 computes the threshold/weights. +// Threshold and weights are returned in: self->priorModelPars. +static void FeatureParameterExtraction(NoiseSuppressionC* self, int flag) { int i, useFeatureSpecFlat, useFeatureSpecDiff, numHistLrt; int maxPeak1, maxPeak2; - int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, weightPeak2SpecDiff; + int weightPeak1SpecFlat, weightPeak2SpecFlat, weightPeak1SpecDiff, + weightPeak2SpecDiff; float binMid, featureSum; float posPeak1SpecFlat, posPeak2SpecFlat, posPeak1SpecDiff, posPeak2SpecDiff; float fluctLrt, avgHistLrt, avgSquareHistLrt, avgHistLrtCompl; - //3 features: lrt, flatness, difference - //lrt_feature = inst->featureData[3]; - //flat_feature = inst->featureData[0]; - //diff_feature = inst->featureData[4]; + // 3 features: LRT, flatness, difference. + // lrt_feature = self->featureData[3]; + // flat_feature = self->featureData[0]; + // diff_feature = self->featureData[4]; - //update histograms + // Update histograms. if (flag == 0) { // LRT - if ((inst->featureData[3] < HIST_PAR_EST * inst->featureExtractionParams.binSizeLrt) - && (inst->featureData[3] >= 0.0)) { - i = (int)(inst->featureData[3] / inst->featureExtractionParams.binSizeLrt); - inst->histLrt[i]++; + if ((self->featureData[3] < + HIST_PAR_EST * self->featureExtractionParams.binSizeLrt) && + (self->featureData[3] >= 0.0)) { + i = (int)(self->featureData[3] / + self->featureExtractionParams.binSizeLrt); + self->histLrt[i]++; } - // Spectral flatness - if ((inst->featureData[0] < HIST_PAR_EST - * inst->featureExtractionParams.binSizeSpecFlat) - && (inst->featureData[0] >= 0.0)) { - i = (int)(inst->featureData[0] / inst->featureExtractionParams.binSizeSpecFlat); - inst->histSpecFlat[i]++; + // Spectral flatness. + if ((self->featureData[0] < + HIST_PAR_EST * self->featureExtractionParams.binSizeSpecFlat) && + (self->featureData[0] >= 0.0)) { + i = (int)(self->featureData[0] / + self->featureExtractionParams.binSizeSpecFlat); + self->histSpecFlat[i]++; } - // Spectral difference - if ((inst->featureData[4] < HIST_PAR_EST - * inst->featureExtractionParams.binSizeSpecDiff) - && (inst->featureData[4] >= 0.0)) { - i = (int)(inst->featureData[4] / inst->featureExtractionParams.binSizeSpecDiff); - inst->histSpecDiff[i]++; + // Spectral difference. + if ((self->featureData[4] < + HIST_PAR_EST * self->featureExtractionParams.binSizeSpecDiff) && + (self->featureData[4] >= 0.0)) { + i = (int)(self->featureData[4] / + self->featureExtractionParams.binSizeSpecDiff); + self->histSpecDiff[i]++; } } - // extract parameters for speech/noise probability + // Extract parameters for speech/noise probability. if (flag == 1) { - //lrt feature: compute the average over inst->featureExtractionParams.rangeAvgHistLrt + // LRT feature: compute the average over + // self->featureExtractionParams.rangeAvgHistLrt. avgHistLrt = 0.0; avgHistLrtCompl = 0.0; avgSquareHistLrt = 0.0; numHistLrt = 0; for (i = 0; i < HIST_PAR_EST; i++) { - binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeLrt; - if (binMid <= inst->featureExtractionParams.rangeAvgHistLrt) { - avgHistLrt += inst->histLrt[i] * binMid; - numHistLrt += inst->histLrt[i]; + binMid = ((float)i + 0.5f) * self->featureExtractionParams.binSizeLrt; + if (binMid <= self->featureExtractionParams.rangeAvgHistLrt) { + avgHistLrt += self->histLrt[i] * binMid; + numHistLrt += self->histLrt[i]; } - avgSquareHistLrt += inst->histLrt[i] * binMid * binMid; - avgHistLrtCompl += inst->histLrt[i] * binMid; + avgSquareHistLrt += self->histLrt[i] * binMid * binMid; + avgHistLrtCompl += self->histLrt[i] * binMid; } if (numHistLrt > 0) { avgHistLrt = avgHistLrt / ((float)numHistLrt); } - avgHistLrtCompl = avgHistLrtCompl / ((float)inst->modelUpdatePars[1]); - avgSquareHistLrt = avgSquareHistLrt / ((float)inst->modelUpdatePars[1]); + avgHistLrtCompl = avgHistLrtCompl / ((float)self->modelUpdatePars[1]); + avgSquareHistLrt = avgSquareHistLrt / ((float)self->modelUpdatePars[1]); fluctLrt = avgSquareHistLrt - avgHistLrt * avgHistLrtCompl; - // get threshold for lrt feature: - if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) { - //very low fluct, so likely noise - inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; + // Get threshold for LRT feature. + if (fluctLrt < self->featureExtractionParams.thresFluctLrt) { + // Very low fluctuation, so likely noise. + self->priorModelPars[0] = self->featureExtractionParams.maxLrt; } else { - inst->priorModelPars[0] = inst->featureExtractionParams.factor1ModelPars - * avgHistLrt; - // check if value is within min/max range - if (inst->priorModelPars[0] < inst->featureExtractionParams.minLrt) { - inst->priorModelPars[0] = inst->featureExtractionParams.minLrt; + self->priorModelPars[0] = + self->featureExtractionParams.factor1ModelPars * avgHistLrt; + // Check if value is within min/max range. + if (self->priorModelPars[0] < self->featureExtractionParams.minLrt) { + self->priorModelPars[0] = self->featureExtractionParams.minLrt; } - if (inst->priorModelPars[0] > inst->featureExtractionParams.maxLrt) { - inst->priorModelPars[0] = inst->featureExtractionParams.maxLrt; + if (self->priorModelPars[0] > self->featureExtractionParams.maxLrt) { + self->priorModelPars[0] = self->featureExtractionParams.maxLrt; } } - // done with lrt feature + // Done with LRT feature. - // - // for spectral flatness and spectral difference: compute the main peaks of histogram + // For spectral flatness and spectral difference: compute the main peaks of + // histogram. maxPeak1 = 0; maxPeak2 = 0; posPeak1SpecFlat = 0.0; @@ -394,222 +382,269 @@ void WebRtcNs_FeatureParameterExtraction(NSinst_t* inst, int flag) { weightPeak1SpecFlat = 0; weightPeak2SpecFlat = 0; - // peaks for flatness + // Peaks for flatness. for (i = 0; i < HIST_PAR_EST; i++) { - binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecFlat; - if (inst->histSpecFlat[i] > maxPeak1) { - // Found new "first" peak + binMid = + (i + 0.5f) * self->featureExtractionParams.binSizeSpecFlat; + if (self->histSpecFlat[i] > maxPeak1) { + // Found new "first" peak. maxPeak2 = maxPeak1; weightPeak2SpecFlat = weightPeak1SpecFlat; posPeak2SpecFlat = posPeak1SpecFlat; - maxPeak1 = inst->histSpecFlat[i]; - weightPeak1SpecFlat = inst->histSpecFlat[i]; + maxPeak1 = self->histSpecFlat[i]; + weightPeak1SpecFlat = self->histSpecFlat[i]; posPeak1SpecFlat = binMid; - } else if (inst->histSpecFlat[i] > maxPeak2) { - // Found new "second" peak - maxPeak2 = inst->histSpecFlat[i]; - weightPeak2SpecFlat = inst->histSpecFlat[i]; + } else if (self->histSpecFlat[i] > maxPeak2) { + // Found new "second" peak. + maxPeak2 = self->histSpecFlat[i]; + weightPeak2SpecFlat = self->histSpecFlat[i]; posPeak2SpecFlat = binMid; } } - //compute two peaks for spectral difference + // Compute two peaks for spectral difference. maxPeak1 = 0; maxPeak2 = 0; posPeak1SpecDiff = 0.0; posPeak2SpecDiff = 0.0; weightPeak1SpecDiff = 0; weightPeak2SpecDiff = 0; - // peaks for spectral difference + // Peaks for spectral difference. for (i = 0; i < HIST_PAR_EST; i++) { - binMid = ((float)i + (float)0.5) * inst->featureExtractionParams.binSizeSpecDiff; - if (inst->histSpecDiff[i] > maxPeak1) { - // Found new "first" peak + binMid = + ((float)i + 0.5f) * self->featureExtractionParams.binSizeSpecDiff; + if (self->histSpecDiff[i] > maxPeak1) { + // Found new "first" peak. maxPeak2 = maxPeak1; weightPeak2SpecDiff = weightPeak1SpecDiff; posPeak2SpecDiff = posPeak1SpecDiff; - maxPeak1 = inst->histSpecDiff[i]; - weightPeak1SpecDiff = inst->histSpecDiff[i]; + maxPeak1 = self->histSpecDiff[i]; + weightPeak1SpecDiff = self->histSpecDiff[i]; posPeak1SpecDiff = binMid; - } else if (inst->histSpecDiff[i] > maxPeak2) { - // Found new "second" peak - maxPeak2 = inst->histSpecDiff[i]; - weightPeak2SpecDiff = inst->histSpecDiff[i]; + } else if (self->histSpecDiff[i] > maxPeak2) { + // Found new "second" peak. + maxPeak2 = self->histSpecDiff[i]; + weightPeak2SpecDiff = self->histSpecDiff[i]; posPeak2SpecDiff = binMid; } } - // for spectrum flatness feature + // For spectrum flatness feature. useFeatureSpecFlat = 1; - // merge the two peaks if they are close - if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) - < inst->featureExtractionParams.limitPeakSpacingSpecFlat) - && (weightPeak2SpecFlat - > inst->featureExtractionParams.limitPeakWeightsSpecFlat - * weightPeak1SpecFlat)) { + // Merge the two peaks if they are close. + if ((fabs(posPeak2SpecFlat - posPeak1SpecFlat) < + self->featureExtractionParams.limitPeakSpacingSpecFlat) && + (weightPeak2SpecFlat > + self->featureExtractionParams.limitPeakWeightsSpecFlat * + weightPeak1SpecFlat)) { weightPeak1SpecFlat += weightPeak2SpecFlat; - posPeak1SpecFlat = (float)0.5 * (posPeak1SpecFlat + posPeak2SpecFlat); + posPeak1SpecFlat = 0.5f * (posPeak1SpecFlat + posPeak2SpecFlat); } - //reject if weight of peaks is not large enough, or peak value too small - if (weightPeak1SpecFlat < inst->featureExtractionParams.thresWeightSpecFlat - || posPeak1SpecFlat < inst->featureExtractionParams.thresPosSpecFlat) { + // Reject if weight of peaks is not large enough, or peak value too small. + if (weightPeak1SpecFlat < + self->featureExtractionParams.thresWeightSpecFlat || + posPeak1SpecFlat < self->featureExtractionParams.thresPosSpecFlat) { useFeatureSpecFlat = 0; } - // if selected, get the threshold + // If selected, get the threshold. if (useFeatureSpecFlat == 1) { - // compute the threshold - inst->priorModelPars[1] = inst->featureExtractionParams.factor2ModelPars - * posPeak1SpecFlat; - //check if value is within min/max range - if (inst->priorModelPars[1] < inst->featureExtractionParams.minSpecFlat) { - inst->priorModelPars[1] = inst->featureExtractionParams.minSpecFlat; + // Compute the threshold. + self->priorModelPars[1] = + self->featureExtractionParams.factor2ModelPars * posPeak1SpecFlat; + // Check if value is within min/max range. + if (self->priorModelPars[1] < self->featureExtractionParams.minSpecFlat) { + self->priorModelPars[1] = self->featureExtractionParams.minSpecFlat; } - if (inst->priorModelPars[1] > inst->featureExtractionParams.maxSpecFlat) { - inst->priorModelPars[1] = inst->featureExtractionParams.maxSpecFlat; + if (self->priorModelPars[1] > self->featureExtractionParams.maxSpecFlat) { + self->priorModelPars[1] = self->featureExtractionParams.maxSpecFlat; } } - // done with flatness feature + // Done with flatness feature. - // for template feature + // For template feature. useFeatureSpecDiff = 1; - // merge the two peaks if they are close - if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) - < inst->featureExtractionParams.limitPeakSpacingSpecDiff) - && (weightPeak2SpecDiff - > inst->featureExtractionParams.limitPeakWeightsSpecDiff - * weightPeak1SpecDiff)) { + // Merge the two peaks if they are close. + if ((fabs(posPeak2SpecDiff - posPeak1SpecDiff) < + self->featureExtractionParams.limitPeakSpacingSpecDiff) && + (weightPeak2SpecDiff > + self->featureExtractionParams.limitPeakWeightsSpecDiff * + weightPeak1SpecDiff)) { weightPeak1SpecDiff += weightPeak2SpecDiff; - posPeak1SpecDiff = (float)0.5 * (posPeak1SpecDiff + posPeak2SpecDiff); + posPeak1SpecDiff = 0.5f * (posPeak1SpecDiff + posPeak2SpecDiff); } - // get the threshold value - inst->priorModelPars[3] = inst->featureExtractionParams.factor1ModelPars - * posPeak1SpecDiff; - //reject if weight of peaks is not large enough - if (weightPeak1SpecDiff < inst->featureExtractionParams.thresWeightSpecDiff) { + // Get the threshold value. + self->priorModelPars[3] = + self->featureExtractionParams.factor1ModelPars * posPeak1SpecDiff; + // Reject if weight of peaks is not large enough. + if (weightPeak1SpecDiff < + self->featureExtractionParams.thresWeightSpecDiff) { useFeatureSpecDiff = 0; } - //check if value is within min/max range - if (inst->priorModelPars[3] < inst->featureExtractionParams.minSpecDiff) { - inst->priorModelPars[3] = inst->featureExtractionParams.minSpecDiff; + // Check if value is within min/max range. + if (self->priorModelPars[3] < self->featureExtractionParams.minSpecDiff) { + self->priorModelPars[3] = self->featureExtractionParams.minSpecDiff; } - if (inst->priorModelPars[3] > inst->featureExtractionParams.maxSpecDiff) { - inst->priorModelPars[3] = inst->featureExtractionParams.maxSpecDiff; + if (self->priorModelPars[3] > self->featureExtractionParams.maxSpecDiff) { + self->priorModelPars[3] = self->featureExtractionParams.maxSpecDiff; } - // done with spectral difference feature + // Done with spectral difference feature. - // don't use template feature if fluctuation of lrt feature is very low: - // most likely just noise state - if (fluctLrt < inst->featureExtractionParams.thresFluctLrt) { + // Don't use template feature if fluctuation of LRT feature is very low: + // most likely just noise state. + if (fluctLrt < self->featureExtractionParams.thresFluctLrt) { useFeatureSpecDiff = 0; } - // select the weights between the features - // inst->priorModelPars[4] is weight for lrt: always selected - // inst->priorModelPars[5] is weight for spectral flatness - // inst->priorModelPars[6] is weight for spectral difference + // Select the weights between the features. + // self->priorModelPars[4] is weight for LRT: always selected. + // self->priorModelPars[5] is weight for spectral flatness. + // self->priorModelPars[6] is weight for spectral difference. featureSum = (float)(1 + useFeatureSpecFlat + useFeatureSpecDiff); - inst->priorModelPars[4] = (float)1.0 / featureSum; - inst->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum; - inst->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum; + self->priorModelPars[4] = 1.f / featureSum; + self->priorModelPars[5] = ((float)useFeatureSpecFlat) / featureSum; + self->priorModelPars[6] = ((float)useFeatureSpecDiff) / featureSum; - // set hists to zero for next update - if (inst->modelUpdatePars[0] >= 1) { + // Set hists to zero for next update. + if (self->modelUpdatePars[0] >= 1) { for (i = 0; i < HIST_PAR_EST; i++) { - inst->histLrt[i] = 0; - inst->histSpecFlat[i] = 0; - inst->histSpecDiff[i] = 0; + self->histLrt[i] = 0; + self->histSpecFlat[i] = 0; + self->histSpecDiff[i] = 0; } } - } // end of flag == 1 + } // End of flag == 1. } -// Compute spectral flatness on input spectrum -// magnIn is the magnitude spectrum -// spectral flatness is returned in inst->featureData[0] -void WebRtcNs_ComputeSpectralFlatness(NSinst_t* inst, float* magnIn) { - int i; - int shiftLP = 1; //option to remove first bin(s) from spectral measures +// Compute spectral flatness on input spectrum. +// |magnIn| is the magnitude spectrum. +// Spectral flatness is returned in self->featureData[0]. +static void ComputeSpectralFlatness(NoiseSuppressionC* self, + const float* magnIn) { + size_t i; + size_t shiftLP = 1; // Option to remove first bin(s) from spectral measures. float avgSpectralFlatnessNum, avgSpectralFlatnessDen, spectralTmp; - // comute spectral measures - // for flatness + // Compute spectral measures. + // For flatness. avgSpectralFlatnessNum = 0.0; - avgSpectralFlatnessDen = inst->sumMagn; + avgSpectralFlatnessDen = self->sumMagn; for (i = 0; i < shiftLP; i++) { avgSpectralFlatnessDen -= magnIn[i]; } - // compute log of ratio of the geometric to arithmetic mean: check for log(0) case - for (i = shiftLP; i < inst->magnLen; i++) { + // Compute log of ratio of the geometric to arithmetic mean: check for log(0) + // case. + for (i = shiftLP; i < self->magnLen; i++) { if (magnIn[i] > 0.0) { avgSpectralFlatnessNum += (float)log(magnIn[i]); } else { - inst->featureData[0] -= SPECT_FL_TAVG * inst->featureData[0]; + self->featureData[0] -= SPECT_FL_TAVG * self->featureData[0]; return; } } - //normalize - avgSpectralFlatnessDen = avgSpectralFlatnessDen / inst->magnLen; - avgSpectralFlatnessNum = avgSpectralFlatnessNum / inst->magnLen; + // Normalize. + avgSpectralFlatnessDen = avgSpectralFlatnessDen / self->magnLen; + avgSpectralFlatnessNum = avgSpectralFlatnessNum / self->magnLen; - //ratio and inverse log: check for case of log(0) + // Ratio and inverse log: check for case of log(0). spectralTmp = (float)exp(avgSpectralFlatnessNum) / avgSpectralFlatnessDen; - //time-avg update of spectral flatness feature - inst->featureData[0] += SPECT_FL_TAVG * (spectralTmp - inst->featureData[0]); - // done with flatness feature + // Time-avg update of spectral flatness feature. + self->featureData[0] += SPECT_FL_TAVG * (spectralTmp - self->featureData[0]); + // Done with flatness feature. } -// Compute the difference measure between input spectrum and a template/learned noise spectrum -// magnIn is the input spectrum -// the reference/template spectrum is inst->magnAvgPause[i] -// returns (normalized) spectral difference in inst->featureData[4] -void WebRtcNs_ComputeSpectralDifference(NSinst_t* inst, float* magnIn) { - // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause) - int i; +// Compute prior and post SNR based on quantile noise estimation. +// Compute DD estimate of prior SNR. +// Inputs: +// * |magn| is the signal magnitude spectrum estimate. +// * |noise| is the magnitude noise spectrum estimate. +// Outputs: +// * |snrLocPrior| is the computed prior SNR. +// * |snrLocPost| is the computed post SNR. +static void ComputeSnr(const NoiseSuppressionC* self, + const float* magn, + const float* noise, + float* snrLocPrior, + float* snrLocPost) { + size_t i; + + for (i = 0; i < self->magnLen; i++) { + // Previous post SNR. + // Previous estimate: based on previous frame with gain filter. + float previousEstimateStsa = self->magnPrevAnalyze[i] / + (self->noisePrev[i] + 0.0001f) * self->smooth[i]; + // Post SNR. + snrLocPost[i] = 0.f; + if (magn[i] > noise[i]) { + snrLocPost[i] = magn[i] / (noise[i] + 0.0001f) - 1.f; + } + // DD estimate is sum of two terms: current estimate and previous estimate. + // Directed decision update of snrPrior. + snrLocPrior[i] = + DD_PR_SNR * previousEstimateStsa + (1.f - DD_PR_SNR) * snrLocPost[i]; + } // End of loop over frequencies. +} + +// Compute the difference measure between input spectrum and a template/learned +// noise spectrum. +// |magnIn| is the input spectrum. +// The reference/template spectrum is self->magnAvgPause[i]. +// Returns (normalized) spectral difference in self->featureData[4]. +static void ComputeSpectralDifference(NoiseSuppressionC* self, + const float* magnIn) { + // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / + // var(magnAvgPause) + size_t i; float avgPause, avgMagn, covMagnPause, varPause, varMagn, avgDiffNormMagn; avgPause = 0.0; - avgMagn = inst->sumMagn; - // compute average quantities - for (i = 0; i < inst->magnLen; i++) { - //conservative smooth noise spectrum from pause frames - avgPause += inst->magnAvgPause[i]; + avgMagn = self->sumMagn; + // Compute average quantities. + for (i = 0; i < self->magnLen; i++) { + // Conservative smooth noise spectrum from pause frames. + avgPause += self->magnAvgPause[i]; } - avgPause = avgPause / ((float)inst->magnLen); - avgMagn = avgMagn / ((float)inst->magnLen); + avgPause /= self->magnLen; + avgMagn /= self->magnLen; covMagnPause = 0.0; varPause = 0.0; varMagn = 0.0; - // compute variance and covariance quantities - for (i = 0; i < inst->magnLen; i++) { - covMagnPause += (magnIn[i] - avgMagn) * (inst->magnAvgPause[i] - avgPause); - varPause += (inst->magnAvgPause[i] - avgPause) * (inst->magnAvgPause[i] - avgPause); + // Compute variance and covariance quantities. + for (i = 0; i < self->magnLen; i++) { + covMagnPause += (magnIn[i] - avgMagn) * (self->magnAvgPause[i] - avgPause); + varPause += + (self->magnAvgPause[i] - avgPause) * (self->magnAvgPause[i] - avgPause); varMagn += (magnIn[i] - avgMagn) * (magnIn[i] - avgMagn); } - covMagnPause = covMagnPause / ((float)inst->magnLen); - varPause = varPause / ((float)inst->magnLen); - varMagn = varMagn / ((float)inst->magnLen); - // update of average magnitude spectrum - inst->featureData[6] += inst->signalEnergy; + covMagnPause /= self->magnLen; + varPause /= self->magnLen; + varMagn /= self->magnLen; + // Update of average magnitude spectrum. + self->featureData[6] += self->signalEnergy; - avgDiffNormMagn = varMagn - (covMagnPause * covMagnPause) / (varPause + (float)0.0001); - // normalize and compute time-avg update of difference feature - avgDiffNormMagn = (float)(avgDiffNormMagn / (inst->featureData[5] + (float)0.0001)); - inst->featureData[4] += SPECT_DIFF_TAVG * (avgDiffNormMagn - inst->featureData[4]); + avgDiffNormMagn = + varMagn - (covMagnPause * covMagnPause) / (varPause + 0.0001f); + // Normalize and compute time-avg update of difference feature. + avgDiffNormMagn = (float)(avgDiffNormMagn / (self->featureData[5] + 0.0001f)); + self->featureData[4] += + SPECT_DIFF_TAVG * (avgDiffNormMagn - self->featureData[4]); } -// Compute speech/noise probability -// speech/noise probability is returned in: probSpeechFinal -//magn is the input magnitude spectrum -//noise is the noise spectrum -//snrLocPrior is the prior snr for each freq. -//snr loc_post is the post snr for each freq. -void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snrLocPrior, - float* snrLocPost) { - int i, sgnMap; +// Compute speech/noise probability. +// Speech/noise probability is returned in |probSpeechFinal|. +// |magn| is the input magnitude spectrum. +// |noise| is the noise spectrum. +// |snrLocPrior| is the prior SNR for each frequency. +// |snrLocPost| is the post SNR for each frequency. +static void SpeechNoiseProb(NoiseSuppressionC* self, + float* probSpeechFinal, + const float* snrLocPrior, + const float* snrLocPost) { + size_t i; + int sgnMap; float invLrt, gainPrior, indPrior; float logLrtTimeAvgKsum, besselTmp; float indicator0, indicator1, indicator2; @@ -619,687 +654,763 @@ void WebRtcNs_SpeechNoiseProb(NSinst_t* inst, float* probSpeechFinal, float* snr float widthPrior, widthPrior0, widthPrior1, widthPrior2; widthPrior0 = WIDTH_PR_MAP; - widthPrior1 = (float)2.0 * WIDTH_PR_MAP; //width for pause region: - // lower range, so increase width in tanh map - widthPrior2 = (float)2.0 * WIDTH_PR_MAP; //for spectral-difference measure + // Width for pause region: lower range, so increase width in tanh map. + widthPrior1 = 2.f * WIDTH_PR_MAP; + widthPrior2 = 2.f * WIDTH_PR_MAP; // For spectral-difference measure. - //threshold parameters for features - threshPrior0 = inst->priorModelPars[0]; - threshPrior1 = inst->priorModelPars[1]; - threshPrior2 = inst->priorModelPars[3]; + // Threshold parameters for features. + threshPrior0 = self->priorModelPars[0]; + threshPrior1 = self->priorModelPars[1]; + threshPrior2 = self->priorModelPars[3]; - //sign for flatness feature - sgnMap = (int)(inst->priorModelPars[2]); + // Sign for flatness feature. + sgnMap = (int)(self->priorModelPars[2]); - //weight parameters for features - weightIndPrior0 = inst->priorModelPars[4]; - weightIndPrior1 = inst->priorModelPars[5]; - weightIndPrior2 = inst->priorModelPars[6]; + // Weight parameters for features. + weightIndPrior0 = self->priorModelPars[4]; + weightIndPrior1 = self->priorModelPars[5]; + weightIndPrior2 = self->priorModelPars[6]; - // compute feature based on average LR factor - // this is the average over all frequencies of the smooth log lrt + // Compute feature based on average LR factor. + // This is the average over all frequencies of the smooth log LRT. logLrtTimeAvgKsum = 0.0; - for (i = 0; i < inst->magnLen; i++) { - tmpFloat1 = (float)1.0 + (float)2.0 * snrLocPrior[i]; - tmpFloat2 = (float)2.0 * snrLocPrior[i] / (tmpFloat1 + (float)0.0001); - besselTmp = (snrLocPost[i] + (float)1.0) * tmpFloat2; - inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - - inst->logLrtTimeAvg[i]); - logLrtTimeAvgKsum += inst->logLrtTimeAvg[i]; + for (i = 0; i < self->magnLen; i++) { + tmpFloat1 = 1.f + 2.f * snrLocPrior[i]; + tmpFloat2 = 2.f * snrLocPrior[i] / (tmpFloat1 + 0.0001f); + besselTmp = (snrLocPost[i] + 1.f) * tmpFloat2; + self->logLrtTimeAvg[i] += + LRT_TAVG * (besselTmp - (float)log(tmpFloat1) - self->logLrtTimeAvg[i]); + logLrtTimeAvgKsum += self->logLrtTimeAvg[i]; } - logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (inst->magnLen); - inst->featureData[3] = logLrtTimeAvgKsum; - // done with computation of LR factor + logLrtTimeAvgKsum = (float)logLrtTimeAvgKsum / (self->magnLen); + self->featureData[3] = logLrtTimeAvgKsum; + // Done with computation of LR factor. - // - //compute the indicator functions - // - - // average lrt feature + // Compute the indicator functions. + // Average LRT feature. widthPrior = widthPrior0; - //use larger width in tanh map for pause regions + // Use larger width in tanh map for pause regions. if (logLrtTimeAvgKsum < threshPrior0) { widthPrior = widthPrior1; } - // compute indicator function: sigmoid map - indicator0 = (float)0.5 * ((float)tanh(widthPrior * - (logLrtTimeAvgKsum - threshPrior0)) + (float)1.0); + // Compute indicator function: sigmoid map. + indicator0 = + 0.5f * + ((float)tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.f); - //spectral flatness feature - tmpFloat1 = inst->featureData[0]; + // Spectral flatness feature. + tmpFloat1 = self->featureData[0]; widthPrior = widthPrior0; - //use larger width in tanh map for pause regions + // Use larger width in tanh map for pause regions. if (sgnMap == 1 && (tmpFloat1 > threshPrior1)) { widthPrior = widthPrior1; } if (sgnMap == -1 && (tmpFloat1 < threshPrior1)) { widthPrior = widthPrior1; } - // compute indicator function: sigmoid map - indicator1 = (float)0.5 * ((float)tanh((float)sgnMap * - widthPrior * (threshPrior1 - tmpFloat1)) + (float)1.0); + // Compute indicator function: sigmoid map. + indicator1 = + 0.5f * + ((float)tanh((float)sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) + + 1.f); - //for template spectrum-difference - tmpFloat1 = inst->featureData[4]; + // For template spectrum-difference. + tmpFloat1 = self->featureData[4]; widthPrior = widthPrior0; - //use larger width in tanh map for pause regions + // Use larger width in tanh map for pause regions. if (tmpFloat1 < threshPrior2) { widthPrior = widthPrior2; } - // compute indicator function: sigmoid map - indicator2 = (float)0.5 * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) - + (float)1.0); + // Compute indicator function: sigmoid map. + indicator2 = + 0.5f * ((float)tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.f); - //combine the indicator function with the feature weights - indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2 - * indicator2; - // done with computing indicator function + // Combine the indicator function with the feature weights. + indPrior = weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + + weightIndPrior2 * indicator2; + // Done with computing indicator function. - //compute the prior probability - inst->priorSpeechProb += PRIOR_UPDATE * (indPrior - inst->priorSpeechProb); - // make sure probabilities are within range: keep floor to 0.01 - if (inst->priorSpeechProb > 1.0) { - inst->priorSpeechProb = (float)1.0; + // Compute the prior probability. + self->priorSpeechProb += PRIOR_UPDATE * (indPrior - self->priorSpeechProb); + // Make sure probabilities are within range: keep floor to 0.01. + if (self->priorSpeechProb > 1.f) { + self->priorSpeechProb = 1.f; } - if (inst->priorSpeechProb < 0.01) { - inst->priorSpeechProb = (float)0.01; + if (self->priorSpeechProb < 0.01f) { + self->priorSpeechProb = 0.01f; } - //final speech probability: combine prior model with LR factor: - gainPrior = ((float)1.0 - inst->priorSpeechProb) / (inst->priorSpeechProb + (float)0.0001); - for (i = 0; i < inst->magnLen; i++) { - invLrt = (float)exp(-inst->logLrtTimeAvg[i]); + // Final speech probability: combine prior model with LR factor:. + gainPrior = (1.f - self->priorSpeechProb) / (self->priorSpeechProb + 0.0001f); + for (i = 0; i < self->magnLen; i++) { + invLrt = (float)exp(-self->logLrtTimeAvg[i]); invLrt = (float)gainPrior * invLrt; - probSpeechFinal[i] = (float)1.0 / ((float)1.0 + invLrt); + probSpeechFinal[i] = 1.f / (1.f + invLrt); } } -int WebRtcNs_ProcessCore(NSinst_t* inst, - short* speechFrame, - short* speechFrameHB, - short* outFrame, - short* outFrameHB) { - // main routine for noise reduction +// Update the noise features. +// Inputs: +// * |magn| is the signal magnitude spectrum estimate. +// * |updateParsFlag| is an update flag for parameters. +static void FeatureUpdate(NoiseSuppressionC* self, + const float* magn, + int updateParsFlag) { + // Compute spectral flatness on input spectrum. + ComputeSpectralFlatness(self, magn); + // Compute difference of input spectrum with learned/estimated noise spectrum. + ComputeSpectralDifference(self, magn); + // Compute histograms for parameter decisions (thresholds and weights for + // features). + // Parameters are extracted once every window time. + // (=self->modelUpdatePars[1]) + if (updateParsFlag >= 1) { + // Counter update. + self->modelUpdatePars[3]--; + // Update histogram. + if (self->modelUpdatePars[3] > 0) { + FeatureParameterExtraction(self, 0); + } + // Compute model parameters. + if (self->modelUpdatePars[3] == 0) { + FeatureParameterExtraction(self, 1); + self->modelUpdatePars[3] = self->modelUpdatePars[1]; + // If wish to update only once, set flag to zero. + if (updateParsFlag == 1) { + self->modelUpdatePars[0] = 0; + } else { + // Update every window: + // Get normalization for spectral difference for next window estimate. + self->featureData[6] = + self->featureData[6] / ((float)self->modelUpdatePars[1]); + self->featureData[5] = + 0.5f * (self->featureData[6] + self->featureData[5]); + self->featureData[6] = 0.f; + } + } + } +} - int flagHB = 0; - int i; - const int kStartBand = 5; // Skip first frequency bins during estimation. - int updateParsFlag; +// Update the noise estimate. +// Inputs: +// * |magn| is the signal magnitude spectrum estimate. +// * |snrLocPrior| is the prior SNR. +// * |snrLocPost| is the post SNR. +// Output: +// * |noise| is the updated noise magnitude spectrum estimate. +static void UpdateNoiseEstimate(NoiseSuppressionC* self, + const float* magn, + const float* snrLocPrior, + const float* snrLocPost, + float* noise) { + size_t i; + float probSpeech, probNonSpeech; + // Time-avg parameter for noise update. + float gammaNoiseTmp = NOISE_UPDATE; + float gammaNoiseOld; + float noiseUpdateTmp; - float energy1, energy2, gain, factor, factor1, factor2; - float signalEnergy, sumMagn; - float snrPrior, currentEstimateStsa; - float tmpFloat1, tmpFloat2, tmpFloat3, probSpeech, probNonSpeech; - float gammaNoiseTmp, gammaNoiseOld; - float noiseUpdateTmp, fTmp, dTmp; - float fin[BLOCKL_MAX], fout[BLOCKL_MAX]; - float winData[ANAL_BLOCKL_MAX]; - float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; - float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; - float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; - float probSpeechFinal[HALF_ANAL_BLOCKL], previousEstimateStsa[HALF_ANAL_BLOCKL]; - float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; - // Variables during startup - float sum_log_i = 0.0; - float sum_log_i_square = 0.0; - float sum_log_magn = 0.0; - float sum_log_i_log_magn = 0.0; - float parametric_noise = 0.0; - float parametric_exp = 0.0; - float parametric_num = 0.0; + for (i = 0; i < self->magnLen; i++) { + probSpeech = self->speechProb[i]; + probNonSpeech = 1.f - probSpeech; + // Temporary noise update: + // Use it for speech frames if update value is less than previous. + noiseUpdateTmp = gammaNoiseTmp * self->noisePrev[i] + + (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] + + probSpeech * self->noisePrev[i]); + // Time-constant based on speech/noise state. + gammaNoiseOld = gammaNoiseTmp; + gammaNoiseTmp = NOISE_UPDATE; + // Increase gamma (i.e., less noise update) for frame likely to be speech. + if (probSpeech > PROB_RANGE) { + gammaNoiseTmp = SPEECH_UPDATE; + } + // Conservative noise update. + if (probSpeech < PROB_RANGE) { + self->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - self->magnAvgPause[i]); + } + // Noise update. + if (gammaNoiseTmp == gammaNoiseOld) { + noise[i] = noiseUpdateTmp; + } else { + noise[i] = gammaNoiseTmp * self->noisePrev[i] + + (1.f - gammaNoiseTmp) * (probNonSpeech * magn[i] + + probSpeech * self->noisePrev[i]); + // Allow for noise update downwards: + // If noise update decreases the noise, it is safe, so allow it to + // happen. + if (noiseUpdateTmp < noise[i]) { + noise[i] = noiseUpdateTmp; + } + } + } // End of freq loop. +} - // SWB variables - int deltaBweHB = 1; - int deltaGainHB = 1; - float decayBweHB = 1.0; - float gainMapParHB = 1.0; - float gainTimeDomainHB = 1.0; - float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; +// Updates |buffer| with a new |frame|. +// Inputs: +// * |frame| is a new speech frame or NULL for setting to zero. +// * |frame_length| is the length of the new frame. +// * |buffer_length| is the length of the buffer. +// Output: +// * |buffer| is the updated buffer. +static void UpdateBuffer(const float* frame, + size_t frame_length, + size_t buffer_length, + float* buffer) { + assert(buffer_length < 2 * frame_length); - // Check that initiation has been done - if (inst->initFlag != 1) { + memcpy(buffer, + buffer + frame_length, + sizeof(*buffer) * (buffer_length - frame_length)); + if (frame) { + memcpy(buffer + buffer_length - frame_length, + frame, + sizeof(*buffer) * frame_length); + } else { + memset(buffer + buffer_length - frame_length, + 0, + sizeof(*buffer) * frame_length); + } +} + +// Transforms the signal from time to frequency domain. +// Inputs: +// * |time_data| is the signal in the time domain. +// * |time_data_length| is the length of the analysis buffer. +// * |magnitude_length| is the length of the spectrum magnitude, which equals +// the length of both |real| and |imag| (time_data_length / 2 + 1). +// Outputs: +// * |time_data| is the signal in the frequency domain. +// * |real| is the real part of the frequency domain. +// * |imag| is the imaginary part of the frequency domain. +// * |magn| is the calculated signal magnitude in the frequency domain. +static void FFT(NoiseSuppressionC* self, + float* time_data, + size_t time_data_length, + size_t magnitude_length, + float* real, + float* imag, + float* magn) { + size_t i; + + assert(magnitude_length == time_data_length / 2 + 1); + + WebRtc_rdft(time_data_length, 1, time_data, self->ip, self->wfft); + + imag[0] = 0; + real[0] = time_data[0]; + magn[0] = fabsf(real[0]) + 1.f; + imag[magnitude_length - 1] = 0; + real[magnitude_length - 1] = time_data[1]; + magn[magnitude_length - 1] = fabsf(real[magnitude_length - 1]) + 1.f; + for (i = 1; i < magnitude_length - 1; ++i) { + real[i] = time_data[2 * i]; + imag[i] = time_data[2 * i + 1]; + // Magnitude spectrum. + magn[i] = sqrtf(real[i] * real[i] + imag[i] * imag[i]) + 1.f; + } +} + +// Transforms the signal from frequency to time domain. +// Inputs: +// * |real| is the real part of the frequency domain. +// * |imag| is the imaginary part of the frequency domain. +// * |magnitude_length| is the length of the spectrum magnitude, which equals +// the length of both |real| and |imag|. +// * |time_data_length| is the length of the analysis buffer +// (2 * (magnitude_length - 1)). +// Output: +// * |time_data| is the signal in the time domain. +static void IFFT(NoiseSuppressionC* self, + const float* real, + const float* imag, + size_t magnitude_length, + size_t time_data_length, + float* time_data) { + size_t i; + + assert(time_data_length == 2 * (magnitude_length - 1)); + + time_data[0] = real[0]; + time_data[1] = real[magnitude_length - 1]; + for (i = 1; i < magnitude_length - 1; ++i) { + time_data[2 * i] = real[i]; + time_data[2 * i + 1] = imag[i]; + } + WebRtc_rdft(time_data_length, -1, time_data, self->ip, self->wfft); + + for (i = 0; i < time_data_length; ++i) { + time_data[i] *= 2.f / time_data_length; // FFT scaling. + } +} + +// Calculates the energy of a buffer. +// Inputs: +// * |buffer| is the buffer over which the energy is calculated. +// * |length| is the length of the buffer. +// Returns the calculated energy. +static float Energy(const float* buffer, size_t length) { + size_t i; + float energy = 0.f; + + for (i = 0; i < length; ++i) { + energy += buffer[i] * buffer[i]; + } + + return energy; +} + +// Windows a buffer. +// Inputs: +// * |window| is the window by which to multiply. +// * |data| is the data without windowing. +// * |length| is the length of the window and data. +// Output: +// * |data_windowed| is the windowed data. +static void Windowing(const float* window, + const float* data, + size_t length, + float* data_windowed) { + size_t i; + + for (i = 0; i < length; ++i) { + data_windowed[i] = window[i] * data[i]; + } +} + +// Estimate prior SNR decision-directed and compute DD based Wiener Filter. +// Input: +// * |magn| is the signal magnitude spectrum estimate. +// Output: +// * |theFilter| is the frequency response of the computed Wiener filter. +static void ComputeDdBasedWienerFilter(const NoiseSuppressionC* self, + const float* magn, + float* theFilter) { + size_t i; + float snrPrior, previousEstimateStsa, currentEstimateStsa; + + for (i = 0; i < self->magnLen; i++) { + // Previous estimate: based on previous frame with gain filter. + previousEstimateStsa = self->magnPrevProcess[i] / + (self->noisePrev[i] + 0.0001f) * self->smooth[i]; + // Post and prior SNR. + currentEstimateStsa = 0.f; + if (magn[i] > self->noise[i]) { + currentEstimateStsa = magn[i] / (self->noise[i] + 0.0001f) - 1.f; + } + // DD estimate is sum of two terms: current estimate and previous estimate. + // Directed decision update of |snrPrior|. + snrPrior = DD_PR_SNR * previousEstimateStsa + + (1.f - DD_PR_SNR) * currentEstimateStsa; + // Gain filter. + theFilter[i] = snrPrior / (self->overdrive + snrPrior); + } // End of loop over frequencies. +} + +// Changes the aggressiveness of the noise suppression method. +// |mode| = 0 is mild (6dB), |mode| = 1 is medium (10dB) and |mode| = 2 is +// aggressive (15dB). +// Returns 0 on success and -1 otherwise. +int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode) { + // Allow for modes: 0, 1, 2, 3. + if (mode < 0 || mode > 3) { return (-1); } - // Check for valid pointers based on sampling rate - if (inst->fs == 32000) { - if (speechFrameHB == NULL) { - return -1; - } - flagHB = 1; - // range for averaging low band quantities for H band gain - deltaBweHB = (int)inst->magnLen / 4; - deltaGainHB = deltaBweHB; + + self->aggrMode = mode; + if (mode == 0) { + self->overdrive = 1.f; + self->denoiseBound = 0.5f; + self->gainmap = 0; + } else if (mode == 1) { + // self->overdrive = 1.25f; + self->overdrive = 1.f; + self->denoiseBound = 0.25f; + self->gainmap = 1; + } else if (mode == 2) { + // self->overdrive = 1.25f; + self->overdrive = 1.1f; + self->denoiseBound = 0.125f; + self->gainmap = 1; + } else if (mode == 3) { + // self->overdrive = 1.3f; + self->overdrive = 1.25f; + self->denoiseBound = 0.09f; + self->gainmap = 1; } - // - updateParsFlag = inst->modelUpdatePars[0]; - // - - //for LB do all processing - // convert to float - for (i = 0; i < inst->blockLen10ms; i++) { - fin[i] = (float)speechFrame[i]; - } - // update analysis buffer for L band - memcpy(inst->dataBuf, inst->dataBuf + inst->blockLen10ms, - sizeof(float) * (inst->anaLen - inst->blockLen10ms)); - memcpy(inst->dataBuf + inst->anaLen - inst->blockLen10ms, fin, - sizeof(float) * inst->blockLen10ms); - - if (flagHB == 1) { - // convert to float - for (i = 0; i < inst->blockLen10ms; i++) { - fin[i] = (float)speechFrameHB[i]; - } - // update analysis buffer for H band - memcpy(inst->dataBufHB, inst->dataBufHB + inst->blockLen10ms, - sizeof(float) * (inst->anaLen - inst->blockLen10ms)); - memcpy(inst->dataBufHB + inst->anaLen - inst->blockLen10ms, fin, - sizeof(float) * inst->blockLen10ms); - } - - // check if processing needed - if (inst->outLen == 0) { - // windowing - energy1 = 0.0; - for (i = 0; i < inst->anaLen; i++) { - winData[i] = inst->window[i] * inst->dataBuf[i]; - energy1 += winData[i] * winData[i]; - } - if (energy1 == 0.0) { - // synthesize the special case of zero input - // we want to avoid updating statistics in this case: - // Updating feature statistics when we have zeros only will cause thresholds to - // move towards zero signal situations. This in turn has the effect that once the - // signal is "turned on" (non-zero values) everything will be treated as speech - // and there is no noise suppression effect. Depending on the duration of the - // inactive signal it takes a considerable amount of time for the system to learn - // what is noise and what is speech. - - // read out fully processed segment - for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) { - fout[i - inst->windShift] = inst->syntBuf[i]; - } - // update synthesis buffer - memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, - sizeof(float) * (inst->anaLen - inst->blockLen)); - memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, - sizeof(float) * inst->blockLen); - - // out buffer - inst->outLen = inst->blockLen - inst->blockLen10ms; - if (inst->blockLen > inst->blockLen10ms) { - for (i = 0; i < inst->outLen; i++) { - inst->outBuf[i] = fout[i + inst->blockLen10ms]; - } - } - // convert to short - for (i = 0; i < inst->blockLen10ms; i++) { - dTmp = fout[i]; - if (dTmp < WEBRTC_SPL_WORD16_MIN) { - dTmp = WEBRTC_SPL_WORD16_MIN; - } else if (dTmp > WEBRTC_SPL_WORD16_MAX) { - dTmp = WEBRTC_SPL_WORD16_MAX; - } - outFrame[i] = (short)dTmp; - } - - // for time-domain gain of HB - if (flagHB == 1) { - for (i = 0; i < inst->blockLen10ms; i++) { - dTmp = inst->dataBufHB[i]; - if (dTmp < WEBRTC_SPL_WORD16_MIN) { - dTmp = WEBRTC_SPL_WORD16_MIN; - } else if (dTmp > WEBRTC_SPL_WORD16_MAX) { - dTmp = WEBRTC_SPL_WORD16_MAX; - } - outFrameHB[i] = (short)dTmp; - } - } // end of H band gain computation - // - return 0; - } - - // - inst->blockInd++; // Update the block index only when we process a block. - // FFT - rdft(inst->anaLen, 1, winData, inst->ip, inst->wfft); - - imag[0] = 0; - real[0] = winData[0]; - magn[0] = (float)(fabs(real[0]) + 1.0f); - imag[inst->magnLen - 1] = 0; - real[inst->magnLen - 1] = winData[1]; - magn[inst->magnLen - 1] = (float)(fabs(real[inst->magnLen - 1]) + 1.0f); - signalEnergy = (float)(real[0] * real[0]) + - (float)(real[inst->magnLen - 1] * real[inst->magnLen - 1]); - sumMagn = magn[0] + magn[inst->magnLen - 1]; - if (inst->blockInd < END_STARTUP_SHORT) { - inst->initMagnEst[0] += magn[0]; - inst->initMagnEst[inst->magnLen - 1] += magn[inst->magnLen - 1]; - tmpFloat2 = log((float)(inst->magnLen - 1)); - sum_log_i = tmpFloat2; - sum_log_i_square = tmpFloat2 * tmpFloat2; - tmpFloat1 = log(magn[inst->magnLen - 1]); - sum_log_magn = tmpFloat1; - sum_log_i_log_magn = tmpFloat2 * tmpFloat1; - } - for (i = 1; i < inst->magnLen - 1; i++) { - real[i] = winData[2 * i]; - imag[i] = winData[2 * i + 1]; - // magnitude spectrum - fTmp = real[i] * real[i]; - fTmp += imag[i] * imag[i]; - signalEnergy += fTmp; - magn[i] = ((float)sqrt(fTmp)) + 1.0f; - sumMagn += magn[i]; - if (inst->blockInd < END_STARTUP_SHORT) { - inst->initMagnEst[i] += magn[i]; - if (i >= kStartBand) { - tmpFloat2 = log((float)i); - sum_log_i += tmpFloat2; - sum_log_i_square += tmpFloat2 * tmpFloat2; - tmpFloat1 = log(magn[i]); - sum_log_magn += tmpFloat1; - sum_log_i_log_magn += tmpFloat2 * tmpFloat1; - } - } - } - signalEnergy = signalEnergy / ((float)inst->magnLen); - inst->signalEnergy = signalEnergy; - inst->sumMagn = sumMagn; - - //compute spectral flatness on input spectrum - WebRtcNs_ComputeSpectralFlatness(inst, magn); - // quantile noise estimate - WebRtcNs_NoiseEstimation(inst, magn, noise); - //compute simplified noise model during startup - if (inst->blockInd < END_STARTUP_SHORT) { - // Estimate White noise - inst->whiteNoiseLevel += sumMagn / ((float)inst->magnLen) * inst->overdrive; - // Estimate Pink noise parameters - tmpFloat1 = sum_log_i_square * ((float)(inst->magnLen - kStartBand)); - tmpFloat1 -= (sum_log_i * sum_log_i); - tmpFloat2 = (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); - tmpFloat3 = tmpFloat2 / tmpFloat1; - // Constrain the estimated spectrum to be positive - if (tmpFloat3 < 0.0f) { - tmpFloat3 = 0.0f; - } - inst->pinkNoiseNumerator += tmpFloat3; - tmpFloat2 = (sum_log_i * sum_log_magn); - tmpFloat2 -= ((float)(inst->magnLen - kStartBand)) * sum_log_i_log_magn; - tmpFloat3 = tmpFloat2 / tmpFloat1; - // Constrain the pink noise power to be in the interval [0, 1]; - if (tmpFloat3 < 0.0f) { - tmpFloat3 = 0.0f; - } - if (tmpFloat3 > 1.0f) { - tmpFloat3 = 1.0f; - } - inst->pinkNoiseExp += tmpFloat3; - - // Calculate frequency independent parts of parametric noise estimate. - if (inst->pinkNoiseExp == 0.0f) { - // Use white noise estimate - parametric_noise = inst->whiteNoiseLevel; - } else { - // Use pink noise estimate - parametric_num = exp(inst->pinkNoiseNumerator / (float)(inst->blockInd + 1)); - parametric_num *= (float)(inst->blockInd + 1); - parametric_exp = inst->pinkNoiseExp / (float)(inst->blockInd + 1); - parametric_noise = parametric_num / pow((float)kStartBand, parametric_exp); - } - for (i = 0; i < inst->magnLen; i++) { - // Estimate the background noise using the white and pink noise parameters - if ((inst->pinkNoiseExp > 0.0f) && (i >= kStartBand)) { - // Use pink noise estimate - parametric_noise = parametric_num / pow((float)i, parametric_exp); - } - theFilterTmp[i] = (inst->initMagnEst[i] - inst->overdrive * parametric_noise); - theFilterTmp[i] /= (inst->initMagnEst[i] + (float)0.0001); - // Weight quantile noise with modeled noise - noise[i] *= (inst->blockInd); - tmpFloat2 = parametric_noise * (END_STARTUP_SHORT - inst->blockInd); - noise[i] += (tmpFloat2 / (float)(inst->blockInd + 1)); - noise[i] /= END_STARTUP_SHORT; - } - } - //compute average signal during END_STARTUP_LONG time: - // used to normalize spectral difference measure - if (inst->blockInd < END_STARTUP_LONG) { - inst->featureData[5] *= inst->blockInd; - inst->featureData[5] += signalEnergy; - inst->featureData[5] /= (inst->blockInd + 1); - } - -#ifdef PROCESS_FLOW_0 - if (inst->blockInd > END_STARTUP_LONG) { - //option: average the quantile noise: for check with AEC2 - for (i = 0; i < inst->magnLen; i++) { - noise[i] = (float)0.6 * inst->noisePrev[i] + (float)0.4 * noise[i]; - } - for (i = 0; i < inst->magnLen; i++) { - // Wiener with over sub-substraction: - theFilter[i] = (magn[i] - inst->overdrive * noise[i]) / (magn[i] + (float)0.0001); - } - } -#else - //start processing at frames == converged+1 - // - // STEP 1: compute prior and post snr based on quantile noise est - // - - // compute DD estimate of prior SNR: needed for new method - for (i = 0; i < inst->magnLen; i++) { - // post snr - snrLocPost[i] = (float)0.0; - if (magn[i] > noise[i]) { - snrLocPost[i] = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; - } - // previous post snr - // previous estimate: based on previous frame with gain filter - previousEstimateStsa[i] = inst->magnPrev[i] / (inst->noisePrev[i] + (float)0.0001) - * (inst->smooth[i]); - // DD estimate is sum of two terms: current estimate and previous estimate - // directed decision update of snrPrior - snrLocPrior[i] = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) - * snrLocPost[i]; - // post and prior snr needed for step 2 - } // end of loop over freqs -#ifdef PROCESS_FLOW_1 - for (i = 0; i < inst->magnLen; i++) { - // gain filter - tmpFloat1 = inst->overdrive + snrLocPrior[i]; - tmpFloat2 = (float)snrLocPrior[i] / tmpFloat1; - theFilter[i] = (float)tmpFloat2; - } // end of loop over freqs -#endif - // done with step 1: dd computation of prior and post snr - - // - //STEP 2: compute speech/noise likelihood - // -#ifdef PROCESS_FLOW_2 - // compute difference of input spectrum with learned/estimated noise spectrum - WebRtcNs_ComputeSpectralDifference(inst, magn); - // compute histograms for parameter decisions (thresholds and weights for features) - // parameters are extracted once every window time (=inst->modelUpdatePars[1]) - if (updateParsFlag >= 1) { - // counter update - inst->modelUpdatePars[3]--; - // update histogram - if (inst->modelUpdatePars[3] > 0) { - WebRtcNs_FeatureParameterExtraction(inst, 0); - } - // compute model parameters - if (inst->modelUpdatePars[3] == 0) { - WebRtcNs_FeatureParameterExtraction(inst, 1); - inst->modelUpdatePars[3] = inst->modelUpdatePars[1]; - // if wish to update only once, set flag to zero - if (updateParsFlag == 1) { - inst->modelUpdatePars[0] = 0; - } else { - // update every window: - // get normalization for spectral difference for next window estimate - inst->featureData[6] = inst->featureData[6] - / ((float)inst->modelUpdatePars[1]); - inst->featureData[5] = (float)0.5 * (inst->featureData[6] - + inst->featureData[5]); - inst->featureData[6] = (float)0.0; - } - } - } - // compute speech/noise probability - WebRtcNs_SpeechNoiseProb(inst, probSpeechFinal, snrLocPrior, snrLocPost); - // time-avg parameter for noise update - gammaNoiseTmp = NOISE_UPDATE; - for (i = 0; i < inst->magnLen; i++) { - probSpeech = probSpeechFinal[i]; - probNonSpeech = (float)1.0 - probSpeech; - // temporary noise update: - // use it for speech frames if update value is less than previous - noiseUpdateTmp = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) - * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); - // - // time-constant based on speech/noise state - gammaNoiseOld = gammaNoiseTmp; - gammaNoiseTmp = NOISE_UPDATE; - // increase gamma (i.e., less noise update) for frame likely to be speech - if (probSpeech > PROB_RANGE) { - gammaNoiseTmp = SPEECH_UPDATE; - } - // conservative noise update - if (probSpeech < PROB_RANGE) { - inst->magnAvgPause[i] += GAMMA_PAUSE * (magn[i] - inst->magnAvgPause[i]); - } - // noise update - if (gammaNoiseTmp == gammaNoiseOld) { - noise[i] = noiseUpdateTmp; - } else { - noise[i] = gammaNoiseTmp * inst->noisePrev[i] + ((float)1.0 - gammaNoiseTmp) - * (probNonSpeech * magn[i] + probSpeech * inst->noisePrev[i]); - // allow for noise update downwards: - // if noise update decreases the noise, it is safe, so allow it to happen - if (noiseUpdateTmp < noise[i]) { - noise[i] = noiseUpdateTmp; - } - } - } // end of freq loop - // done with step 2: noise update - - // - // STEP 3: compute dd update of prior snr and post snr based on new noise estimate - // - for (i = 0; i < inst->magnLen; i++) { - // post and prior snr - currentEstimateStsa = (float)0.0; - if (magn[i] > noise[i]) { - currentEstimateStsa = magn[i] / (noise[i] + (float)0.0001) - (float)1.0; - } - // DD estimate is sume of two terms: current estimate and previous estimate - // directed decision update of snrPrior - snrPrior = DD_PR_SNR * previousEstimateStsa[i] + ((float)1.0 - DD_PR_SNR) - * currentEstimateStsa; - // gain filter - tmpFloat1 = inst->overdrive + snrPrior; - tmpFloat2 = (float)snrPrior / tmpFloat1; - theFilter[i] = (float)tmpFloat2; - } // end of loop over freqs - // done with step3 -#endif -#endif - - for (i = 0; i < inst->magnLen; i++) { - // flooring bottom - if (theFilter[i] < inst->denoiseBound) { - theFilter[i] = inst->denoiseBound; - } - // flooring top - if (theFilter[i] > (float)1.0) { - theFilter[i] = 1.0; - } - if (inst->blockInd < END_STARTUP_SHORT) { - // flooring bottom - if (theFilterTmp[i] < inst->denoiseBound) { - theFilterTmp[i] = inst->denoiseBound; - } - // flooring top - if (theFilterTmp[i] > (float)1.0) { - theFilterTmp[i] = 1.0; - } - // Weight the two suppression filters - theFilter[i] *= (inst->blockInd); - theFilterTmp[i] *= (END_STARTUP_SHORT - inst->blockInd); - theFilter[i] += theFilterTmp[i]; - theFilter[i] /= (END_STARTUP_SHORT); - } - // smoothing -#ifdef PROCESS_FLOW_0 - inst->smooth[i] *= SMOOTH; // value set to 0.7 in define.h file - inst->smooth[i] += ((float)1.0 - SMOOTH) * theFilter[i]; -#else - inst->smooth[i] = theFilter[i]; -#endif - real[i] *= inst->smooth[i]; - imag[i] *= inst->smooth[i]; - } - // keep track of noise and magn spectrum for next frame - for (i = 0; i < inst->magnLen; i++) { - inst->noisePrev[i] = noise[i]; - inst->magnPrev[i] = magn[i]; - } - // back to time domain - winData[0] = real[0]; - winData[1] = real[inst->magnLen - 1]; - for (i = 1; i < inst->magnLen - 1; i++) { - winData[2 * i] = real[i]; - winData[2 * i + 1] = imag[i]; - } - rdft(inst->anaLen, -1, winData, inst->ip, inst->wfft); - - for (i = 0; i < inst->anaLen; i++) { - real[i] = 2.0f * winData[i] / inst->anaLen; // fft scaling - } - - //scale factor: only do it after END_STARTUP_LONG time - factor = (float)1.0; - if (inst->gainmap == 1 && inst->blockInd > END_STARTUP_LONG) { - factor1 = (float)1.0; - factor2 = (float)1.0; - - energy2 = 0.0; - for (i = 0; i < inst->anaLen; i++) { - energy2 += (float)real[i] * (float)real[i]; - } - gain = (float)sqrt(energy2 / (energy1 + (float)1.0)); - -#ifdef PROCESS_FLOW_2 - // scaling for new version - if (gain > B_LIM) { - factor1 = (float)1.0 + (float)1.3 * (gain - B_LIM); - if (gain * factor1 > (float)1.0) { - factor1 = (float)1.0 / gain; - } - } - if (gain < B_LIM) { - //don't reduce scale too much for pause regions: - // attenuation here should be controlled by flooring - if (gain <= inst->denoiseBound) { - gain = inst->denoiseBound; - } - factor2 = (float)1.0 - (float)0.3 * (B_LIM - gain); - } - //combine both scales with speech/noise prob: - // note prior (priorSpeechProb) is not frequency dependent - factor = inst->priorSpeechProb * factor1 + ((float)1.0 - inst->priorSpeechProb) - * factor2; -#else - if (gain > B_LIM) { - factor = (float)1.0 + (float)1.3 * (gain - B_LIM); - } else { - factor = (float)1.0 + (float)2.0 * (gain - B_LIM); - } - if (gain * factor > (float)1.0) { - factor = (float)1.0 / gain; - } -#endif - } // out of inst->gainmap==1 - - // synthesis - for (i = 0; i < inst->anaLen; i++) { - inst->syntBuf[i] += factor * inst->window[i] * (float)real[i]; - } - // read out fully processed segment - for (i = inst->windShift; i < inst->blockLen + inst->windShift; i++) { - fout[i - inst->windShift] = inst->syntBuf[i]; - } - // update synthesis buffer - memcpy(inst->syntBuf, inst->syntBuf + inst->blockLen, - sizeof(float) * (inst->anaLen - inst->blockLen)); - memset(inst->syntBuf + inst->anaLen - inst->blockLen, 0, - sizeof(float) * inst->blockLen); - - // out buffer - inst->outLen = inst->blockLen - inst->blockLen10ms; - if (inst->blockLen > inst->blockLen10ms) { - for (i = 0; i < inst->outLen; i++) { - inst->outBuf[i] = fout[i + inst->blockLen10ms]; - } - } - } // end of if out.len==0 - else { - for (i = 0; i < inst->blockLen10ms; i++) { - fout[i] = inst->outBuf[i]; - } - memcpy(inst->outBuf, inst->outBuf + inst->blockLen10ms, - sizeof(float) * (inst->outLen - inst->blockLen10ms)); - memset(inst->outBuf + inst->outLen - inst->blockLen10ms, 0, - sizeof(float) * inst->blockLen10ms); - inst->outLen -= inst->blockLen10ms; - } - - // convert to short - for (i = 0; i < inst->blockLen10ms; i++) { - dTmp = fout[i]; - if (dTmp < WEBRTC_SPL_WORD16_MIN) { - dTmp = WEBRTC_SPL_WORD16_MIN; - } else if (dTmp > WEBRTC_SPL_WORD16_MAX) { - dTmp = WEBRTC_SPL_WORD16_MAX; - } - outFrame[i] = (short)dTmp; - } - - // for time-domain gain of HB - if (flagHB == 1) { - for (i = 0; i < inst->magnLen; i++) { - inst->speechProbHB[i] = probSpeechFinal[i]; - } - if (inst->blockInd > END_STARTUP_LONG) { - // average speech prob from low band - // avg over second half (i.e., 4->8kHz) of freq. spectrum - avgProbSpeechHB = 0.0; - for (i = inst->magnLen - deltaBweHB - 1; i < inst->magnLen - 1; i++) { - avgProbSpeechHB += inst->speechProbHB[i]; - } - avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB); - // average filter gain from low band - // average over second half (i.e., 4->8kHz) of freq. spectrum - avgFilterGainHB = 0.0; - for (i = inst->magnLen - deltaGainHB - 1; i < inst->magnLen - 1; i++) { - avgFilterGainHB += inst->smooth[i]; - } - avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB)); - avgProbSpeechHBTmp = (float)2.0 * avgProbSpeechHB - (float)1.0; - // gain based on speech prob: - gainModHB = (float)0.5 * ((float)1.0 + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); - //combine gain with low band gain - gainTimeDomainHB = (float)0.5 * gainModHB + (float)0.5 * avgFilterGainHB; - if (avgProbSpeechHB >= (float)0.5) { - gainTimeDomainHB = (float)0.25 * gainModHB + (float)0.75 * avgFilterGainHB; - } - gainTimeDomainHB = gainTimeDomainHB * decayBweHB; - } // end of converged - //make sure gain is within flooring range - // flooring bottom - if (gainTimeDomainHB < inst->denoiseBound) { - gainTimeDomainHB = inst->denoiseBound; - } - // flooring top - if (gainTimeDomainHB > (float)1.0) { - gainTimeDomainHB = 1.0; - } - //apply gain - for (i = 0; i < inst->blockLen10ms; i++) { - dTmp = gainTimeDomainHB * inst->dataBufHB[i]; - if (dTmp < WEBRTC_SPL_WORD16_MIN) { - dTmp = WEBRTC_SPL_WORD16_MIN; - } else if (dTmp > WEBRTC_SPL_WORD16_MAX) { - dTmp = WEBRTC_SPL_WORD16_MAX; - } - outFrameHB[i] = (short)dTmp; - } - } // end of H band gain computation - // - return 0; } + +void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame) { + size_t i; + const size_t kStartBand = 5; // Skip first frequency bins during estimation. + int updateParsFlag; + float energy; + float signalEnergy = 0.f; + float sumMagn = 0.f; + float tmpFloat1, tmpFloat2, tmpFloat3; + float winData[ANAL_BLOCKL_MAX]; + float magn[HALF_ANAL_BLOCKL], noise[HALF_ANAL_BLOCKL]; + float snrLocPost[HALF_ANAL_BLOCKL], snrLocPrior[HALF_ANAL_BLOCKL]; + float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; + // Variables during startup. + float sum_log_i = 0.0; + float sum_log_i_square = 0.0; + float sum_log_magn = 0.0; + float sum_log_i_log_magn = 0.0; + float parametric_exp = 0.0; + float parametric_num = 0.0; + + // Check that initiation has been done. + assert(self->initFlag == 1); + updateParsFlag = self->modelUpdatePars[0]; + + // Update analysis buffer for L band. + UpdateBuffer(speechFrame, self->blockLen, self->anaLen, self->analyzeBuf); + + Windowing(self->window, self->analyzeBuf, self->anaLen, winData); + energy = Energy(winData, self->anaLen); + if (energy == 0.0) { + // We want to avoid updating statistics in this case: + // Updating feature statistics when we have zeros only will cause + // thresholds to move towards zero signal situations. This in turn has the + // effect that once the signal is "turned on" (non-zero values) everything + // will be treated as speech and there is no noise suppression effect. + // Depending on the duration of the inactive signal it takes a + // considerable amount of time for the system to learn what is noise and + // what is speech. + return; + } + + self->blockInd++; // Update the block index only when we process a block. + + FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn); + + for (i = 0; i < self->magnLen; i++) { + signalEnergy += real[i] * real[i] + imag[i] * imag[i]; + sumMagn += magn[i]; + if (self->blockInd < END_STARTUP_SHORT) { + if (i >= kStartBand) { + tmpFloat2 = logf((float)i); + sum_log_i += tmpFloat2; + sum_log_i_square += tmpFloat2 * tmpFloat2; + tmpFloat1 = logf(magn[i]); + sum_log_magn += tmpFloat1; + sum_log_i_log_magn += tmpFloat2 * tmpFloat1; + } + } + } + signalEnergy /= self->magnLen; + self->signalEnergy = signalEnergy; + self->sumMagn = sumMagn; + + // Quantile noise estimate. + NoiseEstimation(self, magn, noise); + // Compute simplified noise model during startup. + if (self->blockInd < END_STARTUP_SHORT) { + // Estimate White noise. + self->whiteNoiseLevel += sumMagn / self->magnLen * self->overdrive; + // Estimate Pink noise parameters. + tmpFloat1 = sum_log_i_square * (self->magnLen - kStartBand); + tmpFloat1 -= (sum_log_i * sum_log_i); + tmpFloat2 = + (sum_log_i_square * sum_log_magn - sum_log_i * sum_log_i_log_magn); + tmpFloat3 = tmpFloat2 / tmpFloat1; + // Constrain the estimated spectrum to be positive. + if (tmpFloat3 < 0.f) { + tmpFloat3 = 0.f; + } + self->pinkNoiseNumerator += tmpFloat3; + tmpFloat2 = (sum_log_i * sum_log_magn); + tmpFloat2 -= (self->magnLen - kStartBand) * sum_log_i_log_magn; + tmpFloat3 = tmpFloat2 / tmpFloat1; + // Constrain the pink noise power to be in the interval [0, 1]. + if (tmpFloat3 < 0.f) { + tmpFloat3 = 0.f; + } + if (tmpFloat3 > 1.f) { + tmpFloat3 = 1.f; + } + self->pinkNoiseExp += tmpFloat3; + + // Calculate frequency independent parts of parametric noise estimate. + if (self->pinkNoiseExp > 0.f) { + // Use pink noise estimate. + parametric_num = + expf(self->pinkNoiseNumerator / (float)(self->blockInd + 1)); + parametric_num *= (float)(self->blockInd + 1); + parametric_exp = self->pinkNoiseExp / (float)(self->blockInd + 1); + } + for (i = 0; i < self->magnLen; i++) { + // Estimate the background noise using the white and pink noise + // parameters. + if (self->pinkNoiseExp == 0.f) { + // Use white noise estimate. + self->parametricNoise[i] = self->whiteNoiseLevel; + } else { + // Use pink noise estimate. + float use_band = (float)(i < kStartBand ? kStartBand : i); + self->parametricNoise[i] = + parametric_num / powf(use_band, parametric_exp); + } + // Weight quantile noise with modeled noise. + noise[i] *= (self->blockInd); + tmpFloat2 = + self->parametricNoise[i] * (END_STARTUP_SHORT - self->blockInd); + noise[i] += (tmpFloat2 / (float)(self->blockInd + 1)); + noise[i] /= END_STARTUP_SHORT; + } + } + // Compute average signal during END_STARTUP_LONG time: + // used to normalize spectral difference measure. + if (self->blockInd < END_STARTUP_LONG) { + self->featureData[5] *= self->blockInd; + self->featureData[5] += signalEnergy; + self->featureData[5] /= (self->blockInd + 1); + } + + // Post and prior SNR needed for SpeechNoiseProb. + ComputeSnr(self, magn, noise, snrLocPrior, snrLocPost); + + FeatureUpdate(self, magn, updateParsFlag); + SpeechNoiseProb(self, self->speechProb, snrLocPrior, snrLocPost); + UpdateNoiseEstimate(self, magn, snrLocPrior, snrLocPost, noise); + + // Keep track of noise spectrum for next frame. + memcpy(self->noise, noise, sizeof(*noise) * self->magnLen); + memcpy(self->magnPrevAnalyze, magn, sizeof(*magn) * self->magnLen); +} + +void WebRtcNs_ProcessCore(NoiseSuppressionC* self, + const float* const* speechFrame, + size_t num_bands, + float* const* outFrame) { + // Main routine for noise reduction. + int flagHB = 0; + size_t i, j; + + float energy1, energy2, gain, factor, factor1, factor2; + float fout[BLOCKL_MAX]; + float winData[ANAL_BLOCKL_MAX]; + float magn[HALF_ANAL_BLOCKL]; + float theFilter[HALF_ANAL_BLOCKL], theFilterTmp[HALF_ANAL_BLOCKL]; + float real[ANAL_BLOCKL_MAX], imag[HALF_ANAL_BLOCKL]; + + // SWB variables. + int deltaBweHB = 1; + int deltaGainHB = 1; + float decayBweHB = 1.0; + float gainMapParHB = 1.0; + float gainTimeDomainHB = 1.0; + float avgProbSpeechHB, avgProbSpeechHBTmp, avgFilterGainHB, gainModHB; + float sumMagnAnalyze, sumMagnProcess; + + // Check that initiation has been done. + assert(self->initFlag == 1); + assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX); + + const float* const* speechFrameHB = NULL; + float* const* outFrameHB = NULL; + size_t num_high_bands = 0; + if (num_bands > 1) { + speechFrameHB = &speechFrame[1]; + outFrameHB = &outFrame[1]; + num_high_bands = num_bands - 1; + flagHB = 1; + // Range for averaging low band quantities for H band gain. + deltaBweHB = (int)self->magnLen / 4; + deltaGainHB = deltaBweHB; + } + + // Update analysis buffer for L band. + UpdateBuffer(speechFrame[0], self->blockLen, self->anaLen, self->dataBuf); + + if (flagHB == 1) { + // Update analysis buffer for H bands. + for (i = 0; i < num_high_bands; ++i) { + UpdateBuffer(speechFrameHB[i], + self->blockLen, + self->anaLen, + self->dataBufHB[i]); + } + } + + Windowing(self->window, self->dataBuf, self->anaLen, winData); + energy1 = Energy(winData, self->anaLen); + if (energy1 == 0.0) { + // Synthesize the special case of zero input. + // Read out fully processed segment. + for (i = self->windShift; i < self->blockLen + self->windShift; i++) { + fout[i - self->windShift] = self->syntBuf[i]; + } + // Update synthesis buffer. + UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf); + + for (i = 0; i < self->blockLen; ++i) + outFrame[0][i] = + WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); + + // For time-domain gain of HB. + if (flagHB == 1) { + for (i = 0; i < num_high_bands; ++i) { + for (j = 0; j < self->blockLen; ++j) { + outFrameHB[i][j] = WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + self->dataBufHB[i][j], + WEBRTC_SPL_WORD16_MIN); + } + } + } + + return; + } + + FFT(self, winData, self->anaLen, self->magnLen, real, imag, magn); + + if (self->blockInd < END_STARTUP_SHORT) { + for (i = 0; i < self->magnLen; i++) { + self->initMagnEst[i] += magn[i]; + } + } + + ComputeDdBasedWienerFilter(self, magn, theFilter); + + for (i = 0; i < self->magnLen; i++) { + // Flooring bottom. + if (theFilter[i] < self->denoiseBound) { + theFilter[i] = self->denoiseBound; + } + // Flooring top. + if (theFilter[i] > 1.f) { + theFilter[i] = 1.f; + } + if (self->blockInd < END_STARTUP_SHORT) { + theFilterTmp[i] = + (self->initMagnEst[i] - self->overdrive * self->parametricNoise[i]); + theFilterTmp[i] /= (self->initMagnEst[i] + 0.0001f); + // Flooring bottom. + if (theFilterTmp[i] < self->denoiseBound) { + theFilterTmp[i] = self->denoiseBound; + } + // Flooring top. + if (theFilterTmp[i] > 1.f) { + theFilterTmp[i] = 1.f; + } + // Weight the two suppression filters. + theFilter[i] *= (self->blockInd); + theFilterTmp[i] *= (END_STARTUP_SHORT - self->blockInd); + theFilter[i] += theFilterTmp[i]; + theFilter[i] /= (END_STARTUP_SHORT); + } + + self->smooth[i] = theFilter[i]; + real[i] *= self->smooth[i]; + imag[i] *= self->smooth[i]; + } + // Keep track of |magn| spectrum for next frame. + memcpy(self->magnPrevProcess, magn, sizeof(*magn) * self->magnLen); + memcpy(self->noisePrev, self->noise, sizeof(self->noise[0]) * self->magnLen); + // Back to time domain. + IFFT(self, real, imag, self->magnLen, self->anaLen, winData); + + // Scale factor: only do it after END_STARTUP_LONG time. + factor = 1.f; + if (self->gainmap == 1 && self->blockInd > END_STARTUP_LONG) { + factor1 = 1.f; + factor2 = 1.f; + + energy2 = Energy(winData, self->anaLen); + gain = (float)sqrt(energy2 / (energy1 + 1.f)); + + // Scaling for new version. + if (gain > B_LIM) { + factor1 = 1.f + 1.3f * (gain - B_LIM); + if (gain * factor1 > 1.f) { + factor1 = 1.f / gain; + } + } + if (gain < B_LIM) { + // Don't reduce scale too much for pause regions: + // attenuation here should be controlled by flooring. + if (gain <= self->denoiseBound) { + gain = self->denoiseBound; + } + factor2 = 1.f - 0.3f * (B_LIM - gain); + } + // Combine both scales with speech/noise prob: + // note prior (priorSpeechProb) is not frequency dependent. + factor = self->priorSpeechProb * factor1 + + (1.f - self->priorSpeechProb) * factor2; + } // Out of self->gainmap == 1. + + Windowing(self->window, winData, self->anaLen, winData); + + // Synthesis. + for (i = 0; i < self->anaLen; i++) { + self->syntBuf[i] += factor * winData[i]; + } + // Read out fully processed segment. + for (i = self->windShift; i < self->blockLen + self->windShift; i++) { + fout[i - self->windShift] = self->syntBuf[i]; + } + // Update synthesis buffer. + UpdateBuffer(NULL, self->blockLen, self->anaLen, self->syntBuf); + + for (i = 0; i < self->blockLen; ++i) + outFrame[0][i] = + WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fout[i], WEBRTC_SPL_WORD16_MIN); + + // For time-domain gain of HB. + if (flagHB == 1) { + // Average speech prob from low band. + // Average over second half (i.e., 4->8kHz) of frequencies spectrum. + avgProbSpeechHB = 0.0; + for (i = self->magnLen - deltaBweHB - 1; i < self->magnLen - 1; i++) { + avgProbSpeechHB += self->speechProb[i]; + } + avgProbSpeechHB = avgProbSpeechHB / ((float)deltaBweHB); + // If the speech was suppressed by a component between Analyze and + // Process, for example the AEC, then it should not be considered speech + // for high band suppression purposes. + sumMagnAnalyze = 0; + sumMagnProcess = 0; + for (i = 0; i < self->magnLen; ++i) { + sumMagnAnalyze += self->magnPrevAnalyze[i]; + sumMagnProcess += self->magnPrevProcess[i]; + } + avgProbSpeechHB *= sumMagnProcess / sumMagnAnalyze; + // Average filter gain from low band. + // Average over second half (i.e., 4->8kHz) of frequencies spectrum. + avgFilterGainHB = 0.0; + for (i = self->magnLen - deltaGainHB - 1; i < self->magnLen - 1; i++) { + avgFilterGainHB += self->smooth[i]; + } + avgFilterGainHB = avgFilterGainHB / ((float)(deltaGainHB)); + avgProbSpeechHBTmp = 2.f * avgProbSpeechHB - 1.f; + // Gain based on speech probability. + gainModHB = 0.5f * (1.f + (float)tanh(gainMapParHB * avgProbSpeechHBTmp)); + // Combine gain with low band gain. + gainTimeDomainHB = 0.5f * gainModHB + 0.5f * avgFilterGainHB; + if (avgProbSpeechHB >= 0.5f) { + gainTimeDomainHB = 0.25f * gainModHB + 0.75f * avgFilterGainHB; + } + gainTimeDomainHB = gainTimeDomainHB * decayBweHB; + // Make sure gain is within flooring range. + // Flooring bottom. + if (gainTimeDomainHB < self->denoiseBound) { + gainTimeDomainHB = self->denoiseBound; + } + // Flooring top. + if (gainTimeDomainHB > 1.f) { + gainTimeDomainHB = 1.f; + } + // Apply gain. + for (i = 0; i < num_high_bands; ++i) { + for (j = 0; j < self->blockLen; j++) { + outFrameHB[i][j] = + WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, + gainTimeDomainHB * self->dataBufHB[i][j], + WEBRTC_SPL_WORD16_MIN); + } + } + } // End of H band gain computation. +} diff --git a/webrtc/modules/audio_processing/ns/ns_core.h b/webrtc/modules/audio_processing/ns/ns_core.h index 2f4c34f..aba1c46 100644 --- a/webrtc/modules/audio_processing/ns/ns_core.h +++ b/webrtc/modules/audio_processing/ns/ns_core.h @@ -8,105 +8,110 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ -#include "defines.h" +#include "webrtc/modules/audio_processing/ns/defines.h" -typedef struct NSParaExtract_t_ { - - //bin size of histogram +typedef struct NSParaExtract_ { + // Bin size of histogram. float binSizeLrt; float binSizeSpecFlat; float binSizeSpecDiff; - //range of histogram over which lrt threshold is computed + // Range of histogram over which LRT threshold is computed. float rangeAvgHistLrt; - //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain - //thresholds for prior model - float factor1ModelPars; //for lrt and spectral difference - float factor2ModelPars; //for spectral_flatness: used when noise is flatter than speech - //peak limit for spectral flatness (varies between 0 and 1) + // Scale parameters: multiply dominant peaks of the histograms by scale factor + // to obtain thresholds for prior model. + float factor1ModelPars; // For LRT and spectral difference. + float factor2ModelPars; // For spectral_flatness: used when noise is flatter + // than speech. + // Peak limit for spectral flatness (varies between 0 and 1). float thresPosSpecFlat; - //limit on spacing of two highest peaks in histogram: spacing determined by bin size + // Limit on spacing of two highest peaks in histogram: spacing determined by + // bin size. float limitPeakSpacingSpecFlat; float limitPeakSpacingSpecDiff; - //limit on relevance of second peak: + // Limit on relevance of second peak. float limitPeakWeightsSpecFlat; float limitPeakWeightsSpecDiff; - //limit on fluctuation of lrt feature + // Limit on fluctuation of LRT feature. float thresFluctLrt; - //limit on the max and min values for the feature thresholds + // Limit on the max and min values for the feature thresholds. float maxLrt; float minLrt; float maxSpecFlat; float minSpecFlat; float maxSpecDiff; float minSpecDiff; - //criteria of weight of histogram peak to accept/reject feature + // Criteria of weight of histogram peak to accept/reject feature. int thresWeightSpecFlat; int thresWeightSpecDiff; -} NSParaExtract_t; +} NSParaExtract; -typedef struct NSinst_t_ { +typedef struct NoiseSuppressionC_ { + uint32_t fs; + size_t blockLen; + size_t windShift; + size_t anaLen; + size_t magnLen; + int aggrMode; + const float* window; + float analyzeBuf[ANAL_BLOCKL_MAX]; + float dataBuf[ANAL_BLOCKL_MAX]; + float syntBuf[ANAL_BLOCKL_MAX]; - WebRtc_UWord32 fs; - int blockLen; - int blockLen10ms; - int windShift; - int outLen; - int anaLen; - int magnLen; - int aggrMode; - const float* window; - float dataBuf[ANAL_BLOCKL_MAX]; - float syntBuf[ANAL_BLOCKL_MAX]; - float outBuf[3 * BLOCKL_MAX]; + int initFlag; + // Parameters for quantile noise estimation. + float density[SIMULT * HALF_ANAL_BLOCKL]; + float lquantile[SIMULT * HALF_ANAL_BLOCKL]; + float quantile[HALF_ANAL_BLOCKL]; + int counter[SIMULT]; + int updates; + // Parameters for Wiener filter. + float smooth[HALF_ANAL_BLOCKL]; + float overdrive; + float denoiseBound; + int gainmap; + // FFT work arrays. + size_t ip[IP_LENGTH]; + float wfft[W_LENGTH]; - int initFlag; - // parameters for quantile noise estimation - float density[SIMULT* HALF_ANAL_BLOCKL]; - float lquantile[SIMULT* HALF_ANAL_BLOCKL]; - float quantile[HALF_ANAL_BLOCKL]; - int counter[SIMULT]; - int updates; - // parameters for Wiener filter - float smooth[HALF_ANAL_BLOCKL]; - float overdrive; - float denoiseBound; - int gainmap; - // fft work arrays. - int ip[IP_LENGTH]; - float wfft[W_LENGTH]; - - // parameters for new method: some not needed, will reduce/cleanup later - WebRtc_Word32 blockInd; //frame index counter - int modelUpdatePars[4]; //parameters for updating or estimating - // thresholds/weights for prior model - float priorModelPars[7]; //parameters for prior model - float noisePrev[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame - float magnPrev[HALF_ANAL_BLOCKL]; //magnitude spectrum of previous frame - float logLrtTimeAvg[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing - float priorSpeechProb; //prior speech/noise probability - float featureData[7]; //data for features - float magnAvgPause[HALF_ANAL_BLOCKL]; //conservative noise spectrum estimate - float signalEnergy; //energy of magn - float sumMagn; //sum of magn - float whiteNoiseLevel; //initial noise estimate - float initMagnEst[HALF_ANAL_BLOCKL]; //initial magnitude spectrum estimate - float pinkNoiseNumerator; //pink noise parameter: numerator - float pinkNoiseExp; //pink noise parameter: power of freq - NSParaExtract_t featureExtractionParams; //parameters for feature extraction - //histograms for parameter estimation - int histLrt[HIST_PAR_EST]; - int histSpecFlat[HIST_PAR_EST]; - int histSpecDiff[HIST_PAR_EST]; - //quantities for high band estimate - float speechProbHB[HALF_ANAL_BLOCKL]; //final speech/noise prob: prior + LRT - float dataBufHB[ANAL_BLOCKL_MAX]; //buffering data for HB - -} NSinst_t; + // Parameters for new method: some not needed, will reduce/cleanup later. + int32_t blockInd; // Frame index counter. + int modelUpdatePars[4]; // Parameters for updating or estimating. + // Thresholds/weights for prior model. + float priorModelPars[7]; // Parameters for prior model. + float noise[HALF_ANAL_BLOCKL]; // Noise spectrum from current frame. + float noisePrev[HALF_ANAL_BLOCKL]; // Noise spectrum from previous frame. + // Magnitude spectrum of previous analyze frame. + float magnPrevAnalyze[HALF_ANAL_BLOCKL]; + // Magnitude spectrum of previous process frame. + float magnPrevProcess[HALF_ANAL_BLOCKL]; + float logLrtTimeAvg[HALF_ANAL_BLOCKL]; // Log LRT factor with time-smoothing. + float priorSpeechProb; // Prior speech/noise probability. + float featureData[7]; + // Conservative noise spectrum estimate. + float magnAvgPause[HALF_ANAL_BLOCKL]; + float signalEnergy; // Energy of |magn|. + float sumMagn; + float whiteNoiseLevel; // Initial noise estimate. + float initMagnEst[HALF_ANAL_BLOCKL]; // Initial magnitude spectrum estimate. + float pinkNoiseNumerator; // Pink noise parameter: numerator. + float pinkNoiseExp; // Pink noise parameter: power of frequencies. + float parametricNoise[HALF_ANAL_BLOCKL]; + // Parameters for feature extraction. + NSParaExtract featureExtractionParams; + // Histograms for parameter estimation. + int histLrt[HIST_PAR_EST]; + int histSpecFlat[HIST_PAR_EST]; + int histSpecDiff[HIST_PAR_EST]; + // Quantities for high band estimate. + float speechProb[HALF_ANAL_BLOCKL]; // Final speech/noise prob: prior + LRT. + // Buffering data for HB. + float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX]; +} NoiseSuppressionC; #ifdef __cplusplus extern "C" { @@ -118,16 +123,16 @@ extern "C" { * This function initializes a noise suppression instance * * Input: - * - inst : Instance that should be initialized + * - self : Instance that should be initialized * - fs : Sampling frequency * * Output: - * - inst : Initialized instance + * - self : Initialized instance * * Return value : 0 - Ok * -1 - Error */ -int WebRtcNs_InitCore(NSinst_t* inst, WebRtc_UWord32 fs); +int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs); /**************************************************************************** * WebRtcNs_set_policy_core(...) @@ -135,16 +140,30 @@ int WebRtcNs_InitCore(NSinst_t* inst, WebRtc_UWord32 fs); * This changes the aggressiveness of the noise suppression method. * * Input: - * - inst : Instance that should be initialized - * - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB) + * - self : Instance that should be initialized + * - mode : 0: Mild (6dB), 1: Medium (10dB), 2: Aggressive (15dB) * * Output: - * - NS_inst : Initialized instance + * - self : Initialized instance * * Return value : 0 - Ok * -1 - Error */ -int WebRtcNs_set_policy_core(NSinst_t* inst, int mode); +int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode); + +/**************************************************************************** + * WebRtcNs_AnalyzeCore + * + * Estimate the background noise. + * + * Input: + * - self : Instance that should be initialized + * - speechFrame : Input speech frame for lower band + * + * Output: + * - self : Updated instance + */ +void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame); /**************************************************************************** * WebRtcNs_ProcessCore @@ -152,28 +171,20 @@ int WebRtcNs_set_policy_core(NSinst_t* inst, int mode); * Do noise suppression. * * Input: - * - inst : Instance that should be initialized - * - inFrameLow : Input speech frame for lower band - * - inFrameHigh : Input speech frame for higher band + * - self : Instance that should be initialized + * - inFrame : Input speech frame for each band + * - num_bands : Number of bands * * Output: - * - inst : Updated instance - * - outFrameLow : Output speech frame for lower band - * - outFrameHigh : Output speech frame for higher band - * - * Return value : 0 - OK - * -1 - Error + * - self : Updated instance + * - outFrame : Output speech frame for each band */ - - -int WebRtcNs_ProcessCore(NSinst_t* inst, - short* inFrameLow, - short* inFrameHigh, - short* outFrameLow, - short* outFrameHigh); - +void WebRtcNs_ProcessCore(NoiseSuppressionC* self, + const float* const* inFrame, + size_t num_bands, + float* const* outFrame); #ifdef __cplusplus } #endif -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_ diff --git a/webrtc/modules/audio_processing/ns/nsx_core.c b/webrtc/modules/audio_processing/ns/nsx_core.c index 967d849..ed6125a 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core.c +++ b/webrtc/modules/audio_processing/ns/nsx_core.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,197 +8,70 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "noise_suppression_x.h" +#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" #include #include -#include #include -#include +#include -#include "nsx_core.h" +#include "webrtc/common_audio/signal_processing/include/real_fft.h" +#include "webrtc/modules/audio_processing/ns/nsx_core.h" +#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h" -// Skip first frequency bins during estimation. (0 <= value < 64) -static const int kStartBand = 5; - -// Constants to compensate for shifting signal log(2^shifts). -const WebRtc_Word16 WebRtcNsx_kLogTable[9] = { +#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON) +/* Tables are defined in ARM assembly files. */ +extern const int16_t WebRtcNsx_kLogTable[9]; +extern const int16_t WebRtcNsx_kCounterDiv[201]; +extern const int16_t WebRtcNsx_kLogTableFrac[256]; +#else +static const int16_t WebRtcNsx_kLogTable[9] = { 0, 177, 355, 532, 710, 887, 1065, 1242, 1420 }; -const WebRtc_Word16 WebRtcNsx_kCounterDiv[201] = { - 32767, 16384, 10923, 8192, 6554, 5461, 4681, - 4096, 3641, 3277, 2979, 2731, 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, - 1489, 1425, 1365, 1311, 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, - 886, 862, 840, 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, - 607, 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, 468, - 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, 386, 381, 377, - 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, 328, 324, 321, 318, 315, - 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, 285, 282, 280, 278, 275, 273, 271, - 269, 266, 264, 262, 260, 258, 256, 254, 252, 250, 248, 246, 245, 243, 241, 239, 237, - 236, 234, 232, 231, 229, 228, 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, - 210, 209, 207, 206, 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, - 189, 188, 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, +static const int16_t WebRtcNsx_kCounterDiv[201] = { + 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731, + 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311, + 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840, + 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607, + 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, + 468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, + 386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, + 328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, + 285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254, + 252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228, + 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206, + 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188, + 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163 }; -const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256] = { +static const int16_t WebRtcNsx_kLogTableFrac[256] = { 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, - 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, 117, - 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, - 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, - 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, - 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 178, - 179, 180, 181, 182, 183, 184, 185, 185, 186, 187, 188, 189, 190, 191, 192, 192, - 193, 194, 195, 196, 197, 198, 198, 199, 200, 201, 202, 203, 203, 204, 205, 206, - 207, 208, 208, 209, 210, 211, 212, 212, 213, 214, 215, 216, 216, 217, 218, 219, - 220, 220, 221, 222, 223, 224, 224, 225, 226, 227, 228, 228, 229, 230, 231, 231, - 232, 233, 234, 234, 235, 236, 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, - 244, 245, 246, 247, 247, 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255 + 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, + 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, + 147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, + 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, + 175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187, + 188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200, + 201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212, + 213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224, + 225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236, + 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247, + 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255 }; +#endif // WEBRTC_DETECT_NEON || WEBRTC_HAS_NEON -static const WebRtc_Word16 kPowTableFrac[1024] = { - 0, 1, 1, 2, 3, 3, 4, 5, - 6, 6, 7, 8, 8, 9, 10, 10, - 11, 12, 13, 13, 14, 15, 15, 16, - 17, 17, 18, 19, 20, 20, 21, 22, - 22, 23, 24, 25, 25, 26, 27, 27, - 28, 29, 30, 30, 31, 32, 32, 33, - 34, 35, 35, 36, 37, 37, 38, 39, - 40, 40, 41, 42, 42, 43, 44, 45, - 45, 46, 47, 48, 48, 49, 50, 50, - 51, 52, 53, 53, 54, 55, 56, 56, - 57, 58, 58, 59, 60, 61, 61, 62, - 63, 64, 64, 65, 66, 67, 67, 68, - 69, 69, 70, 71, 72, 72, 73, 74, - 75, 75, 76, 77, 78, 78, 79, 80, - 81, 81, 82, 83, 84, 84, 85, 86, - 87, 87, 88, 89, 90, 90, 91, 92, - 93, 93, 94, 95, 96, 96, 97, 98, - 99, 100, 100, 101, 102, 103, 103, 104, - 105, 106, 106, 107, 108, 109, 109, 110, - 111, 112, 113, 113, 114, 115, 116, 116, - 117, 118, 119, 119, 120, 121, 122, 123, - 123, 124, 125, 126, 126, 127, 128, 129, - 130, 130, 131, 132, 133, 133, 134, 135, - 136, 137, 137, 138, 139, 140, 141, 141, - 142, 143, 144, 144, 145, 146, 147, 148, - 148, 149, 150, 151, 152, 152, 153, 154, - 155, 156, 156, 157, 158, 159, 160, 160, - 161, 162, 163, 164, 164, 165, 166, 167, - 168, 168, 169, 170, 171, 172, 173, 173, - 174, 175, 176, 177, 177, 178, 179, 180, - 181, 181, 182, 183, 184, 185, 186, 186, - 187, 188, 189, 190, 190, 191, 192, 193, - 194, 195, 195, 196, 197, 198, 199, 200, - 200, 201, 202, 203, 204, 205, 205, 206, - 207, 208, 209, 210, 210, 211, 212, 213, - 214, 215, 215, 216, 217, 218, 219, 220, - 220, 221, 222, 223, 224, 225, 225, 226, - 227, 228, 229, 230, 231, 231, 232, 233, - 234, 235, 236, 237, 237, 238, 239, 240, - 241, 242, 243, 243, 244, 245, 246, 247, - 248, 249, 249, 250, 251, 252, 253, 254, - 255, 255, 256, 257, 258, 259, 260, 261, - 262, 262, 263, 264, 265, 266, 267, 268, - 268, 269, 270, 271, 272, 273, 274, 275, - 276, 276, 277, 278, 279, 280, 281, 282, - 283, 283, 284, 285, 286, 287, 288, 289, - 290, 291, 291, 292, 293, 294, 295, 296, - 297, 298, 299, 299, 300, 301, 302, 303, - 304, 305, 306, 307, 308, 308, 309, 310, - 311, 312, 313, 314, 315, 316, 317, 318, - 318, 319, 320, 321, 322, 323, 324, 325, - 326, 327, 328, 328, 329, 330, 331, 332, - 333, 334, 335, 336, 337, 338, 339, 339, - 340, 341, 342, 343, 344, 345, 346, 347, - 348, 349, 350, 351, 352, 352, 353, 354, - 355, 356, 357, 358, 359, 360, 361, 362, - 363, 364, 365, 366, 367, 367, 368, 369, - 370, 371, 372, 373, 374, 375, 376, 377, - 378, 379, 380, 381, 382, 383, 384, 385, - 385, 386, 387, 388, 389, 390, 391, 392, - 393, 394, 395, 396, 397, 398, 399, 400, - 401, 402, 403, 404, 405, 406, 407, 408, - 409, 410, 410, 411, 412, 413, 414, 415, - 416, 417, 418, 419, 420, 421, 422, 423, - 424, 425, 426, 427, 428, 429, 430, 431, - 432, 433, 434, 435, 436, 437, 438, 439, - 440, 441, 442, 443, 444, 445, 446, 447, - 448, 449, 450, 451, 452, 453, 454, 455, - 456, 457, 458, 459, 460, 461, 462, 463, - 464, 465, 466, 467, 468, 469, 470, 471, - 472, 473, 474, 475, 476, 477, 478, 479, - 480, 481, 482, 483, 484, 485, 486, 487, - 488, 489, 490, 491, 492, 493, 494, 495, - 496, 498, 499, 500, 501, 502, 503, 504, - 505, 506, 507, 508, 509, 510, 511, 512, - 513, 514, 515, 516, 517, 518, 519, 520, - 521, 522, 523, 525, 526, 527, 528, 529, - 530, 531, 532, 533, 534, 535, 536, 537, - 538, 539, 540, 541, 542, 544, 545, 546, - 547, 548, 549, 550, 551, 552, 553, 554, - 555, 556, 557, 558, 560, 561, 562, 563, - 564, 565, 566, 567, 568, 569, 570, 571, - 572, 574, 575, 576, 577, 578, 579, 580, - 581, 582, 583, 584, 585, 587, 588, 589, - 590, 591, 592, 593, 594, 595, 596, 597, - 599, 600, 601, 602, 603, 604, 605, 606, - 607, 608, 610, 611, 612, 613, 614, 615, - 616, 617, 618, 620, 621, 622, 623, 624, - 625, 626, 627, 628, 630, 631, 632, 633, - 634, 635, 636, 637, 639, 640, 641, 642, - 643, 644, 645, 646, 648, 649, 650, 651, - 652, 653, 654, 656, 657, 658, 659, 660, - 661, 662, 664, 665, 666, 667, 668, 669, - 670, 672, 673, 674, 675, 676, 677, 678, - 680, 681, 682, 683, 684, 685, 687, 688, - 689, 690, 691, 692, 693, 695, 696, 697, - 698, 699, 700, 702, 703, 704, 705, 706, - 708, 709, 710, 711, 712, 713, 715, 716, - 717, 718, 719, 720, 722, 723, 724, 725, - 726, 728, 729, 730, 731, 732, 733, 735, - 736, 737, 738, 739, 741, 742, 743, 744, - 745, 747, 748, 749, 750, 751, 753, 754, - 755, 756, 757, 759, 760, 761, 762, 763, - 765, 766, 767, 768, 770, 771, 772, 773, - 774, 776, 777, 778, 779, 780, 782, 783, - 784, 785, 787, 788, 789, 790, 792, 793, - 794, 795, 796, 798, 799, 800, 801, 803, - 804, 805, 806, 808, 809, 810, 811, 813, - 814, 815, 816, 818, 819, 820, 821, 823, - 824, 825, 826, 828, 829, 830, 831, 833, - 834, 835, 836, 838, 839, 840, 841, 843, - 844, 845, 846, 848, 849, 850, 851, 853, - 854, 855, 857, 858, 859, 860, 862, 863, - 864, 866, 867, 868, 869, 871, 872, 873, - 874, 876, 877, 878, 880, 881, 882, 883, - 885, 886, 887, 889, 890, 891, 893, 894, - 895, 896, 898, 899, 900, 902, 903, 904, - 906, 907, 908, 909, 911, 912, 913, 915, - 916, 917, 919, 920, 921, 923, 924, 925, - 927, 928, 929, 931, 932, 933, 935, 936, - 937, 938, 940, 941, 942, 944, 945, 946, - 948, 949, 950, 952, 953, 955, 956, 957, - 959, 960, 961, 963, 964, 965, 967, 968, - 969, 971, 972, 973, 975, 976, 977, 979, - 980, 981, 983, 984, 986, 987, 988, 990, - 991, 992, 994, 995, 996, 998, 999, 1001, - 1002, 1003, 1005, 1006, 1007, 1009, 1010, 1012, - 1013, 1014, 1016, 1017, 1018, 1020, 1021, 1023 -}; - -static const WebRtc_Word16 kIndicatorTable[17] = { - 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, - 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187 -}; +// Skip first frequency bins during estimation. (0 <= value < 64) +static const size_t kStartBand = 5; // hybrib Hanning & flat window -static const WebRtc_Word16 kBlocks80w128x[128] = { +static const int16_t kBlocks80w128x[128] = { 0, 536, 1072, 1606, 2139, 2669, 3196, 3720, 4240, 4756, 5266, 5771, 6270, 6762, 7246, 7723, 8192, 8652, 9102, 9543, 9974, 10394, 10803, 11200, 11585, 11958, 12318, 12665, 12998, 13318, 13623, 13913, 14189, @@ -214,7 +87,7 @@ static const WebRtc_Word16 kBlocks80w128x[128] = { }; // hybrib Hanning & flat window -static const WebRtc_Word16 kBlocks160w256x[256] = { +static const int16_t kBlocks160w256x[256] = { 0, 268, 536, 804, 1072, 1339, 1606, 1872, 2139, 2404, 2669, 2933, 3196, 3459, 3720, 3981, 4240, 4499, 4756, 5012, 5266, 5520, 5771, 6021, @@ -259,7 +132,7 @@ static const WebRtc_Word16 kBlocks160w256x[256] = { // } else { // factor1 = 1.0; // } -static const WebRtc_Word16 kFactor1Table[257] = { +static const int16_t kFactor1Table[257] = { 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, 8192, @@ -294,7 +167,7 @@ static const WebRtc_Word16 kFactor1Table[257] = { // } // // Gain factor table: Input value in Q8 and output value in Q13 -static const WebRtc_Word16 kFactor2Aggressiveness1[257] = { +static const int16_t kFactor2Aggressiveness1[257] = { 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7577, 7596, 7614, 7632, 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845, @@ -317,7 +190,7 @@ static const WebRtc_Word16 kFactor2Aggressiveness1[257] = { }; // Gain factor table: Input value in Q8 and output value in Q13 -static const WebRtc_Word16 kFactor2Aggressiveness2[257] = { +static const int16_t kFactor2Aggressiveness2[257] = { 7270, 7270, 7270, 7270, 7270, 7306, 7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632, 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845, @@ -340,7 +213,7 @@ static const WebRtc_Word16 kFactor2Aggressiveness2[257] = { }; // Gain factor table: Input value in Q8 and output value in Q13 -static const WebRtc_Word16 kFactor2Aggressiveness3[257] = { +static const int16_t kFactor2Aggressiveness3[257] = { 7184, 7184, 7184, 7229, 7270, 7306, 7339, 7369, 7397, 7424, 7448, 7472, 7495, 7517, 7537, 7558, 7577, 7596, 7614, 7632, 7650, 7667, 7683, 7699, 7715, 7731, 7746, 7761, 7775, 7790, 7804, 7818, 7832, 7845, @@ -364,7 +237,7 @@ static const WebRtc_Word16 kFactor2Aggressiveness3[257] = { // sum of log2(i) from table index to inst->anaLen2 in Q5 // Note that the first table value is invalid, since log2(0) = -infinity -static const WebRtc_Word16 kSumLogIndex[66] = { +static const int16_t kSumLogIndex[66] = { 0, 22917, 22917, 22885, 22834, 22770, 22696, 22613, 22524, 22428, 22326, 22220, 22109, 21994, 21876, 21754, 21629, 21501, 21370, 21237, 21101, 20963, 20822, 20679, @@ -378,7 +251,7 @@ static const WebRtc_Word16 kSumLogIndex[66] = { // sum of log2(i)^2 from table index to inst->anaLen2 in Q2 // Note that the first table value is invalid, since log2(0) = -infinity -static const WebRtc_Word16 kSumSquareLogIndex[66] = { +static const int16_t kSumSquareLogIndex[66] = { 0, 16959, 16959, 16955, 16945, 16929, 16908, 16881, 16850, 16814, 16773, 16729, 16681, 16630, 16575, 16517, 16456, 16392, 16325, 16256, 16184, 16109, 16032, 15952, @@ -392,7 +265,7 @@ static const WebRtc_Word16 kSumSquareLogIndex[66] = { // log2(table index) in Q12 // Note that the first table value is invalid, since log2(0) = -infinity -static const WebRtc_Word16 kLogIndex[129] = { +static const int16_t kLogIndex[129] = { 0, 0, 4096, 6492, 8192, 9511, 10588, 11499, 12288, 12984, 13607, 14170, 14684, 15157, 15595, 16003, 16384, 16742, 17080, 17400, 17703, 17991, 18266, 18529, @@ -414,7 +287,7 @@ static const WebRtc_Word16 kLogIndex[129] = { // determinant of estimation matrix in Q0 corresponding to the log2 tables above // Note that the first table value is invalid, since log2(0) = -infinity -static const WebRtc_Word16 kDeterminantEstMatrix[66] = { +static const int16_t kDeterminantEstMatrix[66] = { 0, 29814, 25574, 22640, 20351, 18469, 16873, 15491, 14277, 13199, 12233, 11362, 10571, 9851, 9192, 8587, 8030, 7515, 7038, 6596, 6186, 5804, 5448, 5115, @@ -426,93 +299,333 @@ static const WebRtc_Word16 kDeterminantEstMatrix[66] = { 355, 330 }; -void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset) { - WebRtc_Word32 tmp32no1 = 0; - WebRtc_Word32 tmp32no2 = 0; +// Update the noise estimation information. +static void UpdateNoiseEstimate(NoiseSuppressionFixedC* inst, int offset) { + int32_t tmp32no1 = 0; + int32_t tmp32no2 = 0; + int16_t tmp16 = 0; + const int16_t kExp2Const = 11819; // Q13 - WebRtc_Word16 tmp16no1 = 0; - WebRtc_Word16 tmp16no2 = 0; - const WebRtc_Word16 kExp2Const = 11819; // Q13 + size_t i = 0; - int i = 0; - - tmp16no2 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, + tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, inst->magnLen); // Guarantee a Q-domain as high as possible and still fit in int16 inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - kExp2Const, tmp16no2, 21); + kExp2Const, tmp16, 21); for (i = 0; i < inst->magnLen; i++) { // inst->quantile[i]=exp(inst->lquantile[offset+i]); // in Q21 - tmp32no2 = WEBRTC_SPL_MUL_16_16(kExp2Const, - inst->noiseEstLogQuantile[offset + i]); + tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i]; tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac - tmp16no1 = -(WebRtc_Word16) WEBRTC_SPL_RSHIFT_W32(tmp32no2, 21); - tmp16no1 += 21;// shift 21 to get result in Q0 - tmp16no1 -= (WebRtc_Word16) inst->qNoise; //shift to get result in Q(qNoise) - if (tmp16no1 > 0) { - tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1, tmp16no1); + tmp16 = (int16_t)(tmp32no2 >> 21); + tmp16 -= 21;// shift 21 to get result in Q0 + tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise) + if (tmp16 < 0) { + tmp32no1 >>= -tmp16; } else { - tmp32no1 = WEBRTC_SPL_LSHIFT_W32(tmp32no1, -tmp16no1); + tmp32no1 <<= tmp16; } - // TODO(bjornv): Replace with WebRtcSpl_SatW32ToW16(...) when available. - if (tmp32no1 > 32767) { - tmp32no1 = 32767; - } else if (tmp32no1 < -32768) { - tmp32no1 = -32768; - } - tmp16no1 = (WebRtc_Word16) tmp32no1; - inst->noiseEstQuantile[i] = tmp16no1; + inst->noiseEstQuantile[i] = WebRtcSpl_SatW32ToW16(tmp32no1); } } -void WebRtcNsx_CalcParametricNoiseEstimate(NsxInst_t* inst, - WebRtc_Word16 pink_noise_exp_avg, - WebRtc_Word32 pink_noise_num_avg, - int freq_index, - WebRtc_UWord32* noise_estimate, - WebRtc_UWord32* noise_estimate_avg) { - WebRtc_Word32 tmp32no1 = 0; - WebRtc_Word32 tmp32no2 = 0; +// Noise Estimation +static void NoiseEstimationC(NoiseSuppressionFixedC* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise) { + int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv; + int16_t countProd, delta, zeros, frac; + int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2; + const int16_t log2_const = 22713; // Q15 + const int16_t width_factor = 21845; - WebRtc_Word16 int_part = 0; - WebRtc_Word16 frac_part = 0; + size_t i, s, offset; + + tabind = inst->stages - inst->normData; + assert(tabind < 9); + assert(tabind > -9); + if (tabind < 0) { + logval = -WebRtcNsx_kLogTable[-tabind]; + } else { + logval = WebRtcNsx_kLogTable[tabind]; + } + + // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) + // magn is in Q(-stages), and the real lmagn values are: + // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) + // lmagn in Q8 + for (i = 0; i < inst->magnLen; i++) { + if (magn[i]) { + zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); + frac = (int16_t)((((uint32_t)magn[i] << zeros) + & 0x7FFFFFFF) >> 23); + // log2(magn(i)) + assert(frac < 256); + log2 = (int16_t)(((31 - zeros) << 8) + + WebRtcNsx_kLogTableFrac[frac]); + // log2(magn(i))*log(2) + lmagn[i] = (int16_t)((log2 * log2_const) >> 15); + // + log(2^stages) + lmagn[i] += logval; + } else { + lmagn[i] = logval;//0; + } + } + + // loop over simultaneous estimates + for (s = 0; s < SIMULT; s++) { + offset = s * inst->magnLen; + + // Get counter values from state + counter = inst->noiseEstCounter[s]; + assert(counter < 201); + countDiv = WebRtcNsx_kCounterDiv[counter]; + countProd = (int16_t)(counter * countDiv); + + // quant_est(...) + for (i = 0; i < inst->magnLen; i++) { + // compute delta + if (inst->noiseEstDensity[offset + i] > 512) { + // Get the value for delta by shifting intead of dividing. + int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]); + delta = (int16_t)(FACTOR_Q16 >> (14 - factor)); + } else { + delta = FACTOR_Q7; + if (inst->blockIndex < END_STARTUP_LONG) { + // Smaller step size during startup. This prevents from using + // unrealistic values causing overflow. + delta = FACTOR_Q7_STARTUP; + } + } + + // update log quantile estimate + tmp16 = (int16_t)((delta * countDiv) >> 14); + if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { + // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 + // CounterDiv=1/(inst->counter[s]+1) in Q15 + tmp16 += 2; + inst->noiseEstLogQuantile[offset + i] += tmp16 / 4; + } else { + tmp16 += 1; + // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 + // TODO(bjornv): investigate why we need to truncate twice. + tmp16no2 = (int16_t)((tmp16 / 2) * 3 / 2); + inst->noiseEstLogQuantile[offset + i] -= tmp16no2; + if (inst->noiseEstLogQuantile[offset + i] < logval) { + // This is the smallest fixed point representation we can + // have, hence we limit the output. + inst->noiseEstLogQuantile[offset + i] = logval; + } + } + + // update density estimate + if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) + < WIDTH_Q8) { + tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->noiseEstDensity[offset + i], countProd, 15); + tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + width_factor, countDiv, 15); + inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; + } + } // end loop over magnitude spectrum + + if (counter >= END_STARTUP_LONG) { + inst->noiseEstCounter[s] = 0; + if (inst->blockIndex >= END_STARTUP_LONG) { + UpdateNoiseEstimate(inst, offset); + } + } + inst->noiseEstCounter[s]++; + + } // end loop over simultaneous estimates + + // Sequentially update the noise during startup + if (inst->blockIndex < END_STARTUP_LONG) { + UpdateNoiseEstimate(inst, offset); + } + + for (i = 0; i < inst->magnLen; i++) { + noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise) + } + (*q_noise) = (int16_t)inst->qNoise; +} + +// Filter the data in the frequency domain, and create spectrum. +static void PrepareSpectrumC(NoiseSuppressionFixedC* inst, int16_t* freq_buf) { + size_t i = 0, j = 0; + + for (i = 0; i < inst->magnLen; i++) { + inst->real[i] = (int16_t)((inst->real[i] * + (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) + inst->imag[i] = (int16_t)((inst->imag[i] * + (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) + } + + freq_buf[0] = inst->real[0]; + freq_buf[1] = -inst->imag[0]; + for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { + freq_buf[j] = inst->real[i]; + freq_buf[j + 1] = -inst->imag[i]; + } + freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; + freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; +} + +// Denormalize the real-valued signal |in|, the output from inverse FFT. +static void DenormalizeC(NoiseSuppressionFixedC* inst, + int16_t* in, + int factor) { + size_t i = 0; + int32_t tmp32 = 0; + for (i = 0; i < inst->anaLen; i += 1) { + tmp32 = WEBRTC_SPL_SHIFT_W32((int32_t)in[i], + factor - inst->normData); + inst->real[i] = WebRtcSpl_SatW32ToW16(tmp32); // Q0 + } +} + +// For the noise supression process, synthesis, read out fully processed +// segment, and update synthesis buffer. +static void SynthesisUpdateC(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor) { + size_t i = 0; + int16_t tmp16a = 0; + int16_t tmp16b = 0; + int32_t tmp32 = 0; + + // synthesis + for (i = 0; i < inst->anaLen; i++) { + tmp16a = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->window[i], inst->real[i], 14); // Q0, window in Q14 + tmp32 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16a, gain_factor, 13); // Q0 + // Down shift with rounding + tmp16b = WebRtcSpl_SatW32ToW16(tmp32); // Q0 + inst->synthesisBuffer[i] = WebRtcSpl_AddSatW16(inst->synthesisBuffer[i], + tmp16b); // Q0 + } + + // read out fully processed segment + for (i = 0; i < inst->blockLen10ms; i++) { + out_frame[i] = inst->synthesisBuffer[i]; // Q0 + } + + // update synthesis buffer + memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer)); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); +} + +// Update analysis buffer for lower band, and window data before FFT. +static void AnalysisUpdateC(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech) { + size_t i = 0; + + // For lower band update analysis buffer. + memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer)); + memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech, + inst->blockLen10ms * sizeof(*inst->analysisBuffer)); + + // Window data before FFT. + for (i = 0; i < inst->anaLen; i++) { + out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->window[i], inst->analysisBuffer[i], 14); // Q0 + } +} + +// Normalize the real-valued signal |in|, the input to forward FFT. +static void NormalizeRealBufferC(NoiseSuppressionFixedC* inst, + const int16_t* in, + int16_t* out) { + size_t i = 0; + assert(inst->normData >= 0); + for (i = 0; i < inst->anaLen; ++i) { + out[i] = in[i] << inst->normData; // Q(normData) + } +} + +// Declare function pointers. +NoiseEstimation WebRtcNsx_NoiseEstimation; +PrepareSpectrum WebRtcNsx_PrepareSpectrum; +SynthesisUpdate WebRtcNsx_SynthesisUpdate; +AnalysisUpdate WebRtcNsx_AnalysisUpdate; +Denormalize WebRtcNsx_Denormalize; +NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer; + +#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON) +// Initialize function pointers for ARM Neon platform. +static void WebRtcNsx_InitNeon(void) { + WebRtcNsx_NoiseEstimation = WebRtcNsx_NoiseEstimationNeon; + WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrumNeon; + WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdateNeon; + WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdateNeon; +} +#endif + +#if defined(MIPS32_LE) +// Initialize function pointers for MIPS platform. +static void WebRtcNsx_InitMips(void) { + WebRtcNsx_PrepareSpectrum = WebRtcNsx_PrepareSpectrum_mips; + WebRtcNsx_SynthesisUpdate = WebRtcNsx_SynthesisUpdate_mips; + WebRtcNsx_AnalysisUpdate = WebRtcNsx_AnalysisUpdate_mips; + WebRtcNsx_NormalizeRealBuffer = WebRtcNsx_NormalizeRealBuffer_mips; +#if defined(MIPS_DSP_R1_LE) + WebRtcNsx_Denormalize = WebRtcNsx_Denormalize_mips; +#endif +} +#endif + +void WebRtcNsx_CalcParametricNoiseEstimate(NoiseSuppressionFixedC* inst, + int16_t pink_noise_exp_avg, + int32_t pink_noise_num_avg, + int freq_index, + uint32_t* noise_estimate, + uint32_t* noise_estimate_avg) { + int32_t tmp32no1 = 0; + int32_t tmp32no2 = 0; + + int16_t int_part = 0; + int16_t frac_part = 0; // Use pink noise estimate // noise_estimate = 2^(pinkNoiseNumerator + pinkNoiseExp * log2(j)) assert(freq_index >= 0); assert(freq_index < 129); - tmp32no2 = WEBRTC_SPL_MUL_16_16(pink_noise_exp_avg, kLogIndex[freq_index]); // Q26 - tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 15); // Q11 + tmp32no2 = (pink_noise_exp_avg * kLogIndex[freq_index]) >> 15; // Q11 tmp32no1 = pink_noise_num_avg - tmp32no2; // Q11 // Calculate output: 2^tmp32no1 // Output in Q(minNorm-stages) - tmp32no1 += WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)(inst->minNorm - inst->stages), 11); + tmp32no1 += (inst->minNorm - inst->stages) << 11; if (tmp32no1 > 0) { - int_part = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 11); - frac_part = (WebRtc_Word16)(tmp32no1 & 0x000007ff); // Q11 + int_part = (int16_t)(tmp32no1 >> 11); + frac_part = (int16_t)(tmp32no1 & 0x000007ff); // Q11 // Piecewise linear approximation of 'b' in // 2^(int_part+frac_part) = 2^int_part * (1 + b) // 'b' is given in Q11 and below stored in frac_part. - if (WEBRTC_SPL_RSHIFT_W16(frac_part, 10)) { + if (frac_part >> 10) { // Upper fractional part - tmp32no2 = WEBRTC_SPL_MUL_16_16(2048 - frac_part, 1244); // Q21 - tmp32no2 = 2048 - WEBRTC_SPL_RSHIFT_W32(tmp32no2, 10); + tmp32no2 = (2048 - frac_part) * 1244; // Q21 + tmp32no2 = 2048 - (tmp32no2 >> 10); } else { // Lower fractional part - tmp32no2 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(frac_part, 804), 10); + tmp32no2 = (frac_part * 804) >> 10; } // Shift fractional part to Q(minNorm-stages) tmp32no2 = WEBRTC_SPL_SHIFT_W32(tmp32no2, int_part - 11); - *noise_estimate_avg = WEBRTC_SPL_LSHIFT_U32(1, int_part) + (WebRtc_UWord32)tmp32no2; + *noise_estimate_avg = (1 << int_part) + (uint32_t)tmp32no2; // Scale up to initMagnEst, which is not block averaged - *noise_estimate = (*noise_estimate_avg) * (WebRtc_UWord32)(inst->blockIndex + 1); + *noise_estimate = (*noise_estimate_avg) * (uint32_t)(inst->blockIndex + 1); } } // Initialize state -WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs) { +int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs) { int i; //check for valid pointer @@ -522,7 +635,7 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs) { // // Initialization of struct - if (fs == 8000 || fs == 16000 || fs == 32000) { + if (fs == 8000 || fs == 16000 || fs == 32000 || fs == 48000) { inst->fs = fs; } else { return -1; @@ -536,15 +649,7 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs) { inst->thresholdLogLrt = 131072; //default threshold for LRT feature inst->maxLrt = 0x0040000; inst->minLrt = 52429; - } else if (fs == 16000) { - inst->blockLen10ms = 160; - inst->anaLen = 256; - inst->stages = 8; - inst->window = kBlocks160w256x; - inst->thresholdLogLrt = 212644; //default threshold for LRT feature - inst->maxLrt = 0x0080000; - inst->minLrt = 104858; - } else if (fs == 32000) { + } else { inst->blockLen10ms = 160; inst->anaLen = 256; inst->stages = 8; @@ -553,14 +658,23 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs) { inst->maxLrt = 0x0080000; inst->minLrt = 104858; } - inst->anaLen2 = WEBRTC_SPL_RSHIFT_W16(inst->anaLen, 1); + inst->anaLen2 = inst->anaLen / 2; inst->magnLen = inst->anaLen2 + 1; + if (inst->real_fft != NULL) { + WebRtcSpl_FreeRealFFT(inst->real_fft); + } + inst->real_fft = WebRtcSpl_CreateRealFFT(inst->stages); + if (inst->real_fft == NULL) { + return -1; + } + WebRtcSpl_ZerosArrayW16(inst->analysisBuffer, ANAL_BLOCKL_MAX); WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer, ANAL_BLOCKL_MAX); // for HB processing - WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX, ANAL_BLOCKL_MAX); + WebRtcSpl_ZerosArrayW16(inst->dataBufHBFX[0], + NUM_HIGH_BANDS_MAX * ANAL_BLOCKL_MAX); // for quantile noise estimation WebRtcSpl_ZerosArrayW16(inst->noiseEstQuantile, HALF_ANAL_BLOCKL); for (i = 0; i < SIMULT * HALF_ANAL_BLOCKL; i++) { @@ -568,11 +682,11 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs) { inst->noiseEstDensity[i] = 153; // Q9 } for (i = 0; i < SIMULT; i++) { - inst->noiseEstCounter[i] = (WebRtc_Word16)(END_STARTUP_LONG * (i + 1)) / SIMULT; + inst->noiseEstCounter[i] = (int16_t)(END_STARTUP_LONG * (i + 1)) / SIMULT; } // Initialize suppression filter with ones - WebRtcSpl_MemSetW16((WebRtc_Word16*)inst->noiseSupFilter, 16384, HALF_ANAL_BLOCKL); + WebRtcSpl_MemSetW16((int16_t*)inst->noiseSupFilter, 16384, HALF_ANAL_BLOCKL); // Set the aggressiveness: default inst->aggrMode = 0; @@ -640,12 +754,33 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs) { inst->file5 = fopen("file5.pcm", "wb"); #endif + // Initialize function pointers. + WebRtcNsx_NoiseEstimation = NoiseEstimationC; + WebRtcNsx_PrepareSpectrum = PrepareSpectrumC; + WebRtcNsx_SynthesisUpdate = SynthesisUpdateC; + WebRtcNsx_AnalysisUpdate = AnalysisUpdateC; + WebRtcNsx_Denormalize = DenormalizeC; + WebRtcNsx_NormalizeRealBuffer = NormalizeRealBufferC; + +#ifdef WEBRTC_DETECT_NEON + uint64_t features = WebRtc_GetCPUFeaturesARM(); + if ((features & kCPUFeatureNEON) != 0) { + WebRtcNsx_InitNeon(); + } +#elif defined(WEBRTC_HAS_NEON) + WebRtcNsx_InitNeon(); +#endif + +#if defined(MIPS32_LE) + WebRtcNsx_InitMips(); +#endif + inst->initFlag = 1; return 0; } -int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode) { +int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode) { // allow for modes:0,1,2,3 if (mode < 0 || mode > 3) { return -1; @@ -675,145 +810,24 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode) { return 0; } -#if !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)) -void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise, - WebRtc_Word16* qNoise) { - WebRtc_Word32 numerator; - - WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac; - WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2; - WebRtc_Word16 log2Const = 22713; // Q15 - WebRtc_Word16 widthFactor = 21845; - - int i, s, offset; - - numerator = FACTOR_Q16; - - tabind = inst->stages - inst->normData; - assert(tabind < 9); - assert(tabind > -9); - if (tabind < 0) { - logval = -WebRtcNsx_kLogTable[-tabind]; - } else { - logval = WebRtcNsx_kLogTable[tabind]; - } - - // lmagn(i)=log(magn(i))=log(2)*log2(magn(i)) - // magn is in Q(-stages), and the real lmagn values are: - // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages) - // lmagn in Q8 - for (i = 0; i < inst->magnLen; i++) { - if (magn[i]) { - zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]); - frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23); - // log2(magn(i)) - assert(frac < 256); - log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); - // log2(magn(i))*log(2) - lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15); - // + log(2^stages) - lmagn[i] += logval; - } else { - lmagn[i] = logval;//0; - } - } - - // loop over simultaneous estimates - for (s = 0; s < SIMULT; s++) { - offset = s * inst->magnLen; - - // Get counter values from state - counter = inst->noiseEstCounter[s]; - assert(counter < 201); - countDiv = WebRtcNsx_kCounterDiv[counter]; - countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv); - - // quant_est(...) - for (i = 0; i < inst->magnLen; i++) { - // compute delta - if (inst->noiseEstDensity[offset + i] > 512) { - delta = WebRtcSpl_DivW32W16ResW16(numerator, - inst->noiseEstDensity[offset + i]); - } else { - delta = FACTOR_Q7; - if (inst->blockIndex < END_STARTUP_LONG) { - // Smaller step size during startup. This prevents from using - // unrealistic values causing overflow. - delta = FACTOR_Q7_STARTUP; - } - } - - // update log quantile estimate - tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); - if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { - // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 - // CounterDiv=1/(inst->counter[s]+1) in Q15 - tmp16 += 2; - tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2); - inst->noiseEstLogQuantile[offset + i] += tmp16no1; - } else { - tmp16 += 1; - tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1); - // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1); - inst->noiseEstLogQuantile[offset + i] -= tmp16no2; - if (inst->noiseEstLogQuantile[offset + i] < logval) { - // This is the smallest fixed point representation we can - // have, hence we limit the output. - inst->noiseEstLogQuantile[offset + i] = logval; - } - } - - // update density estimate - if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) - < WIDTH_Q8) { - tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - inst->noiseEstDensity[offset + i], countProd, 15); - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(widthFactor, - countDiv, 15); - inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; - } - } // end loop over magnitude spectrum - - if (counter >= END_STARTUP_LONG) { - inst->noiseEstCounter[s] = 0; - if (inst->blockIndex >= END_STARTUP_LONG) { - WebRtcNsx_UpdateNoiseEstimate(inst, offset); - } - } - inst->noiseEstCounter[s]++; - - } // end loop over simultaneous estimates - - // Sequentially update the noise during startup - if (inst->blockIndex < END_STARTUP_LONG) { - WebRtcNsx_UpdateNoiseEstimate(inst, offset); - } - - for (i = 0; i < inst->magnLen; i++) { - noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise) - } - (*qNoise) = (WebRtc_Word16)inst->qNoise; -} -#endif // !(defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)) - // Extract thresholds for feature parameters // histograms are computed over some window_size (given by window_pars) // thresholds and weights are extracted every window // flag 0 means update histogram only, flag 1 means compute the thresholds/weights // threshold and weights are returned in: inst->priorModelPars -void WebRtcNsx_FeatureParameterExtraction(NsxInst_t* inst, int flag) { - WebRtc_UWord32 tmpU32; - WebRtc_UWord32 histIndex; - WebRtc_UWord32 posPeak1SpecFlatFX, posPeak2SpecFlatFX; - WebRtc_UWord32 posPeak1SpecDiffFX, posPeak2SpecDiffFX; +void WebRtcNsx_FeatureParameterExtraction(NoiseSuppressionFixedC* inst, + int flag) { + uint32_t tmpU32; + uint32_t histIndex; + uint32_t posPeak1SpecFlatFX, posPeak2SpecFlatFX; + uint32_t posPeak1SpecDiffFX, posPeak2SpecDiffFX; - WebRtc_Word32 tmp32; - WebRtc_Word32 fluctLrtFX, thresFluctLrtFX; - WebRtc_Word32 avgHistLrtFX, avgSquareHistLrtFX, avgHistLrtComplFX; + int32_t tmp32; + int32_t fluctLrtFX, thresFluctLrtFX; + int32_t avgHistLrtFX, avgSquareHistLrtFX, avgHistLrtComplFX; - WebRtc_Word16 j; - WebRtc_Word16 numHistLrt; + int16_t j; + int16_t numHistLrt; int i; int useFeatureSpecFlat, useFeatureSpecDiff, featureSum; @@ -826,13 +840,13 @@ void WebRtcNsx_FeatureParameterExtraction(NsxInst_t* inst, int flag) { // LRT // Type casting to UWord32 is safe since negative values will not be wrapped to larger // values than HIST_PAR_EST - histIndex = (WebRtc_UWord32)(inst->featureLogLrt); + histIndex = (uint32_t)(inst->featureLogLrt); if (histIndex < HIST_PAR_EST) { inst->histLrt[histIndex]++; } // Spectral flatness // (inst->featureSpecFlat*20)>>10 = (inst->featureSpecFlat*5)>>8 - histIndex = WEBRTC_SPL_RSHIFT_U32(inst->featureSpecFlat * 5, 8); + histIndex = (inst->featureSpecFlat * 5) >> 8; if (histIndex < HIST_PAR_EST) { inst->histSpecFlat[histIndex]++; } @@ -842,8 +856,8 @@ void WebRtcNsx_FeatureParameterExtraction(NsxInst_t* inst, int flag) { // Guard against division by zero // If timeAvgMagnEnergy == 0 we have no normalizing statistics and // therefore can't update the histogram - histIndex = WEBRTC_SPL_UDIV((inst->featureSpecDiff * 5) >> inst->stages, - inst->timeAvgMagnEnergy); + histIndex = ((inst->featureSpecDiff * 5) >> inst->stages) / + inst->timeAvgMagnEnergy; } if (histIndex < HIST_PAR_EST) { inst->histSpecDiff[histIndex]++; @@ -860,29 +874,29 @@ void WebRtcNsx_FeatureParameterExtraction(NsxInst_t* inst, int flag) { numHistLrt = 0; for (i = 0; i < BIN_SIZE_LRT; i++) { j = (2 * i + 1); - tmp32 = WEBRTC_SPL_MUL_16_16(inst->histLrt[i], j); + tmp32 = inst->histLrt[i] * j; avgHistLrtFX += tmp32; numHistLrt += inst->histLrt[i]; - avgSquareHistLrtFX += WEBRTC_SPL_MUL_32_16(tmp32, j); + avgSquareHistLrtFX += tmp32 * j; } avgHistLrtComplFX = avgHistLrtFX; for (; i < HIST_PAR_EST; i++) { j = (2 * i + 1); - tmp32 = WEBRTC_SPL_MUL_16_16(inst->histLrt[i], j); + tmp32 = inst->histLrt[i] * j; avgHistLrtComplFX += tmp32; - avgSquareHistLrtFX += WEBRTC_SPL_MUL_32_16(tmp32, j); + avgSquareHistLrtFX += tmp32 * j; } - fluctLrtFX = WEBRTC_SPL_MUL(avgSquareHistLrtFX, numHistLrt); - fluctLrtFX -= WEBRTC_SPL_MUL(avgHistLrtFX, avgHistLrtComplFX); + fluctLrtFX = avgSquareHistLrtFX * numHistLrt - + avgHistLrtFX * avgHistLrtComplFX; thresFluctLrtFX = THRES_FLUCT_LRT * numHistLrt; // get threshold for LRT feature: - tmpU32 = (FACTOR_1_LRT_DIFF * (WebRtc_UWord32)avgHistLrtFX); + tmpU32 = (FACTOR_1_LRT_DIFF * (uint32_t)avgHistLrtFX); if ((fluctLrtFX < thresFluctLrtFX) || (numHistLrt == 0) || - (tmpU32 > (WebRtc_UWord32)(100 * numHistLrt))) { + (tmpU32 > (uint32_t)(100 * numHistLrt))) { //very low fluctuation, so likely noise inst->thresholdLogLrt = inst->maxLrt; } else { - tmp32 = (WebRtc_Word32)((tmpU32 << (9 + inst->stages)) / numHistLrt / + tmp32 = (int32_t)((tmpU32 << (9 + inst->stages)) / numHistLrt / 25); // check if value is within min/max range inst->thresholdLogLrt = WEBRTC_SPL_SAT(inst->maxLrt, @@ -913,12 +927,12 @@ void WebRtcNsx_FeatureParameterExtraction(NsxInst_t* inst, int flag) { maxPeak1 = inst->histSpecFlat[i]; weightPeak1SpecFlat = inst->histSpecFlat[i]; - posPeak1SpecFlatFX = (WebRtc_UWord32)(2 * i + 1); + posPeak1SpecFlatFX = (uint32_t)(2 * i + 1); } else if (inst->histSpecFlat[i] > maxPeak2) { // Found new "second" peak maxPeak2 = inst->histSpecFlat[i]; weightPeak2SpecFlat = inst->histSpecFlat[i]; - posPeak2SpecFlatFX = (WebRtc_UWord32)(2 * i + 1); + posPeak2SpecFlatFX = (uint32_t)(2 * i + 1); } } @@ -959,12 +973,12 @@ void WebRtcNsx_FeatureParameterExtraction(NsxInst_t* inst, int flag) { maxPeak1 = inst->histSpecDiff[i]; weightPeak1SpecDiff = inst->histSpecDiff[i]; - posPeak1SpecDiffFX = (WebRtc_UWord32)(2 * i + 1); + posPeak1SpecDiffFX = (uint32_t)(2 * i + 1); } else if (inst->histSpecDiff[i] > maxPeak2) { // Found new "second" peak maxPeak2 = inst->histSpecDiff[i]; weightPeak2SpecDiff = inst->histSpecDiff[i]; - posPeak2SpecDiffFX = (WebRtc_UWord32)(2 * i + 1); + posPeak2SpecDiffFX = (uint32_t)(2 * i + 1); } } @@ -995,27 +1009,28 @@ void WebRtcNsx_FeatureParameterExtraction(NsxInst_t* inst, int flag) { WebRtcSpl_ZerosArrayW16(inst->histLrt, HIST_PAR_EST); WebRtcSpl_ZerosArrayW16(inst->histSpecDiff, HIST_PAR_EST); WebRtcSpl_ZerosArrayW16(inst->histSpecFlat, HIST_PAR_EST); - } // end of flag == 1 + } // end of flag == 1 } // Compute spectral flatness on input spectrum // magn is the magnitude spectrum // spectral flatness is returned in inst->featureSpecFlat -void WebRtcNsx_ComputeSpectralFlatness(NsxInst_t* inst, WebRtc_UWord16* magn) { - WebRtc_UWord32 tmpU32; - WebRtc_UWord32 avgSpectralFlatnessNum, avgSpectralFlatnessDen; +void WebRtcNsx_ComputeSpectralFlatness(NoiseSuppressionFixedC* inst, + uint16_t* magn) { + uint32_t tmpU32; + uint32_t avgSpectralFlatnessNum, avgSpectralFlatnessDen; - WebRtc_Word32 tmp32; - WebRtc_Word32 currentSpectralFlatness, logCurSpectralFlatness; + int32_t tmp32; + int32_t currentSpectralFlatness, logCurSpectralFlatness; - WebRtc_Word16 zeros, frac, intPart; + int16_t zeros, frac, intPart; - int i; + size_t i; // for flatness avgSpectralFlatnessNum = 0; - avgSpectralFlatnessDen = inst->sumMagn - (WebRtc_UWord32)magn[0]; // Q(normData-stages) + avgSpectralFlatnessDen = inst->sumMagn - (uint32_t)magn[0]; // Q(normData-stages) // compute log of ratio of the geometric to arithmetic mean: check for log(0) case // flatness = exp( sum(log(magn[i]))/N - log(sum(magn[i])/N) ) @@ -1024,46 +1039,44 @@ void WebRtcNsx_ComputeSpectralFlatness(NsxInst_t* inst, WebRtc_UWord16* magn) { for (i = 1; i < inst->magnLen; i++) { // First bin is excluded from spectrum measures. Number of bins is now a power of 2 if (magn[i]) { - zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]); - frac = (WebRtc_Word16)(((WebRtc_UWord32)((WebRtc_UWord32)(magn[i]) << zeros) + zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); + frac = (int16_t)(((uint32_t)((uint32_t)(magn[i]) << zeros) & 0x7FFFFFFF) >> 23); // log2(magn(i)) assert(frac < 256); - tmpU32 = (WebRtc_UWord32)(((31 - zeros) << 8) + tmpU32 = (uint32_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); // Q8 avgSpectralFlatnessNum += tmpU32; // Q8 } else { //if at least one frequency component is zero, treat separately tmpU32 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecFlat, SPECT_FLAT_TAVG_Q14); // Q24 - inst->featureSpecFlat -= WEBRTC_SPL_RSHIFT_U32(tmpU32, 14); // Q10 + inst->featureSpecFlat -= tmpU32 >> 14; // Q10 return; } } //ratio and inverse log: check for case of log(0) zeros = WebRtcSpl_NormU32(avgSpectralFlatnessDen); - frac = (WebRtc_Word16)(((avgSpectralFlatnessDen << zeros) & 0x7FFFFFFF) >> 23); + frac = (int16_t)(((avgSpectralFlatnessDen << zeros) & 0x7FFFFFFF) >> 23); // log2(avgSpectralFlatnessDen) assert(frac < 256); - tmp32 = (WebRtc_Word32)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); // Q8 - logCurSpectralFlatness = (WebRtc_Word32)avgSpectralFlatnessNum; - logCurSpectralFlatness += ((WebRtc_Word32)(inst->stages - 1) << (inst->stages + 7)); // Q(8+stages-1) + tmp32 = (int32_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); // Q8 + logCurSpectralFlatness = (int32_t)avgSpectralFlatnessNum; + logCurSpectralFlatness += ((int32_t)(inst->stages - 1) << (inst->stages + 7)); // Q(8+stages-1) logCurSpectralFlatness -= (tmp32 << (inst->stages - 1)); - logCurSpectralFlatness = WEBRTC_SPL_LSHIFT_W32(logCurSpectralFlatness, 10 - inst->stages); // Q17 - tmp32 = (WebRtc_Word32)(0x00020000 | (WEBRTC_SPL_ABS_W32(logCurSpectralFlatness) + logCurSpectralFlatness <<= (10 - inst->stages); // Q17 + tmp32 = (int32_t)(0x00020000 | (WEBRTC_SPL_ABS_W32(logCurSpectralFlatness) & 0x0001FFFF)); //Q17 - intPart = -(WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(logCurSpectralFlatness, 17); - intPart += 7; // Shift 7 to get the output in Q10 (from Q17 = -17+10) + intPart = 7 - (logCurSpectralFlatness >> 17); // Add 7 for output in Q10. if (intPart > 0) { - currentSpectralFlatness = WEBRTC_SPL_RSHIFT_W32(tmp32, intPart); + currentSpectralFlatness = tmp32 >> intPart; } else { - currentSpectralFlatness = WEBRTC_SPL_LSHIFT_W32(tmp32, -intPart); + currentSpectralFlatness = tmp32 << -intPart; } //time average update of spectral flatness feature - tmp32 = currentSpectralFlatness - (WebRtc_Word32)inst->featureSpecFlat; // Q10 - tmp32 = WEBRTC_SPL_MUL_32_16(SPECT_FLAT_TAVG_Q14, tmp32); // Q24 - inst->featureSpecFlat = (WebRtc_UWord32)((WebRtc_Word32)inst->featureSpecFlat - + WEBRTC_SPL_RSHIFT_W32(tmp32, 14)); // Q10 + tmp32 = currentSpectralFlatness - (int32_t)inst->featureSpecFlat; // Q10 + tmp32 *= SPECT_FLAT_TAVG_Q14; // Q24 + inst->featureSpecFlat += tmp32 >> 14; // Q10 // done with flatness feature } @@ -1072,20 +1085,22 @@ void WebRtcNsx_ComputeSpectralFlatness(NsxInst_t* inst, WebRtc_UWord16* magn) { // magn_tmp is the input spectrum // the reference/template spectrum is inst->magn_avg_pause[i] // returns (normalized) spectral difference in inst->featureSpecDiff -void WebRtcNsx_ComputeSpectralDifference(NsxInst_t* inst, WebRtc_UWord16* magnIn) { +void WebRtcNsx_ComputeSpectralDifference(NoiseSuppressionFixedC* inst, + uint16_t* magnIn) { // This is to be calculated: // avgDiffNormMagn = var(magnIn) - cov(magnIn, magnAvgPause)^2 / var(magnAvgPause) - WebRtc_UWord32 tmpU32no1, tmpU32no2; - WebRtc_UWord32 varMagnUFX, varPauseUFX, avgDiffNormMagnUFX; + uint32_t tmpU32no1, tmpU32no2; + uint32_t varMagnUFX, varPauseUFX, avgDiffNormMagnUFX; - WebRtc_Word32 tmp32no1, tmp32no2; - WebRtc_Word32 avgPauseFX, avgMagnFX, covMagnPauseFX; - WebRtc_Word32 maxPause, minPause; + int32_t tmp32no1, tmp32no2; + int32_t avgPauseFX, avgMagnFX, covMagnPauseFX; + int32_t maxPause, minPause; - WebRtc_Word16 tmp16no1; + int16_t tmp16no1; - int i, norm32, nShifts; + size_t i; + int norm32, nShifts; avgPauseFX = 0; maxPause = 0; @@ -1098,8 +1113,8 @@ void WebRtcNsx_ComputeSpectralDifference(NsxInst_t* inst, WebRtc_UWord16* magnIn minPause = WEBRTC_SPL_MIN(minPause, inst->avgMagnPause[i]); } // normalize by replacing div of "inst->magnLen" with "inst->stages-1" shifts - avgPauseFX = WEBRTC_SPL_RSHIFT_W32(avgPauseFX, inst->stages - 1); - avgMagnFX = (WebRtc_Word32)WEBRTC_SPL_RSHIFT_U32(inst->sumMagn, inst->stages - 1); + avgPauseFX >>= inst->stages - 1; + avgMagnFX = inst->sumMagn >> (inst->stages - 1); // Largest possible deviation in magnPause for (co)var calculations tmp32no1 = WEBRTC_SPL_MAX(maxPause - avgPauseFX, avgPauseFX - minPause); // Get number of shifts to make sure we don't get wrap around in varPause @@ -1110,26 +1125,26 @@ void WebRtcNsx_ComputeSpectralDifference(NsxInst_t* inst, WebRtc_UWord16* magnIn covMagnPauseFX = 0; for (i = 0; i < inst->magnLen; i++) { // Compute var and cov of magn and magn_pause - tmp16no1 = (WebRtc_Word16)((WebRtc_Word32)magnIn[i] - avgMagnFX); + tmp16no1 = (int16_t)((int32_t)magnIn[i] - avgMagnFX); tmp32no2 = inst->avgMagnPause[i] - avgPauseFX; - varMagnUFX += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); // Q(2*qMagn) - tmp32no1 = WEBRTC_SPL_MUL_32_16(tmp32no2, tmp16no1); // Q(prevQMagn+qMagn) + varMagnUFX += (uint32_t)(tmp16no1 * tmp16no1); // Q(2*qMagn) + tmp32no1 = tmp32no2 * tmp16no1; // Q(prevQMagn+qMagn) covMagnPauseFX += tmp32no1; // Q(prevQMagn+qMagn) - tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, nShifts); // Q(prevQMagn-minPause) - varPauseUFX += (WebRtc_UWord32)WEBRTC_SPL_MUL(tmp32no1, tmp32no1); // Q(2*(prevQMagn-minPause)) + tmp32no1 = tmp32no2 >> nShifts; // Q(prevQMagn-minPause). + varPauseUFX += tmp32no1 * tmp32no1; // Q(2*(prevQMagn-minPause)) } //update of average magnitude spectrum: Q(-2*stages) and averaging replaced by shifts - inst->curAvgMagnEnergy += WEBRTC_SPL_RSHIFT_U32(inst->magnEnergy, 2 * inst->normData - + inst->stages - 1); + inst->curAvgMagnEnergy += + inst->magnEnergy >> (2 * inst->normData + inst->stages - 1); avgDiffNormMagnUFX = varMagnUFX; // Q(2*qMagn) if ((varPauseUFX) && (covMagnPauseFX)) { - tmpU32no1 = (WebRtc_UWord32)WEBRTC_SPL_ABS_W32(covMagnPauseFX); // Q(prevQMagn+qMagn) + tmpU32no1 = (uint32_t)WEBRTC_SPL_ABS_W32(covMagnPauseFX); // Q(prevQMagn+qMagn) norm32 = WebRtcSpl_NormU32(tmpU32no1) - 16; if (norm32 > 0) { - tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(tmpU32no1, norm32); // Q(prevQMagn+qMagn+norm32) + tmpU32no1 <<= norm32; // Q(prevQMagn+qMagn+norm32) } else { - tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, -norm32); // Q(prevQMagn+qMagn+norm32) + tmpU32no1 >>= -norm32; // Q(prevQMagn+qMagn+norm32) } tmpU32no2 = WEBRTC_SPL_UMUL(tmpU32no1, tmpU32no1); // Q(2*(prevQMagn+qMagn-norm32)) @@ -1141,8 +1156,8 @@ void WebRtcNsx_ComputeSpectralDifference(NsxInst_t* inst, WebRtc_UWord16* magnIn } if (varPauseUFX > 0) { // Q(2*(qMagn+norm32-16+minPause)) - tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no2, varPauseUFX); - tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, nShifts); + tmpU32no1 = tmpU32no2 / varPauseUFX; + tmpU32no1 >>= nShifts; // Q(2*qMagn) avgDiffNormMagnUFX -= WEBRTC_SPL_MIN(avgDiffNormMagnUFX, tmpU32no1); @@ -1151,293 +1166,58 @@ void WebRtcNsx_ComputeSpectralDifference(NsxInst_t* inst, WebRtc_UWord16* magnIn } } //normalize and compute time average update of difference feature - tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(avgDiffNormMagnUFX, 2 * inst->normData); + tmpU32no1 = avgDiffNormMagnUFX >> (2 * inst->normData); if (inst->featureSpecDiff > tmpU32no1) { tmpU32no2 = WEBRTC_SPL_UMUL_32_16(inst->featureSpecDiff - tmpU32no1, SPECT_DIFF_TAVG_Q8); // Q(8-2*stages) - inst->featureSpecDiff -= WEBRTC_SPL_RSHIFT_U32(tmpU32no2, 8); // Q(-2*stages) + inst->featureSpecDiff -= tmpU32no2 >> 8; // Q(-2*stages) } else { tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no1 - inst->featureSpecDiff, SPECT_DIFF_TAVG_Q8); // Q(8-2*stages) - inst->featureSpecDiff += WEBRTC_SPL_RSHIFT_U32(tmpU32no2, 8); // Q(-2*stages) - } -} - -// Compute speech/noise probability -// speech/noise probability is returned in: probSpeechFinal -//snrLocPrior is the prior SNR for each frequency (in Q11) -//snrLocPost is the post SNR for each frequency (in Q11) -void WebRtcNsx_SpeechNoiseProb(NsxInst_t* inst, WebRtc_UWord16* nonSpeechProbFinal, - WebRtc_UWord32* priorLocSnr, WebRtc_UWord32* postLocSnr) { - WebRtc_UWord32 zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3; - - WebRtc_Word32 invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32; - WebRtc_Word32 frac32, logTmp; - WebRtc_Word32 logLrtTimeAvgKsumFX; - - WebRtc_Word16 indPriorFX16; - WebRtc_Word16 tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart; - - int i, normTmp, normTmp2, nShifts; - - // compute feature based on average LR factor - // this is the average over all frequencies of the smooth log LRT - logLrtTimeAvgKsumFX = 0; - for (i = 0; i < inst->magnLen; i++) { - besselTmpFX32 = (WebRtc_Word32)postLocSnr[i]; // Q11 - normTmp = WebRtcSpl_NormU32(postLocSnr[i]); - num = WEBRTC_SPL_LSHIFT_U32(postLocSnr[i], normTmp); // Q(11+normTmp) - if (normTmp > 10) { - den = WEBRTC_SPL_LSHIFT_U32(priorLocSnr[i], normTmp - 11); // Q(normTmp) - } else { - den = WEBRTC_SPL_RSHIFT_U32(priorLocSnr[i], 11 - normTmp); // Q(normTmp) - } - if (den > 0) { - besselTmpFX32 -= WEBRTC_SPL_UDIV(num, den); // Q11 - } else { - besselTmpFX32 -= num; // Q11 - } - - // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior) - inst->logLrtTimeAvg[i]); - // Here, LRT_TAVG = 0.5 - zeros = WebRtcSpl_NormU32(priorLocSnr[i]); - frac32 = (WebRtc_Word32)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19); - tmp32 = WEBRTC_SPL_MUL(frac32, frac32); - tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(tmp32, -43), 19); - tmp32 += WEBRTC_SPL_MUL_16_16_RSFT((WebRtc_Word16)frac32, 5412, 12); - frac32 = tmp32 + 37; - // tmp32 = log2(priorLocSnr[i]) - tmp32 = (WebRtc_Word32)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12 - logTmp = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_32_16(tmp32, 178), 8); // log2(priorLocSnr[i])*log(2) - tmp32no1 = WEBRTC_SPL_RSHIFT_W32(logTmp + inst->logLrtTimeAvgW32[i], 1); // Q12 - inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12 - - logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12 - } - inst->featureLogLrt = WEBRTC_SPL_RSHIFT_W32(logLrtTimeAvgKsumFX * 5, inst->stages + 10); // 5 = BIN_SIZE_LRT / 2 - // done with computation of LR factor - - // - //compute the indicator functions - // - - // average LRT feature - // FLOAT code - // indicator0 = 0.5 * (tanh(widthPrior * (logLrtTimeAvgKsum - threshPrior0)) + 1.0); - tmpIndFX = 16384; // Q14(1.0) - tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 - nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; - //use larger width in tanh map for pause regions - if (tmp32no1 < 0) { - tmpIndFX = 0; - tmp32no1 = -tmp32no1; - //widthPrior = widthPrior * 2.0; - nShifts++; - } - tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 - // compute indicator function: sigmoid map - tableIndex = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 14); - if ((tableIndex < 16) && (tableIndex >= 0)) { - tmp16no2 = kIndicatorTable[tableIndex]; - tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; - frac = (WebRtc_Word16)(tmp32no1 & 0x00003fff); // Q14 - tmp16no2 += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14); - if (tmpIndFX == 0) { - tmpIndFX = 8192 - tmp16no2; // Q14 - } else { - tmpIndFX = 8192 + tmp16no2; // Q14 - } - } - indPriorFX = WEBRTC_SPL_MUL_16_16(inst->weightLogLrt, tmpIndFX); // 6*Q14 - - //spectral flatness feature - if (inst->weightSpecFlat) { - tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 - tmpIndFX = 16384; // Q14(1.0) - //use larger width in tanh map for pause regions - tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 - nShifts = 4; - if (inst->thresholdSpecFlat < tmpU32no1) { - tmpIndFX = 0; - tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; - //widthPrior = widthPrior * 2.0; - nShifts++; - } - tmp32no1 = (WebRtc_Word32)WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, - nShifts), 25); //Q14 - tmpU32no1 = WebRtcSpl_DivU32U16(WEBRTC_SPL_LSHIFT_U32(tmpU32no2, nShifts), 25); //Q14 - // compute indicator function: sigmoid map - // FLOAT code - // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * (threshPrior1 - tmpFloat1)) + 1.0); - tableIndex = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14); - if (tableIndex < 16) { - tmp16no2 = kIndicatorTable[tableIndex]; - tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; - frac = (WebRtc_Word16)(tmpU32no1 & 0x00003fff); // Q14 - tmp16no2 += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, frac, 14); - if (tmpIndFX) { - tmpIndFX = 8192 + tmp16no2; // Q14 - } else { - tmpIndFX = 8192 - tmp16no2; // Q14 - } - } - indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecFlat, tmpIndFX); // 6*Q14 - } - - //for template spectral-difference - if (inst->weightSpecDiff) { - tmpU32no1 = 0; - if (inst->featureSpecDiff) { - normTmp = WEBRTC_SPL_MIN(20 - inst->stages, - WebRtcSpl_NormU32(inst->featureSpecDiff)); - tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(inst->featureSpecDiff, normTmp); // Q(normTmp-2*stages) - tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->timeAvgMagnEnergy, 20 - inst->stages - - normTmp); - if (tmpU32no2 > 0) { - // Q(20 - inst->stages) - tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2); - } else { - tmpU32no1 = (WebRtc_UWord32)(0x7fffffff); - } - } - tmpU32no3 = WEBRTC_SPL_UDIV(WEBRTC_SPL_LSHIFT_U32(inst->thresholdSpecDiff, 17), 25); - tmpU32no2 = tmpU32no1 - tmpU32no3; - nShifts = 1; - tmpIndFX = 16384; // Q14(1.0) - //use larger width in tanh map for pause regions - if (tmpU32no2 & 0x80000000) { - tmpIndFX = 0; - tmpU32no2 = tmpU32no3 - tmpU32no1; - //widthPrior = widthPrior * 2.0; - nShifts--; - } - tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, nShifts); - // compute indicator function: sigmoid map - /* FLOAT code - indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); - */ - tableIndex = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 14); - if (tableIndex < 16) { - tmp16no2 = kIndicatorTable[tableIndex]; - tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; - frac = (WebRtc_Word16)(tmpU32no1 & 0x00003fff); // Q14 - tmp16no2 += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - tmp16no1, frac, 14); - if (tmpIndFX) { - tmpIndFX = 8192 + tmp16no2; - } else { - tmpIndFX = 8192 - tmp16no2; - } - } - indPriorFX += WEBRTC_SPL_MUL_16_16(inst->weightSpecDiff, tmpIndFX); // 6*Q14 - } - - //combine the indicator function with the feature weights - // FLOAT code - // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * indicator1 + weightIndPrior2 * indicator2); - indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 - // done with computing indicator function - - //compute the prior probability - // FLOAT code - // inst->priorNonSpeechProb += PRIOR_UPDATE * (indPriorNonSpeech - inst->priorNonSpeechProb); - tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 - inst->priorNonSpeechProb += (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( - PRIOR_UPDATE_Q14, tmp16, 14); // Q14 - - //final speech probability: combine prior model with LR factor: - - memset(nonSpeechProbFinal, 0, sizeof(WebRtc_UWord16) * inst->magnLen); - - if (inst->priorNonSpeechProb > 0) { - for (i = 0; i < inst->magnLen; i++) { - // FLOAT code - // invLrt = exp(inst->logLrtTimeAvg[i]); - // invLrt = inst->priorSpeechProb * invLrt; - // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) / (1.0 - inst->priorSpeechProb + invLrt); - // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt; - // nonSpeechProbFinal[i] = inst->priorNonSpeechProb / (inst->priorNonSpeechProb + invLrt); - if (inst->logLrtTimeAvgW32[i] < 65300) { - tmp32no1 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(inst->logLrtTimeAvgW32[i], 23637), - 14); // Q12 - intPart = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 12); - if (intPart < -8) { - intPart = -8; - } - frac = (WebRtc_Word16)(tmp32no1 & 0x00000fff); // Q12 - - // Quadratic approximation of 2^frac - tmp32no2 = WEBRTC_SPL_RSHIFT_W32(frac * frac * 44, 19); // Q12 - tmp32no2 += WEBRTC_SPL_MUL_16_16_RSFT(frac, 84, 7); // Q12 - invLrtFX = WEBRTC_SPL_LSHIFT_W32(1, 8 + intPart) - + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8 - - normTmp = WebRtcSpl_NormW32(invLrtFX); - normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb)); - if (normTmp + normTmp2 >= 7) { - if (normTmp + normTmp2 < 15) { - invLrtFX = WEBRTC_SPL_RSHIFT_W32(invLrtFX, 15 - normTmp2 - normTmp); - // Q(normTmp+normTmp2-7) - tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb)); - // Q(normTmp+normTmp2+7) - invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2); // Q14 - } else { - tmp32no1 = WEBRTC_SPL_MUL_32_16(invLrtFX, (16384 - inst->priorNonSpeechProb)); // Q22 - invLrtFX = WEBRTC_SPL_RSHIFT_W32(tmp32no1, 8); // Q14 - } - - tmp32no1 = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)inst->priorNonSpeechProb, 8); // Q22 - - nonSpeechProbFinal[i] = (WebRtc_UWord16)WEBRTC_SPL_DIV(tmp32no1, - (WebRtc_Word32)inst->priorNonSpeechProb + invLrtFX); // Q8 - } - } - } + inst->featureSpecDiff += tmpU32no2 >> 8; // Q(-2*stages) } } // Transform input (speechFrame) to frequency domain magnitude (magnU16) -void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* magnU16) { +void WebRtcNsx_DataAnalysis(NoiseSuppressionFixedC* inst, + short* speechFrame, + uint16_t* magnU16) { + uint32_t tmpU32no1; - WebRtc_UWord32 tmpU32no1, tmpU32no2; + int32_t tmp_1_w32 = 0; + int32_t tmp_2_w32 = 0; + int32_t sum_log_magn = 0; + int32_t sum_log_i_log_magn = 0; - WebRtc_Word32 tmp_1_w32 = 0; - WebRtc_Word32 tmp_2_w32 = 0; - WebRtc_Word32 sum_log_magn = 0; - WebRtc_Word32 sum_log_i_log_magn = 0; + uint16_t sum_log_magn_u16 = 0; + uint16_t tmp_u16 = 0; - WebRtc_UWord16 sum_log_magn_u16 = 0; - WebRtc_UWord16 tmp_u16 = 0; + int16_t sum_log_i = 0; + int16_t sum_log_i_square = 0; + int16_t frac = 0; + int16_t log2 = 0; + int16_t matrix_determinant = 0; + int16_t maxWinData; - WebRtc_Word16 sum_log_i = 0; - WebRtc_Word16 sum_log_i_square = 0; - WebRtc_Word16 frac = 0; - WebRtc_Word16 log2 = 0; - WebRtc_Word16 matrix_determinant = 0; - WebRtc_Word16 winData[ANAL_BLOCKL_MAX], maxWinData; - WebRtc_Word16 realImag[ANAL_BLOCKL_MAX << 1]; - - int i, j; - int outCFFT; + size_t i, j; int zeros; int net_norm = 0; int right_shifts_in_magnU16 = 0; int right_shifts_in_initMagnEst = 0; - // For lower band do all processing - // update analysis buffer for L band - WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, - inst->anaLen - inst->blockLen10ms); - WEBRTC_SPL_MEMCPY_W16(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, - speechFrame, inst->blockLen10ms); + int16_t winData_buff[ANAL_BLOCKL_MAX * 2 + 16]; + int16_t realImag_buff[ANAL_BLOCKL_MAX * 2 + 16]; + + // Align the structures to 32-byte boundary for the FFT function. + int16_t* winData = (int16_t*) (((uintptr_t)winData_buff + 31) & ~31); + int16_t* realImag = (int16_t*) (((uintptr_t) realImag_buff + 31) & ~31); + + // Update analysis buffer for lower band, and window data before FFT. + WebRtcNsx_AnalysisUpdate(inst, winData, speechFrame); - // Window data before FFT - for (i = 0; i < inst->anaLen; i++) { - winData[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - inst->window[i], inst->analysisBuffer[i], 14); // Q0 - } // Get input energy - inst->energyIn = WebRtcSpl_Energy(winData, (int)inst->anaLen, &(inst->scaleEnergyIn)); + inst->energyIn = + WebRtcSpl_Energy(winData, inst->anaLen, &inst->scaleEnergyIn); // Reset zero input flag inst->zeroInputSignal = 0; @@ -1459,41 +1239,36 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* right_shifts_in_magnU16 = WEBRTC_SPL_MAX(right_shifts_in_magnU16, 0); // create realImag as winData interleaved with zeros (= imag. part), normalize it - for (i = 0; i < inst->anaLen; i++) { - j = WEBRTC_SPL_LSHIFT_W16(i, 1); - realImag[j] = WEBRTC_SPL_LSHIFT_W16(winData[i], inst->normData); // Q(normData) - realImag[j + 1] = 0; // Insert zeros in imaginary part - } + WebRtcNsx_NormalizeRealBuffer(inst, winData, realImag); - // bit-reverse position of elements in array and FFT the array - WebRtcSpl_ComplexBitReverse(realImag, inst->stages); // Q(normData-stages) - outCFFT = WebRtcSpl_ComplexFFT(realImag, inst->stages, 1); + // FFT output will be in winData[]. + WebRtcSpl_RealForwardFFT(inst->real_fft, realImag, winData); inst->imag[0] = 0; // Q(normData-stages) inst->imag[inst->anaLen2] = 0; - inst->real[0] = realImag[0]; // Q(normData-stages) - inst->real[inst->anaLen2] = realImag[inst->anaLen]; + inst->real[0] = winData[0]; // Q(normData-stages) + inst->real[inst->anaLen2] = winData[inst->anaLen]; // Q(2*(normData-stages)) - inst->magnEnergy = (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(inst->real[0], inst->real[0]); - inst->magnEnergy += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(inst->real[inst->anaLen2], - inst->real[inst->anaLen2]); - magnU16[0] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(inst->real[0]); // Q(normData-stages) - magnU16[inst->anaLen2] = (WebRtc_UWord16)WEBRTC_SPL_ABS_W16(inst->real[inst->anaLen2]); - inst->sumMagn = (WebRtc_UWord32)magnU16[0]; // Q(normData-stages) - inst->sumMagn += (WebRtc_UWord32)magnU16[inst->anaLen2]; + inst->magnEnergy = (uint32_t)(inst->real[0] * inst->real[0]); + inst->magnEnergy += (uint32_t)(inst->real[inst->anaLen2] * + inst->real[inst->anaLen2]); + magnU16[0] = (uint16_t)WEBRTC_SPL_ABS_W16(inst->real[0]); // Q(normData-stages) + magnU16[inst->anaLen2] = (uint16_t)WEBRTC_SPL_ABS_W16(inst->real[inst->anaLen2]); + inst->sumMagn = (uint32_t)magnU16[0]; // Q(normData-stages) + inst->sumMagn += (uint32_t)magnU16[inst->anaLen2]; if (inst->blockIndex >= END_STARTUP_SHORT) { for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { - inst->real[i] = realImag[j]; - inst->imag[i] = -realImag[j + 1]; + inst->real[i] = winData[j]; + inst->imag[i] = -winData[j + 1]; // magnitude spectrum // energy in Q(2*(normData-stages)) - tmpU32no1 = (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j], realImag[j]); - tmpU32no1 += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j + 1], realImag[j + 1]); + tmpU32no1 = (uint32_t)(winData[j] * winData[j]); + tmpU32no1 += (uint32_t)(winData[j + 1] * winData[j + 1]); inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages)) - magnU16[i] = (WebRtc_UWord16)WebRtcSpl_Sqrt(tmpU32no1); // Q(normData-stages) - inst->sumMagn += (WebRtc_UWord32)magnU16[i]; // Q(normData-stages) + magnU16[i] = (uint16_t)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages) + inst->sumMagn += (uint32_t)magnU16[i]; // Q(normData-stages) } } else { // @@ -1501,74 +1276,62 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* // // Switch initMagnEst to Q(minNorm-stages) - inst->initMagnEst[0] = WEBRTC_SPL_RSHIFT_U32(inst->initMagnEst[0], - right_shifts_in_initMagnEst); - inst->initMagnEst[inst->anaLen2] = - WEBRTC_SPL_RSHIFT_U32(inst->initMagnEst[inst->anaLen2], - right_shifts_in_initMagnEst); // Q(minNorm-stages) + inst->initMagnEst[0] >>= right_shifts_in_initMagnEst; + inst->initMagnEst[inst->anaLen2] >>= right_shifts_in_initMagnEst; - // Shift magnU16 to same domain as initMagnEst - tmpU32no1 = WEBRTC_SPL_RSHIFT_W32((WebRtc_UWord32)magnU16[0], - right_shifts_in_magnU16); // Q(minNorm-stages) - tmpU32no2 = WEBRTC_SPL_RSHIFT_W32((WebRtc_UWord32)magnU16[inst->anaLen2], - right_shifts_in_magnU16); // Q(minNorm-stages) - - // Update initMagnEst - inst->initMagnEst[0] += tmpU32no1; // Q(minNorm-stages) - inst->initMagnEst[inst->anaLen2] += tmpU32no2; // Q(minNorm-stages) + // Update initMagnEst with magnU16 in Q(minNorm-stages). + inst->initMagnEst[0] += magnU16[0] >> right_shifts_in_magnU16; + inst->initMagnEst[inst->anaLen2] += + magnU16[inst->anaLen2] >> right_shifts_in_magnU16; log2 = 0; if (magnU16[inst->anaLen2]) { // Calculate log2(magnU16[inst->anaLen2]) - zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magnU16[inst->anaLen2]); - frac = (WebRtc_Word16)((((WebRtc_UWord32)magnU16[inst->anaLen2] << zeros) & + zeros = WebRtcSpl_NormU32((uint32_t)magnU16[inst->anaLen2]); + frac = (int16_t)((((uint32_t)magnU16[inst->anaLen2] << zeros) & 0x7FFFFFFF) >> 23); // Q8 // log2(magnU16(i)) in Q8 assert(frac < 256); - log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); + log2 = (int16_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); } - sum_log_magn = (WebRtc_Word32)log2; // Q8 + sum_log_magn = (int32_t)log2; // Q8 // sum_log_i_log_magn in Q17 - sum_log_i_log_magn = (WEBRTC_SPL_MUL_16_16(kLogIndex[inst->anaLen2], log2) >> 3); + sum_log_i_log_magn = (kLogIndex[inst->anaLen2] * log2) >> 3; for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { - inst->real[i] = realImag[j]; - inst->imag[i] = -realImag[j + 1]; + inst->real[i] = winData[j]; + inst->imag[i] = -winData[j + 1]; // magnitude spectrum // energy in Q(2*(normData-stages)) - tmpU32no1 = (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j], realImag[j]); - tmpU32no1 += (WebRtc_UWord32)WEBRTC_SPL_MUL_16_16(realImag[j + 1], realImag[j + 1]); + tmpU32no1 = (uint32_t)(winData[j] * winData[j]); + tmpU32no1 += (uint32_t)(winData[j + 1] * winData[j + 1]); inst->magnEnergy += tmpU32no1; // Q(2*(normData-stages)) - magnU16[i] = (WebRtc_UWord16)WebRtcSpl_Sqrt(tmpU32no1); // Q(normData-stages) - inst->sumMagn += (WebRtc_UWord32)magnU16[i]; // Q(normData-stages) + magnU16[i] = (uint16_t)WebRtcSpl_SqrtFloor(tmpU32no1); // Q(normData-stages) + inst->sumMagn += (uint32_t)magnU16[i]; // Q(normData-stages) // Switch initMagnEst to Q(minNorm-stages) - inst->initMagnEst[i] = WEBRTC_SPL_RSHIFT_U32(inst->initMagnEst[i], - right_shifts_in_initMagnEst); + inst->initMagnEst[i] >>= right_shifts_in_initMagnEst; - // Shift magnU16 to same domain as initMagnEst, i.e., Q(minNorm-stages) - tmpU32no1 = WEBRTC_SPL_RSHIFT_W32((WebRtc_UWord32)magnU16[i], - right_shifts_in_magnU16); - // Update initMagnEst - inst->initMagnEst[i] += tmpU32no1; // Q(minNorm-stages) + // Update initMagnEst with magnU16 in Q(minNorm-stages). + inst->initMagnEst[i] += magnU16[i] >> right_shifts_in_magnU16; if (i >= kStartBand) { // For pink noise estimation. Collect data neglecting lower frequency band log2 = 0; if (magnU16[i]) { - zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magnU16[i]); - frac = (WebRtc_Word16)((((WebRtc_UWord32)magnU16[i] << zeros) & + zeros = WebRtcSpl_NormU32((uint32_t)magnU16[i]); + frac = (int16_t)((((uint32_t)magnU16[i] << zeros) & 0x7FFFFFFF) >> 23); // log2(magnU16(i)) in Q8 assert(frac < 256); - log2 = (WebRtc_Word16)(((31 - zeros) << 8) + log2 = (int16_t)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); } - sum_log_magn += (WebRtc_Word32)log2; // Q8 + sum_log_magn += (int32_t)log2; // Q8 // sum_log_i_log_magn in Q17 - sum_log_i_log_magn += (WEBRTC_SPL_MUL_16_16(kLogIndex[i], log2) >> 3); + sum_log_i_log_magn += (kLogIndex[i] * log2) >> 3; } } @@ -1579,16 +1342,15 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* // Estimate White noise // Switch whiteNoiseLevel to Q(minNorm-stages) - inst->whiteNoiseLevel = WEBRTC_SPL_RSHIFT_U32(inst->whiteNoiseLevel, - right_shifts_in_initMagnEst); + inst->whiteNoiseLevel >>= right_shifts_in_initMagnEst; // Update the average magnitude spectrum, used as noise estimate. tmpU32no1 = WEBRTC_SPL_UMUL_32_16(inst->sumMagn, inst->overdrive); - tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, inst->stages + 8); + tmpU32no1 >>= inst->stages + 8; // Replacing division above with 'stages' shifts // Shift to same Q-domain as whiteNoiseLevel - tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, right_shifts_in_magnU16); + tmpU32no1 >>= right_shifts_in_magnU16; // This operation is safe from wrap around as long as END_STARTUP_SHORT < 128 assert(END_STARTUP_SHORT < 128); inst->whiteNoiseLevel += tmpU32no1; // Q(minNorm-stages) @@ -1603,13 +1365,12 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* sum_log_i_square = kSumSquareLogIndex[kStartBand]; // Q2 if (inst->fs == 8000) { // Adjust values to shorter blocks in narrow band. - tmp_1_w32 = (WebRtc_Word32)matrix_determinant; - tmp_1_w32 += WEBRTC_SPL_MUL_16_16_RSFT(kSumLogIndex[65], sum_log_i, 9); - tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT(kSumLogIndex[65], kSumLogIndex[65], 10); - tmp_1_w32 -= WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)sum_log_i_square, 4); - tmp_1_w32 -= WEBRTC_SPL_MUL_16_16_RSFT( - (WebRtc_Word16)(inst->magnLen - kStartBand), kSumSquareLogIndex[65], 2); - matrix_determinant = (WebRtc_Word16)tmp_1_w32; + tmp_1_w32 = (int32_t)matrix_determinant; + tmp_1_w32 += (kSumLogIndex[65] * sum_log_i) >> 9; + tmp_1_w32 -= (kSumLogIndex[65] * kSumLogIndex[65]) >> 10; + tmp_1_w32 -= (int32_t)sum_log_i_square << 4; + tmp_1_w32 -= ((inst->magnLen - kStartBand) * kSumSquareLogIndex[65]) >> 2; + matrix_determinant = (int16_t)tmp_1_w32; sum_log_i -= kSumLogIndex[65]; // Q5 sum_log_i_square -= kSumSquareLogIndex[65]; // Q2 } @@ -1619,24 +1380,24 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* if (zeros < 0) { zeros = 0; } - tmp_1_w32 = WEBRTC_SPL_LSHIFT_W32(sum_log_magn, 1); // Q9 - sum_log_magn_u16 = (WebRtc_UWord16)WEBRTC_SPL_RSHIFT_W32(tmp_1_w32, zeros);//Q(9-zeros) + tmp_1_w32 = sum_log_magn << 1; // Q9 + sum_log_magn_u16 = (uint16_t)(tmp_1_w32 >> zeros); // Q(9-zeros). // Calculate and update pinkNoiseNumerator. Result in Q11. tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i_square, sum_log_magn_u16); // Q(11-zeros) - tmpU32no1 = WEBRTC_SPL_RSHIFT_U32((WebRtc_UWord32)sum_log_i_log_magn, 12); // Q5 + tmpU32no1 = sum_log_i_log_magn >> 12; // Q5 // Shift the largest value of sum_log_i and tmp32no3 before multiplication - tmp_u16 = WEBRTC_SPL_LSHIFT_U16((WebRtc_UWord16)sum_log_i, 1); // Q6 - if ((WebRtc_UWord32)sum_log_i > tmpU32no1) { - tmp_u16 = WEBRTC_SPL_RSHIFT_U16(tmp_u16, zeros); + tmp_u16 = ((uint16_t)sum_log_i << 1); // Q6 + if ((uint32_t)sum_log_i > tmpU32no1) { + tmp_u16 >>= zeros; } else { - tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, zeros); + tmpU32no1 >>= zeros; } - tmp_2_w32 -= (WebRtc_Word32)WEBRTC_SPL_UMUL_32_16(tmpU32no1, tmp_u16); // Q(11-zeros) - matrix_determinant = WEBRTC_SPL_RSHIFT_W16(matrix_determinant, zeros); // Q(-zeros) + tmp_2_w32 -= (int32_t)WEBRTC_SPL_UMUL_32_16(tmpU32no1, tmp_u16); // Q(11-zeros) + matrix_determinant >>= zeros; // Q(-zeros) tmp_2_w32 = WebRtcSpl_DivW32W16(tmp_2_w32, matrix_determinant); // Q11 - tmp_2_w32 += WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)net_norm, 11); // Q11 + tmp_2_w32 += (int32_t)net_norm << 11; // Q11 if (tmp_2_w32 < 0) { tmp_2_w32 = 0; } @@ -1644,9 +1405,8 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* // Calculate and update pinkNoiseExp. Result in Q14. tmp_2_w32 = WEBRTC_SPL_MUL_16_U16(sum_log_i, sum_log_magn_u16); // Q(14-zeros) - tmp_1_w32 = WEBRTC_SPL_RSHIFT_W32(sum_log_i_log_magn, 3 + zeros); - tmp_1_w32 = WEBRTC_SPL_MUL((WebRtc_Word32)(inst->magnLen - kStartBand), - tmp_1_w32); + tmp_1_w32 = sum_log_i_log_magn >> (3 + zeros); + tmp_1_w32 *= inst->magnLen - kStartBand; tmp_2_w32 -= tmp_1_w32; // Q(14-zeros) if (tmp_2_w32 > 0) { // If the exponential parameter is negative force it to zero, which means a @@ -1657,16 +1417,21 @@ void WebRtcNsx_DataAnalysis(NsxInst_t* inst, short* speechFrame, WebRtc_UWord16* } } -void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) { - WebRtc_Word32 tmp32no1; - WebRtc_Word32 energyOut; +void WebRtcNsx_DataSynthesis(NoiseSuppressionFixedC* inst, short* outFrame) { + int32_t energyOut; - WebRtc_Word16 realImag[ANAL_BLOCKL_MAX << 1]; - WebRtc_Word16 tmp16no1, tmp16no2; - WebRtc_Word16 energyRatio; - WebRtc_Word16 gainFactor, gainFactor1, gainFactor2; + int16_t realImag_buff[ANAL_BLOCKL_MAX * 2 + 16]; + int16_t rfft_out_buff[ANAL_BLOCKL_MAX * 2 + 16]; - int i, j; + // Align the structures to 32-byte boundary for the FFT function. + int16_t* realImag = (int16_t*) (((uintptr_t)realImag_buff + 31) & ~31); + int16_t* rfft_out = (int16_t*) (((uintptr_t) rfft_out_buff + 31) & ~31); + + int16_t tmp16no1, tmp16no2; + int16_t energyRatio; + int16_t gainFactor, gainFactor1, gainFactor2; + + size_t i; int outCIFFT; int scaleEnergyOut = 0; @@ -1677,65 +1442,40 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) { outFrame[i] = inst->synthesisBuffer[i]; // Q0 } // update synthesis buffer - WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, - inst->synthesisBuffer + inst->blockLen10ms, - inst->anaLen - inst->blockLen10ms); + memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer)); WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); return; } - // Filter the data in the frequency domain - for (i = 0; i < inst->magnLen; i++) { - inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( - inst->real[i], (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) - inst->imag[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT( - inst->imag[i], (WebRtc_Word16)(inst->noiseSupFilter[i]), 14); // Q(normData-stages) - } - // back to time domain - // Create spectrum - realImag[0] = inst->real[0]; - realImag[1] = -inst->imag[0]; - for (i = 1; i < inst->anaLen2; i++) { - j = WEBRTC_SPL_LSHIFT_W16(i, 1); - tmp16no1 = (inst->anaLen << 1) - j; - realImag[j] = inst->real[i]; - realImag[j + 1] = -inst->imag[i]; - realImag[tmp16no1] = inst->real[i]; - realImag[tmp16no1 + 1] = inst->imag[i]; - } - realImag[inst->anaLen] = inst->real[inst->anaLen2]; - realImag[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; - // bit-reverse position of elements in array and IFFT it - WebRtcSpl_ComplexBitReverse(realImag, inst->stages); - outCIFFT = WebRtcSpl_ComplexIFFT(realImag, inst->stages, 1); + // Filter the data in the frequency domain, and create spectrum. + WebRtcNsx_PrepareSpectrum(inst, realImag); - for (i = 0; i < inst->anaLen; i++) { - j = WEBRTC_SPL_LSHIFT_W16(i, 1); - tmp32no1 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)realImag[j], - outCIFFT - inst->normData); - inst->real[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, - tmp32no1, - WEBRTC_SPL_WORD16_MIN); - } + // Inverse FFT output will be in rfft_out[]. + outCIFFT = WebRtcSpl_RealInverseFFT(inst->real_fft, realImag, rfft_out); + + WebRtcNsx_Denormalize(inst, rfft_out, outCIFFT); //scale factor: only do it after END_STARTUP_LONG time gainFactor = 8192; // 8192 = Q13(1.0) if (inst->gainMap == 1 && inst->blockIndex > END_STARTUP_LONG && inst->energyIn > 0) { - energyOut = WebRtcSpl_Energy(inst->real, (int)inst->anaLen, &scaleEnergyOut); // Q(-scaleEnergyOut) + // Q(-scaleEnergyOut) + energyOut = WebRtcSpl_Energy(inst->real, inst->anaLen, &scaleEnergyOut); if (scaleEnergyOut == 0 && !(energyOut & 0x7f800000)) { energyOut = WEBRTC_SPL_SHIFT_W32(energyOut, 8 + scaleEnergyOut - inst->scaleEnergyIn); } else { - inst->energyIn = WEBRTC_SPL_RSHIFT_W32(inst->energyIn, 8 + scaleEnergyOut - - inst->scaleEnergyIn); // Q(-8-scaleEnergyOut) + // |energyIn| is currently in Q(|scaleEnergyIn|), but to later on end up + // with an |energyRatio| in Q8 we need to change the Q-domain to + // Q(-8-scaleEnergyOut). + inst->energyIn >>= 8 + scaleEnergyOut - inst->scaleEnergyIn; } assert(inst->energyIn > 0); - energyRatio = (WebRtc_Word16)WEBRTC_SPL_DIV(energyOut - + WEBRTC_SPL_RSHIFT_W32(inst->energyIn, 1), inst->energyIn); // Q8 + energyRatio = (energyOut + inst->energyIn / 2) / inst->energyIn; // Q8 // Limit the ratio to [0, 1] in Q8, i.e., [0, 256] energyRatio = WEBRTC_SPL_SAT(256, energyRatio, 0); @@ -1747,105 +1487,107 @@ void WebRtcNsx_DataSynthesis(NsxInst_t* inst, short* outFrame) { //combine both scales with speech/noise prob: note prior (priorSpeechProb) is not frequency dependent // factor = inst->priorSpeechProb*factor1 + (1.0-inst->priorSpeechProb)*factor2; // original code - tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(16384 - inst->priorNonSpeechProb, - gainFactor1, 14); // Q13 16384 = Q14(1.0) - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(inst->priorNonSpeechProb, - gainFactor2, 14); // Q13; + tmp16no1 = (int16_t)(((16384 - inst->priorNonSpeechProb) * gainFactor1) >> + 14); // in Q13, where 16384 = Q14(1.0) + tmp16no2 = (int16_t)((inst->priorNonSpeechProb * gainFactor2) >> 14); gainFactor = tmp16no1 + tmp16no2; // Q13 - } // out of flag_gain_map==1 + } // out of flag_gain_map==1 - // synthesis - for (i = 0; i < inst->anaLen; i++) { - tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(inst->window[i], - inst->real[i], 14); // Q0, window in Q14 - tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(tmp16no1, gainFactor, 13); // Q0 - // Down shift with rounding - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, tmp32no1, - WEBRTC_SPL_WORD16_MIN); // Q0 - inst->synthesisBuffer[i] = WEBRTC_SPL_ADD_SAT_W16(inst->synthesisBuffer[i], tmp16no2); // Q0 - } - - // read out fully processed segment - for (i = 0; i < inst->blockLen10ms; i++) { - outFrame[i] = inst->synthesisBuffer[i]; // Q0 - } - // update synthesis buffer - WEBRTC_SPL_MEMCPY_W16(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, - inst->anaLen - inst->blockLen10ms); - WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms, - inst->blockLen10ms); + // Synthesis, read out fully processed segment, and update synthesis buffer. + WebRtcNsx_SynthesisUpdate(inst, outFrame, gainFactor); } -int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFrameHB, - short* outFrame, short* outFrameHB) { +void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst, + const short* const* speechFrame, + int num_bands, + short* const* outFrame) { // main routine for noise suppression - WebRtc_UWord32 tmpU32no1, tmpU32no2, tmpU32no3; - WebRtc_UWord32 satMax, maxNoiseU32; - WebRtc_UWord32 tmpMagnU32, tmpNoiseU32; - WebRtc_UWord32 nearMagnEst; - WebRtc_UWord32 noiseUpdateU32; - WebRtc_UWord32 noiseU32[HALF_ANAL_BLOCKL]; - WebRtc_UWord32 postLocSnr[HALF_ANAL_BLOCKL]; - WebRtc_UWord32 priorLocSnr[HALF_ANAL_BLOCKL]; - WebRtc_UWord32 prevNearSnr[HALF_ANAL_BLOCKL]; - WebRtc_UWord32 curNearSnr; - WebRtc_UWord32 priorSnr; - WebRtc_UWord32 noise_estimate = 0; - WebRtc_UWord32 noise_estimate_avg = 0; - WebRtc_UWord32 numerator = 0; + uint32_t tmpU32no1, tmpU32no2, tmpU32no3; + uint32_t satMax, maxNoiseU32; + uint32_t tmpMagnU32, tmpNoiseU32; + uint32_t nearMagnEst; + uint32_t noiseUpdateU32; + uint32_t noiseU32[HALF_ANAL_BLOCKL]; + uint32_t postLocSnr[HALF_ANAL_BLOCKL]; + uint32_t priorLocSnr[HALF_ANAL_BLOCKL]; + uint32_t prevNearSnr[HALF_ANAL_BLOCKL]; + uint32_t curNearSnr; + uint32_t priorSnr; + uint32_t noise_estimate = 0; + uint32_t noise_estimate_avg = 0; + uint32_t numerator = 0; - WebRtc_Word32 tmp32no1, tmp32no2; - WebRtc_Word32 pink_noise_num_avg = 0; + int32_t tmp32no1, tmp32no2; + int32_t pink_noise_num_avg = 0; - WebRtc_UWord16 tmpU16no1; - WebRtc_UWord16 magnU16[HALF_ANAL_BLOCKL]; - WebRtc_UWord16 prevNoiseU16[HALF_ANAL_BLOCKL]; - WebRtc_UWord16 nonSpeechProbFinal[HALF_ANAL_BLOCKL]; - WebRtc_UWord16 gammaNoise, prevGammaNoise; - WebRtc_UWord16 noiseSupFilterTmp[HALF_ANAL_BLOCKL]; + uint16_t tmpU16no1; + uint16_t magnU16[HALF_ANAL_BLOCKL]; + uint16_t prevNoiseU16[HALF_ANAL_BLOCKL]; + uint16_t nonSpeechProbFinal[HALF_ANAL_BLOCKL]; + uint16_t gammaNoise, prevGammaNoise; + uint16_t noiseSupFilterTmp[HALF_ANAL_BLOCKL]; - WebRtc_Word16 qMagn, qNoise; - WebRtc_Word16 avgProbSpeechHB, gainModHB, avgFilterGainHB, gainTimeDomainHB; - WebRtc_Word16 pink_noise_exp_avg = 0; + int16_t qMagn, qNoise; + int16_t avgProbSpeechHB, gainModHB, avgFilterGainHB, gainTimeDomainHB; + int16_t pink_noise_exp_avg = 0; - int i; + size_t i, j; int nShifts, postShifts; int norm32no1, norm32no2; int flag, sign; int q_domain_to_use = 0; + // Code for ARMv7-Neon platform assumes the following: + assert(inst->anaLen > 0); + assert(inst->anaLen2 > 0); + assert(inst->anaLen % 16 == 0); + assert(inst->anaLen2 % 8 == 0); + assert(inst->blockLen10ms > 0); + assert(inst->blockLen10ms % 16 == 0); + assert(inst->magnLen == inst->anaLen2 + 1); + #ifdef NS_FILEDEBUG - fwrite(spframe, sizeof(short), inst->blockLen10ms, inst->infile); + if (fwrite(spframe, sizeof(short), + inst->blockLen10ms, inst->infile) != inst->blockLen10ms) { + assert(false); + } #endif // Check that initialization has been done - if (inst->initFlag != 1) { - return -1; - } - // Check for valid pointers based on sampling rate - if ((inst->fs == 32000) && (speechFrameHB == NULL)) { - return -1; + assert(inst->initFlag == 1); + assert((num_bands - 1) <= NUM_HIGH_BANDS_MAX); + + const short* const* speechFrameHB = NULL; + short* const* outFrameHB = NULL; + size_t num_high_bands = 0; + if (num_bands > 1) { + speechFrameHB = &speechFrame[1]; + outFrameHB = &outFrame[1]; + num_high_bands = (size_t)(num_bands - 1); } // Store speechFrame and transform to frequency domain - WebRtcNsx_DataAnalysis(inst, speechFrame, magnU16); + WebRtcNsx_DataAnalysis(inst, (short*)speechFrame[0], magnU16); if (inst->zeroInputSignal) { - WebRtcNsx_DataSynthesis(inst, outFrame); + WebRtcNsx_DataSynthesis(inst, outFrame[0]); - if (inst->fs == 32000) { + if (num_bands > 1) { // update analysis buffer for H band // append new data to buffer FX - WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX, inst->dataBufHBFX + inst->blockLen10ms, - inst->anaLen - inst->blockLen10ms); - WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX + inst->anaLen - inst->blockLen10ms, - speechFrameHB, inst->blockLen10ms); - for (i = 0; i < inst->blockLen10ms; i++) { - outFrameHB[i] = inst->dataBufHBFX[i]; // Q0 + for (i = 0; i < num_high_bands; ++i) { + int block_shift = inst->anaLen - inst->blockLen10ms; + memcpy(inst->dataBufHBFX[i], inst->dataBufHBFX[i] + inst->blockLen10ms, + block_shift * sizeof(*inst->dataBufHBFX[i])); + memcpy(inst->dataBufHBFX[i] + block_shift, speechFrameHB[i], + inst->blockLen10ms * sizeof(*inst->dataBufHBFX[i])); + for (j = 0; j < inst->blockLen10ms; j++) { + outFrameHB[i][j] = inst->dataBufHBFX[i][j]; // Q0 + } } - } // end of H band gain computation - return 0; + } // end of H band gain computation + return; } // Update block index when we have something to process @@ -1863,7 +1605,7 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram //noise estimate from previous frame for (i = 0; i < inst->magnLen; i++) { - prevNoiseU16[i] = (WebRtc_UWord16)WEBRTC_SPL_RSHIFT_U32(inst->prevNoiseU32[i], 11); // Q(prevQNoise) + prevNoiseU16[i] = (uint16_t)(inst->prevNoiseU32[i] >> 11); // Q(prevQNoise) } if (inst->blockIndex < END_STARTUP_SHORT) { @@ -1873,10 +1615,10 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram // Calculate frequency independent parts in parametric noise estimate and calculate // the estimate for the lower frequency band (same values for all frequency bins) if (inst->pinkNoiseExp) { - pink_noise_exp_avg = (WebRtc_Word16)WebRtcSpl_DivW32W16(inst->pinkNoiseExp, - (WebRtc_Word16)(inst->blockIndex + 1)); // Q14 + pink_noise_exp_avg = (int16_t)WebRtcSpl_DivW32W16(inst->pinkNoiseExp, + (int16_t)(inst->blockIndex + 1)); // Q14 pink_noise_num_avg = WebRtcSpl_DivW32W16(inst->pinkNoiseNumerator, - (WebRtc_Word16)(inst->blockIndex + 1)); // Q11 + (int16_t)(inst->blockIndex + 1)); // Q11 WebRtcNsx_CalcParametricNoiseEstimate(inst, pink_noise_exp_avg, pink_noise_num_avg, @@ -1908,7 +1650,7 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram // numerator = (initMagnEst - noise_estimate * overdrive) // Result in Q(8+minNorm-stages) tmpU32no1 = WEBRTC_SPL_UMUL_32_16(noise_estimate, inst->overdrive); - numerator = WEBRTC_SPL_LSHIFT_U32(inst->initMagnEst[i], 8); + numerator = inst->initMagnEst[i] << 8; if (numerator > tmpU32no1) { // Suppression filter coefficient larger than zero, so calculate. numerator -= tmpU32no1; @@ -1919,18 +1661,18 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram nShifts = WEBRTC_SPL_SAT(6, nShifts, 0); // Shift numerator to Q(nShifts+8+minNorm-stages) - numerator = WEBRTC_SPL_LSHIFT_U32(numerator, nShifts); + numerator <<= nShifts; // Shift denominator to Q(nShifts-6+minNorm-stages) - tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(inst->initMagnEst[i], 6 - nShifts); + tmpU32no1 = inst->initMagnEst[i] >> (6 - nShifts); if (tmpU32no1 == 0) { // This is only possible if numerator = 0, in which case // we don't need any division. tmpU32no1 = 1; } - tmpU32no2 = WEBRTC_SPL_UDIV(numerator, tmpU32no1); // Q14 - noiseSupFilterTmp[i] = (WebRtc_UWord16)WEBRTC_SPL_SAT(16384, tmpU32no2, - (WebRtc_UWord32)(inst->denoiseBound)); // Q14 + tmpU32no2 = numerator / tmpU32no1; // Q14 + noiseSupFilterTmp[i] = (uint16_t)WEBRTC_SPL_SAT(16384, tmpU32no2, + (uint32_t)(inst->denoiseBound)); // Q14 } } // Weight quantile noise 'noiseU32' with modeled noise 'noise_estimate_avg' @@ -1941,16 +1683,16 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram // may not. // Shift 'noiseU32' to 'q_domain_to_use' - tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(noiseU32[i], (int)qNoise - q_domain_to_use); + tmpU32no1 = noiseU32[i] >> (qNoise - q_domain_to_use); // Shift 'noise_estimate_avg' to 'q_domain_to_use' - tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(noise_estimate_avg, inst->minNorm - inst->stages - - q_domain_to_use); + tmpU32no2 = noise_estimate_avg >> + (inst->minNorm - inst->stages - q_domain_to_use); // Make a simple check to see if we have enough room for weighting 'tmpU32no1' // without wrap around nShifts = 0; if (tmpU32no1 & 0xfc000000) { - tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 6); - tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, 6); + tmpU32no1 >>= 6; + tmpU32no2 >>= 6; nShifts = 6; } tmpU32no1 *= inst->blockIndex; @@ -1958,7 +1700,7 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram // Add them together and divide by startup length noiseU32[i] = WebRtcSpl_DivU32U16(tmpU32no1 + tmpU32no2, END_STARTUP_SHORT); // Shift back if necessary - noiseU32[i] = WEBRTC_SPL_LSHIFT_U32(noiseU32[i], nShifts); + noiseU32[i] <<= nShifts; } // Update new Q-domain for 'noiseU32' qNoise = q_domain_to_use; @@ -1967,9 +1709,8 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram // used to normalize spectral difference measure if (inst->blockIndex < END_STARTUP_LONG) { // substituting division with shift ending up in Q(-2*stages) - inst->timeAvgMagnEnergyTmp - += WEBRTC_SPL_RSHIFT_U32(inst->magnEnergy, - 2 * inst->normData + inst->stages - 1); + inst->timeAvgMagnEnergyTmp += + inst->magnEnergy >> (2 * inst->normData + inst->stages - 1); inst->timeAvgMagnEnergy = WebRtcSpl_DivU32U16(inst->timeAvgMagnEnergyTmp, inst->blockIndex + 1); } @@ -1978,7 +1719,7 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram // STEP 1: compute prior and post SNR based on quantile noise estimates // compute direct decision (DD) estimate of prior SNR: needed for new method - satMax = (WebRtc_UWord32)1048575;// Largest possible value without getting overflow despite shifting 12 steps + satMax = (uint32_t)1048575;// Largest possible value without getting overflow despite shifting 12 steps postShifts = 6 + qMagn - qNoise; nShifts = 5 - inst->prevQMagn + inst->prevQNoise; for (i = 0; i < inst->magnLen; i++) { @@ -2001,17 +1742,17 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram // calculate post SNR: output in Q11 postLocSnr[i] = 2048; // 1.0 in Q11 - tmpU32no1 = WEBRTC_SPL_LSHIFT_U32((WebRtc_UWord32)magnU16[i], 6); // Q(6+qMagn) + tmpU32no1 = (uint32_t)magnU16[i] << 6; // Q(6+qMagn) if (postShifts < 0) { - tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(noiseU32[i], -postShifts); // Q(6+qMagn) + tmpU32no2 = noiseU32[i] >> -postShifts; // Q(6+qMagn) } else { - tmpU32no2 = WEBRTC_SPL_LSHIFT_U32(noiseU32[i], postShifts); // Q(6+qMagn) + tmpU32no2 = noiseU32[i] << postShifts; // Q(6+qMagn) } if (tmpU32no1 > tmpU32no2) { // Current magnitude larger than noise - tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(tmpU32no1, 11); // Q(17+qMagn) + tmpU32no1 <<= 11; // Q(17+qMagn) if (tmpU32no2 > 0) { - tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2); // Q11 + tmpU32no1 /= tmpU32no2; // Q11 postLocSnr[i] = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11 } else { postLocSnr[i] = satMax; @@ -2019,12 +1760,13 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram } // calculate prevNearSnr[i] and save for later instead of recalculating it later - nearMagnEst = WEBRTC_SPL_UMUL_16_16(inst->prevMagnU16[i], inst->noiseSupFilter[i]); // Q(prevQMagn+14) - tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(nearMagnEst, 3); // Q(prevQMagn+17) - tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(inst->prevNoiseU32[i], nShifts); // Q(prevQMagn+6) + // |nearMagnEst| in Q(prevQMagn + 14) + nearMagnEst = inst->prevMagnU16[i] * inst->noiseSupFilter[i]; + tmpU32no1 = nearMagnEst << 3; // Q(prevQMagn+17) + tmpU32no2 = inst->prevNoiseU32[i] >> nShifts; // Q(prevQMagn+6) if (tmpU32no2 > 0) { - tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2); // Q11 + tmpU32no1 /= tmpU32no2; // Q11 tmpU32no1 = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11 } else { tmpU32no1 = satMax; // Q11 @@ -2036,8 +1778,8 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram tmpU32no2 = WEBRTC_SPL_UMUL_32_16(postLocSnr[i] - 2048, ONE_MINUS_DD_PR_SNR_Q11); // Q22 priorSnr = tmpU32no1 + tmpU32no2 + 512; // Q22 (added 512 for rounding) // priorLocSnr = 1 + 2*priorSnr - priorLocSnr[i] = 2048 + WEBRTC_SPL_RSHIFT_U32(priorSnr, 10); // Q11 - } // end of loop over frequencies + priorLocSnr[i] = 2048 + (priorSnr >> 10); // Q11 + } // end of loop over frequencies // done with step 1: DD computation of prior and post SNR // STEP 2: compute speech/noise likelihood @@ -2058,7 +1800,7 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram // get normalization for spectral difference for next window estimate // Shift to Q(-2*stages) - inst->curAvgMagnEnergy = WEBRTC_SPL_RSHIFT_U32(inst->curAvgMagnEnergy, STAT_UPDATES); + inst->curAvgMagnEnergy >>= STAT_UPDATES; tmpU32no1 = (inst->curAvgMagnEnergy + inst->timeAvgMagnEnergy + 1) >> 1; //Q(-2*stages) // Update featureSpecDiff @@ -2076,12 +1818,12 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram norm32no1++; } tmpU32no3 = WEBRTC_SPL_UMUL(tmpU32no3, tmpU32no2); - tmpU32no3 = WEBRTC_SPL_UDIV(tmpU32no3, inst->timeAvgMagnEnergy); + tmpU32no3 /= inst->timeAvgMagnEnergy; if (WebRtcSpl_NormU32(tmpU32no3) < norm32no1) { inst->featureSpecDiff = 0x007FFFFF; } else { - inst->featureSpecDiff = WEBRTC_SPL_MIN( - 0x007FFFFF, WEBRTC_SPL_LSHIFT_U32(tmpU32no3, norm32no1)); + inst->featureSpecDiff = WEBRTC_SPL_MIN(0x007FFFFF, + tmpU32no3 << norm32no1); } } @@ -2104,9 +1846,9 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram // noiseUpdate = noisePrev[i] + (1 - gammaNoise) * nonSpeechProb * (magn[i] - noisePrev[i]) if (postShifts < 0) { - tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(magnU16[i], -postShifts); // Q(prevQNoise) + tmpU32no2 = magnU16[i] >> -postShifts; // Q(prevQNoise) } else { - tmpU32no2 = WEBRTC_SPL_LSHIFT_U32(magnU16[i], postShifts); // Q(prevQNoise) + tmpU32no2 = (uint32_t)magnU16[i] << postShifts; // Q(prevQNoise) } if (prevNoiseU16[i] > tmpU32no2) { sign = -1; @@ -2122,12 +1864,10 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram tmpU32no3 = WEBRTC_SPL_UMUL_32_16(tmpU32no1, nonSpeechProbFinal[i]); // Q(prevQNoise+8) if (0x7c000000 & tmpU32no3) { // Shifting required before multiplication - tmpU32no2 - = WEBRTC_SPL_UMUL_32_16(WEBRTC_SPL_RSHIFT_U32(tmpU32no3, 5), gammaNoise); // Q(prevQNoise+11) + tmpU32no2 = (tmpU32no3 >> 5) * gammaNoise; // Q(prevQNoise+11) } else { // We can do shifting after multiplication - tmpU32no2 - = WEBRTC_SPL_RSHIFT_U32(WEBRTC_SPL_UMUL_32_16(tmpU32no3, gammaNoise), 5); // Q(prevQNoise+11) + tmpU32no2 = (tmpU32no3 * gammaNoise) >> 5; // Q(prevQNoise+11) } if (sign > 0) { noiseUpdateU32 += tmpU32no2; // Q(prevQNoise+11) @@ -2156,12 +1896,10 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram if (0x7c000000 & tmpU32no3) { // Shifting required before multiplication - tmpU32no2 - = WEBRTC_SPL_UMUL_32_16(WEBRTC_SPL_RSHIFT_U32(tmpU32no3, 5), gammaNoise); // Q(prevQNoise+11) + tmpU32no2 = (tmpU32no3 >> 5) * gammaNoise; // Q(prevQNoise+11) } else { // We can do shifting after multiplication - tmpU32no2 - = WEBRTC_SPL_RSHIFT_U32(WEBRTC_SPL_UMUL_32_16(tmpU32no3, gammaNoise), 5); // Q(prevQNoise+11) + tmpU32no2 = (tmpU32no3 * gammaNoise) >> 5; // Q(prevQNoise+11) } if (sign > 0) { tmpU32no1 = inst->prevNoiseU32[i] + tmpU32no2; // Q(prevQNoise+11) @@ -2186,19 +1924,19 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram tmp32no2 = WEBRTC_SPL_SHIFT_W32(inst->avgMagnPause[i], -nShifts); if (nonSpeechProbFinal[i] > ONE_MINUS_PROB_RANGE_Q8) { if (nShifts < 0) { - tmp32no1 = (WebRtc_Word32)magnU16[i] - tmp32no2; // Q(qMagn) - tmp32no1 = WEBRTC_SPL_MUL_32_16(tmp32no1, ONE_MINUS_GAMMA_PAUSE_Q8); // Q(8+prevQMagn+nShifts) - tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1 + 128, 8); // Q(qMagn) + tmp32no1 = (int32_t)magnU16[i] - tmp32no2; // Q(qMagn) + tmp32no1 *= ONE_MINUS_GAMMA_PAUSE_Q8; // Q(8+prevQMagn+nShifts) + tmp32no1 = (tmp32no1 + 128) >> 8; // Q(qMagn). } else { - tmp32no1 = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)magnU16[i], nShifts) - - inst->avgMagnPause[i]; // Q(qMagn+nShifts) - tmp32no1 = WEBRTC_SPL_MUL_32_16(tmp32no1, ONE_MINUS_GAMMA_PAUSE_Q8); // Q(8+prevQMagn+nShifts) - tmp32no1 = WEBRTC_SPL_RSHIFT_W32(tmp32no1 + (128 << nShifts), 8 + nShifts); // Q(qMagn) + // In Q(qMagn+nShifts) + tmp32no1 = ((int32_t)magnU16[i] << nShifts) - inst->avgMagnPause[i]; + tmp32no1 *= ONE_MINUS_GAMMA_PAUSE_Q8; // Q(8+prevQMagn+nShifts) + tmp32no1 = (tmp32no1 + (128 << nShifts)) >> (8 + nShifts); // Q(qMagn). } tmp32no2 += tmp32no1; // Q(qMagn) } inst->avgMagnPause[i] = tmp32no2; - } // end of frequency loop + } // end of frequency loop norm32no1 = WebRtcSpl_NormU32(maxNoiseU32); qNoise = inst->prevQNoise + norm32no1 - 5; @@ -2226,22 +1964,22 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram curNearSnr = 0; // Q11 if (nShifts < 0) { // This case is equivalent with magn < noise which implies curNearSnr = 0; - tmpMagnU32 = (WebRtc_UWord32)magnU16[i]; // Q(qMagn) - tmpNoiseU32 = WEBRTC_SPL_LSHIFT_U32(noiseU32[i], -nShifts); // Q(qMagn) + tmpMagnU32 = (uint32_t)magnU16[i]; // Q(qMagn) + tmpNoiseU32 = noiseU32[i] << -nShifts; // Q(qMagn) } else if (nShifts > 17) { - tmpMagnU32 = WEBRTC_SPL_LSHIFT_U32(magnU16[i], 17); // Q(qMagn+17) - tmpNoiseU32 = WEBRTC_SPL_RSHIFT_U32(noiseU32[i], nShifts - 17); // Q(qMagn+17) + tmpMagnU32 = (uint32_t)magnU16[i] << 17; // Q(qMagn+17) + tmpNoiseU32 = noiseU32[i] >> (nShifts - 17); // Q(qMagn+17) } else { - tmpMagnU32 = WEBRTC_SPL_LSHIFT_U32((WebRtc_UWord32)magnU16[i], nShifts); // Q(qNoise_prev+11) + tmpMagnU32 = (uint32_t)magnU16[i] << nShifts; // Q(qNoise_prev+11) tmpNoiseU32 = noiseU32[i]; // Q(qNoise_prev+11) } if (tmpMagnU32 > tmpNoiseU32) { tmpU32no1 = tmpMagnU32 - tmpNoiseU32; // Q(qCur) norm32no2 = WEBRTC_SPL_MIN(11, WebRtcSpl_NormU32(tmpU32no1)); - tmpU32no1 = WEBRTC_SPL_LSHIFT_U32(tmpU32no1, norm32no2); // Q(qCur+norm32no2) - tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpNoiseU32, 11 - norm32no2); // Q(qCur+norm32no2-11) + tmpU32no1 <<= norm32no2; // Q(qCur+norm32no2) + tmpU32no2 = tmpNoiseU32 >> (11 - norm32no2); // Q(qCur+norm32no2-11) if (tmpU32no2 > 0) { - tmpU32no1 = WEBRTC_SPL_UDIV(tmpU32no1, tmpU32no2); // Q11 + tmpU32no1 /= tmpU32no2; // Q11 } curNearSnr = WEBRTC_SPL_MIN(satMax, tmpU32no1); // Q11 } @@ -2255,25 +1993,22 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram priorSnr = tmpU32no1 + tmpU32no2; // Q22 //gain filter - tmpU32no1 = (WebRtc_UWord32)(inst->overdrive) - + WEBRTC_SPL_RSHIFT_U32(priorSnr + 8192, 14); // Q8 + tmpU32no1 = inst->overdrive + ((priorSnr + 8192) >> 14); // Q8 assert(inst->overdrive > 0); - tmpU16no1 = (WebRtc_UWord16)WEBRTC_SPL_UDIV(priorSnr + (tmpU32no1 >> 1), tmpU32no1); // Q14 + tmpU16no1 = (priorSnr + tmpU32no1 / 2) / tmpU32no1; // Q14 inst->noiseSupFilter[i] = WEBRTC_SPL_SAT(16384, tmpU16no1, inst->denoiseBound); // 16384 = Q14(1.0) // Q14 // Weight in the parametric Wiener filter during startup if (inst->blockIndex < END_STARTUP_SHORT) { // Weight the two suppression filters - tmpU32no1 = WEBRTC_SPL_UMUL_16_16(inst->noiseSupFilter[i], - (WebRtc_UWord16)inst->blockIndex); - tmpU32no2 = WEBRTC_SPL_UMUL_16_16(noiseSupFilterTmp[i], - (WebRtc_UWord16)(END_STARTUP_SHORT - - inst->blockIndex)); + tmpU32no1 = inst->noiseSupFilter[i] * inst->blockIndex; + tmpU32no2 = noiseSupFilterTmp[i] * + (END_STARTUP_SHORT - inst->blockIndex); tmpU32no1 += tmpU32no2; - inst->noiseSupFilter[i] = (WebRtc_UWord16)WebRtcSpl_DivU32U16(tmpU32no1, + inst->noiseSupFilter[i] = (uint16_t)WebRtcSpl_DivU32U16(tmpU32no1, END_STARTUP_SHORT); } - } // end of loop over frequencies + } // end of loop over frequencies //done with step3 // save noise and magnitude spectrum for next frame @@ -2281,28 +2016,35 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram inst->prevQMagn = qMagn; if (norm32no1 > 5) { for (i = 0; i < inst->magnLen; i++) { - inst->prevNoiseU32[i] = WEBRTC_SPL_LSHIFT_U32(noiseU32[i], norm32no1 - 5); // Q(qNoise+11) + inst->prevNoiseU32[i] = noiseU32[i] << (norm32no1 - 5); // Q(qNoise+11) inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn) } } else { for (i = 0; i < inst->magnLen; i++) { - inst->prevNoiseU32[i] = WEBRTC_SPL_RSHIFT_U32(noiseU32[i], 5 - norm32no1); // Q(qNoise+11) + inst->prevNoiseU32[i] = noiseU32[i] >> (5 - norm32no1); // Q(qNoise+11) inst->prevMagnU16[i] = magnU16[i]; // Q(qMagn) } } - WebRtcNsx_DataSynthesis(inst, outFrame); + WebRtcNsx_DataSynthesis(inst, outFrame[0]); #ifdef NS_FILEDEBUG - fwrite(outframe, sizeof(short), inst->blockLen10ms, inst->outfile); + if (fwrite(outframe, sizeof(short), + inst->blockLen10ms, inst->outfile) != inst->blockLen10ms) { + assert(false); + } #endif //for H band: // only update data buffer, then apply time-domain gain is applied derived from L band - if (inst->fs == 32000) { + if (num_bands > 1) { // update analysis buffer for H band // append new data to buffer FX - WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX, inst->dataBufHBFX + inst->blockLen10ms, inst->anaLen - inst->blockLen10ms); - WEBRTC_SPL_MEMCPY_W16(inst->dataBufHBFX + inst->anaLen - inst->blockLen10ms, speechFrameHB, inst->blockLen10ms); + for (i = 0; i < num_high_bands; ++i) { + memcpy(inst->dataBufHBFX[i], inst->dataBufHBFX[i] + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->dataBufHBFX[i])); + memcpy(inst->dataBufHBFX[i] + inst->anaLen - inst->blockLen10ms, + speechFrameHB[i], inst->blockLen10ms * sizeof(*inst->dataBufHBFX[i])); + } // range for averaging low band quantities for H band gain gainTimeDomainHB = 16384; // 16384 = Q14(1.0) @@ -2313,11 +2055,11 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram tmpU16no1 = 0; // Q8 for (i = inst->anaLen2 - (inst->anaLen2 >> 2); i < inst->anaLen2; i++) { tmpU16no1 += nonSpeechProbFinal[i]; // Q8 - tmpU32no1 += (WebRtc_UWord32)(inst->noiseSupFilter[i]); // Q14 + tmpU32no1 += (uint32_t)(inst->noiseSupFilter[i]); // Q14 } - avgProbSpeechHB = (WebRtc_Word16)(4096 - - WEBRTC_SPL_RSHIFT_U16(tmpU16no1, inst->stages - 7)); // Q12 - avgFilterGainHB = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_U32(tmpU32no1, inst->stages - 3); // Q14 + assert(inst->stages >= 7); + avgProbSpeechHB = (4096 - (tmpU16no1 >> (inst->stages - 7))); // Q12 + avgFilterGainHB = (int16_t)(tmpU32no1 >> (inst->stages - 3)); // Q14 // // original FLOAT code // // gain based on speech probability: @@ -2351,20 +2093,20 @@ int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* speechFrame, short* speechFram gainTimeDomainHB = (gainModHB << 1) + (avgFilterGainHB >> 1); // Q14 } else { // "gain_time_domain = 0.25 * gain_mod + 0.75 * agv_filter_gain;" - gainTimeDomainHB = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(3, avgFilterGainHB, 2); // 3 = Q2(0.75); Q14 + gainTimeDomainHB = (int16_t)((3 * avgFilterGainHB) >> 2); // 3 = Q2(0.75) gainTimeDomainHB += gainModHB; // Q14 } //make sure gain is within flooring range gainTimeDomainHB - = WEBRTC_SPL_SAT(16384, gainTimeDomainHB, (WebRtc_Word16)(inst->denoiseBound)); // 16384 = Q14(1.0) + = WEBRTC_SPL_SAT(16384, gainTimeDomainHB, (int16_t)(inst->denoiseBound)); // 16384 = Q14(1.0) //apply gain - for (i = 0; i < inst->blockLen10ms; i++) { - outFrameHB[i] - = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(gainTimeDomainHB, inst->dataBufHBFX[i], 14); // Q0 + for (i = 0; i < num_high_bands; ++i) { + for (j = 0; j < inst->blockLen10ms; j++) { + outFrameHB[i][j] = (int16_t)((gainTimeDomainHB * + inst->dataBufHBFX[i][j]) >> 14); // Q0 + } } - } // end of H band gain computation - - return 0; + } // end of H band gain computation } diff --git a/webrtc/modules/audio_processing/ns/nsx_core.h b/webrtc/modules/audio_processing/ns/nsx_core.h index d5766ab..f463dbb 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core.h +++ b/webrtc/modules/audio_processing/ns/nsx_core.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -11,95 +11,103 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ -#include "typedefs.h" -#include "signal_processing_library.h" - -#include "nsx_defines.h" - #ifdef NS_FILEDEBUG #include #endif -typedef struct NsxInst_t_ { - WebRtc_UWord32 fs; +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/ns/nsx_defines.h" +#include "webrtc/typedefs.h" - const WebRtc_Word16* window; - WebRtc_Word16 analysisBuffer[ANAL_BLOCKL_MAX]; - WebRtc_Word16 synthesisBuffer[ANAL_BLOCKL_MAX]; - WebRtc_UWord16 noiseSupFilter[HALF_ANAL_BLOCKL]; - WebRtc_UWord16 overdrive; /* Q8 */ - WebRtc_UWord16 denoiseBound; /* Q14 */ - const WebRtc_Word16* factor2Table; - WebRtc_Word16 noiseEstLogQuantile[SIMULT* HALF_ANAL_BLOCKL]; - WebRtc_Word16 noiseEstDensity[SIMULT* HALF_ANAL_BLOCKL]; - WebRtc_Word16 noiseEstCounter[SIMULT]; - WebRtc_Word16 noiseEstQuantile[HALF_ANAL_BLOCKL]; +typedef struct NoiseSuppressionFixedC_ { + uint32_t fs; - WebRtc_Word16 anaLen; - int anaLen2; - int magnLen; + const int16_t* window; + int16_t analysisBuffer[ANAL_BLOCKL_MAX]; + int16_t synthesisBuffer[ANAL_BLOCKL_MAX]; + uint16_t noiseSupFilter[HALF_ANAL_BLOCKL]; + uint16_t overdrive; /* Q8 */ + uint16_t denoiseBound; /* Q14 */ + const int16_t* factor2Table; + int16_t noiseEstLogQuantile[SIMULT* HALF_ANAL_BLOCKL]; + int16_t noiseEstDensity[SIMULT* HALF_ANAL_BLOCKL]; + int16_t noiseEstCounter[SIMULT]; + int16_t noiseEstQuantile[HALF_ANAL_BLOCKL]; + + size_t anaLen; + size_t anaLen2; + size_t magnLen; int aggrMode; int stages; int initFlag; int gainMap; - WebRtc_Word32 maxLrt; - WebRtc_Word32 minLrt; - WebRtc_Word32 logLrtTimeAvgW32[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing in Q8 - WebRtc_Word32 featureLogLrt; - WebRtc_Word32 thresholdLogLrt; - WebRtc_Word16 weightLogLrt; + int32_t maxLrt; + int32_t minLrt; + // Log LRT factor with time-smoothing in Q8. + int32_t logLrtTimeAvgW32[HALF_ANAL_BLOCKL]; + int32_t featureLogLrt; + int32_t thresholdLogLrt; + int16_t weightLogLrt; - WebRtc_UWord32 featureSpecDiff; - WebRtc_UWord32 thresholdSpecDiff; - WebRtc_Word16 weightSpecDiff; + uint32_t featureSpecDiff; + uint32_t thresholdSpecDiff; + int16_t weightSpecDiff; - WebRtc_UWord32 featureSpecFlat; - WebRtc_UWord32 thresholdSpecFlat; - WebRtc_Word16 weightSpecFlat; + uint32_t featureSpecFlat; + uint32_t thresholdSpecFlat; + int16_t weightSpecFlat; - WebRtc_Word32 avgMagnPause[HALF_ANAL_BLOCKL]; //conservative estimate of noise spectrum - WebRtc_UWord32 magnEnergy; - WebRtc_UWord32 sumMagn; - WebRtc_UWord32 curAvgMagnEnergy; - WebRtc_UWord32 timeAvgMagnEnergy; - WebRtc_UWord32 timeAvgMagnEnergyTmp; + // Conservative estimate of noise spectrum. + int32_t avgMagnPause[HALF_ANAL_BLOCKL]; + uint32_t magnEnergy; + uint32_t sumMagn; + uint32_t curAvgMagnEnergy; + uint32_t timeAvgMagnEnergy; + uint32_t timeAvgMagnEnergyTmp; - WebRtc_UWord32 whiteNoiseLevel; //initial noise estimate - WebRtc_UWord32 initMagnEst[HALF_ANAL_BLOCKL];//initial magnitude spectrum estimate - WebRtc_Word32 pinkNoiseNumerator; //pink noise parameter: numerator - WebRtc_Word32 pinkNoiseExp; //pink noise parameter: power of freq - int minNorm; //smallest normalization factor - int zeroInputSignal; //zero input signal flag + uint32_t whiteNoiseLevel; // Initial noise estimate. + // Initial magnitude spectrum estimate. + uint32_t initMagnEst[HALF_ANAL_BLOCKL]; + // Pink noise parameters: + int32_t pinkNoiseNumerator; // Numerator. + int32_t pinkNoiseExp; // Power of freq. + int minNorm; // Smallest normalization factor. + int zeroInputSignal; // Zero input signal flag. - WebRtc_UWord32 prevNoiseU32[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame - WebRtc_UWord16 prevMagnU16[HALF_ANAL_BLOCKL]; //magnitude spectrum from previous frame - WebRtc_Word16 priorNonSpeechProb; //prior speech/noise probability // Q14 + // Noise spectrum from previous frame. + uint32_t prevNoiseU32[HALF_ANAL_BLOCKL]; + // Magnitude spectrum from previous frame. + uint16_t prevMagnU16[HALF_ANAL_BLOCKL]; + // Prior speech/noise probability in Q14. + int16_t priorNonSpeechProb; - int blockIndex; //frame index counter - int modelUpdate; //parameter for updating or estimating thresholds/weights for prior model + int blockIndex; // Frame index counter. + // Parameter for updating or estimating thresholds/weights for prior model. + int modelUpdate; int cntThresUpdate; - //histograms for parameter estimation - WebRtc_Word16 histLrt[HIST_PAR_EST]; - WebRtc_Word16 histSpecFlat[HIST_PAR_EST]; - WebRtc_Word16 histSpecDiff[HIST_PAR_EST]; + // Histograms for parameter estimation. + int16_t histLrt[HIST_PAR_EST]; + int16_t histSpecFlat[HIST_PAR_EST]; + int16_t histSpecDiff[HIST_PAR_EST]; - //quantities for high band estimate - WebRtc_Word16 dataBufHBFX[ANAL_BLOCKL_MAX]; /* Q0 */ + // Quantities for high band estimate. + int16_t dataBufHBFX[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX]; int qNoise; int prevQNoise; int prevQMagn; - int blockLen10ms; + size_t blockLen10ms; - WebRtc_Word16 real[ANAL_BLOCKL_MAX]; - WebRtc_Word16 imag[ANAL_BLOCKL_MAX]; - WebRtc_Word32 energyIn; + int16_t real[ANAL_BLOCKL_MAX]; + int16_t imag[ANAL_BLOCKL_MAX]; + int32_t energyIn; int scaleEnergyIn; int normData; -} NsxInst_t; + struct RealFFT* real_fft; +} NoiseSuppressionFixedC; #ifdef __cplusplus extern "C" @@ -121,7 +129,7 @@ extern "C" * Return value : 0 - Ok * -1 - Error */ -WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs); +int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs); /**************************************************************************** * WebRtcNsx_set_policy_core(...) @@ -129,16 +137,16 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs); * This changes the aggressiveness of the noise suppression method. * * Input: - * - inst : Instance that should be initialized - * - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB) + * - inst : Instance that should be initialized + * - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB) * * Output: - * - NS_inst : Initialized instance + * - inst : Initialized instance * - * Return value : 0 - Ok - * -1 - Error + * Return value : 0 - Ok + * -1 - Error */ -int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode); +int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode); /**************************************************************************** * WebRtcNsx_ProcessCore @@ -147,34 +155,109 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode); * * Input: * - inst : Instance that should be initialized - * - inFrameLow : Input speech frame for lower band - * - inFrameHigh : Input speech frame for higher band + * - inFrame : Input speech frame for each band + * - num_bands : Number of bands * * Output: * - inst : Updated instance - * - outFrameLow : Output speech frame for lower band - * - outFrameHigh : Output speech frame for higher band - * - * Return value : 0 - OK - * -1 - Error + * - outFrame : Output speech frame for each band */ -int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* inFrameLow, short* inFrameHigh, - short* outFrameLow, short* outFrameHigh); +void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst, + const short* const* inFrame, + int num_bands, + short* const* outFrame); /**************************************************************************** - * Internal functions and variable declarations shared with optimized code. + * Some function pointers, for internal functions shared by ARM NEON and + * generic C code. */ -void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset); +// Noise Estimation. +typedef void (*NoiseEstimation)(NoiseSuppressionFixedC* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise); +extern NoiseEstimation WebRtcNsx_NoiseEstimation; -void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise, - WebRtc_Word16* qNoise); +// Filter the data in the frequency domain, and create spectrum. +typedef void (*PrepareSpectrum)(NoiseSuppressionFixedC* inst, + int16_t* freq_buff); +extern PrepareSpectrum WebRtcNsx_PrepareSpectrum; -extern const WebRtc_Word16 WebRtcNsx_kLogTable[9]; -extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256]; -extern const WebRtc_Word16 WebRtcNsx_kCounterDiv[201]; +// For the noise supression process, synthesis, read out fully processed +// segment, and update synthesis buffer. +typedef void (*SynthesisUpdate)(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor); +extern SynthesisUpdate WebRtcNsx_SynthesisUpdate; + +// Update analysis buffer for lower band, and window data before FFT. +typedef void (*AnalysisUpdate)(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech); +extern AnalysisUpdate WebRtcNsx_AnalysisUpdate; + +// Denormalize the real-valued signal |in|, the output from inverse FFT. +typedef void (*Denormalize)(NoiseSuppressionFixedC* inst, + int16_t* in, + int factor); +extern Denormalize WebRtcNsx_Denormalize; + +// Normalize the real-valued signal |in|, the input to forward FFT. +typedef void (*NormalizeRealBuffer)(NoiseSuppressionFixedC* inst, + const int16_t* in, + int16_t* out); +extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer; + +// Compute speech/noise probability. +// Intended to be private. +void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst, + uint16_t* nonSpeechProbFinal, + uint32_t* priorLocSnr, + uint32_t* postLocSnr); + +#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON) +// For the above function pointers, functions for generic platforms are declared +// and defined as static in file nsx_core.c, while those for ARM Neon platforms +// are declared below and defined in file nsx_core_neon.c. +void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise); +void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor); +void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech); +void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst, + int16_t* freq_buff); +#endif + +#if defined(MIPS32_LE) +// For the above function pointers, functions for generic platforms are declared +// and defined as static in file nsx_core.c, while those for MIPS platforms +// are declared below and defined in file nsx_core_mips.c. +void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor); +void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech); +void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst, + int16_t* freq_buff); +void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst, + const int16_t* in, + int16_t* out); +#if defined(MIPS_DSP_R1_LE) +void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst, + int16_t* in, + int factor); +#endif + +#endif #ifdef __cplusplus } #endif -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_ diff --git a/webrtc/modules/audio_processing/ns/nsx_core_c.c b/webrtc/modules/audio_processing/ns/nsx_core_c.c new file mode 100644 index 0000000..14322d3 --- /dev/null +++ b/webrtc/modules/audio_processing/ns/nsx_core_c.c @@ -0,0 +1,261 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include + +#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" +#include "webrtc/modules/audio_processing/ns/nsx_core.h" +#include "webrtc/modules/audio_processing/ns/nsx_defines.h" + +static const int16_t kIndicatorTable[17] = { + 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, + 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187 +}; + +// Compute speech/noise probability +// speech/noise probability is returned in: probSpeechFinal +//snrLocPrior is the prior SNR for each frequency (in Q11) +//snrLocPost is the post SNR for each frequency (in Q11) +void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst, + uint16_t* nonSpeechProbFinal, + uint32_t* priorLocSnr, + uint32_t* postLocSnr) { + uint32_t zeros, num, den, tmpU32no1, tmpU32no2, tmpU32no3; + int32_t invLrtFX, indPriorFX, tmp32, tmp32no1, tmp32no2, besselTmpFX32; + int32_t frac32, logTmp; + int32_t logLrtTimeAvgKsumFX; + int16_t indPriorFX16; + int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac, intPart; + size_t i; + int normTmp, normTmp2, nShifts; + + // compute feature based on average LR factor + // this is the average over all frequencies of the smooth log LRT + logLrtTimeAvgKsumFX = 0; + for (i = 0; i < inst->magnLen; i++) { + besselTmpFX32 = (int32_t)postLocSnr[i]; // Q11 + normTmp = WebRtcSpl_NormU32(postLocSnr[i]); + num = postLocSnr[i] << normTmp; // Q(11+normTmp) + if (normTmp > 10) { + den = priorLocSnr[i] << (normTmp - 11); // Q(normTmp) + } else { + den = priorLocSnr[i] >> (11 - normTmp); // Q(normTmp) + } + if (den > 0) { + besselTmpFX32 -= num / den; // Q11 + } else { + besselTmpFX32 = 0; + } + + // inst->logLrtTimeAvg[i] += LRT_TAVG * (besselTmp - log(snrLocPrior) + // - inst->logLrtTimeAvg[i]); + // Here, LRT_TAVG = 0.5 + zeros = WebRtcSpl_NormU32(priorLocSnr[i]); + frac32 = (int32_t)(((priorLocSnr[i] << zeros) & 0x7FFFFFFF) >> 19); + tmp32 = (frac32 * frac32 * -43) >> 19; + tmp32 += ((int16_t)frac32 * 5412) >> 12; + frac32 = tmp32 + 37; + // tmp32 = log2(priorLocSnr[i]) + tmp32 = (int32_t)(((31 - zeros) << 12) + frac32) - (11 << 12); // Q12 + logTmp = (tmp32 * 178) >> 8; // log2(priorLocSnr[i])*log(2) + // tmp32no1 = LRT_TAVG * (log(snrLocPrior) + inst->logLrtTimeAvg[i]) in Q12. + tmp32no1 = (logTmp + inst->logLrtTimeAvgW32[i]) / 2; + inst->logLrtTimeAvgW32[i] += (besselTmpFX32 - tmp32no1); // Q12 + + logLrtTimeAvgKsumFX += inst->logLrtTimeAvgW32[i]; // Q12 + } + inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >> + (inst->stages + 11); + + // done with computation of LR factor + + // + //compute the indicator functions + // + + // average LRT feature + // FLOAT code + // indicator0 = 0.5 * (tanh(widthPrior * + // (logLrtTimeAvgKsum - threshPrior0)) + 1.0); + tmpIndFX = 16384; // Q14(1.0) + tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 + nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; + //use larger width in tanh map for pause regions + if (tmp32no1 < 0) { + tmpIndFX = 0; + tmp32no1 = -tmp32no1; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 + // compute indicator function: sigmoid map + tableIndex = (int16_t)(tmp32no1 >> 14); + if ((tableIndex < 16) && (tableIndex >= 0)) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); + if (tmpIndFX == 0) { + tmpIndFX = 8192 - tmp16no2; // Q14 + } else { + tmpIndFX = 8192 + tmp16no2; // Q14 + } + } + indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14 + + //spectral flatness feature + if (inst->weightSpecFlat) { + tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 + nShifts = 4; + if (inst->thresholdSpecFlat < tmpU32no1) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); // Q14 + // compute indicator function: sigmoid map + // FLOAT code + // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * + // (threshPrior1 - tmpFloat1)) + 1.0); + tableIndex = (int16_t)(tmpU32no1 >> 14); + if (tableIndex < 16) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; // Q14 + } else { + tmpIndFX = 8192 - tmp16no2; // Q14 + } + } + indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14 + } + + //for template spectral-difference + if (inst->weightSpecDiff) { + tmpU32no1 = 0; + if (inst->featureSpecDiff) { + normTmp = WEBRTC_SPL_MIN(20 - inst->stages, + WebRtcSpl_NormU32(inst->featureSpecDiff)); + assert(normTmp >= 0); + tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages) + tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp); + if (tmpU32no2 > 0) { + // Q(20 - inst->stages) + tmpU32no1 /= tmpU32no2; + } else { + tmpU32no1 = (uint32_t)(0x7fffffff); + } + } + tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25; + tmpU32no2 = tmpU32no1 - tmpU32no3; + nShifts = 1; + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + if (tmpU32no2 & 0x80000000) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no3 - tmpU32no1; + //widthPrior = widthPrior * 2.0; + nShifts--; + } + tmpU32no1 = tmpU32no2 >> nShifts; + // compute indicator function: sigmoid map + /* FLOAT code + indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); + */ + tableIndex = (int16_t)(tmpU32no1 >> 14); + if (tableIndex < 16) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + tmp16no1, frac, 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; + } else { + tmpIndFX = 8192 - tmp16no2; + } + } + indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14 + } + + //combine the indicator function with the feature weights + // FLOAT code + // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * + // indicator1 + weightIndPrior2 * indicator2); + indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 + // done with computing indicator function + + //compute the prior probability + // FLOAT code + // inst->priorNonSpeechProb += PRIOR_UPDATE * + // (indPriorNonSpeech - inst->priorNonSpeechProb); + tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 + inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14); + + //final speech probability: combine prior model with LR factor: + + memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen); + + if (inst->priorNonSpeechProb > 0) { + for (i = 0; i < inst->magnLen; i++) { + // FLOAT code + // invLrt = exp(inst->logLrtTimeAvg[i]); + // invLrt = inst->priorSpeechProb * invLrt; + // nonSpeechProbFinal[i] = (1.0 - inst->priorSpeechProb) / + // (1.0 - inst->priorSpeechProb + invLrt); + // invLrt = (1.0 - inst->priorNonSpeechProb) * invLrt; + // nonSpeechProbFinal[i] = inst->priorNonSpeechProb / + // (inst->priorNonSpeechProb + invLrt); + if (inst->logLrtTimeAvgW32[i] < 65300) { + tmp32no1 = (inst->logLrtTimeAvgW32[i] * 23637) >> 14; // Q12 + intPart = (int16_t)(tmp32no1 >> 12); + if (intPart < -8) { + intPart = -8; + } + frac = (int16_t)(tmp32no1 & 0x00000fff); // Q12 + + // Quadratic approximation of 2^frac + tmp32no2 = (frac * frac * 44) >> 19; // Q12. + tmp32no2 += (frac * 84) >> 7; // Q12 + invLrtFX = (1 << (8 + intPart)) + + WEBRTC_SPL_SHIFT_W32(tmp32no2, intPart - 4); // Q8 + + normTmp = WebRtcSpl_NormW32(invLrtFX); + normTmp2 = WebRtcSpl_NormW16((16384 - inst->priorNonSpeechProb)); + if (normTmp + normTmp2 >= 7) { + if (normTmp + normTmp2 < 15) { + invLrtFX >>= 15 - normTmp2 - normTmp; + // Q(normTmp+normTmp2-7) + tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb); + // Q(normTmp+normTmp2+7) + invLrtFX = WEBRTC_SPL_SHIFT_W32(tmp32no1, 7 - normTmp - normTmp2); + // Q14 + } else { + tmp32no1 = invLrtFX * (16384 - inst->priorNonSpeechProb); + // Q22 + invLrtFX = tmp32no1 >> 8; // Q14. + } + + tmp32no1 = (int32_t)inst->priorNonSpeechProb << 8; // Q22 + + nonSpeechProbFinal[i] = tmp32no1 / + (inst->priorNonSpeechProb + invLrtFX); // Q8 + } + } + } + } +} + diff --git a/webrtc/modules/audio_processing/ns/nsx_core_mips.c b/webrtc/modules/audio_processing/ns/nsx_core_mips.c new file mode 100644 index 0000000..d99be87 --- /dev/null +++ b/webrtc/modules/audio_processing/ns/nsx_core_mips.c @@ -0,0 +1,1002 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include + +#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h" +#include "webrtc/modules/audio_processing/ns/nsx_core.h" + +static const int16_t kIndicatorTable[17] = { + 0, 2017, 3809, 5227, 6258, 6963, 7424, 7718, + 7901, 8014, 8084, 8126, 8152, 8168, 8177, 8183, 8187 +}; + +// Compute speech/noise probability +// speech/noise probability is returned in: probSpeechFinal +//snrLocPrior is the prior SNR for each frequency (in Q11) +//snrLocPost is the post SNR for each frequency (in Q11) +void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst, + uint16_t* nonSpeechProbFinal, + uint32_t* priorLocSnr, + uint32_t* postLocSnr) { + uint32_t tmpU32no1, tmpU32no2, tmpU32no3; + int32_t indPriorFX, tmp32no1; + int32_t logLrtTimeAvgKsumFX; + int16_t indPriorFX16; + int16_t tmp16, tmp16no1, tmp16no2, tmpIndFX, tableIndex, frac; + size_t i; + int normTmp, nShifts; + + int32_t r0, r1, r2, r3, r4, r5, r6, r7, r8, r9; + int32_t const_max = 0x7fffffff; + int32_t const_neg43 = -43; + int32_t const_5412 = 5412; + int32_t const_11rsh12 = (11 << 12); + int32_t const_178 = 178; + + + // compute feature based on average LR factor + // this is the average over all frequencies of the smooth log LRT + logLrtTimeAvgKsumFX = 0; + for (i = 0; i < inst->magnLen; i++) { + r0 = postLocSnr[i]; // Q11 + r1 = priorLocSnr[i]; + r2 = inst->logLrtTimeAvgW32[i]; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "clz %[r3], %[r0] \n\t" + "clz %[r5], %[r1] \n\t" + "slti %[r4], %[r3], 32 \n\t" + "slti %[r6], %[r5], 32 \n\t" + "movz %[r3], $0, %[r4] \n\t" + "movz %[r5], $0, %[r6] \n\t" + "slti %[r4], %[r3], 11 \n\t" + "addiu %[r6], %[r3], -11 \n\t" + "neg %[r7], %[r6] \n\t" + "sllv %[r6], %[r1], %[r6] \n\t" + "srav %[r7], %[r1], %[r7] \n\t" + "movn %[r6], %[r7], %[r4] \n\t" + "sllv %[r1], %[r1], %[r5] \n\t" + "and %[r1], %[r1], %[const_max] \n\t" + "sra %[r1], %[r1], 19 \n\t" + "mul %[r7], %[r1], %[r1] \n\t" + "sllv %[r3], %[r0], %[r3] \n\t" + "divu %[r8], %[r3], %[r6] \n\t" + "slti %[r6], %[r6], 1 \n\t" + "mul %[r7], %[r7], %[const_neg43] \n\t" + "sra %[r7], %[r7], 19 \n\t" + "movz %[r3], %[r8], %[r6] \n\t" + "subu %[r0], %[r0], %[r3] \n\t" + "movn %[r0], $0, %[r6] \n\t" + "mul %[r1], %[r1], %[const_5412] \n\t" + "sra %[r1], %[r1], 12 \n\t" + "addu %[r7], %[r7], %[r1] \n\t" + "addiu %[r1], %[r7], 37 \n\t" + "addiu %[r5], %[r5], -31 \n\t" + "neg %[r5], %[r5] \n\t" + "sll %[r5], %[r5], 12 \n\t" + "addu %[r5], %[r5], %[r1] \n\t" + "subu %[r7], %[r5], %[const_11rsh12] \n\t" + "mul %[r7], %[r7], %[const_178] \n\t" + "sra %[r7], %[r7], 8 \n\t" + "addu %[r7], %[r7], %[r2] \n\t" + "sra %[r7], %[r7], 1 \n\t" + "subu %[r2], %[r2], %[r7] \n\t" + "addu %[r2], %[r2], %[r0] \n\t" + ".set pop \n\t" + : [r0] "+r" (r0), [r1] "+r" (r1), [r2] "+r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8) + : [const_max] "r" (const_max), [const_neg43] "r" (const_neg43), + [const_5412] "r" (const_5412), [const_11rsh12] "r" (const_11rsh12), + [const_178] "r" (const_178) + : "hi", "lo" + ); + inst->logLrtTimeAvgW32[i] = r2; + logLrtTimeAvgKsumFX += r2; + } + + inst->featureLogLrt = (logLrtTimeAvgKsumFX * BIN_SIZE_LRT) >> + (inst->stages + 11); + + // done with computation of LR factor + + // + // compute the indicator functions + // + + // average LRT feature + // FLOAT code + // indicator0 = 0.5 * (tanh(widthPrior * + // (logLrtTimeAvgKsum - threshPrior0)) + 1.0); + tmpIndFX = 16384; // Q14(1.0) + tmp32no1 = logLrtTimeAvgKsumFX - inst->thresholdLogLrt; // Q12 + nShifts = 7 - inst->stages; // WIDTH_PR_MAP_SHIFT - inst->stages + 5; + //use larger width in tanh map for pause regions + if (tmp32no1 < 0) { + tmpIndFX = 0; + tmp32no1 = -tmp32no1; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, nShifts); // Q14 + // compute indicator function: sigmoid map + tableIndex = (int16_t)(tmp32no1 >> 14); + if ((tableIndex < 16) && (tableIndex >= 0)) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmp32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); + if (tmpIndFX == 0) { + tmpIndFX = 8192 - tmp16no2; // Q14 + } else { + tmpIndFX = 8192 + tmp16no2; // Q14 + } + } + indPriorFX = inst->weightLogLrt * tmpIndFX; // 6*Q14 + + //spectral flatness feature + if (inst->weightSpecFlat) { + tmpU32no1 = WEBRTC_SPL_UMUL(inst->featureSpecFlat, 400); // Q10 + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + tmpU32no2 = inst->thresholdSpecFlat - tmpU32no1; //Q10 + nShifts = 4; + if (inst->thresholdSpecFlat < tmpU32no1) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no1 - inst->thresholdSpecFlat; + //widthPrior = widthPrior * 2.0; + nShifts++; + } + tmpU32no1 = WebRtcSpl_DivU32U16(tmpU32no2 << nShifts, 25); //Q14 + // compute indicator function: sigmoid map + // FLOAT code + // indicator1 = 0.5 * (tanh(sgnMap * widthPrior * + // (threshPrior1 - tmpFloat1)) + 1.0); + tableIndex = (int16_t)(tmpU32no1 >> 14); + if (tableIndex < 16) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)((tmp16no1 * frac) >> 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; // Q14 + } else { + tmpIndFX = 8192 - tmp16no2; // Q14 + } + } + indPriorFX += inst->weightSpecFlat * tmpIndFX; // 6*Q14 + } + + //for template spectral-difference + if (inst->weightSpecDiff) { + tmpU32no1 = 0; + if (inst->featureSpecDiff) { + normTmp = WEBRTC_SPL_MIN(20 - inst->stages, + WebRtcSpl_NormU32(inst->featureSpecDiff)); + assert(normTmp >= 0); + tmpU32no1 = inst->featureSpecDiff << normTmp; // Q(normTmp-2*stages) + tmpU32no2 = inst->timeAvgMagnEnergy >> (20 - inst->stages - normTmp); + if (tmpU32no2 > 0) { + // Q(20 - inst->stages) + tmpU32no1 /= tmpU32no2; + } else { + tmpU32no1 = (uint32_t)(0x7fffffff); + } + } + tmpU32no3 = (inst->thresholdSpecDiff << 17) / 25; + tmpU32no2 = tmpU32no1 - tmpU32no3; + nShifts = 1; + tmpIndFX = 16384; // Q14(1.0) + //use larger width in tanh map for pause regions + if (tmpU32no2 & 0x80000000) { + tmpIndFX = 0; + tmpU32no2 = tmpU32no3 - tmpU32no1; + //widthPrior = widthPrior * 2.0; + nShifts--; + } + tmpU32no1 = tmpU32no2 >> nShifts; + // compute indicator function: sigmoid map + /* FLOAT code + indicator2 = 0.5 * (tanh(widthPrior * (tmpFloat1 - threshPrior2)) + 1.0); + */ + tableIndex = (int16_t)(tmpU32no1 >> 14); + if (tableIndex < 16) { + tmp16no2 = kIndicatorTable[tableIndex]; + tmp16no1 = kIndicatorTable[tableIndex + 1] - kIndicatorTable[tableIndex]; + frac = (int16_t)(tmpU32no1 & 0x00003fff); // Q14 + tmp16no2 += (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + tmp16no1, frac, 14); + if (tmpIndFX) { + tmpIndFX = 8192 + tmp16no2; + } else { + tmpIndFX = 8192 - tmp16no2; + } + } + indPriorFX += inst->weightSpecDiff * tmpIndFX; // 6*Q14 + } + + //combine the indicator function with the feature weights + // FLOAT code + // indPrior = 1 - (weightIndPrior0 * indicator0 + weightIndPrior1 * + // indicator1 + weightIndPrior2 * indicator2); + indPriorFX16 = WebRtcSpl_DivW32W16ResW16(98307 - indPriorFX, 6); // Q14 + // done with computing indicator function + + //compute the prior probability + // FLOAT code + // inst->priorNonSpeechProb += PRIOR_UPDATE * + // (indPriorNonSpeech - inst->priorNonSpeechProb); + tmp16 = indPriorFX16 - inst->priorNonSpeechProb; // Q14 + inst->priorNonSpeechProb += (int16_t)((PRIOR_UPDATE_Q14 * tmp16) >> 14); + + //final speech probability: combine prior model with LR factor: + + memset(nonSpeechProbFinal, 0, sizeof(uint16_t) * inst->magnLen); + + if (inst->priorNonSpeechProb > 0) { + r0 = inst->priorNonSpeechProb; + r1 = 16384 - r0; + int32_t const_23637 = 23637; + int32_t const_44 = 44; + int32_t const_84 = 84; + int32_t const_1 = 1; + int32_t const_neg8 = -8; + for (i = 0; i < inst->magnLen; i++) { + r2 = inst->logLrtTimeAvgW32[i]; + if (r2 < 65300) { + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "mul %[r2], %[r2], %[const_23637] \n\t" + "sll %[r6], %[r1], 16 \n\t" + "clz %[r7], %[r6] \n\t" + "clo %[r8], %[r6] \n\t" + "slt %[r9], %[r6], $0 \n\t" + "movn %[r7], %[r8], %[r9] \n\t" + "sra %[r2], %[r2], 14 \n\t" + "andi %[r3], %[r2], 0xfff \n\t" + "mul %[r4], %[r3], %[r3] \n\t" + "mul %[r3], %[r3], %[const_84] \n\t" + "sra %[r2], %[r2], 12 \n\t" + "slt %[r5], %[r2], %[const_neg8] \n\t" + "movn %[r2], %[const_neg8], %[r5] \n\t" + "mul %[r4], %[r4], %[const_44] \n\t" + "sra %[r3], %[r3], 7 \n\t" + "addiu %[r7], %[r7], -1 \n\t" + "slti %[r9], %[r7], 31 \n\t" + "movz %[r7], $0, %[r9] \n\t" + "sra %[r4], %[r4], 19 \n\t" + "addu %[r4], %[r4], %[r3] \n\t" + "addiu %[r3], %[r2], 8 \n\t" + "addiu %[r2], %[r2], -4 \n\t" + "neg %[r5], %[r2] \n\t" + "sllv %[r6], %[r4], %[r2] \n\t" + "srav %[r5], %[r4], %[r5] \n\t" + "slt %[r2], %[r2], $0 \n\t" + "movn %[r6], %[r5], %[r2] \n\t" + "sllv %[r3], %[const_1], %[r3] \n\t" + "addu %[r2], %[r3], %[r6] \n\t" + "clz %[r4], %[r2] \n\t" + "clo %[r5], %[r2] \n\t" + "slt %[r8], %[r2], $0 \n\t" + "movn %[r4], %[r5], %[r8] \n\t" + "addiu %[r4], %[r4], -1 \n\t" + "slt %[r5], $0, %[r2] \n\t" + "or %[r5], %[r5], %[r7] \n\t" + "movz %[r4], $0, %[r5] \n\t" + "addiu %[r6], %[r7], -7 \n\t" + "addu %[r6], %[r6], %[r4] \n\t" + "bltz %[r6], 1f \n\t" + " nop \n\t" + "addiu %[r4], %[r6], -8 \n\t" + "neg %[r3], %[r4] \n\t" + "srav %[r5], %[r2], %[r3] \n\t" + "mul %[r5], %[r5], %[r1] \n\t" + "mul %[r2], %[r2], %[r1] \n\t" + "slt %[r4], %[r4], $0 \n\t" + "srav %[r5], %[r5], %[r6] \n\t" + "sra %[r2], %[r2], 8 \n\t" + "movn %[r2], %[r5], %[r4] \n\t" + "sll %[r3], %[r0], 8 \n\t" + "addu %[r2], %[r0], %[r2] \n\t" + "divu %[r3], %[r3], %[r2] \n\t" + "1: \n\t" + ".set pop \n\t" + : [r2] "+r" (r2), [r3] "=&r" (r3), [r4] "=&r" (r4), + [r5] "=&r" (r5), [r6] "=&r" (r6), [r7] "=&r" (r7), + [r8] "=&r" (r8), [r9] "=&r" (r9) + : [r0] "r" (r0), [r1] "r" (r1), [const_23637] "r" (const_23637), + [const_neg8] "r" (const_neg8), [const_84] "r" (const_84), + [const_1] "r" (const_1), [const_44] "r" (const_44) + : "hi", "lo" + ); + nonSpeechProbFinal[i] = r3; + } + } + } +} + +// Update analysis buffer for lower band, and window data before FFT. +void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech) { + int iters, after; + int anaLen = (int)inst->anaLen; + int *window = (int*)inst->window; + int *anaBuf = (int*)inst->analysisBuffer; + int *outBuf = (int*)out; + int r0, r1, r2, r3, r4, r5, r6, r7; +#if defined(MIPS_DSP_R1_LE) + int r8; +#endif + + // For lower band update analysis buffer. + memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer)); + memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, new_speech, + inst->blockLen10ms * sizeof(*inst->analysisBuffer)); + + // Window data before FFT. +#if defined(MIPS_DSP_R1_LE) + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "sra %[iters], %[anaLen], 3 \n\t" + "1: \n\t" + "blez %[iters], 2f \n\t" + " nop \n\t" + "lw %[r0], 0(%[window]) \n\t" + "lw %[r1], 0(%[anaBuf]) \n\t" + "lw %[r2], 4(%[window]) \n\t" + "lw %[r3], 4(%[anaBuf]) \n\t" + "lw %[r4], 8(%[window]) \n\t" + "lw %[r5], 8(%[anaBuf]) \n\t" + "lw %[r6], 12(%[window]) \n\t" + "lw %[r7], 12(%[anaBuf]) \n\t" + "muleq_s.w.phl %[r8], %[r0], %[r1] \n\t" + "muleq_s.w.phr %[r0], %[r0], %[r1] \n\t" + "muleq_s.w.phl %[r1], %[r2], %[r3] \n\t" + "muleq_s.w.phr %[r2], %[r2], %[r3] \n\t" + "muleq_s.w.phl %[r3], %[r4], %[r5] \n\t" + "muleq_s.w.phr %[r4], %[r4], %[r5] \n\t" + "muleq_s.w.phl %[r5], %[r6], %[r7] \n\t" + "muleq_s.w.phr %[r6], %[r6], %[r7] \n\t" +#if defined(MIPS_DSP_R2_LE) + "precr_sra_r.ph.w %[r8], %[r0], 15 \n\t" + "precr_sra_r.ph.w %[r1], %[r2], 15 \n\t" + "precr_sra_r.ph.w %[r3], %[r4], 15 \n\t" + "precr_sra_r.ph.w %[r5], %[r6], 15 \n\t" + "sw %[r8], 0(%[outBuf]) \n\t" + "sw %[r1], 4(%[outBuf]) \n\t" + "sw %[r3], 8(%[outBuf]) \n\t" + "sw %[r5], 12(%[outBuf]) \n\t" +#else + "shra_r.w %[r8], %[r8], 15 \n\t" + "shra_r.w %[r0], %[r0], 15 \n\t" + "shra_r.w %[r1], %[r1], 15 \n\t" + "shra_r.w %[r2], %[r2], 15 \n\t" + "shra_r.w %[r3], %[r3], 15 \n\t" + "shra_r.w %[r4], %[r4], 15 \n\t" + "shra_r.w %[r5], %[r5], 15 \n\t" + "shra_r.w %[r6], %[r6], 15 \n\t" + "sll %[r0], %[r0], 16 \n\t" + "sll %[r2], %[r2], 16 \n\t" + "sll %[r4], %[r4], 16 \n\t" + "sll %[r6], %[r6], 16 \n\t" + "packrl.ph %[r0], %[r8], %[r0] \n\t" + "packrl.ph %[r2], %[r1], %[r2] \n\t" + "packrl.ph %[r4], %[r3], %[r4] \n\t" + "packrl.ph %[r6], %[r5], %[r6] \n\t" + "sw %[r0], 0(%[outBuf]) \n\t" + "sw %[r2], 4(%[outBuf]) \n\t" + "sw %[r4], 8(%[outBuf]) \n\t" + "sw %[r6], 12(%[outBuf]) \n\t" +#endif + "addiu %[window], %[window], 16 \n\t" + "addiu %[anaBuf], %[anaBuf], 16 \n\t" + "addiu %[outBuf], %[outBuf], 16 \n\t" + "b 1b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "2: \n\t" + "andi %[after], %[anaLen], 7 \n\t" + "3: \n\t" + "blez %[after], 4f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[anaBuf]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[anaBuf], %[anaBuf], 2 \n\t" + "addiu %[outBuf], %[outBuf], 2 \n\t" + "shra_r.w %[r0], %[r0], 14 \n\t" + "sh %[r0], -2(%[outBuf]) \n\t" + "b 3b \n\t" + " addiu %[after], %[after], -1 \n\t" + "4: \n\t" + ".set pop \n\t" + : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [r8] "=&r" (r8), + [iters] "=&r" (iters), [after] "=&r" (after), + [window] "+r" (window),[anaBuf] "+r" (anaBuf), + [outBuf] "+r" (outBuf) + : [anaLen] "r" (anaLen) + : "memory", "hi", "lo" + ); +#else + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "sra %[iters], %[anaLen], 2 \n\t" + "1: \n\t" + "blez %[iters], 2f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[anaBuf]) \n\t" + "lh %[r2], 2(%[window]) \n\t" + "lh %[r3], 2(%[anaBuf]) \n\t" + "lh %[r4], 4(%[window]) \n\t" + "lh %[r5], 4(%[anaBuf]) \n\t" + "lh %[r6], 6(%[window]) \n\t" + "lh %[r7], 6(%[anaBuf]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "mul %[r2], %[r2], %[r3] \n\t" + "mul %[r4], %[r4], %[r5] \n\t" + "mul %[r6], %[r6], %[r7] \n\t" + "addiu %[window], %[window], 8 \n\t" + "addiu %[anaBuf], %[anaBuf], 8 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "addiu %[r2], %[r2], 0x2000 \n\t" + "addiu %[r4], %[r4], 0x2000 \n\t" + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sra %[r2], %[r2], 14 \n\t" + "sra %[r4], %[r4], 14 \n\t" + "sra %[r6], %[r6], 14 \n\t" + "sh %[r0], 0(%[outBuf]) \n\t" + "sh %[r2], 2(%[outBuf]) \n\t" + "sh %[r4], 4(%[outBuf]) \n\t" + "sh %[r6], 6(%[outBuf]) \n\t" + "addiu %[outBuf], %[outBuf], 8 \n\t" + "b 1b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "2: \n\t" + "andi %[after], %[anaLen], 3 \n\t" + "3: \n\t" + "blez %[after], 4f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[anaBuf]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[anaBuf], %[anaBuf], 2 \n\t" + "addiu %[outBuf], %[outBuf], 2 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sh %[r0], -2(%[outBuf]) \n\t" + "b 3b \n\t" + " addiu %[after], %[after], -1 \n\t" + "4: \n\t" + ".set pop \n\t" + : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "=&r" (iters), + [after] "=&r" (after), [window] "+r" (window), + [anaBuf] "+r" (anaBuf), [outBuf] "+r" (outBuf) + : [anaLen] "r" (anaLen) + : "memory", "hi", "lo" + ); +#endif +} + +// For the noise supression process, synthesis, read out fully processed +// segment, and update synthesis buffer. +void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor) { + int iters = (int)inst->blockLen10ms >> 2; + int after = inst->blockLen10ms & 3; + int r0, r1, r2, r3, r4, r5, r6, r7; + int16_t *window = (int16_t*)inst->window; + int16_t *real = inst->real; + int16_t *synthBuf = inst->synthesisBuffer; + int16_t *out = out_frame; + int sat_pos = 0x7fff; + int sat_neg = 0xffff8000; + int block10 = (int)inst->blockLen10ms; + int anaLen = (int)inst->anaLen; + + __asm __volatile( + ".set push \n\t" + ".set noreorder \n\t" + "1: \n\t" + "blez %[iters], 2f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "lh %[r2], 2(%[window]) \n\t" + "lh %[r3], 2(%[real]) \n\t" + "lh %[r4], 4(%[window]) \n\t" + "lh %[r5], 4(%[real]) \n\t" + "lh %[r6], 6(%[window]) \n\t" + "lh %[r7], 6(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "mul %[r2], %[r2], %[r3] \n\t" + "mul %[r4], %[r4], %[r5] \n\t" + "mul %[r6], %[r6], %[r7] \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "addiu %[r2], %[r2], 0x2000 \n\t" + "addiu %[r4], %[r4], 0x2000 \n\t" + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sra %[r2], %[r2], 14 \n\t" + "sra %[r4], %[r4], 14 \n\t" + "sra %[r6], %[r6], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "mul %[r2], %[r2], %[gain_factor] \n\t" + "mul %[r4], %[r4], %[gain_factor] \n\t" + "mul %[r6], %[r6], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "addiu %[r2], %[r2], 0x1000 \n\t" + "addiu %[r4], %[r4], 0x1000 \n\t" + "addiu %[r6], %[r6], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "sra %[r2], %[r2], 13 \n\t" + "sra %[r4], %[r4], 13 \n\t" + "sra %[r6], %[r6], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "lh %[r3], 2(%[synthBuf]) \n\t" + "lh %[r5], 4(%[synthBuf]) \n\t" + "lh %[r7], 6(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "addu %[r2], %[r2], %[r3] \n\t" + "addu %[r4], %[r4], %[r5] \n\t" + "addu %[r6], %[r6], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "slt %[r3], %[r2], %[sat_neg] \n\t" + "slt %[r5], %[r4], %[sat_neg] \n\t" + "slt %[r7], %[r6], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "movn %[r2], %[sat_neg], %[r3] \n\t" + "movn %[r4], %[sat_neg], %[r5] \n\t" + "movn %[r6], %[sat_neg], %[r7] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "sh %[r2], 2(%[synthBuf]) \n\t" + "sh %[r4], 4(%[synthBuf]) \n\t" + "sh %[r6], 6(%[synthBuf]) \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r2], 2(%[out]) \n\t" + "sh %[r4], 4(%[out]) \n\t" + "sh %[r6], 6(%[out]) \n\t" + "addiu %[window], %[window], 8 \n\t" + "addiu %[real], %[real], 8 \n\t" + "addiu %[synthBuf],%[synthBuf], 8 \n\t" + "addiu %[out], %[out], 8 \n\t" + "b 1b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "2: \n\t" + "blez %[after], 3f \n\t" + " subu %[block10], %[anaLen], %[block10] \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[real], %[real], 2 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "sh %[r0], 0(%[out]) \n\t" + "addiu %[synthBuf],%[synthBuf], 2 \n\t" + "addiu %[out], %[out], 2 \n\t" + "b 2b \n\t" + " addiu %[after], %[after], -1 \n\t" + "3: \n\t" + "sra %[iters], %[block10], 2 \n\t" + "4: \n\t" + "blez %[iters], 5f \n\t" + " andi %[after], %[block10], 3 \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "lh %[r2], 2(%[window]) \n\t" + "lh %[r3], 2(%[real]) \n\t" + "lh %[r4], 4(%[window]) \n\t" + "lh %[r5], 4(%[real]) \n\t" + "lh %[r6], 6(%[window]) \n\t" + "lh %[r7], 6(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "mul %[r2], %[r2], %[r3] \n\t" + "mul %[r4], %[r4], %[r5] \n\t" + "mul %[r6], %[r6], %[r7] \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "addiu %[r2], %[r2], 0x2000 \n\t" + "addiu %[r4], %[r4], 0x2000 \n\t" + "addiu %[r6], %[r6], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "sra %[r2], %[r2], 14 \n\t" + "sra %[r4], %[r4], 14 \n\t" + "sra %[r6], %[r6], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "mul %[r2], %[r2], %[gain_factor] \n\t" + "mul %[r4], %[r4], %[gain_factor] \n\t" + "mul %[r6], %[r6], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "addiu %[r2], %[r2], 0x1000 \n\t" + "addiu %[r4], %[r4], 0x1000 \n\t" + "addiu %[r6], %[r6], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "sra %[r2], %[r2], 13 \n\t" + "sra %[r4], %[r4], 13 \n\t" + "sra %[r6], %[r6], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "lh %[r3], 2(%[synthBuf]) \n\t" + "lh %[r5], 4(%[synthBuf]) \n\t" + "lh %[r7], 6(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "addu %[r2], %[r2], %[r3] \n\t" + "addu %[r4], %[r4], %[r5] \n\t" + "addu %[r6], %[r6], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "slt %[r3], %[r2], %[sat_pos] \n\t" + "slt %[r5], %[r4], %[sat_pos] \n\t" + "slt %[r7], %[r6], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "movz %[r2], %[sat_pos], %[r3] \n\t" + "movz %[r4], %[sat_pos], %[r5] \n\t" + "movz %[r6], %[sat_pos], %[r7] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "slt %[r3], %[r2], %[sat_neg] \n\t" + "slt %[r5], %[r4], %[sat_neg] \n\t" + "slt %[r7], %[r6], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "movn %[r2], %[sat_neg], %[r3] \n\t" + "movn %[r4], %[sat_neg], %[r5] \n\t" + "movn %[r6], %[sat_neg], %[r7] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "sh %[r2], 2(%[synthBuf]) \n\t" + "sh %[r4], 4(%[synthBuf]) \n\t" + "sh %[r6], 6(%[synthBuf]) \n\t" + "addiu %[window], %[window], 8 \n\t" + "addiu %[real], %[real], 8 \n\t" + "addiu %[synthBuf],%[synthBuf], 8 \n\t" + "b 4b \n\t" + " addiu %[iters], %[iters], -1 \n\t" + "5: \n\t" + "blez %[after], 6f \n\t" + " nop \n\t" + "lh %[r0], 0(%[window]) \n\t" + "lh %[r1], 0(%[real]) \n\t" + "mul %[r0], %[r0], %[r1] \n\t" + "addiu %[window], %[window], 2 \n\t" + "addiu %[real], %[real], 2 \n\t" + "addiu %[r0], %[r0], 0x2000 \n\t" + "sra %[r0], %[r0], 14 \n\t" + "mul %[r0], %[r0], %[gain_factor] \n\t" + "addiu %[r0], %[r0], 0x1000 \n\t" + "sra %[r0], %[r0], 13 \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "lh %[r1], 0(%[synthBuf]) \n\t" + "addu %[r0], %[r0], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_pos] \n\t" + "movz %[r0], %[sat_pos], %[r1] \n\t" + "slt %[r1], %[r0], %[sat_neg] \n\t" + "movn %[r0], %[sat_neg], %[r1] \n\t" + "sh %[r0], 0(%[synthBuf]) \n\t" + "addiu %[synthBuf],%[synthBuf], 2 \n\t" + "b 2b \n\t" + " addiu %[after], %[after], -1 \n\t" + "6: \n\t" + ".set pop \n\t" + : [r0] "=&r" (r0), [r1] "=&r" (r1), [r2] "=&r" (r2), + [r3] "=&r" (r3), [r4] "=&r" (r4), [r5] "=&r" (r5), + [r6] "=&r" (r6), [r7] "=&r" (r7), [iters] "+r" (iters), + [after] "+r" (after), [block10] "+r" (block10), + [window] "+r" (window), [real] "+r" (real), + [synthBuf] "+r" (synthBuf), [out] "+r" (out) + : [gain_factor] "r" (gain_factor), [sat_pos] "r" (sat_pos), + [sat_neg] "r" (sat_neg), [anaLen] "r" (anaLen) + : "memory", "hi", "lo" + ); + + // update synthesis buffer + memcpy(inst->synthesisBuffer, inst->synthesisBuffer + inst->blockLen10ms, + (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->synthesisBuffer)); + WebRtcSpl_ZerosArrayW16(inst->synthesisBuffer + + inst->anaLen - inst->blockLen10ms, inst->blockLen10ms); +} + +// Filter the data in the frequency domain, and create spectrum. +void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst, + int16_t* freq_buf) { + uint16_t *noiseSupFilter = inst->noiseSupFilter; + int16_t *real = inst->real; + int16_t *imag = inst->imag; + int32_t loop_count = 2; + int16_t tmp_1, tmp_2, tmp_3, tmp_4, tmp_5, tmp_6; + int16_t tmp16 = (int16_t)(inst->anaLen << 1) - 4; + int16_t* freq_buf_f = freq_buf; + int16_t* freq_buf_s = &freq_buf[tmp16]; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + //first sample + "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" + "lh %[tmp_2], 0(%[real]) \n\t" + "lh %[tmp_3], 0(%[imag]) \n\t" + "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" + "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" + "sra %[tmp_2], %[tmp_2], 14 \n\t" + "sra %[tmp_3], %[tmp_3], 14 \n\t" + "sh %[tmp_2], 0(%[real]) \n\t" + "sh %[tmp_3], 0(%[imag]) \n\t" + "negu %[tmp_3], %[tmp_3] \n\t" + "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" + "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" + "addiu %[real], %[real], 2 \n\t" + "addiu %[imag], %[imag], 2 \n\t" + "addiu %[noiseSupFilter], %[noiseSupFilter], 2 \n\t" + "addiu %[freq_buf_f], %[freq_buf_f], 4 \n\t" + "1: \n\t" + "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" + "lh %[tmp_2], 0(%[real]) \n\t" + "lh %[tmp_3], 0(%[imag]) \n\t" + "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t" + "lh %[tmp_5], 2(%[real]) \n\t" + "lh %[tmp_6], 2(%[imag]) \n\t" + "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" + "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" + "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t" + "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t" + "addiu %[loop_count], %[loop_count], 2 \n\t" + "sra %[tmp_2], %[tmp_2], 14 \n\t" + "sra %[tmp_3], %[tmp_3], 14 \n\t" + "sra %[tmp_5], %[tmp_5], 14 \n\t" + "sra %[tmp_6], %[tmp_6], 14 \n\t" + "addiu %[noiseSupFilter], %[noiseSupFilter], 4 \n\t" + "sh %[tmp_2], 0(%[real]) \n\t" + "sh %[tmp_2], 4(%[freq_buf_s]) \n\t" + "sh %[tmp_3], 0(%[imag]) \n\t" + "sh %[tmp_3], 6(%[freq_buf_s]) \n\t" + "negu %[tmp_3], %[tmp_3] \n\t" + "sh %[tmp_5], 2(%[real]) \n\t" + "sh %[tmp_5], 0(%[freq_buf_s]) \n\t" + "sh %[tmp_6], 2(%[imag]) \n\t" + "sh %[tmp_6], 2(%[freq_buf_s]) \n\t" + "negu %[tmp_6], %[tmp_6] \n\t" + "addiu %[freq_buf_s], %[freq_buf_s], -8 \n\t" + "addiu %[real], %[real], 4 \n\t" + "addiu %[imag], %[imag], 4 \n\t" + "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" + "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" + "sh %[tmp_5], 4(%[freq_buf_f]) \n\t" + "sh %[tmp_6], 6(%[freq_buf_f]) \n\t" + "blt %[loop_count], %[loop_size], 1b \n\t" + " addiu %[freq_buf_f], %[freq_buf_f], 8 \n\t" + //last two samples: + "lh %[tmp_1], 0(%[noiseSupFilter]) \n\t" + "lh %[tmp_2], 0(%[real]) \n\t" + "lh %[tmp_3], 0(%[imag]) \n\t" + "lh %[tmp_4], 2(%[noiseSupFilter]) \n\t" + "lh %[tmp_5], 2(%[real]) \n\t" + "lh %[tmp_6], 2(%[imag]) \n\t" + "mul %[tmp_2], %[tmp_2], %[tmp_1] \n\t" + "mul %[tmp_3], %[tmp_3], %[tmp_1] \n\t" + "mul %[tmp_5], %[tmp_5], %[tmp_4] \n\t" + "mul %[tmp_6], %[tmp_6], %[tmp_4] \n\t" + "sra %[tmp_2], %[tmp_2], 14 \n\t" + "sra %[tmp_3], %[tmp_3], 14 \n\t" + "sra %[tmp_5], %[tmp_5], 14 \n\t" + "sra %[tmp_6], %[tmp_6], 14 \n\t" + "sh %[tmp_2], 0(%[real]) \n\t" + "sh %[tmp_2], 4(%[freq_buf_s]) \n\t" + "sh %[tmp_3], 0(%[imag]) \n\t" + "sh %[tmp_3], 6(%[freq_buf_s]) \n\t" + "negu %[tmp_3], %[tmp_3] \n\t" + "sh %[tmp_2], 0(%[freq_buf_f]) \n\t" + "sh %[tmp_3], 2(%[freq_buf_f]) \n\t" + "sh %[tmp_5], 4(%[freq_buf_f]) \n\t" + "sh %[tmp_6], 6(%[freq_buf_f]) \n\t" + "sh %[tmp_5], 2(%[real]) \n\t" + "sh %[tmp_6], 2(%[imag]) \n\t" + ".set pop \n\t" + : [real] "+r" (real), [imag] "+r" (imag), + [freq_buf_f] "+r" (freq_buf_f), [freq_buf_s] "+r" (freq_buf_s), + [loop_count] "+r" (loop_count), [noiseSupFilter] "+r" (noiseSupFilter), + [tmp_1] "=&r" (tmp_1), [tmp_2] "=&r" (tmp_2), [tmp_3] "=&r" (tmp_3), + [tmp_4] "=&r" (tmp_4), [tmp_5] "=&r" (tmp_5), [tmp_6] "=&r" (tmp_6) + : [loop_size] "r" (inst->anaLen2) + : "memory", "hi", "lo" + ); +} + +#if defined(MIPS_DSP_R1_LE) +// Denormalize the real-valued signal |in|, the output from inverse FFT. +void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst, + int16_t* in, + int factor) { + int32_t r0, r1, r2, r3, t0; + int len = (int)inst->anaLen; + int16_t *out = &inst->real[0]; + int shift = factor - inst->normData; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "beqz %[len], 8f \n\t" + " nop \n\t" + "bltz %[shift], 4f \n\t" + " sra %[t0], %[len], 2 \n\t" + "beqz %[t0], 2f \n\t" + " andi %[len], %[len], 3 \n\t" + "1: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "lh %[r1], 2(%[in]) \n\t" + "lh %[r2], 4(%[in]) \n\t" + "lh %[r3], 6(%[in]) \n\t" + "shllv_s.ph %[r0], %[r0], %[shift] \n\t" + "shllv_s.ph %[r1], %[r1], %[shift] \n\t" + "shllv_s.ph %[r2], %[r2], %[shift] \n\t" + "shllv_s.ph %[r3], %[r3], %[shift] \n\t" + "addiu %[in], %[in], 8 \n\t" + "addiu %[t0], %[t0], -1 \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r1], 2(%[out]) \n\t" + "sh %[r2], 4(%[out]) \n\t" + "sh %[r3], 6(%[out]) \n\t" + "bgtz %[t0], 1b \n\t" + " addiu %[out], %[out], 8 \n\t" + "2: \n\t" + "beqz %[len], 8f \n\t" + " nop \n\t" + "3: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "addiu %[in], %[in], 2 \n\t" + "addiu %[len], %[len], -1 \n\t" + "shllv_s.ph %[r0], %[r0], %[shift] \n\t" + "addiu %[out], %[out], 2 \n\t" + "bgtz %[len], 3b \n\t" + " sh %[r0], -2(%[out]) \n\t" + "b 8f \n\t" + "4: \n\t" + "negu %[shift], %[shift] \n\t" + "beqz %[t0], 6f \n\t" + " andi %[len], %[len], 3 \n\t" + "5: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "lh %[r1], 2(%[in]) \n\t" + "lh %[r2], 4(%[in]) \n\t" + "lh %[r3], 6(%[in]) \n\t" + "srav %[r0], %[r0], %[shift] \n\t" + "srav %[r1], %[r1], %[shift] \n\t" + "srav %[r2], %[r2], %[shift] \n\t" + "srav %[r3], %[r3], %[shift] \n\t" + "addiu %[in], %[in], 8 \n\t" + "addiu %[t0], %[t0], -1 \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r1], 2(%[out]) \n\t" + "sh %[r2], 4(%[out]) \n\t" + "sh %[r3], 6(%[out]) \n\t" + "bgtz %[t0], 5b \n\t" + " addiu %[out], %[out], 8 \n\t" + "6: \n\t" + "beqz %[len], 8f \n\t" + " nop \n\t" + "7: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "addiu %[in], %[in], 2 \n\t" + "addiu %[len], %[len], -1 \n\t" + "srav %[r0], %[r0], %[shift] \n\t" + "addiu %[out], %[out], 2 \n\t" + "bgtz %[len], 7b \n\t" + " sh %[r0], -2(%[out]) \n\t" + "8: \n\t" + ".set pop \n\t" + : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1), + [r2] "=&r" (r2), [r3] "=&r" (r3) + : [len] "r" (len), [shift] "r" (shift), [in] "r" (in), + [out] "r" (out) + : "memory" + ); +} +#endif + +// Normalize the real-valued signal |in|, the input to forward FFT. +void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst, + const int16_t* in, + int16_t* out) { + int32_t r0, r1, r2, r3, t0; + int len = (int)inst->anaLen; + int shift = inst->normData; + + __asm __volatile ( + ".set push \n\t" + ".set noreorder \n\t" + "beqz %[len], 4f \n\t" + " sra %[t0], %[len], 2 \n\t" + "beqz %[t0], 2f \n\t" + " andi %[len], %[len], 3 \n\t" + "1: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "lh %[r1], 2(%[in]) \n\t" + "lh %[r2], 4(%[in]) \n\t" + "lh %[r3], 6(%[in]) \n\t" + "sllv %[r0], %[r0], %[shift] \n\t" + "sllv %[r1], %[r1], %[shift] \n\t" + "sllv %[r2], %[r2], %[shift] \n\t" + "sllv %[r3], %[r3], %[shift] \n\t" + "addiu %[in], %[in], 8 \n\t" + "addiu %[t0], %[t0], -1 \n\t" + "sh %[r0], 0(%[out]) \n\t" + "sh %[r1], 2(%[out]) \n\t" + "sh %[r2], 4(%[out]) \n\t" + "sh %[r3], 6(%[out]) \n\t" + "bgtz %[t0], 1b \n\t" + " addiu %[out], %[out], 8 \n\t" + "2: \n\t" + "beqz %[len], 4f \n\t" + " nop \n\t" + "3: \n\t" + "lh %[r0], 0(%[in]) \n\t" + "addiu %[in], %[in], 2 \n\t" + "addiu %[len], %[len], -1 \n\t" + "sllv %[r0], %[r0], %[shift] \n\t" + "addiu %[out], %[out], 2 \n\t" + "bgtz %[len], 3b \n\t" + " sh %[r0], -2(%[out]) \n\t" + "4: \n\t" + ".set pop \n\t" + : [t0] "=&r" (t0), [r0] "=&r" (r0), [r1] "=&r" (r1), + [r2] "=&r" (r2), [r3] "=&r" (r3) + : [len] "r" (len), [shift] "r" (shift), [in] "r" (in), + [out] "r" (out) + : "memory" + ); +} + diff --git a/webrtc/modules/audio_processing/ns/nsx_core_neon.c b/webrtc/modules/audio_processing/ns/nsx_core_neon.c index 82f02ae..65788ae 100644 --- a/webrtc/modules/audio_processing/ns/nsx_core_neon.c +++ b/webrtc/modules/audio_processing/ns/nsx_core_neon.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,25 +8,140 @@ * be found in the AUTHORS file in the root of the source tree. */ -#if defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID) - -#include "nsx_core.h" +#include "webrtc/modules/audio_processing/ns/nsx_core.h" #include #include -void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise, - WebRtc_Word16* qNoise) { - WebRtc_Word32 numerator; +// Constants to compensate for shifting signal log(2^shifts). +const int16_t WebRtcNsx_kLogTable[9] = { + 0, 177, 355, 532, 710, 887, 1065, 1242, 1420 +}; - WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac; - WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2; - WebRtc_Word16 log2Const = 22713; - WebRtc_Word16 widthFactor = 21845; +const int16_t WebRtcNsx_kCounterDiv[201] = { + 32767, 16384, 10923, 8192, 6554, 5461, 4681, 4096, 3641, 3277, 2979, 2731, + 2521, 2341, 2185, 2048, 1928, 1820, 1725, 1638, 1560, 1489, 1425, 1365, 1311, + 1260, 1214, 1170, 1130, 1092, 1057, 1024, 993, 964, 936, 910, 886, 862, 840, + 819, 799, 780, 762, 745, 728, 712, 697, 683, 669, 655, 643, 630, 618, 607, + 596, 585, 575, 565, 555, 546, 537, 529, 520, 512, 504, 496, 489, 482, 475, + 468, 462, 455, 449, 443, 437, 431, 426, 420, 415, 410, 405, 400, 395, 390, + 386, 381, 377, 372, 368, 364, 360, 356, 352, 349, 345, 341, 338, 334, 331, + 328, 324, 321, 318, 315, 312, 309, 306, 303, 301, 298, 295, 293, 290, 287, + 285, 282, 280, 278, 275, 273, 271, 269, 266, 264, 262, 260, 258, 256, 254, + 252, 250, 248, 246, 245, 243, 241, 239, 237, 236, 234, 232, 231, 229, 228, + 226, 224, 223, 221, 220, 218, 217, 216, 214, 213, 211, 210, 209, 207, 206, + 205, 204, 202, 201, 200, 199, 197, 196, 195, 194, 193, 192, 191, 189, 188, + 187, 186, 185, 184, 183, 182, 181, 180, 179, 178, 177, 176, 175, 174, 173, + 172, 172, 171, 170, 169, 168, 167, 166, 165, 165, 164, 163 +}; - int i, s, offset; +const int16_t WebRtcNsx_kLogTableFrac[256] = { + 0, 1, 3, 4, 6, 7, 9, 10, 11, 13, 14, 16, 17, 18, 20, 21, + 22, 24, 25, 26, 28, 29, 30, 32, 33, 34, 36, 37, 38, 40, 41, 42, + 44, 45, 46, 47, 49, 50, 51, 52, 54, 55, 56, 57, 59, 60, 61, 62, + 63, 65, 66, 67, 68, 69, 71, 72, 73, 74, 75, 77, 78, 79, 80, 81, + 82, 84, 85, 86, 87, 88, 89, 90, 92, 93, 94, 95, 96, 97, 98, 99, + 100, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 116, + 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, + 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, + 147, 148, 149, 150, 151, 152, 153, 154, 155, 155, 156, 157, 158, 159, 160, + 161, 162, 163, 164, 165, 166, 167, 168, 169, 169, 170, 171, 172, 173, 174, + 175, 176, 177, 178, 178, 179, 180, 181, 182, 183, 184, 185, 185, 186, 187, + 188, 189, 190, 191, 192, 192, 193, 194, 195, 196, 197, 198, 198, 199, 200, + 201, 202, 203, 203, 204, 205, 206, 207, 208, 208, 209, 210, 211, 212, 212, + 213, 214, 215, 216, 216, 217, 218, 219, 220, 220, 221, 222, 223, 224, 224, + 225, 226, 227, 228, 228, 229, 230, 231, 231, 232, 233, 234, 234, 235, 236, + 237, 238, 238, 239, 240, 241, 241, 242, 243, 244, 244, 245, 246, 247, 247, + 248, 249, 249, 250, 251, 252, 252, 253, 254, 255, 255 +}; - numerator = FACTOR_Q16; +// Update the noise estimation information. +static void UpdateNoiseEstimateNeon(NoiseSuppressionFixedC* inst, int offset) { + const int16_t kExp2Const = 11819; // Q13 + int16_t* ptr_noiseEstLogQuantile = NULL; + int16_t* ptr_noiseEstQuantile = NULL; + int16x4_t kExp2Const16x4 = vdup_n_s16(kExp2Const); + int32x4_t twentyOne32x4 = vdupq_n_s32(21); + int32x4_t constA32x4 = vdupq_n_s32(0x1fffff); + int32x4_t constB32x4 = vdupq_n_s32(0x200000); + + int16_t tmp16 = WebRtcSpl_MaxValueW16(inst->noiseEstLogQuantile + offset, + inst->magnLen); + + // Guarantee a Q-domain as high as possible and still fit in int16 + inst->qNoise = 14 - (int) WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(kExp2Const, + tmp16, + 21); + + int32x4_t qNoise32x4 = vdupq_n_s32(inst->qNoise); + + for (ptr_noiseEstLogQuantile = &inst->noiseEstLogQuantile[offset], + ptr_noiseEstQuantile = &inst->noiseEstQuantile[0]; + ptr_noiseEstQuantile < &inst->noiseEstQuantile[inst->magnLen - 3]; + ptr_noiseEstQuantile += 4, ptr_noiseEstLogQuantile += 4) { + + // tmp32no2 = kExp2Const * inst->noiseEstLogQuantile[offset + i]; + int16x4_t v16x4 = vld1_s16(ptr_noiseEstLogQuantile); + int32x4_t v32x4B = vmull_s16(v16x4, kExp2Const16x4); + + // tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac + int32x4_t v32x4A = vandq_s32(v32x4B, constA32x4); + v32x4A = vorrq_s32(v32x4A, constB32x4); + + // tmp16 = (int16_t)(tmp32no2 >> 21); + v32x4B = vshrq_n_s32(v32x4B, 21); + + // tmp16 -= 21;// shift 21 to get result in Q0 + v32x4B = vsubq_s32(v32x4B, twentyOne32x4); + + // tmp16 += (int16_t) inst->qNoise; + // shift to get result in Q(qNoise) + v32x4B = vaddq_s32(v32x4B, qNoise32x4); + + // if (tmp16 < 0) { + // tmp32no1 >>= -tmp16; + // } else { + // tmp32no1 <<= tmp16; + // } + v32x4B = vshlq_s32(v32x4A, v32x4B); + + // tmp16 = WebRtcSpl_SatW32ToW16(tmp32no1); + v16x4 = vqmovn_s32(v32x4B); + + //inst->noiseEstQuantile[i] = tmp16; + vst1_s16(ptr_noiseEstQuantile, v16x4); + } + + // Last iteration: + + // inst->quantile[i]=exp(inst->lquantile[offset+i]); + // in Q21 + int32_t tmp32no2 = kExp2Const * *ptr_noiseEstLogQuantile; + int32_t tmp32no1 = (0x00200000 | (tmp32no2 & 0x001FFFFF)); // 2^21 + frac + + tmp16 = (int16_t)(tmp32no2 >> 21); + tmp16 -= 21;// shift 21 to get result in Q0 + tmp16 += (int16_t) inst->qNoise; //shift to get result in Q(qNoise) + if (tmp16 < 0) { + tmp32no1 >>= -tmp16; + } else { + tmp32no1 <<= tmp16; + } + *ptr_noiseEstQuantile = WebRtcSpl_SatW32ToW16(tmp32no1); +} + +// Noise Estimation +void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst, + uint16_t* magn, + uint32_t* noise, + int16_t* q_noise) { + int16_t lmagn[HALF_ANAL_BLOCKL], counter, countDiv; + int16_t countProd, delta, zeros, frac; + int16_t log2, tabind, logval, tmp16, tmp16no1, tmp16no2; + const int16_t log2_const = 22713; + const int16_t width_factor = 21845; + + size_t i, s, offset; tabind = inst->stages - inst->normData; assert(tabind < 9); @@ -45,13 +160,15 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo // lmagn in Q8 for (i = 0; i < inst->magnLen; i++) { if (magn[i]) { - zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]); - frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23); + zeros = WebRtcSpl_NormU32((uint32_t)magn[i]); + frac = (int16_t)((((uint32_t)magn[i] << zeros) + & 0x7FFFFFFF) >> 23); assert(frac < 256); // log2(magn(i)) - log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]); + log2 = (int16_t)(((31 - zeros) << 8) + + WebRtcNsx_kLogTableFrac[frac]); // log2(magn(i))*log(2) - lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15); + lmagn[i] = (int16_t)((log2 * log2_const) >> 15); // + log(2^stages) lmagn[i] += logval; } else { @@ -61,9 +178,9 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo int16x4_t Q3_16x4 = vdup_n_s16(3); int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8); - int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(widthFactor); + int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(width_factor); - WebRtc_Word16 factor = FACTOR_Q7; + int16_t factor = FACTOR_Q7; if (inst->blockIndex < END_STARTUP_LONG) factor = FACTOR_Q7_STARTUP; @@ -75,10 +192,10 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo counter = inst->noiseEstCounter[s]; assert(counter < 201); countDiv = WebRtcNsx_kCounterDiv[counter]; - countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv); + countProd = (int16_t)(counter * countDiv); // quant_est(...) - WebRtc_Word16 deltaBuff[8]; + int16_t deltaBuff[8]; int16x4_t tmp16x4_0; int16x4_t tmp16x4_1; int16x4_t countDiv_16x4 = vdup_n_s16(countDiv); @@ -88,11 +205,10 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo int16x8_t tmp16x8_1; int16x8_t tmp16x8_2; int16x8_t tmp16x8_3; - int16x8_t tmp16x8_4; - int16x8_t tmp16x8_5; + uint16x8_t tmp16x8_4; int32x4_t tmp32x4; - for (i = 0; i < inst->magnLen - 7; i += 8) { + for (i = 0; i + 7 < inst->magnLen; i += 8) { // Compute delta. // Smaller step size during startup. This prevents from using // unrealistic values causing overflow. @@ -102,14 +218,15 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo int j; for (j = 0; j < 8; j++) { if (inst->noiseEstDensity[offset + i + j] > 512) { - deltaBuff[j] = WebRtcSpl_DivW32W16ResW16( - numerator, inst->noiseEstDensity[offset + i + j]); + // Get values for deltaBuff by shifting intead of dividing. + int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i + j]); + deltaBuff[j] = (int16_t)(FACTOR_Q16 >> (14 - factor)); } } // Update log quantile estimate - // tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); + // tmp16 = (int16_t)((delta * countDiv) >> 14); tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4); tmp16x4_1 = vshrn_n_s32(tmp32x4, 14); tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4); @@ -130,11 +247,11 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo // tmp16_1 = (Word16)(tmp16>>1); tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1); - // tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1); + // tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1); tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4); tmp16x4_1 = vshrn_n_s32(tmp32x4, 1); - // tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1); + // tmp16_2 = (int16_t)((tmp16_1 * 3) >> 1); tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4); tmp16x4_0 = vshrn_n_s32(tmp32x4, 1); @@ -142,17 +259,15 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0); - // logval is the smallest fixed point representation we can have. Values below - // that will correspond to values in the interval [0, 1], which can't possibly - // occur. + // logval is the smallest fixed point representation we can have. Values + // below that will correspond to values in the interval [0, 1], which + // can't possibly occur. tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8); // Do the if-else branches: tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines - tmp16x8_5 = vsubq_s16(tmp16x8_3, tmp16x8_2); - __asm__("vcgt.s16 %q0, %q1, #0"::"w"(tmp16x8_4), "w"(tmp16x8_5)); - __asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4)); - __asm__("vbif %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4)); + tmp16x8_4 = vcgtq_s16(tmp16x8_3, tmp16x8_2); + tmp16x8_2 = vbslq_s16(tmp16x8_4, tmp16x8_1, tmp16x8_0); vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2); // Update density estimate @@ -165,76 +280,319 @@ void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWo tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2); tmp16x8_3 = vabsq_s16(tmp16x8_3); tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3); - __asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4)); + tmp16x8_1 = vbslq_s16(tmp16x8_4, tmp16x8_0, tmp16x8_1); vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1); - } // End loop over magnitude spectrum + } // End loop over magnitude spectrum - for (; i < inst->magnLen; i++) { - // compute delta - if (inst->noiseEstDensity[offset + i] > 512) { - delta = WebRtcSpl_DivW32W16ResW16(numerator, - inst->noiseEstDensity[offset + i]); - } else { - delta = FACTOR_Q7; - if (inst->blockIndex < END_STARTUP_LONG) { - // Smaller step size during startup. This prevents from using - // unrealistic values causing overflow. - delta = FACTOR_Q7_STARTUP; - } + // Last iteration over magnitude spectrum: + // compute delta + if (inst->noiseEstDensity[offset + i] > 512) { + // Get values for deltaBuff by shifting intead of dividing. + int factor = WebRtcSpl_NormW16(inst->noiseEstDensity[offset + i]); + delta = (int16_t)(FACTOR_Q16 >> (14 - factor)); + } else { + delta = FACTOR_Q7; + if (inst->blockIndex < END_STARTUP_LONG) { + // Smaller step size during startup. This prevents from using + // unrealistic values causing overflow. + delta = FACTOR_Q7_STARTUP; } + } + // update log quantile estimate + tmp16 = (int16_t)((delta * countDiv) >> 14); + if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { + // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 + // CounterDiv=1/(inst->counter[s]+1) in Q15 + tmp16 += 2; + inst->noiseEstLogQuantile[offset + i] += tmp16 / 4; + } else { + tmp16 += 1; + // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 + // TODO(bjornv): investigate why we need to truncate twice. + tmp16no2 = (int16_t)((tmp16 / 2) * 3 / 2); + inst->noiseEstLogQuantile[offset + i] -= tmp16no2; + if (inst->noiseEstLogQuantile[offset + i] < logval) { + // logval is the smallest fixed point representation we can have. + // Values below that will correspond to values in the interval + // [0, 1], which can't possibly occur. + inst->noiseEstLogQuantile[offset + i] = logval; + } + } - // update log quantile estimate - tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14); - if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) { - // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2 - // CounterDiv=1/(inst->counter[s]+1) in Q15 - tmp16 += 2; - tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2); - inst->noiseEstLogQuantile[offset + i] += tmp16no1; - } else { - tmp16 += 1; - tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1); - // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2 - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1); - inst->noiseEstLogQuantile[offset + i] -= tmp16no2; - if (inst->noiseEstLogQuantile[offset + i] < logval) { - // logval is the smallest fixed point representation we can have. - // Values below that will correspond to values in the interval - // [0, 1], which can't possibly occur. - inst->noiseEstLogQuantile[offset + i] = logval; - } - } + // update density estimate + if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) + < WIDTH_Q8) { + tmp16no1 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + inst->noiseEstDensity[offset + i], countProd, 15); + tmp16no2 = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( + width_factor, countDiv, 15); + inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; + } - // update density estimate - if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i]) - < WIDTH_Q8) { - tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - inst->noiseEstDensity[offset + i], countProd, 15); - tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND( - widthFactor, countDiv, 15); - inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2; - } - } // end loop over magnitude spectrum if (counter >= END_STARTUP_LONG) { inst->noiseEstCounter[s] = 0; if (inst->blockIndex >= END_STARTUP_LONG) { - WebRtcNsx_UpdateNoiseEstimate(inst, offset); + UpdateNoiseEstimateNeon(inst, offset); } } inst->noiseEstCounter[s]++; - } // end loop over simultaneous estimates + } // end loop over simultaneous estimates // Sequentially update the noise during startup if (inst->blockIndex < END_STARTUP_LONG) { - WebRtcNsx_UpdateNoiseEstimate(inst, offset); + UpdateNoiseEstimateNeon(inst, offset); } for (i = 0; i < inst->magnLen; i++) { - noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise) + noise[i] = (uint32_t)(inst->noiseEstQuantile[i]); // Q(qNoise) } - (*qNoise) = (WebRtc_Word16)inst->qNoise; + (*q_noise) = (int16_t)inst->qNoise; } -#endif // defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID) +// Filter the data in the frequency domain, and create spectrum. +void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst, + int16_t* freq_buf) { + assert(inst->magnLen % 8 == 1); + assert(inst->anaLen2 % 16 == 0); + + // (1) Filtering. + + // Fixed point C code for the next block is as follows: + // for (i = 0; i < inst->magnLen; i++) { + // inst->real[i] = (int16_t)((inst->real[i] * + // (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) + // inst->imag[i] = (int16_t)((inst->imag[i] * + // (int16_t)(inst->noiseSupFilter[i])) >> 14); // Q(normData-stages) + // } + + int16_t* preal = &inst->real[0]; + int16_t* pimag = &inst->imag[0]; + int16_t* pns_filter = (int16_t*)&inst->noiseSupFilter[0]; + int16_t* pimag_end = pimag + inst->magnLen - 4; + + while (pimag < pimag_end) { + int16x8_t real = vld1q_s16(preal); + int16x8_t imag = vld1q_s16(pimag); + int16x8_t ns_filter = vld1q_s16(pns_filter); + + int32x4_t tmp_r_0 = vmull_s16(vget_low_s16(real), vget_low_s16(ns_filter)); + int32x4_t tmp_i_0 = vmull_s16(vget_low_s16(imag), vget_low_s16(ns_filter)); + int32x4_t tmp_r_1 = vmull_s16(vget_high_s16(real), + vget_high_s16(ns_filter)); + int32x4_t tmp_i_1 = vmull_s16(vget_high_s16(imag), + vget_high_s16(ns_filter)); + + int16x4_t result_r_0 = vshrn_n_s32(tmp_r_0, 14); + int16x4_t result_i_0 = vshrn_n_s32(tmp_i_0, 14); + int16x4_t result_r_1 = vshrn_n_s32(tmp_r_1, 14); + int16x4_t result_i_1 = vshrn_n_s32(tmp_i_1, 14); + + vst1q_s16(preal, vcombine_s16(result_r_0, result_r_1)); + vst1q_s16(pimag, vcombine_s16(result_i_0, result_i_1)); + preal += 8; + pimag += 8; + pns_filter += 8; + } + + // Filter the last element + *preal = (int16_t)((*preal * *pns_filter) >> 14); + *pimag = (int16_t)((*pimag * *pns_filter) >> 14); + + // (2) Create spectrum. + + // Fixed point C code for the rest of the function is as follows: + // freq_buf[0] = inst->real[0]; + // freq_buf[1] = -inst->imag[0]; + // for (i = 1, j = 2; i < inst->anaLen2; i += 1, j += 2) { + // freq_buf[j] = inst->real[i]; + // freq_buf[j + 1] = -inst->imag[i]; + // } + // freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; + // freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; + + preal = &inst->real[0]; + pimag = &inst->imag[0]; + pimag_end = pimag + inst->anaLen2; + int16_t * freq_buf_start = freq_buf; + while (pimag < pimag_end) { + // loop unroll + int16x8x2_t real_imag_0; + int16x8x2_t real_imag_1; + real_imag_0.val[1] = vld1q_s16(pimag); + real_imag_0.val[0] = vld1q_s16(preal); + preal += 8; + pimag += 8; + real_imag_1.val[1] = vld1q_s16(pimag); + real_imag_1.val[0] = vld1q_s16(preal); + preal += 8; + pimag += 8; + + real_imag_0.val[1] = vnegq_s16(real_imag_0.val[1]); + real_imag_1.val[1] = vnegq_s16(real_imag_1.val[1]); + vst2q_s16(freq_buf_start, real_imag_0); + freq_buf_start += 16; + vst2q_s16(freq_buf_start, real_imag_1); + freq_buf_start += 16; + } + freq_buf[inst->anaLen] = inst->real[inst->anaLen2]; + freq_buf[inst->anaLen + 1] = -inst->imag[inst->anaLen2]; +} + +// For the noise supress process, synthesis, read out fully processed segment, +// and update synthesis buffer. +void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst, + int16_t* out_frame, + int16_t gain_factor) { + assert(inst->anaLen % 16 == 0); + assert(inst->blockLen10ms % 16 == 0); + + int16_t* preal_start = inst->real; + const int16_t* pwindow = inst->window; + int16_t* preal_end = preal_start + inst->anaLen; + int16_t* psynthesis_buffer = inst->synthesisBuffer; + + while (preal_start < preal_end) { + // Loop unroll. + int16x8_t window_0 = vld1q_s16(pwindow); + int16x8_t real_0 = vld1q_s16(preal_start); + int16x8_t synthesis_buffer_0 = vld1q_s16(psynthesis_buffer); + + int16x8_t window_1 = vld1q_s16(pwindow + 8); + int16x8_t real_1 = vld1q_s16(preal_start + 8); + int16x8_t synthesis_buffer_1 = vld1q_s16(psynthesis_buffer + 8); + + int32x4_t tmp32a_0_low = vmull_s16(vget_low_s16(real_0), + vget_low_s16(window_0)); + int32x4_t tmp32a_0_high = vmull_s16(vget_high_s16(real_0), + vget_high_s16(window_0)); + + int32x4_t tmp32a_1_low = vmull_s16(vget_low_s16(real_1), + vget_low_s16(window_1)); + int32x4_t tmp32a_1_high = vmull_s16(vget_high_s16(real_1), + vget_high_s16(window_1)); + + int16x4_t tmp16a_0_low = vqrshrn_n_s32(tmp32a_0_low, 14); + int16x4_t tmp16a_0_high = vqrshrn_n_s32(tmp32a_0_high, 14); + + int16x4_t tmp16a_1_low = vqrshrn_n_s32(tmp32a_1_low, 14); + int16x4_t tmp16a_1_high = vqrshrn_n_s32(tmp32a_1_high, 14); + + int32x4_t tmp32b_0_low = vmull_n_s16(tmp16a_0_low, gain_factor); + int32x4_t tmp32b_0_high = vmull_n_s16(tmp16a_0_high, gain_factor); + + int32x4_t tmp32b_1_low = vmull_n_s16(tmp16a_1_low, gain_factor); + int32x4_t tmp32b_1_high = vmull_n_s16(tmp16a_1_high, gain_factor); + + int16x4_t tmp16b_0_low = vqrshrn_n_s32(tmp32b_0_low, 13); + int16x4_t tmp16b_0_high = vqrshrn_n_s32(tmp32b_0_high, 13); + + int16x4_t tmp16b_1_low = vqrshrn_n_s32(tmp32b_1_low, 13); + int16x4_t tmp16b_1_high = vqrshrn_n_s32(tmp32b_1_high, 13); + + synthesis_buffer_0 = vqaddq_s16(vcombine_s16(tmp16b_0_low, tmp16b_0_high), + synthesis_buffer_0); + synthesis_buffer_1 = vqaddq_s16(vcombine_s16(tmp16b_1_low, tmp16b_1_high), + synthesis_buffer_1); + vst1q_s16(psynthesis_buffer, synthesis_buffer_0); + vst1q_s16(psynthesis_buffer + 8, synthesis_buffer_1); + + pwindow += 16; + preal_start += 16; + psynthesis_buffer += 16; + } + + // Read out fully processed segment. + int16_t * p_start = inst->synthesisBuffer; + int16_t * p_end = inst->synthesisBuffer + inst->blockLen10ms; + int16_t * p_frame = out_frame; + while (p_start < p_end) { + int16x8_t frame_0 = vld1q_s16(p_start); + vst1q_s16(p_frame, frame_0); + p_start += 8; + p_frame += 8; + } + + // Update synthesis buffer. + int16_t* p_start_src = inst->synthesisBuffer + inst->blockLen10ms; + int16_t* p_end_src = inst->synthesisBuffer + inst->anaLen; + int16_t* p_start_dst = inst->synthesisBuffer; + while (p_start_src < p_end_src) { + int16x8_t frame = vld1q_s16(p_start_src); + vst1q_s16(p_start_dst, frame); + p_start_src += 8; + p_start_dst += 8; + } + + p_start = inst->synthesisBuffer + inst->anaLen - inst->blockLen10ms; + p_end = p_start + inst->blockLen10ms; + int16x8_t zero = vdupq_n_s16(0); + for (;p_start < p_end; p_start += 8) { + vst1q_s16(p_start, zero); + } +} + +// Update analysis buffer for lower band, and window data before FFT. +void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst, + int16_t* out, + int16_t* new_speech) { + assert(inst->blockLen10ms % 16 == 0); + assert(inst->anaLen % 16 == 0); + + // For lower band update analysis buffer. + // memcpy(inst->analysisBuffer, inst->analysisBuffer + inst->blockLen10ms, + // (inst->anaLen - inst->blockLen10ms) * sizeof(*inst->analysisBuffer)); + int16_t* p_start_src = inst->analysisBuffer + inst->blockLen10ms; + int16_t* p_end_src = inst->analysisBuffer + inst->anaLen; + int16_t* p_start_dst = inst->analysisBuffer; + while (p_start_src < p_end_src) { + int16x8_t frame = vld1q_s16(p_start_src); + vst1q_s16(p_start_dst, frame); + + p_start_src += 8; + p_start_dst += 8; + } + + // memcpy(inst->analysisBuffer + inst->anaLen - inst->blockLen10ms, + // new_speech, inst->blockLen10ms * sizeof(*inst->analysisBuffer)); + p_start_src = new_speech; + p_end_src = new_speech + inst->blockLen10ms; + p_start_dst = inst->analysisBuffer + inst->anaLen - inst->blockLen10ms; + while (p_start_src < p_end_src) { + int16x8_t frame = vld1q_s16(p_start_src); + vst1q_s16(p_start_dst, frame); + + p_start_src += 8; + p_start_dst += 8; + } + + // Window data before FFT. + int16_t* p_start_window = (int16_t*) inst->window; + int16_t* p_start_buffer = inst->analysisBuffer; + int16_t* p_start_out = out; + const int16_t* p_end_out = out + inst->anaLen; + + // Load the first element to reduce pipeline bubble. + int16x8_t window = vld1q_s16(p_start_window); + int16x8_t buffer = vld1q_s16(p_start_buffer); + p_start_window += 8; + p_start_buffer += 8; + + while (p_start_out < p_end_out) { + // Unroll loop. + int32x4_t tmp32_low = vmull_s16(vget_low_s16(window), vget_low_s16(buffer)); + int32x4_t tmp32_high = vmull_s16(vget_high_s16(window), + vget_high_s16(buffer)); + window = vld1q_s16(p_start_window); + buffer = vld1q_s16(p_start_buffer); + + int16x4_t result_low = vrshrn_n_s32(tmp32_low, 14); + int16x4_t result_high = vrshrn_n_s32(tmp32_high, 14); + vst1q_s16(p_start_out, vcombine_s16(result_low, result_high)); + + p_start_buffer += 8; + p_start_window += 8; + p_start_out += 8; + } +} diff --git a/webrtc/modules/audio_processing/ns/nsx_defines.h b/webrtc/modules/audio_processing/ns/nsx_defines.h index cd1e3bf..862dc3c 100644 --- a/webrtc/modules/audio_processing/ns/nsx_defines.h +++ b/webrtc/modules/audio_processing/ns/nsx_defines.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -11,49 +11,54 @@ #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ -#define ANAL_BLOCKL_MAX 256 // max analysis block length -#define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1 +#define ANAL_BLOCKL_MAX 256 /* Max analysis block length */ +#define HALF_ANAL_BLOCKL 129 /* Half max analysis block length + 1 */ +#define NUM_HIGH_BANDS_MAX 2 /* Max number of high bands */ #define SIMULT 3 #define END_STARTUP_LONG 200 #define END_STARTUP_SHORT 50 -#define FACTOR_Q16 (WebRtc_Word32)2621440 // 40 in Q16 -#define FACTOR_Q7 (WebRtc_Word16)5120 // 40 in Q7 -#define FACTOR_Q7_STARTUP (WebRtc_Word16)1024 // 8 in Q7 -#define WIDTH_Q8 3 // 0.01 in Q8 (or 25 ) -//PARAMETERS FOR NEW METHOD -#define DD_PR_SNR_Q11 2007 // ~= Q11(0.98) DD update of prior SNR -#define ONE_MINUS_DD_PR_SNR_Q11 41 // DD update of prior SNR -#define SPECT_FLAT_TAVG_Q14 4915 // (0.30) tavg parameter for spectral flatness measure -#define SPECT_DIFF_TAVG_Q8 77 // (0.30) tavg parameter for spectral flatness measure -#define PRIOR_UPDATE_Q14 1638 // Q14(0.1) update parameter of prior model -#define NOISE_UPDATE_Q8 26 // 26 ~= Q8(0.1) update parameter for noise -// probability threshold for noise state in speech/noise likelihood -#define ONE_MINUS_PROB_RANGE_Q8 205 // 205 ~= Q8(0.8) -#define HIST_PAR_EST 1000 // histogram size for estimation of parameters -//FEATURE EXTRACTION CONFIG -//bin size of histogram +#define FACTOR_Q16 2621440 /* 40 in Q16 */ +#define FACTOR_Q7 5120 /* 40 in Q7 */ +#define FACTOR_Q7_STARTUP 1024 /* 8 in Q7 */ +#define WIDTH_Q8 3 /* 0.01 in Q8 (or 25 ) */ + +/* PARAMETERS FOR NEW METHOD */ +#define DD_PR_SNR_Q11 2007 /* ~= Q11(0.98) DD update of prior SNR */ +#define ONE_MINUS_DD_PR_SNR_Q11 41 /* DD update of prior SNR */ +#define SPECT_FLAT_TAVG_Q14 4915 /* (0.30) tavg parameter for spectral flatness measure */ +#define SPECT_DIFF_TAVG_Q8 77 /* (0.30) tavg parameter for spectral flatness measure */ +#define PRIOR_UPDATE_Q14 1638 /* Q14(0.1) Update parameter of prior model */ +#define NOISE_UPDATE_Q8 26 /* 26 ~= Q8(0.1) Update parameter for noise */ + +/* Probability threshold for noise state in speech/noise likelihood. */ +#define ONE_MINUS_PROB_RANGE_Q8 205 /* 205 ~= Q8(0.8) */ +#define HIST_PAR_EST 1000 /* Histogram size for estimation of parameters */ + +/* FEATURE EXTRACTION CONFIG */ +/* Bin size of histogram */ #define BIN_SIZE_LRT 10 -//scale parameters: multiply dominant peaks of the histograms by scale factor to obtain -// thresholds for prior model -#define FACTOR_1_LRT_DIFF 6 //for LRT and spectral difference (5 times bigger) -//for spectral_flatness: used when noise is flatter than speech (10 times bigger) +/* Scale parameters: multiply dominant peaks of the histograms by scale factor to obtain. */ +/* Thresholds for prior model */ +#define FACTOR_1_LRT_DIFF 6 /* For LRT and spectral difference (5 times bigger) */ +/* For spectral_flatness: used when noise is flatter than speech (10 times bigger). */ #define FACTOR_2_FLAT_Q10 922 -//peak limit for spectral flatness (varies between 0 and 1) -#define THRES_PEAK_FLAT 24 // * 2 * BIN_SIZE_FLAT_FX -//limit on spacing of two highest peaks in histogram: spacing determined by bin size -#define LIM_PEAK_SPACE_FLAT_DIFF 4 // * 2 * BIN_SIZE_DIFF_FX -//limit on relevance of second peak: +/* Peak limit for spectral flatness (varies between 0 and 1) */ +#define THRES_PEAK_FLAT 24 /* * 2 * BIN_SIZE_FLAT_FX */ +/* Limit on spacing of two highest peaks in histogram: spacing determined by bin size. */ +#define LIM_PEAK_SPACE_FLAT_DIFF 4 /* * 2 * BIN_SIZE_DIFF_FX */ +/* Limit on relevance of second peak */ #define LIM_PEAK_WEIGHT_FLAT_DIFF 2 -#define THRES_FLUCT_LRT 10240 //=20 * inst->modelUpdate; fluctuation limit of LRT feat. -//limit on the max and min values for the feature thresholds -#define MAX_FLAT_Q10 38912 // * 2 * BIN_SIZE_FLAT_FX -#define MIN_FLAT_Q10 4096 // * 2 * BIN_SIZE_FLAT_FX -#define MAX_DIFF 100 // * 2 * BIN_SIZE_DIFF_FX -#define MIN_DIFF 16 // * 2 * BIN_SIZE_DIFF_FX -//criteria of weight of histogram peak to accept/reject feature -#define THRES_WEIGHT_FLAT_DIFF 154//(int)(0.3*(inst->modelUpdate)) for flatness and difference -// -#define STAT_UPDATES 9 // Update every 512 = 1 << 9 block -#define ONE_MINUS_GAMMA_PAUSE_Q8 13 // ~= Q8(0.05) update for conservative noise estimate -#define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 3 // ~= Q8(0.01) update for transition and noise region -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ +#define THRES_FLUCT_LRT 10240 /* = 20 * inst->modelUpdate; fluctuation limit of LRT feat. */ +/* Limit on the max and min values for the feature thresholds */ +#define MAX_FLAT_Q10 38912 /* * 2 * BIN_SIZE_FLAT_FX */ +#define MIN_FLAT_Q10 4096 /* * 2 * BIN_SIZE_FLAT_FX */ +#define MAX_DIFF 100 /* * 2 * BIN_SIZE_DIFF_FX */ +#define MIN_DIFF 16 /* * 2 * BIN_SIZE_DIFF_FX */ +/* Criteria of weight of histogram peak to accept/reject feature */ +#define THRES_WEIGHT_FLAT_DIFF 154 /*(int)(0.3*(inst->modelUpdate)) for flatness and difference */ + +#define STAT_UPDATES 9 /* Update every 512 = 1 << 9 block */ +#define ONE_MINUS_GAMMA_PAUSE_Q8 13 /* ~= Q8(0.05) Update for conservative noise estimate */ +#define GAMMA_NOISE_TRANS_AND_SPEECH_Q8 3 /* ~= Q8(0.01) Update for transition and noise region */ + +#endif /* WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_DEFINES_H_ */ diff --git a/webrtc/modules/audio_processing/processing_component.cc b/webrtc/modules/audio_processing/processing_component.cc index 9ac1257..9e16d7c 100644 --- a/webrtc/modules/audio_processing/processing_component.cc +++ b/webrtc/modules/audio_processing/processing_component.cc @@ -8,17 +8,16 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "processing_component.h" +#include "webrtc/modules/audio_processing/processing_component.h" -#include +#include -#include "audio_processing_impl.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" namespace webrtc { -ProcessingComponent::ProcessingComponent(const AudioProcessingImpl* apm) - : apm_(apm), - initialized_(false), +ProcessingComponent::ProcessingComponent() + : initialized_(false), enabled_(false), num_handles_(0) {} @@ -33,7 +32,7 @@ int ProcessingComponent::Destroy() { } initialized_ = false; - return apm_->kNoError; + return AudioProcessing::kNoError; } int ProcessingComponent::EnableComponent(bool enable) { @@ -41,7 +40,7 @@ int ProcessingComponent::EnableComponent(bool enable) { enabled_ = enable; // Must be set before Initialize() is called. int err = Initialize(); - if (err != apm_->kNoError) { + if (err != AudioProcessing::kNoError) { enabled_ = false; return err; } @@ -49,7 +48,7 @@ int ProcessingComponent::EnableComponent(bool enable) { enabled_ = enable; } - return apm_->kNoError; + return AudioProcessing::kNoError; } bool ProcessingComponent::is_component_enabled() const { @@ -67,7 +66,7 @@ int ProcessingComponent::num_handles() const { int ProcessingComponent::Initialize() { if (!enabled_) { - return apm_->kNoError; + return AudioProcessing::kNoError; } num_handles_ = num_handles_required(); @@ -80,12 +79,12 @@ int ProcessingComponent::Initialize() { if (handles_[i] == NULL) { handles_[i] = CreateHandle(); if (handles_[i] == NULL) { - return apm_->kCreationFailedError; + return AudioProcessing::kCreationFailedError; } } int err = InitializeHandle(handles_[i]); - if (err != apm_->kNoError) { + if (err != AudioProcessing::kNoError) { return GetHandleError(handles_[i]); } } @@ -96,17 +95,17 @@ int ProcessingComponent::Initialize() { int ProcessingComponent::Configure() { if (!initialized_) { - return apm_->kNoError; + return AudioProcessing::kNoError; } assert(static_cast(handles_.size()) >= num_handles_); for (int i = 0; i < num_handles_; i++) { int err = ConfigureHandle(handles_[i]); - if (err != apm_->kNoError) { + if (err != AudioProcessing::kNoError) { return GetHandleError(handles_[i]); } } - return apm_->kNoError; + return AudioProcessing::kNoError; } } // namespace webrtc diff --git a/webrtc/modules/audio_processing/processing_component.h b/webrtc/modules/audio_processing/processing_component.h index 3d8a02b..8ee3ac6 100644 --- a/webrtc/modules/audio_processing/processing_component.h +++ b/webrtc/modules/audio_processing/processing_component.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,39 +8,29 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_PROCESSING_COMPONENT_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_PROCESSING_COMPONENT_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_PROCESSING_COMPONENT_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_PROCESSING_COMPONENT_H_ #include -#include "audio_processing.h" +#include "webrtc/common.h" namespace webrtc { -class AudioProcessingImpl; - -/*template -class ComponentHandle { - public: - ComponentHandle(); - virtual ~ComponentHandle(); - - virtual int Create() = 0; - virtual T* ptr() const = 0; -};*/ class ProcessingComponent { public: - explicit ProcessingComponent(const AudioProcessingImpl* apm); + ProcessingComponent(); virtual ~ProcessingComponent(); virtual int Initialize(); + virtual void SetExtraOptions(const Config& config) {} virtual int Destroy(); - virtual int get_version(char* version, int version_len_bytes) const = 0; + + bool is_component_enabled() const; protected: virtual int Configure(); int EnableComponent(bool enable); - bool is_component_enabled() const; void* handle(int index) const; int num_handles() const; @@ -48,16 +38,16 @@ class ProcessingComponent { virtual void* CreateHandle() const = 0; virtual int InitializeHandle(void* handle) const = 0; virtual int ConfigureHandle(void* handle) const = 0; - virtual int DestroyHandle(void* handle) const = 0; + virtual void DestroyHandle(void* handle) const = 0; virtual int num_handles_required() const = 0; virtual int GetHandleError(void* handle) const = 0; - const AudioProcessingImpl* apm_; std::vector handles_; bool initialized_; bool enabled_; int num_handles_; }; + } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_PROCESSING_COMPONENT_H__ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_PROCESSING_COMPONENT_H__ diff --git a/webrtc/modules/audio_processing/rms_level.cc b/webrtc/modules/audio_processing/rms_level.cc new file mode 100644 index 0000000..70c4422 --- /dev/null +++ b/webrtc/modules/audio_processing/rms_level.cc @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/rms_level.h" + +#include +#include + +namespace webrtc { + +static const float kMaxSquaredLevel = 32768 * 32768; + +RMSLevel::RMSLevel() + : sum_square_(0), + sample_count_(0) {} + +RMSLevel::~RMSLevel() {} + +void RMSLevel::Reset() { + sum_square_ = 0; + sample_count_ = 0; +} + +void RMSLevel::Process(const int16_t* data, size_t length) { + for (size_t i = 0; i < length; ++i) { + sum_square_ += data[i] * data[i]; + } + sample_count_ += length; +} + +void RMSLevel::ProcessMuted(size_t length) { + sample_count_ += length; +} + +int RMSLevel::RMS() { + if (sample_count_ == 0 || sum_square_ == 0) { + Reset(); + return kMinLevel; + } + + // Normalize by the max level. + float rms = sum_square_ / (sample_count_ * kMaxSquaredLevel); + // 20log_10(x^0.5) = 10log_10(x) + rms = 10 * log10(rms); + assert(rms <= 0); + if (rms < -kMinLevel) + rms = -kMinLevel; + + rms = -rms; + Reset(); + return static_cast(rms + 0.5); +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/rms_level.h b/webrtc/modules/audio_processing/rms_level.h new file mode 100644 index 0000000..12fa212 --- /dev/null +++ b/webrtc/modules/audio_processing/rms_level.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_ + +#include + +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Computes the root mean square (RMS) level in dBFs (decibels from digital +// full-scale) of audio data. The computation follows RFC 6465: +// https://tools.ietf.org/html/rfc6465 +// with the intent that it can provide the RTP audio level indication. +// +// The expected approach is to provide constant-sized chunks of audio to +// Process(). When enough chunks have been accumulated to form a packet, call +// RMS() to get the audio level indicator for the RTP header. +class RMSLevel { + public: + static const int kMinLevel = 127; + + RMSLevel(); + ~RMSLevel(); + + // Can be called to reset internal states, but is not required during normal + // operation. + void Reset(); + + // Pass each chunk of audio to Process() to accumulate the level. + void Process(const int16_t* data, size_t length); + + // If all samples with the given |length| have a magnitude of zero, this is + // a shortcut to avoid some computation. + void ProcessMuted(size_t length); + + // Computes the RMS level over all data passed to Process() since the last + // call to RMS(). The returned value is positive but should be interpreted as + // negative as per the RFC. It is constrained to [0, 127]. + int RMS(); + + private: + float sum_square_; + size_t sample_count_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_RMS_LEVEL_H_ + diff --git a/webrtc/modules/audio_processing/splitting_filter.cc b/webrtc/modules/audio_processing/splitting_filter.cc index 1526141..60427e2 100644 --- a/webrtc/modules/audio_processing/splitting_filter.cc +++ b/webrtc/modules/audio_processing/splitting_filter.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,26 +8,102 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "splitting_filter.h" -#include "signal_processing_library.h" +#include "webrtc/modules/audio_processing/splitting_filter.h" + +#include "webrtc/base/checks.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/common_audio/channel_buffer.h" namespace webrtc { -void SplittingFilterAnalysis(const WebRtc_Word16* in_data, - WebRtc_Word16* low_band, - WebRtc_Word16* high_band, - WebRtc_Word32* filter_state1, - WebRtc_Word32* filter_state2) -{ - WebRtcSpl_AnalysisQMF(in_data, low_band, high_band, filter_state1, filter_state2); +SplittingFilter::SplittingFilter(int num_channels, + size_t num_bands, + size_t num_frames) + : num_bands_(num_bands) { + RTC_CHECK(num_bands_ == 2 || num_bands_ == 3); + if (num_bands_ == 2) { + two_bands_states_.resize(num_channels); + } else if (num_bands_ == 3) { + for (int i = 0; i < num_channels; ++i) { + three_band_filter_banks_.push_back(new ThreeBandFilterBank(num_frames)); + } + } } -void SplittingFilterSynthesis(const WebRtc_Word16* low_band, - const WebRtc_Word16* high_band, - WebRtc_Word16* out_data, - WebRtc_Word32* filt_state1, - WebRtc_Word32* filt_state2) -{ - WebRtcSpl_SynthesisQMF(low_band, high_band, out_data, filt_state1, filt_state2); +void SplittingFilter::Analysis(const IFChannelBuffer* data, + IFChannelBuffer* bands) { + RTC_DCHECK_EQ(num_bands_, bands->num_bands()); + RTC_DCHECK_EQ(data->num_channels(), bands->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), + bands->num_frames_per_band() * bands->num_bands()); + if (bands->num_bands() == 2) { + TwoBandsAnalysis(data, bands); + } else if (bands->num_bands() == 3) { + ThreeBandsAnalysis(data, bands); + } } + +void SplittingFilter::Synthesis(const IFChannelBuffer* bands, + IFChannelBuffer* data) { + RTC_DCHECK_EQ(num_bands_, bands->num_bands()); + RTC_DCHECK_EQ(data->num_channels(), bands->num_channels()); + RTC_DCHECK_EQ(data->num_frames(), + bands->num_frames_per_band() * bands->num_bands()); + if (bands->num_bands() == 2) { + TwoBandsSynthesis(bands, data); + } else if (bands->num_bands() == 3) { + ThreeBandsSynthesis(bands, data); + } +} + +void SplittingFilter::TwoBandsAnalysis(const IFChannelBuffer* data, + IFChannelBuffer* bands) { + RTC_DCHECK_EQ(static_cast(two_bands_states_.size()), + data->num_channels()); + for (size_t i = 0; i < two_bands_states_.size(); ++i) { + WebRtcSpl_AnalysisQMF(data->ibuf_const()->channels()[i], + data->num_frames(), + bands->ibuf()->channels(0)[i], + bands->ibuf()->channels(1)[i], + two_bands_states_[i].analysis_state1, + two_bands_states_[i].analysis_state2); + } +} + +void SplittingFilter::TwoBandsSynthesis(const IFChannelBuffer* bands, + IFChannelBuffer* data) { + RTC_DCHECK_EQ(static_cast(two_bands_states_.size()), + data->num_channels()); + for (size_t i = 0; i < two_bands_states_.size(); ++i) { + WebRtcSpl_SynthesisQMF(bands->ibuf_const()->channels(0)[i], + bands->ibuf_const()->channels(1)[i], + bands->num_frames_per_band(), + data->ibuf()->channels()[i], + two_bands_states_[i].synthesis_state1, + two_bands_states_[i].synthesis_state2); + } +} + +void SplittingFilter::ThreeBandsAnalysis(const IFChannelBuffer* data, + IFChannelBuffer* bands) { + RTC_DCHECK_EQ(static_cast(three_band_filter_banks_.size()), + data->num_channels()); + for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { + three_band_filter_banks_[i]->Analysis(data->fbuf_const()->channels()[i], + data->num_frames(), + bands->fbuf()->bands(i)); + } +} + +void SplittingFilter::ThreeBandsSynthesis(const IFChannelBuffer* bands, + IFChannelBuffer* data) { + RTC_DCHECK_EQ(static_cast(three_band_filter_banks_.size()), + data->num_channels()); + for (size_t i = 0; i < three_band_filter_banks_.size(); ++i) { + three_band_filter_banks_[i]->Synthesis(bands->fbuf_const()->bands(i), + bands->num_frames_per_band(), + data->fbuf()->channels()[i]); + } +} + } // namespace webrtc diff --git a/webrtc/modules/audio_processing/splitting_filter.h b/webrtc/modules/audio_processing/splitting_filter.h index 661bfb2..51088d5 100644 --- a/webrtc/modules/audio_processing/splitting_filter.h +++ b/webrtc/modules/audio_processing/splitting_filter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,56 +8,61 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_SPLITTING_FILTER_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_SPLITTING_FILTER_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ -#include "typedefs.h" -#include "signal_processing_library.h" +#include +#include + +#include "webrtc/modules/audio_processing/three_band_filter_bank.h" +#include "webrtc/system_wrappers/interface/scoped_vector.h" namespace webrtc { -/* - * SplittingFilterbank_analysisQMF(...) - * - * Splits a super-wb signal into two subbands: 0-8 kHz and 8-16 kHz. - * - * Input: - * - in_data : super-wb audio signal - * - * Input & Output: - * - filt_state1: Filter state for first all-pass filter - * - filt_state2: Filter state for second all-pass filter - * - * Output: - * - low_band : The signal from the 0-4 kHz band - * - high_band : The signal from the 4-8 kHz band - */ -void SplittingFilterAnalysis(const WebRtc_Word16* in_data, - WebRtc_Word16* low_band, - WebRtc_Word16* high_band, - WebRtc_Word32* filt_state1, - WebRtc_Word32* filt_state2); -/* - * SplittingFilterbank_synthesisQMF(...) - * - * Combines the two subbands (0-8 and 8-16 kHz) into a super-wb signal. - * - * Input: - * - low_band : The signal with the 0-8 kHz band - * - high_band : The signal with the 8-16 kHz band - * - * Input & Output: - * - filt_state1: Filter state for first all-pass filter - * - filt_state2: Filter state for second all-pass filter - * - * Output: - * - out_data : super-wb speech signal - */ -void SplittingFilterSynthesis(const WebRtc_Word16* low_band, - const WebRtc_Word16* high_band, - WebRtc_Word16* out_data, - WebRtc_Word32* filt_state1, - WebRtc_Word32* filt_state2); +class IFChannelBuffer; + +struct TwoBandsStates { + TwoBandsStates() { + memset(analysis_state1, 0, sizeof(analysis_state1)); + memset(analysis_state2, 0, sizeof(analysis_state2)); + memset(synthesis_state1, 0, sizeof(synthesis_state1)); + memset(synthesis_state2, 0, sizeof(synthesis_state2)); + } + + static const int kStateSize = 6; + int analysis_state1[kStateSize]; + int analysis_state2[kStateSize]; + int synthesis_state1[kStateSize]; + int synthesis_state2[kStateSize]; +}; + +// Splitting filter which is able to split into and merge from 2 or 3 frequency +// bands. The number of channels needs to be provided at construction time. +// +// For each block, Analysis() is called to split into bands and then Synthesis() +// to merge these bands again. The input and output signals are contained in +// IFChannelBuffers and for the different bands an array of IFChannelBuffers is +// used. +class SplittingFilter { + public: + SplittingFilter(int num_channels, size_t num_bands, size_t num_frames); + + void Analysis(const IFChannelBuffer* data, IFChannelBuffer* bands); + void Synthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); + + private: + // Two-band analysis and synthesis work for 640 samples or less. + void TwoBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands); + void TwoBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); + void ThreeBandsAnalysis(const IFChannelBuffer* data, IFChannelBuffer* bands); + void ThreeBandsSynthesis(const IFChannelBuffer* bands, IFChannelBuffer* data); + void InitBuffers(); + + const size_t num_bands_; + std::vector two_bands_states_; + ScopedVector three_band_filter_banks_; +}; + } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_SPLITTING_FILTER_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_SPLITTING_FILTER_H_ diff --git a/webrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml b/webrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml deleted file mode 100644 index c6063b3..0000000 --- a/webrtc/modules/audio_processing/test/android/apmtest/AndroidManifest.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - diff --git a/webrtc/modules/audio_processing/test/android/apmtest/default.properties b/webrtc/modules/audio_processing/test/android/apmtest/default.properties deleted file mode 100644 index 9a2c9f6..0000000 --- a/webrtc/modules/audio_processing/test/android/apmtest/default.properties +++ /dev/null @@ -1,11 +0,0 @@ -# This file is automatically generated by Android Tools. -# Do not modify this file -- YOUR CHANGES WILL BE ERASED! -# -# This file must be checked in Version Control Systems. -# -# To customize properties used by the Ant build system use, -# "build.properties", and override values to adapt the script to your -# project structure. - -# Project target. -target=android-9 diff --git a/webrtc/modules/audio_processing/test/android/apmtest/jni/Application.mk b/webrtc/modules/audio_processing/test/android/apmtest/jni/Application.mk deleted file mode 100644 index 22d188e..0000000 --- a/webrtc/modules/audio_processing/test/android/apmtest/jni/Application.mk +++ /dev/null @@ -1 +0,0 @@ -APP_PLATFORM := android-9 diff --git a/webrtc/modules/audio_processing/test/android/apmtest/jni/main.c b/webrtc/modules/audio_processing/test/android/apmtest/jni/main.c deleted file mode 100644 index 2e19635..0000000 --- a/webrtc/modules/audio_processing/test/android/apmtest/jni/main.c +++ /dev/null @@ -1,307 +0,0 @@ -/* - * Copyright (C) 2010 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * - */ - -//BEGIN_INCLUDE(all) -#include -#include - -#include -#include - -#include -#include -#include - -#define LOGI(...) ((void)__android_log_print(ANDROID_LOG_INFO, "native-activity", __VA_ARGS__)) -#define LOGW(...) ((void)__android_log_print(ANDROID_LOG_WARN, "native-activity", __VA_ARGS__)) - -/** - * Our saved state data. - */ -struct saved_state { - float angle; - int32_t x; - int32_t y; -}; - -/** - * Shared state for our app. - */ -struct engine { - struct android_app* app; - - ASensorManager* sensorManager; - const ASensor* accelerometerSensor; - ASensorEventQueue* sensorEventQueue; - - int animating; - EGLDisplay display; - EGLSurface surface; - EGLContext context; - int32_t width; - int32_t height; - struct saved_state state; -}; - -/** - * Initialize an EGL context for the current display. - */ -static int engine_init_display(struct engine* engine) { - // initialize OpenGL ES and EGL - - /* - * Here specify the attributes of the desired configuration. - * Below, we select an EGLConfig with at least 8 bits per color - * component compatible with on-screen windows - */ - const EGLint attribs[] = { - EGL_SURFACE_TYPE, EGL_WINDOW_BIT, - EGL_BLUE_SIZE, 8, - EGL_GREEN_SIZE, 8, - EGL_RED_SIZE, 8, - EGL_NONE - }; - EGLint w, h, dummy, format; - EGLint numConfigs; - EGLConfig config; - EGLSurface surface; - EGLContext context; - - EGLDisplay display = eglGetDisplay(EGL_DEFAULT_DISPLAY); - - eglInitialize(display, 0, 0); - - /* Here, the application chooses the configuration it desires. In this - * sample, we have a very simplified selection process, where we pick - * the first EGLConfig that matches our criteria */ - eglChooseConfig(display, attribs, &config, 1, &numConfigs); - - /* EGL_NATIVE_VISUAL_ID is an attribute of the EGLConfig that is - * guaranteed to be accepted by ANativeWindow_setBuffersGeometry(). - * As soon as we picked a EGLConfig, we can safely reconfigure the - * ANativeWindow buffers to match, using EGL_NATIVE_VISUAL_ID. */ - eglGetConfigAttrib(display, config, EGL_NATIVE_VISUAL_ID, &format); - - ANativeWindow_setBuffersGeometry(engine->app->window, 0, 0, format); - - surface = eglCreateWindowSurface(display, config, engine->app->window, NULL); - context = eglCreateContext(display, config, NULL, NULL); - - if (eglMakeCurrent(display, surface, surface, context) == EGL_FALSE) { - LOGW("Unable to eglMakeCurrent"); - return -1; - } - - eglQuerySurface(display, surface, EGL_WIDTH, &w); - eglQuerySurface(display, surface, EGL_HEIGHT, &h); - - engine->display = display; - engine->context = context; - engine->surface = surface; - engine->width = w; - engine->height = h; - engine->state.angle = 0; - - // Initialize GL state. - glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_FASTEST); - glEnable(GL_CULL_FACE); - glShadeModel(GL_SMOOTH); - glDisable(GL_DEPTH_TEST); - - return 0; -} - -/** - * Just the current frame in the display. - */ -static void engine_draw_frame(struct engine* engine) { - if (engine->display == NULL) { - // No display. - return; - } - - // Just fill the screen with a color. - glClearColor(((float)engine->state.x)/engine->width, engine->state.angle, - ((float)engine->state.y)/engine->height, 1); - glClear(GL_COLOR_BUFFER_BIT); - - eglSwapBuffers(engine->display, engine->surface); -} - -/** - * Tear down the EGL context currently associated with the display. - */ -static void engine_term_display(struct engine* engine) { - if (engine->display != EGL_NO_DISPLAY) { - eglMakeCurrent(engine->display, EGL_NO_SURFACE, EGL_NO_SURFACE, EGL_NO_CONTEXT); - if (engine->context != EGL_NO_CONTEXT) { - eglDestroyContext(engine->display, engine->context); - } - if (engine->surface != EGL_NO_SURFACE) { - eglDestroySurface(engine->display, engine->surface); - } - eglTerminate(engine->display); - } - engine->animating = 0; - engine->display = EGL_NO_DISPLAY; - engine->context = EGL_NO_CONTEXT; - engine->surface = EGL_NO_SURFACE; -} - -/** - * Process the next input event. - */ -static int32_t engine_handle_input(struct android_app* app, AInputEvent* event) { - struct engine* engine = (struct engine*)app->userData; - if (AInputEvent_getType(event) == AINPUT_EVENT_TYPE_MOTION) { - engine->animating = 1; - engine->state.x = AMotionEvent_getX(event, 0); - engine->state.y = AMotionEvent_getY(event, 0); - return 1; - } - return 0; -} - -/** - * Process the next main command. - */ -static void engine_handle_cmd(struct android_app* app, int32_t cmd) { - struct engine* engine = (struct engine*)app->userData; - switch (cmd) { - case APP_CMD_SAVE_STATE: - // The system has asked us to save our current state. Do so. - engine->app->savedState = malloc(sizeof(struct saved_state)); - *((struct saved_state*)engine->app->savedState) = engine->state; - engine->app->savedStateSize = sizeof(struct saved_state); - break; - case APP_CMD_INIT_WINDOW: - // The window is being shown, get it ready. - if (engine->app->window != NULL) { - engine_init_display(engine); - engine_draw_frame(engine); - } - break; - case APP_CMD_TERM_WINDOW: - // The window is being hidden or closed, clean it up. - engine_term_display(engine); - break; - case APP_CMD_GAINED_FOCUS: - // When our app gains focus, we start monitoring the accelerometer. - if (engine->accelerometerSensor != NULL) { - ASensorEventQueue_enableSensor(engine->sensorEventQueue, - engine->accelerometerSensor); - // We'd like to get 60 events per second (in us). - ASensorEventQueue_setEventRate(engine->sensorEventQueue, - engine->accelerometerSensor, (1000L/60)*1000); - } - break; - case APP_CMD_LOST_FOCUS: - // When our app loses focus, we stop monitoring the accelerometer. - // This is to avoid consuming battery while not being used. - if (engine->accelerometerSensor != NULL) { - ASensorEventQueue_disableSensor(engine->sensorEventQueue, - engine->accelerometerSensor); - } - // Also stop animating. - engine->animating = 0; - engine_draw_frame(engine); - break; - } -} - -/** - * This is the main entry point of a native application that is using - * android_native_app_glue. It runs in its own thread, with its own - * event loop for receiving input events and doing other things. - */ -void android_main(struct android_app* state) { - struct engine engine; - - // Make sure glue isn't stripped. - app_dummy(); - - memset(&engine, 0, sizeof(engine)); - state->userData = &engine; - state->onAppCmd = engine_handle_cmd; - state->onInputEvent = engine_handle_input; - engine.app = state; - - // Prepare to monitor accelerometer - engine.sensorManager = ASensorManager_getInstance(); - engine.accelerometerSensor = ASensorManager_getDefaultSensor(engine.sensorManager, - ASENSOR_TYPE_ACCELEROMETER); - engine.sensorEventQueue = ASensorManager_createEventQueue(engine.sensorManager, - state->looper, LOOPER_ID_USER, NULL, NULL); - - if (state->savedState != NULL) { - // We are starting with a previous saved state; restore from it. - engine.state = *(struct saved_state*)state->savedState; - } - - // loop waiting for stuff to do. - - while (1) { - // Read all pending events. - int ident; - int events; - struct android_poll_source* source; - - // If not animating, we will block forever waiting for events. - // If animating, we loop until all events are read, then continue - // to draw the next frame of animation. - while ((ident=ALooper_pollAll(engine.animating ? 0 : -1, NULL, &events, - (void**)&source)) >= 0) { - - // Process this event. - if (source != NULL) { - source->process(state, source); - } - - // If a sensor has data, process it now. - if (ident == LOOPER_ID_USER) { - if (engine.accelerometerSensor != NULL) { - ASensorEvent event; - while (ASensorEventQueue_getEvents(engine.sensorEventQueue, - &event, 1) > 0) { - LOGI("accelerometer: x=%f y=%f z=%f", - event.acceleration.x, event.acceleration.y, - event.acceleration.z); - } - } - } - - // Check if we are exiting. - if (state->destroyRequested != 0) { - engine_term_display(&engine); - return; - } - } - - if (engine.animating) { - // Done with events; draw next animation frame. - engine.state.angle += .01f; - if (engine.state.angle > 1) { - engine.state.angle = 0; - } - - // Drawing is throttled to the screen update rate, so there - // is no need to do timing here. - engine_draw_frame(&engine); - } - } -} -//END_INCLUDE(all) diff --git a/webrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml b/webrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml deleted file mode 100644 index d0bd0f3..0000000 --- a/webrtc/modules/audio_processing/test/android/apmtest/res/values/strings.xml +++ /dev/null @@ -1,4 +0,0 @@ - - - apmtest - diff --git a/webrtc/modules/audio_processing/test/apmtest.m b/webrtc/modules/audio_processing/test/apmtest.m deleted file mode 100644 index 3172cd1..0000000 --- a/webrtc/modules/audio_processing/test/apmtest.m +++ /dev/null @@ -1,355 +0,0 @@ -function apmtest(task, testname, filepath, casenumber, legacy) -%APMTEST is a tool to process APM file sets and easily display the output. -% APMTEST(TASK, TESTNAME, CASENUMBER) performs one of several TASKs: -% 'test' Processes the files to produce test output. -% 'list' Prints a list of cases in the test set, preceded by their -% CASENUMBERs. -% 'show' Uses spclab to show the test case specified by the -% CASENUMBER parameter. -% -% using a set of test files determined by TESTNAME: -% 'all' All tests. -% 'apm' The standard APM test set (default). -% 'apmm' The mobile APM test set. -% 'aec' The AEC test set. -% 'aecm' The AECM test set. -% 'agc' The AGC test set. -% 'ns' The NS test set. -% 'vad' The VAD test set. -% -% FILEPATH specifies the path to the test data files. -% -% CASENUMBER can be used to select a single test case. Omit CASENUMBER, -% or set to zero, to use all test cases. -% - -if nargin < 5 || isempty(legacy) - % Set to true to run old VQE recordings. - legacy = false; -end - -if nargin < 4 || isempty(casenumber) - casenumber = 0; -end - -if nargin < 3 || isempty(filepath) - filepath = 'data/'; -end - -if nargin < 2 || isempty(testname) - testname = 'all'; -end - -if nargin < 1 || isempty(task) - task = 'test'; -end - -if ~strcmp(task, 'test') && ~strcmp(task, 'list') && ~strcmp(task, 'show') - error(['TASK ' task ' is not recognized']); -end - -if casenumber == 0 && strcmp(task, 'show') - error(['CASENUMBER must be specified for TASK ' task]); -end - -inpath = [filepath 'input/']; -outpath = [filepath 'output/']; -refpath = [filepath 'reference/']; - -if strcmp(testname, 'all') - tests = {'apm','apmm','aec','aecm','agc','ns','vad'}; -else - tests = {testname}; -end - -if legacy - progname = './test'; -else - progname = './process_test'; -end - -global farFile; -global nearFile; -global eventFile; -global delayFile; -global driftFile; - -if legacy - farFile = 'vqeFar.pcm'; - nearFile = 'vqeNear.pcm'; - eventFile = 'vqeEvent.dat'; - delayFile = 'vqeBuf.dat'; - driftFile = 'vqeDrift.dat'; -else - farFile = 'apm_far.pcm'; - nearFile = 'apm_near.pcm'; - eventFile = 'apm_event.dat'; - delayFile = 'apm_delay.dat'; - driftFile = 'apm_drift.dat'; -end - -simulateMode = false; -nErr = 0; -nCases = 0; -for i=1:length(tests) - simulateMode = false; - - if strcmp(tests{i}, 'apm') - testdir = ['apm/']; - outfile = ['out']; - if legacy - opt = ['-ec 1 -agc 2 -nc 2 -vad 3']; - else - opt = ['--no_progress -hpf' ... - ' -aec --drift_compensation -agc --fixed_digital' ... - ' -ns --ns_moderate -vad']; - end - - elseif strcmp(tests{i}, 'apm-swb') - simulateMode = true; - testdir = ['apm-swb/']; - outfile = ['out']; - if legacy - opt = ['-fs 32000 -ec 1 -agc 2 -nc 2']; - else - opt = ['--no_progress -fs 32000 -hpf' ... - ' -aec --drift_compensation -agc --adaptive_digital' ... - ' -ns --ns_moderate -vad']; - end - elseif strcmp(tests{i}, 'apmm') - testdir = ['apmm/']; - outfile = ['out']; - opt = ['-aec --drift_compensation -agc --fixed_digital -hpf -ns ' ... - '--ns_moderate']; - - else - error(['TESTNAME ' tests{i} ' is not recognized']); - end - - inpathtest = [inpath testdir]; - outpathtest = [outpath testdir]; - refpathtest = [refpath testdir]; - - if ~exist(inpathtest,'dir') - error(['Input directory ' inpathtest ' does not exist']); - end - - if ~exist(refpathtest,'dir') - warning(['Reference directory ' refpathtest ' does not exist']); - end - - [status, errMsg] = mkdir(outpathtest); - if (status == 0) - error(errMsg); - end - - [nErr, nCases] = recurseDir(inpathtest, outpathtest, refpathtest, outfile, ... - progname, opt, simulateMode, nErr, nCases, task, casenumber, legacy); - - if strcmp(task, 'test') || strcmp(task, 'show') - system(['rm ' farFile]); - system(['rm ' nearFile]); - if simulateMode == false - system(['rm ' eventFile]); - system(['rm ' delayFile]); - system(['rm ' driftFile]); - end - end -end - -if ~strcmp(task, 'list') - if nErr == 0 - fprintf(1, '\nAll files are bit-exact to reference\n', nErr); - else - fprintf(1, '\n%d files are NOT bit-exact to reference\n', nErr); - end -end - - -function [nErrOut, nCases] = recurseDir(inpath, outpath, refpath, ... - outfile, progname, opt, simulateMode, nErr, nCases, task, casenumber, ... - legacy) - -global farFile; -global nearFile; -global eventFile; -global delayFile; -global driftFile; - -dirs = dir(inpath); -nDirs = 0; -nErrOut = nErr; -for i=3:length(dirs) % skip . and .. - nDirs = nDirs + dirs(i).isdir; -end - - -if nDirs == 0 - nCases = nCases + 1; - - if casenumber == nCases || casenumber == 0 - - if strcmp(task, 'list') - fprintf([num2str(nCases) '. ' outfile '\n']) - else - vadoutfile = ['vad_' outfile '.dat']; - outfile = [outfile '.pcm']; - - % Check for VAD test - vadTest = 0; - if ~isempty(findstr(opt, '-vad')) - vadTest = 1; - if legacy - opt = [opt ' ' outpath vadoutfile]; - else - opt = [opt ' --vad_out_file ' outpath vadoutfile]; - end - end - - if exist([inpath 'vqeFar.pcm']) - system(['ln -s -f ' inpath 'vqeFar.pcm ' farFile]); - elseif exist([inpath 'apm_far.pcm']) - system(['ln -s -f ' inpath 'apm_far.pcm ' farFile]); - end - - if exist([inpath 'vqeNear.pcm']) - system(['ln -s -f ' inpath 'vqeNear.pcm ' nearFile]); - elseif exist([inpath 'apm_near.pcm']) - system(['ln -s -f ' inpath 'apm_near.pcm ' nearFile]); - end - - if exist([inpath 'vqeEvent.dat']) - system(['ln -s -f ' inpath 'vqeEvent.dat ' eventFile]); - elseif exist([inpath 'apm_event.dat']) - system(['ln -s -f ' inpath 'apm_event.dat ' eventFile]); - end - - if exist([inpath 'vqeBuf.dat']) - system(['ln -s -f ' inpath 'vqeBuf.dat ' delayFile]); - elseif exist([inpath 'apm_delay.dat']) - system(['ln -s -f ' inpath 'apm_delay.dat ' delayFile]); - end - - if exist([inpath 'vqeSkew.dat']) - system(['ln -s -f ' inpath 'vqeSkew.dat ' driftFile]); - elseif exist([inpath 'vqeDrift.dat']) - system(['ln -s -f ' inpath 'vqeDrift.dat ' driftFile]); - elseif exist([inpath 'apm_drift.dat']) - system(['ln -s -f ' inpath 'apm_drift.dat ' driftFile]); - end - - if simulateMode == false - command = [progname ' -o ' outpath outfile ' ' opt]; - else - if legacy - inputCmd = [' -in ' nearFile]; - else - inputCmd = [' -i ' nearFile]; - end - - if exist([farFile]) - if legacy - inputCmd = [' -if ' farFile inputCmd]; - else - inputCmd = [' -ir ' farFile inputCmd]; - end - end - command = [progname inputCmd ' -o ' outpath outfile ' ' opt]; - end - % This prevents MATLAB from using its own C libraries. - shellcmd = ['bash -c "unset LD_LIBRARY_PATH;']; - fprintf([command '\n']); - [status, result] = system([shellcmd command '"']); - fprintf(result); - - fprintf(['Reference file: ' refpath outfile '\n']); - - if vadTest == 1 - equal_to_ref = are_files_equal([outpath vadoutfile], ... - [refpath vadoutfile], ... - 'int8'); - if ~equal_to_ref - nErr = nErr + 1; - end - end - - [equal_to_ref, diffvector] = are_files_equal([outpath outfile], ... - [refpath outfile], ... - 'int16'); - if ~equal_to_ref - nErr = nErr + 1; - end - - if strcmp(task, 'show') - % Assume the last init gives the sample rate of interest. - str_idx = strfind(result, 'Sample rate:'); - fs = str2num(result(str_idx(end) + 13:str_idx(end) + 17)); - fprintf('Using %d Hz\n', fs); - - if exist([farFile]) - spclab(fs, farFile, nearFile, [refpath outfile], ... - [outpath outfile], diffvector); - %spclab(fs, diffvector); - else - spclab(fs, nearFile, [refpath outfile], [outpath outfile], ... - diffvector); - %spclab(fs, diffvector); - end - end - end - end -else - - for i=3:length(dirs) - if dirs(i).isdir - [nErr, nCases] = recurseDir([inpath dirs(i).name '/'], outpath, ... - refpath,[outfile '_' dirs(i).name], progname, opt, ... - simulateMode, nErr, nCases, task, casenumber, legacy); - end - end -end -nErrOut = nErr; - -function [are_equal, diffvector] = ... - are_files_equal(newfile, reffile, precision, diffvector) - -are_equal = false; -diffvector = 0; -if ~exist(newfile,'file') - warning(['Output file ' newfile ' does not exist']); - return -end - -if ~exist(reffile,'file') - warning(['Reference file ' reffile ' does not exist']); - return -end - -fid = fopen(newfile,'rb'); -new = fread(fid,inf,precision); -fclose(fid); - -fid = fopen(reffile,'rb'); -ref = fread(fid,inf,precision); -fclose(fid); - -if length(new) ~= length(ref) - warning('Reference is not the same length as output'); - minlength = min(length(new), length(ref)); - new = new(1:minlength); - ref = ref(1:minlength); -end -diffvector = new - ref; - -if isequal(new, ref) - fprintf([newfile ' is bit-exact to reference\n']); - are_equal = true; -else - if isempty(new) - warning([newfile ' is empty']); - return - end - snr = snrseg(new,ref,80); - fprintf('\n'); - are_equal = false; -end diff --git a/webrtc/modules/audio_processing/test/process_test.cc b/webrtc/modules/audio_processing/test/process_test.cc deleted file mode 100644 index 3e3c059..0000000 --- a/webrtc/modules/audio_processing/test/process_test.cc +++ /dev/null @@ -1,948 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include -#include -#ifdef WEBRTC_ANDROID -#include -#endif - -#include "gtest/gtest.h" - -#include "audio_processing.h" -#include "cpu_features_wrapper.h" -#include "module_common_types.h" -#include "tick_util.h" -#ifdef WEBRTC_ANDROID -#include "external/webrtc/src/modules/audio_processing/debug.pb.h" -#else -#include "webrtc/audio_processing/debug.pb.h" -#endif - -using webrtc::AudioFrame; -using webrtc::AudioProcessing; -using webrtc::EchoCancellation; -using webrtc::GainControl; -using webrtc::NoiseSuppression; -using webrtc::TickInterval; -using webrtc::TickTime; -using webrtc::audioproc::Event; -using webrtc::audioproc::Init; -using webrtc::audioproc::ReverseStream; -using webrtc::audioproc::Stream; - -namespace { -// Returns true on success, false on error or end-of-file. -bool ReadMessageFromFile(FILE* file, - ::google::protobuf::MessageLite* msg) { - // The "wire format" for the size is little-endian. - // Assume process_test is running on a little-endian machine. - int32_t size; - if (fread(&size, sizeof(int32_t), 1, file) != 1) { - return false; - } - if (size <= 0) { - return false; - } - size_t usize = static_cast(size); - - char array[usize]; - if (fread(array, sizeof(char), usize, file) != usize) { - return false; - } - - msg->Clear(); - return msg->ParseFromArray(array, usize); -} - -void PrintStat(const AudioProcessing::Statistic& stat) { - printf("%d, %d, %d\n", stat.average, - stat.maximum, - stat.minimum); -} - -void usage() { - printf( - "Usage: process_test [options] [-pb PROTOBUF_FILE]\n" - " [-ir REVERSE_FILE] [-i PRIMARY_FILE] [-o OUT_FILE]\n"); - printf( - "process_test is a test application for AudioProcessing.\n\n" - "When a protobuf debug file is available, specify it with -pb.\n" - "Alternately, when -ir or -i is used, the specified files will be\n" - "processed directly in a simulation mode. Otherwise the full set of\n" - "legacy test files is expected to be present in the working directory.\n"); - printf("\n"); - printf("Options\n"); - printf("General configuration (only used for the simulation mode):\n"); - printf(" -fs SAMPLE_RATE_HZ\n"); - printf(" -ch CHANNELS_IN CHANNELS_OUT\n"); - printf(" -rch REVERSE_CHANNELS\n"); - printf("\n"); - printf("Component configuration:\n"); - printf( - "All components are disabled by default. Each block below begins with a\n" - "flag to enable the component with default settings. The subsequent flags\n" - "in the block are used to provide configuration settings.\n"); - printf("\n -aec Echo cancellation\n"); - printf(" --drift_compensation\n"); - printf(" --no_drift_compensation\n"); - printf(" --no_echo_metrics\n"); - printf(" --no_delay_logging\n"); - printf("\n -aecm Echo control mobile\n"); - printf(" --aecm_echo_path_in_file FILE\n"); - printf(" --aecm_echo_path_out_file FILE\n"); - printf("\n -agc Gain control\n"); - printf(" --analog\n"); - printf(" --adaptive_digital\n"); - printf(" --fixed_digital\n"); - printf(" --target_level LEVEL\n"); - printf(" --compression_gain GAIN\n"); - printf(" --limiter\n"); - printf(" --no_limiter\n"); - printf("\n -hpf High pass filter\n"); - printf("\n -ns Noise suppression\n"); - printf(" --ns_low\n"); - printf(" --ns_moderate\n"); - printf(" --ns_high\n"); - printf(" --ns_very_high\n"); - printf("\n -vad Voice activity detection\n"); - printf(" --vad_out_file FILE\n"); - printf("\n"); - printf("Modifiers:\n"); - printf(" --noasm Disable SSE optimization.\n"); - printf(" --perf Measure performance.\n"); - printf(" --quiet Suppress text output.\n"); - printf(" --no_progress Suppress progress.\n"); - printf(" --version Print version information and exit.\n"); -} - -// void function for gtest. -void void_main(int argc, char* argv[]) { - if (argc > 1 && strcmp(argv[1], "--help") == 0) { - usage(); - return; - } - - if (argc < 2) { - printf("Did you mean to run without arguments?\n"); - printf("Try `process_test --help' for more information.\n\n"); - } - - AudioProcessing* apm = AudioProcessing::Create(0); - ASSERT_TRUE(apm != NULL); - - WebRtc_Word8 version[1024]; - WebRtc_UWord32 version_bytes_remaining = sizeof(version); - WebRtc_UWord32 version_position = 0; - - const char* pb_filename = NULL; - const char* far_filename = NULL; - const char* near_filename = NULL; - const char* out_filename = NULL; - const char* vad_out_filename = NULL; - const char* aecm_echo_path_in_filename = NULL; - const char* aecm_echo_path_out_filename = NULL; - - int32_t sample_rate_hz = 16000; - int32_t device_sample_rate_hz = 16000; - - int num_capture_input_channels = 1; - int num_capture_output_channels = 1; - int num_render_channels = 1; - - int samples_per_channel = sample_rate_hz / 100; - - bool simulating = false; - bool perf_testing = false; - bool verbose = true; - bool progress = true; - //bool interleaved = true; - - for (int i = 1; i < argc; i++) { - if (strcmp(argv[i], "-pb") == 0) { - i++; - ASSERT_LT(i, argc) << "Specify protobuf filename after -pb"; - pb_filename = argv[i]; - - } else if (strcmp(argv[i], "-ir") == 0) { - i++; - ASSERT_LT(i, argc) << "Specify filename after -ir"; - far_filename = argv[i]; - simulating = true; - - } else if (strcmp(argv[i], "-i") == 0) { - i++; - ASSERT_LT(i, argc) << "Specify filename after -i"; - near_filename = argv[i]; - simulating = true; - - } else if (strcmp(argv[i], "-o") == 0) { - i++; - ASSERT_LT(i, argc) << "Specify filename after -o"; - out_filename = argv[i]; - - } else if (strcmp(argv[i], "-fs") == 0) { - i++; - ASSERT_LT(i, argc) << "Specify sample rate after -fs"; - ASSERT_EQ(1, sscanf(argv[i], "%d", &sample_rate_hz)); - samples_per_channel = sample_rate_hz / 100; - - ASSERT_EQ(apm->kNoError, - apm->set_sample_rate_hz(sample_rate_hz)); - - } else if (strcmp(argv[i], "-ch") == 0) { - i++; - ASSERT_LT(i + 1, argc) << "Specify number of channels after -ch"; - ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_input_channels)); - i++; - ASSERT_EQ(1, sscanf(argv[i], "%d", &num_capture_output_channels)); - - ASSERT_EQ(apm->kNoError, - apm->set_num_channels(num_capture_input_channels, - num_capture_output_channels)); - - } else if (strcmp(argv[i], "-rch") == 0) { - i++; - ASSERT_LT(i, argc) << "Specify number of channels after -rch"; - ASSERT_EQ(1, sscanf(argv[i], "%d", &num_render_channels)); - - ASSERT_EQ(apm->kNoError, - apm->set_num_reverse_channels(num_render_channels)); - - } else if (strcmp(argv[i], "-aec") == 0) { - ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->echo_cancellation()->enable_metrics(true)); - ASSERT_EQ(apm->kNoError, - apm->echo_cancellation()->enable_delay_logging(true)); - - } else if (strcmp(argv[i], "--drift_compensation") == 0) { - ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); - // TODO(ajm): this is enabled in the VQE test app by default. Investigate - // why it can give better performance despite passing zeros. - ASSERT_EQ(apm->kNoError, - apm->echo_cancellation()->enable_drift_compensation(true)); - } else if (strcmp(argv[i], "--no_drift_compensation") == 0) { - ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->echo_cancellation()->enable_drift_compensation(false)); - - } else if (strcmp(argv[i], "--no_echo_metrics") == 0) { - ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->echo_cancellation()->enable_metrics(false)); - - } else if (strcmp(argv[i], "--no_delay_logging") == 0) { - ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->echo_cancellation()->enable_delay_logging(false)); - - } else if (strcmp(argv[i], "-aecm") == 0) { - ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true)); - - } else if (strcmp(argv[i], "--aecm_echo_path_in_file") == 0) { - i++; - ASSERT_LT(i, argc) << "Specify filename after --aecm_echo_path_in_file"; - aecm_echo_path_in_filename = argv[i]; - - } else if (strcmp(argv[i], "--aecm_echo_path_out_file") == 0) { - i++; - ASSERT_LT(i, argc) << "Specify filename after --aecm_echo_path_out_file"; - aecm_echo_path_out_filename = argv[i]; - - } else if (strcmp(argv[i], "-agc") == 0) { - ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); - - } else if (strcmp(argv[i], "--analog") == 0) { - ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); - - } else if (strcmp(argv[i], "--adaptive_digital") == 0) { - ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->gain_control()->set_mode(GainControl::kAdaptiveDigital)); - - } else if (strcmp(argv[i], "--fixed_digital") == 0) { - ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->gain_control()->set_mode(GainControl::kFixedDigital)); - - } else if (strcmp(argv[i], "--target_level") == 0) { - i++; - int level; - ASSERT_EQ(1, sscanf(argv[i], "%d", &level)); - - ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->gain_control()->set_target_level_dbfs(level)); - - } else if (strcmp(argv[i], "--compression_gain") == 0) { - i++; - int gain; - ASSERT_EQ(1, sscanf(argv[i], "%d", &gain)); - - ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->gain_control()->set_compression_gain_db(gain)); - - } else if (strcmp(argv[i], "--limiter") == 0) { - ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->gain_control()->enable_limiter(true)); - - } else if (strcmp(argv[i], "--no_limiter") == 0) { - ASSERT_EQ(apm->kNoError, apm->gain_control()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->gain_control()->enable_limiter(false)); - - } else if (strcmp(argv[i], "-hpf") == 0) { - ASSERT_EQ(apm->kNoError, apm->high_pass_filter()->Enable(true)); - - } else if (strcmp(argv[i], "-ns") == 0) { - ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); - - } else if (strcmp(argv[i], "--ns_low") == 0) { - ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->noise_suppression()->set_level(NoiseSuppression::kLow)); - - } else if (strcmp(argv[i], "--ns_moderate") == 0) { - ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->noise_suppression()->set_level(NoiseSuppression::kModerate)); - - } else if (strcmp(argv[i], "--ns_high") == 0) { - ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->noise_suppression()->set_level(NoiseSuppression::kHigh)); - - } else if (strcmp(argv[i], "--ns_very_high") == 0) { - ASSERT_EQ(apm->kNoError, apm->noise_suppression()->Enable(true)); - ASSERT_EQ(apm->kNoError, - apm->noise_suppression()->set_level(NoiseSuppression::kVeryHigh)); - - } else if (strcmp(argv[i], "-vad") == 0) { - ASSERT_EQ(apm->kNoError, apm->voice_detection()->Enable(true)); - - } else if (strcmp(argv[i], "--vad_out_file") == 0) { - i++; - ASSERT_LT(i, argc) << "Specify filename after --vad_out_file"; - vad_out_filename = argv[i]; - - } else if (strcmp(argv[i], "--noasm") == 0) { - WebRtc_GetCPUInfo = WebRtc_GetCPUInfoNoASM; - // We need to reinitialize here if components have already been enabled. - ASSERT_EQ(apm->kNoError, apm->Initialize()); - - } else if (strcmp(argv[i], "--perf") == 0) { - perf_testing = true; - - } else if (strcmp(argv[i], "--quiet") == 0) { - verbose = false; - progress = false; - - } else if (strcmp(argv[i], "--no_progress") == 0) { - progress = false; - - } else if (strcmp(argv[i], "--version") == 0) { - ASSERT_EQ(apm->kNoError, apm->Version(version, - version_bytes_remaining, - version_position)); - printf("%s\n", version); - return; - - } else if (strcmp(argv[i], "--debug_recording") == 0) { - i++; - ASSERT_LT(i, argc) << "Specify filename after --debug_recording"; - ASSERT_EQ(apm->kNoError, apm->StartDebugRecording(argv[i])); - } else { - FAIL() << "Unrecognized argument " << argv[i]; - } - } - // If we're reading a protobuf file, ensure a simulation hasn't also - // been requested (which makes no sense...) - ASSERT_FALSE(pb_filename && simulating); - - if (verbose) { - printf("Sample rate: %d Hz\n", sample_rate_hz); - printf("Primary channels: %d (in), %d (out)\n", - num_capture_input_channels, - num_capture_output_channels); - printf("Reverse channels: %d \n", num_render_channels); - } - - const char far_file_default[] = "apm_far.pcm"; - const char near_file_default[] = "apm_near.pcm"; - const char out_file_default[] = "out.pcm"; - const char event_filename[] = "apm_event.dat"; - const char delay_filename[] = "apm_delay.dat"; - const char drift_filename[] = "apm_drift.dat"; - const char vad_file_default[] = "vad_out.dat"; - - if (!simulating) { - far_filename = far_file_default; - near_filename = near_file_default; - } - - if (!out_filename) { - out_filename = out_file_default; - } - - if (!vad_out_filename) { - vad_out_filename = vad_file_default; - } - - FILE* pb_file = NULL; - FILE* far_file = NULL; - FILE* near_file = NULL; - FILE* out_file = NULL; - FILE* event_file = NULL; - FILE* delay_file = NULL; - FILE* drift_file = NULL; - FILE* vad_out_file = NULL; - FILE* aecm_echo_path_in_file = NULL; - FILE* aecm_echo_path_out_file = NULL; - - if (pb_filename) { - pb_file = fopen(pb_filename, "rb"); - ASSERT_TRUE(NULL != pb_file) << "Unable to open protobuf file " - << pb_filename; - } else { - if (far_filename) { - far_file = fopen(far_filename, "rb"); - ASSERT_TRUE(NULL != far_file) << "Unable to open far-end audio file " - << far_filename; - } - - near_file = fopen(near_filename, "rb"); - ASSERT_TRUE(NULL != near_file) << "Unable to open near-end audio file " - << near_filename; - if (!simulating) { - event_file = fopen(event_filename, "rb"); - ASSERT_TRUE(NULL != event_file) << "Unable to open event file " - << event_filename; - - delay_file = fopen(delay_filename, "rb"); - ASSERT_TRUE(NULL != delay_file) << "Unable to open buffer file " - << delay_filename; - - drift_file = fopen(drift_filename, "rb"); - ASSERT_TRUE(NULL != drift_file) << "Unable to open drift file " - << drift_filename; - } - } - - out_file = fopen(out_filename, "wb"); - ASSERT_TRUE(NULL != out_file) << "Unable to open output audio file " - << out_filename; - - int near_size_samples = 0; - if (pb_file) { - struct stat st; - stat(pb_filename, &st); - // Crude estimate, but should be good enough. - near_size_samples = st.st_size / 3 / sizeof(int16_t); - } else { - struct stat st; - stat(near_filename, &st); - near_size_samples = st.st_size / sizeof(int16_t); - } - - if (apm->voice_detection()->is_enabled()) { - vad_out_file = fopen(vad_out_filename, "wb"); - ASSERT_TRUE(NULL != vad_out_file) << "Unable to open VAD output file " - << vad_out_file; - } - - if (aecm_echo_path_in_filename != NULL) { - aecm_echo_path_in_file = fopen(aecm_echo_path_in_filename, "rb"); - ASSERT_TRUE(NULL != aecm_echo_path_in_file) << "Unable to open file " - << aecm_echo_path_in_filename; - - const size_t path_size = - apm->echo_control_mobile()->echo_path_size_bytes(); - unsigned char echo_path[path_size]; - ASSERT_EQ(path_size, fread(echo_path, - sizeof(unsigned char), - path_size, - aecm_echo_path_in_file)); - EXPECT_EQ(apm->kNoError, - apm->echo_control_mobile()->SetEchoPath(echo_path, path_size)); - fclose(aecm_echo_path_in_file); - aecm_echo_path_in_file = NULL; - } - - if (aecm_echo_path_out_filename != NULL) { - aecm_echo_path_out_file = fopen(aecm_echo_path_out_filename, "wb"); - ASSERT_TRUE(NULL != aecm_echo_path_out_file) << "Unable to open file " - << aecm_echo_path_out_filename; - } - - size_t read_count = 0; - int reverse_count = 0; - int primary_count = 0; - int near_read_samples = 0; - TickInterval acc_ticks; - - AudioFrame far_frame; - far_frame._frequencyInHz = sample_rate_hz; - - AudioFrame near_frame; - near_frame._frequencyInHz = sample_rate_hz; - - int delay_ms = 0; - int drift_samples = 0; - int capture_level = 127; - int8_t stream_has_voice = 0; - - TickTime t0 = TickTime::Now(); - TickTime t1 = t0; - WebRtc_Word64 max_time_us = 0; - WebRtc_Word64 max_time_reverse_us = 0; - WebRtc_Word64 min_time_us = 1e6; - WebRtc_Word64 min_time_reverse_us = 1e6; - - // TODO(ajm): Ideally we would refactor this block into separate functions, - // but for now we want to share the variables. - if (pb_file) { - Event event_msg; - while (ReadMessageFromFile(pb_file, &event_msg)) { - std::ostringstream trace_stream; - trace_stream << "Processed frames: " << reverse_count << " (reverse), " - << primary_count << " (primary)"; - SCOPED_TRACE(trace_stream.str()); - - if (event_msg.type() == Event::INIT) { - ASSERT_TRUE(event_msg.has_init()); - const Init msg = event_msg.init(); - - ASSERT_TRUE(msg.has_sample_rate()); - ASSERT_EQ(apm->kNoError, - apm->set_sample_rate_hz(msg.sample_rate())); - - ASSERT_TRUE(msg.has_device_sample_rate()); - ASSERT_EQ(apm->kNoError, - apm->echo_cancellation()->set_device_sample_rate_hz( - msg.device_sample_rate())); - - ASSERT_TRUE(msg.has_num_input_channels()); - ASSERT_TRUE(msg.has_num_output_channels()); - ASSERT_EQ(apm->kNoError, - apm->set_num_channels(msg.num_input_channels(), - msg.num_output_channels())); - - ASSERT_TRUE(msg.has_num_reverse_channels()); - ASSERT_EQ(apm->kNoError, - apm->set_num_reverse_channels(msg.num_reverse_channels())); - - samples_per_channel = msg.sample_rate() / 100; - far_frame._frequencyInHz = msg.sample_rate(); - far_frame._payloadDataLengthInSamples = - msg.num_reverse_channels() * samples_per_channel; - near_frame._frequencyInHz = msg.sample_rate(); - - if (verbose) { - printf("Init at frame: %d (primary), %d (reverse)\n", - primary_count, reverse_count); - printf(" Sample rate: %d Hz\n", sample_rate_hz); - } - - } else if (event_msg.type() == Event::REVERSE_STREAM) { - ASSERT_TRUE(event_msg.has_reverse_stream()); - const ReverseStream msg = event_msg.reverse_stream(); - reverse_count++; - - ASSERT_TRUE(msg.has_data()); - ASSERT_EQ(sizeof(int16_t) * far_frame._payloadDataLengthInSamples, - msg.data().size()); - memcpy(far_frame._payloadData, msg.data().data(), msg.data().size()); - - if (perf_testing) { - t0 = TickTime::Now(); - } - - ASSERT_EQ(apm->kNoError, - apm->AnalyzeReverseStream(&far_frame)); - - if (perf_testing) { - t1 = TickTime::Now(); - TickInterval tick_diff = t1 - t0; - acc_ticks += tick_diff; - if (tick_diff.Microseconds() > max_time_reverse_us) { - max_time_reverse_us = tick_diff.Microseconds(); - } - if (tick_diff.Microseconds() < min_time_reverse_us) { - min_time_reverse_us = tick_diff.Microseconds(); - } - } - - } else if (event_msg.type() == Event::STREAM) { - ASSERT_TRUE(event_msg.has_stream()); - const Stream msg = event_msg.stream(); - primary_count++; - - near_frame._audioChannel = apm->num_input_channels(); - near_frame._payloadDataLengthInSamples = - apm->num_input_channels() * samples_per_channel; - - ASSERT_TRUE(msg.has_input_data()); - ASSERT_EQ(sizeof(int16_t) * near_frame._payloadDataLengthInSamples, - msg.input_data().size()); - memcpy(near_frame._payloadData, - msg.input_data().data(), - msg.input_data().size()); - - near_read_samples += near_frame._payloadDataLengthInSamples; - if (progress && primary_count % 100 == 0) { - printf("%.0f%% complete\r", - (near_read_samples * 100.0) / near_size_samples); - fflush(stdout); - } - - if (perf_testing) { - t0 = TickTime::Now(); - } - - ASSERT_EQ(apm->kNoError, - apm->gain_control()->set_stream_analog_level(msg.level())); - ASSERT_EQ(apm->kNoError, - apm->set_stream_delay_ms(msg.delay())); - ASSERT_EQ(apm->kNoError, - apm->echo_cancellation()->set_stream_drift_samples(msg.drift())); - - int err = apm->ProcessStream(&near_frame); - if (err == apm->kBadStreamParameterWarning) { - printf("Bad parameter warning. %s\n", trace_stream.str().c_str()); - } - ASSERT_TRUE(err == apm->kNoError || - err == apm->kBadStreamParameterWarning); - - capture_level = apm->gain_control()->stream_analog_level(); - - stream_has_voice = - static_cast(apm->voice_detection()->stream_has_voice()); - if (vad_out_file != NULL) { - ASSERT_EQ(1u, fwrite(&stream_has_voice, - sizeof(stream_has_voice), - 1, - vad_out_file)); - } - - if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) { - ASSERT_EQ(msg.level(), capture_level); - } - - if (perf_testing) { - t1 = TickTime::Now(); - TickInterval tick_diff = t1 - t0; - acc_ticks += tick_diff; - if (tick_diff.Microseconds() > max_time_us) { - max_time_us = tick_diff.Microseconds(); - } - if (tick_diff.Microseconds() < min_time_us) { - min_time_us = tick_diff.Microseconds(); - } - } - - ASSERT_EQ(near_frame._payloadDataLengthInSamples, - fwrite(near_frame._payloadData, - sizeof(int16_t), - near_frame._payloadDataLengthInSamples, - out_file)); - } - } - - ASSERT_TRUE(feof(pb_file)); - - } else { - enum Events { - kInitializeEvent, - kRenderEvent, - kCaptureEvent, - kResetEventDeprecated - }; - int16_t event = 0; - while (simulating || feof(event_file) == 0) { - std::ostringstream trace_stream; - trace_stream << "Processed frames: " << reverse_count << " (reverse), " - << primary_count << " (primary)"; - SCOPED_TRACE(trace_stream.str()); - - if (simulating) { - if (far_file == NULL) { - event = kCaptureEvent; - } else { - if (event == kRenderEvent) { - event = kCaptureEvent; - } else { - event = kRenderEvent; - } - } - } else { - read_count = fread(&event, sizeof(event), 1, event_file); - if (read_count != 1) { - break; - } - } - - if (event == kInitializeEvent || event == kResetEventDeprecated) { - ASSERT_EQ(1u, - fread(&sample_rate_hz, sizeof(sample_rate_hz), 1, event_file)); - samples_per_channel = sample_rate_hz / 100; - - ASSERT_EQ(1u, - fread(&device_sample_rate_hz, - sizeof(device_sample_rate_hz), - 1, - event_file)); - - ASSERT_EQ(apm->kNoError, - apm->set_sample_rate_hz(sample_rate_hz)); - - ASSERT_EQ(apm->kNoError, - apm->echo_cancellation()->set_device_sample_rate_hz( - device_sample_rate_hz)); - - far_frame._frequencyInHz = sample_rate_hz; - near_frame._frequencyInHz = sample_rate_hz; - - if (verbose) { - printf("Init at frame: %d (primary), %d (reverse)\n", - primary_count, reverse_count); - printf(" Sample rate: %d Hz\n", sample_rate_hz); - } - - } else if (event == kRenderEvent) { - reverse_count++; - far_frame._audioChannel = num_render_channels; - far_frame._payloadDataLengthInSamples = - num_render_channels * samples_per_channel; - - read_count = fread(far_frame._payloadData, - sizeof(WebRtc_Word16), - far_frame._payloadDataLengthInSamples, - far_file); - - if (simulating) { - if (read_count != far_frame._payloadDataLengthInSamples) { - // Read an equal amount from the near file to avoid errors due to - // not reaching end-of-file. - EXPECT_EQ(0, fseek(near_file, read_count * sizeof(WebRtc_Word16), - SEEK_CUR)); - break; // This is expected. - } - } else { - ASSERT_EQ(read_count, - far_frame._payloadDataLengthInSamples); - } - - if (perf_testing) { - t0 = TickTime::Now(); - } - - ASSERT_EQ(apm->kNoError, - apm->AnalyzeReverseStream(&far_frame)); - - if (perf_testing) { - t1 = TickTime::Now(); - TickInterval tick_diff = t1 - t0; - acc_ticks += tick_diff; - if (tick_diff.Microseconds() > max_time_reverse_us) { - max_time_reverse_us = tick_diff.Microseconds(); - } - if (tick_diff.Microseconds() < min_time_reverse_us) { - min_time_reverse_us = tick_diff.Microseconds(); - } - } - - } else if (event == kCaptureEvent) { - primary_count++; - near_frame._audioChannel = num_capture_input_channels; - near_frame._payloadDataLengthInSamples = - num_capture_input_channels * samples_per_channel; - - read_count = fread(near_frame._payloadData, - sizeof(WebRtc_Word16), - near_frame._payloadDataLengthInSamples, - near_file); - - near_read_samples += read_count; - if (progress && primary_count % 100 == 0) { - printf("%.0f%% complete\r", - (near_read_samples * 100.0) / near_size_samples); - fflush(stdout); - } - if (simulating) { - if (read_count != near_frame._payloadDataLengthInSamples) { - break; // This is expected. - } - - delay_ms = 0; - drift_samples = 0; - } else { - ASSERT_EQ(read_count, - near_frame._payloadDataLengthInSamples); - - // TODO(ajm): sizeof(delay_ms) for current files? - ASSERT_EQ(1u, - fread(&delay_ms, 2, 1, delay_file)); - ASSERT_EQ(1u, - fread(&drift_samples, sizeof(drift_samples), 1, drift_file)); - } - - if (perf_testing) { - t0 = TickTime::Now(); - } - - // TODO(ajm): fake an analog gain while simulating. - - int capture_level_in = capture_level; - ASSERT_EQ(apm->kNoError, - apm->gain_control()->set_stream_analog_level(capture_level)); - ASSERT_EQ(apm->kNoError, - apm->set_stream_delay_ms(delay_ms)); - ASSERT_EQ(apm->kNoError, - apm->echo_cancellation()->set_stream_drift_samples(drift_samples)); - - int err = apm->ProcessStream(&near_frame); - if (err == apm->kBadStreamParameterWarning) { - printf("Bad parameter warning. %s\n", trace_stream.str().c_str()); - } - ASSERT_TRUE(err == apm->kNoError || - err == apm->kBadStreamParameterWarning); - - capture_level = apm->gain_control()->stream_analog_level(); - - stream_has_voice = - static_cast(apm->voice_detection()->stream_has_voice()); - if (vad_out_file != NULL) { - ASSERT_EQ(1u, fwrite(&stream_has_voice, - sizeof(stream_has_voice), - 1, - vad_out_file)); - } - - if (apm->gain_control()->mode() != GainControl::kAdaptiveAnalog) { - ASSERT_EQ(capture_level_in, capture_level); - } - - if (perf_testing) { - t1 = TickTime::Now(); - TickInterval tick_diff = t1 - t0; - acc_ticks += tick_diff; - if (tick_diff.Microseconds() > max_time_us) { - max_time_us = tick_diff.Microseconds(); - } - if (tick_diff.Microseconds() < min_time_us) { - min_time_us = tick_diff.Microseconds(); - } - } - - ASSERT_EQ(near_frame._payloadDataLengthInSamples, - fwrite(near_frame._payloadData, - sizeof(WebRtc_Word16), - near_frame._payloadDataLengthInSamples, - out_file)); - } - else { - FAIL() << "Event " << event << " is unrecognized"; - } - } - } - printf("100%% complete\r"); - - if (aecm_echo_path_out_file != NULL) { - const size_t path_size = - apm->echo_control_mobile()->echo_path_size_bytes(); - unsigned char echo_path[path_size]; - apm->echo_control_mobile()->GetEchoPath(echo_path, path_size); - ASSERT_EQ(path_size, fwrite(echo_path, - sizeof(unsigned char), - path_size, - aecm_echo_path_out_file)); - fclose(aecm_echo_path_out_file); - aecm_echo_path_out_file = NULL; - } - - if (verbose) { - printf("\nProcessed frames: %d (primary), %d (reverse)\n", - primary_count, reverse_count); - - if (apm->echo_cancellation()->are_metrics_enabled()) { - EchoCancellation::Metrics metrics; - apm->echo_cancellation()->GetMetrics(&metrics); - printf("\n--Echo metrics--\n"); - printf("(avg, max, min)\n"); - printf("ERL: "); - PrintStat(metrics.echo_return_loss); - printf("ERLE: "); - PrintStat(metrics.echo_return_loss_enhancement); - printf("ANLP: "); - PrintStat(metrics.a_nlp); - } - if (apm->echo_cancellation()->is_delay_logging_enabled()) { - int median = 0; - int std = 0; - apm->echo_cancellation()->GetDelayMetrics(&median, &std); - printf("\n--Delay metrics--\n"); - printf("Median: %3d\n", median); - printf("Standard deviation: %3d\n", std); - } - } - - if (!pb_file) { - int8_t temp_int8; - if (far_file) { - read_count = fread(&temp_int8, sizeof(temp_int8), 1, far_file); - EXPECT_NE(0, feof(far_file)) << "Far-end file not fully processed"; - } - - read_count = fread(&temp_int8, sizeof(temp_int8), 1, near_file); - EXPECT_NE(0, feof(near_file)) << "Near-end file not fully processed"; - - if (!simulating) { - read_count = fread(&temp_int8, sizeof(temp_int8), 1, event_file); - EXPECT_NE(0, feof(event_file)) << "Event file not fully processed"; - read_count = fread(&temp_int8, sizeof(temp_int8), 1, delay_file); - EXPECT_NE(0, feof(delay_file)) << "Delay file not fully processed"; - read_count = fread(&temp_int8, sizeof(temp_int8), 1, drift_file); - EXPECT_NE(0, feof(drift_file)) << "Drift file not fully processed"; - } - } - - if (perf_testing) { - if (primary_count > 0) { - WebRtc_Word64 exec_time = acc_ticks.Milliseconds(); - printf("\nTotal time: %.3f s, file time: %.2f s\n", - exec_time * 0.001, primary_count * 0.01); - printf("Time per frame: %.3f ms (average), %.3f ms (max)," - " %.3f ms (min)\n", - (exec_time * 1.0) / primary_count, - (max_time_us + max_time_reverse_us) / 1000.0, - (min_time_us + min_time_reverse_us) / 1000.0); - } else { - printf("Warning: no capture frames\n"); - } - } - - AudioProcessing::Destroy(apm); - apm = NULL; -} -} // namespace - -int main(int argc, char* argv[]) -{ - void_main(argc, argv); - - // Optional, but removes memory leak noise from Valgrind. - google::protobuf::ShutdownProtobufLibrary(); - return 0; -} diff --git a/webrtc/modules/audio_processing/test/unit_test.cc b/webrtc/modules/audio_processing/test/unit_test.cc deleted file mode 100644 index 2a50bc3..0000000 --- a/webrtc/modules/audio_processing/test/unit_test.cc +++ /dev/null @@ -1,1045 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include - -#include "gtest/gtest.h" - -#include "audio_processing.h" -#include "event_wrapper.h" -#include "module_common_types.h" -#include "signal_processing_library.h" -#include "testsupport/fileutils.h" -#include "thread_wrapper.h" -#include "trace.h" -#ifdef WEBRTC_ANDROID -#include "external/webrtc/src/modules/audio_processing/test/unittest.pb.h" -#else -#include "webrtc/audio_processing/unittest.pb.h" -#endif - -using webrtc::AudioProcessing; -using webrtc::AudioFrame; -using webrtc::GainControl; -using webrtc::NoiseSuppression; -using webrtc::EchoCancellation; -using webrtc::EventWrapper; -using webrtc::Trace; -using webrtc::LevelEstimator; -using webrtc::EchoCancellation; -using webrtc::EchoControlMobile; -using webrtc::VoiceDetection; - -namespace { -// When false, this will compare the output data with the results stored to -// file. This is the typical case. When the file should be updated, it can -// be set to true with the command-line switch --write_output_data. -bool write_output_data = false; - -class ApmEnvironment : public ::testing::Environment { - public: - virtual void SetUp() { - Trace::CreateTrace(); - ASSERT_EQ(0, Trace::SetTraceFile("apm_trace.txt")); - } - - virtual void TearDown() { - Trace::ReturnTrace(); - } -}; - -class ApmTest : public ::testing::Test { - protected: - ApmTest(); - virtual void SetUp(); - virtual void TearDown(); - // Path to where the resource files to be used for this test are located. - const std::string kResourcePath; - const std::string kOutputFileName; - webrtc::AudioProcessing* apm_; - webrtc::AudioFrame* frame_; - webrtc::AudioFrame* revframe_; - FILE* far_file_; - FILE* near_file_; -}; - -ApmTest::ApmTest() - : kResourcePath(webrtc::test::GetProjectRootPath() + - "test/data/audio_processing/"), -#if defined(WEBRTC_APM_UNIT_TEST_FIXED_PROFILE) - kOutputFileName(kResourcePath + "output_data_fixed.pb"), -#elif defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE) - kOutputFileName(kResourcePath + "output_data_float.pb"), -#endif - apm_(NULL), - frame_(NULL), - revframe_(NULL), - far_file_(NULL), - near_file_(NULL) {} - -void ApmTest::SetUp() { - apm_ = AudioProcessing::Create(0); - ASSERT_TRUE(apm_ != NULL); - - frame_ = new AudioFrame(); - revframe_ = new AudioFrame(); - - ASSERT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(32000)); - ASSERT_EQ(apm_->kNoError, apm_->set_num_channels(2, 2)); - ASSERT_EQ(apm_->kNoError, apm_->set_num_reverse_channels(2)); - - frame_->_payloadDataLengthInSamples = 320; - frame_->_audioChannel = 2; - frame_->_frequencyInHz = 32000; - revframe_->_payloadDataLengthInSamples = 320; - revframe_->_audioChannel = 2; - revframe_->_frequencyInHz = 32000; - - std::string input_filename = kResourcePath + "aec_far.pcm"; - far_file_ = fopen(input_filename.c_str(), "rb"); - ASSERT_TRUE(far_file_ != NULL) << "Could not open input file " << - input_filename << "\n"; - input_filename = kResourcePath + "aec_near.pcm"; - near_file_ = fopen(input_filename.c_str(), "rb"); - ASSERT_TRUE(near_file_ != NULL) << "Could not open input file " << - input_filename << "\n"; -} - -void ApmTest::TearDown() { - if (frame_) { - delete frame_; - } - frame_ = NULL; - - if (revframe_) { - delete revframe_; - } - revframe_ = NULL; - - if (far_file_) { - ASSERT_EQ(0, fclose(far_file_)); - } - far_file_ = NULL; - - if (near_file_) { - ASSERT_EQ(0, fclose(near_file_)); - } - near_file_ = NULL; - - if (apm_ != NULL) { - AudioProcessing::Destroy(apm_); - } - apm_ = NULL; -} - -void MixStereoToMono(const WebRtc_Word16* stereo, - WebRtc_Word16* mono, - int num_samples) { - for (int i = 0; i < num_samples; i++) { - int int32 = (static_cast(stereo[i * 2]) + - static_cast(stereo[i * 2 + 1])) >> 1; - mono[i] = static_cast(int32); - } -} - -template -T MaxValue(T a, T b) { - return a > b ? a : b; -} - -template -T AbsValue(T a) { - return a > 0 ? a : -a; -} - -WebRtc_Word16 MaxAudioFrame(const AudioFrame& frame) { - const int length = frame._payloadDataLengthInSamples * frame._audioChannel; - WebRtc_Word16 max = AbsValue(frame._payloadData[0]); - for (int i = 1; i < length; i++) { - max = MaxValue(max, AbsValue(frame._payloadData[i])); - } - - return max; -} - -void TestStats(const AudioProcessing::Statistic& test, - const webrtc::audioproc::Test::Statistic& reference) { - EXPECT_EQ(reference.instant(), test.instant); - EXPECT_EQ(reference.average(), test.average); - EXPECT_EQ(reference.maximum(), test.maximum); - EXPECT_EQ(reference.minimum(), test.minimum); -} - -void WriteStatsMessage(const AudioProcessing::Statistic& output, - webrtc::audioproc::Test::Statistic* message) { - message->set_instant(output.instant); - message->set_average(output.average); - message->set_maximum(output.maximum); - message->set_minimum(output.minimum); -} - -void WriteMessageLiteToFile(const std::string filename, - const ::google::protobuf::MessageLite& message) { - FILE* file = fopen(filename.c_str(), "wb"); - ASSERT_TRUE(file != NULL) << "Could not open " << filename; - int size = message.ByteSize(); - ASSERT_GT(size, 0); - unsigned char* array = new unsigned char[size]; - ASSERT_TRUE(message.SerializeToArray(array, size)); - - ASSERT_EQ(1u, fwrite(&size, sizeof(int), 1, file)); - ASSERT_EQ(static_cast(size), - fwrite(array, sizeof(unsigned char), size, file)); - - delete [] array; - fclose(file); -} - -void ReadMessageLiteFromFile(const std::string filename, - ::google::protobuf::MessageLite* message) { - assert(message != NULL); - - FILE* file = fopen(filename.c_str(), "rb"); - ASSERT_TRUE(file != NULL) << "Could not open " << filename; - int size = 0; - ASSERT_EQ(1u, fread(&size, sizeof(int), 1, file)); - ASSERT_GT(size, 0); - unsigned char* array = new unsigned char[size]; - ASSERT_EQ(static_cast(size), - fread(array, sizeof(unsigned char), size, file)); - - ASSERT_TRUE(message->ParseFromArray(array, size)); - - delete [] array; - fclose(file); -} - -struct ThreadData { - ThreadData(int thread_num_, AudioProcessing* ap_) - : thread_num(thread_num_), - error(false), - ap(ap_) {} - int thread_num; - bool error; - AudioProcessing* ap; -}; - -// Don't use GTest here; non-thread-safe on Windows (as of 1.5.0). -bool DeadlockProc(void* thread_object) { - ThreadData* thread_data = static_cast(thread_object); - AudioProcessing* ap = thread_data->ap; - int err = ap->kNoError; - - AudioFrame primary_frame; - AudioFrame reverse_frame; - primary_frame._payloadDataLengthInSamples = 320; - primary_frame._audioChannel = 2; - primary_frame._frequencyInHz = 32000; - reverse_frame._payloadDataLengthInSamples = 320; - reverse_frame._audioChannel = 2; - reverse_frame._frequencyInHz = 32000; - - ap->echo_cancellation()->Enable(true); - ap->gain_control()->Enable(true); - ap->high_pass_filter()->Enable(true); - ap->level_estimator()->Enable(true); - ap->noise_suppression()->Enable(true); - ap->voice_detection()->Enable(true); - - if (thread_data->thread_num % 2 == 0) { - err = ap->AnalyzeReverseStream(&reverse_frame); - if (err != ap->kNoError) { - printf("Error in AnalyzeReverseStream(): %d\n", err); - thread_data->error = true; - return false; - } - } - - if (thread_data->thread_num % 2 == 1) { - ap->set_stream_delay_ms(0); - ap->echo_cancellation()->set_stream_drift_samples(0); - ap->gain_control()->set_stream_analog_level(0); - err = ap->ProcessStream(&primary_frame); - if (err == ap->kStreamParameterNotSetError) { - printf("Expected kStreamParameterNotSetError in ProcessStream(): %d\n", - err); - } else if (err != ap->kNoError) { - printf("Error in ProcessStream(): %d\n", err); - thread_data->error = true; - return false; - } - ap->gain_control()->stream_analog_level(); - } - - EventWrapper* event = EventWrapper::Create(); - event->Wait(1); - delete event; - event = NULL; - - return true; -} - -/*TEST_F(ApmTest, Deadlock) { - const int num_threads = 16; - std::vector threads(num_threads); - std::vector thread_data(num_threads); - - ASSERT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(32000)); - ASSERT_EQ(apm_->kNoError, apm_->set_num_channels(2, 2)); - ASSERT_EQ(apm_->kNoError, apm_->set_num_reverse_channels(2)); - - for (int i = 0; i < num_threads; i++) { - thread_data[i] = new ThreadData(i, apm_); - threads[i] = ThreadWrapper::CreateThread(DeadlockProc, - thread_data[i], - kNormalPriority, - 0); - ASSERT_TRUE(threads[i] != NULL); - unsigned int thread_id = 0; - threads[i]->Start(thread_id); - } - - EventWrapper* event = EventWrapper::Create(); - ASSERT_EQ(kEventTimeout, event->Wait(5000)); - delete event; - event = NULL; - - for (int i = 0; i < num_threads; i++) { - // This will return false if the thread has deadlocked. - ASSERT_TRUE(threads[i]->Stop()); - ASSERT_FALSE(thread_data[i]->error); - delete threads[i]; - threads[i] = NULL; - delete thread_data[i]; - thread_data[i] = NULL; - } -}*/ - -TEST_F(ApmTest, StreamParameters) { - // No errors when the components are disabled. - EXPECT_EQ(apm_->kNoError, - apm_->ProcessStream(frame_)); - - // Missing agc level - EXPECT_EQ(apm_->kNoError, apm_->Initialize()); - EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, - apm_->ProcessStream(frame_)); - EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->set_stream_drift_samples(0)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, - apm_->ProcessStream(frame_)); - EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); - - // Missing delay - EXPECT_EQ(apm_->kNoError, apm_->Initialize()); - EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, - apm_->ProcessStream(frame_)); - EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->set_stream_drift_samples(0)); - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_stream_analog_level(127)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, - apm_->ProcessStream(frame_)); - EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); - - // Missing drift - EXPECT_EQ(apm_->kNoError, apm_->Initialize()); - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->enable_drift_compensation(true)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, - apm_->ProcessStream(frame_)); - EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); - EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_stream_analog_level(127)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, - apm_->ProcessStream(frame_)); - - // No stream parameters - EXPECT_EQ(apm_->kNoError, apm_->Initialize()); - EXPECT_EQ(apm_->kNoError, - apm_->AnalyzeReverseStream(revframe_)); - EXPECT_EQ(apm_->kStreamParameterNotSetError, - apm_->ProcessStream(frame_)); - - // All there - EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); - EXPECT_EQ(apm_->kNoError, apm_->Initialize()); - EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(100)); - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->set_stream_drift_samples(0)); - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_stream_analog_level(127)); - EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); -} - -TEST_F(ApmTest, Channels) { - // Testing number of invalid channels - EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_channels(0, 1)); - EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_channels(1, 0)); - EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_channels(3, 1)); - EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_channels(1, 3)); - EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_reverse_channels(0)); - EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_reverse_channels(3)); - // Testing number of valid channels - for (int i = 1; i < 3; i++) { - for (int j = 1; j < 3; j++) { - if (j > i) { - EXPECT_EQ(apm_->kBadParameterError, apm_->set_num_channels(i, j)); - } else { - EXPECT_EQ(apm_->kNoError, apm_->set_num_channels(i, j)); - EXPECT_EQ(j, apm_->num_output_channels()); - } - } - EXPECT_EQ(i, apm_->num_input_channels()); - EXPECT_EQ(apm_->kNoError, apm_->set_num_reverse_channels(i)); - EXPECT_EQ(i, apm_->num_reverse_channels()); - } -} - -TEST_F(ApmTest, SampleRates) { - // Testing invalid sample rates - EXPECT_EQ(apm_->kBadParameterError, apm_->set_sample_rate_hz(10000)); - // Testing valid sample rates - int fs[] = {8000, 16000, 32000}; - for (size_t i = 0; i < sizeof(fs) / sizeof(*fs); i++) { - EXPECT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(fs[i])); - EXPECT_EQ(fs[i], apm_->sample_rate_hz()); - } -} - -TEST_F(ApmTest, Process) { - GOOGLE_PROTOBUF_VERIFY_VERSION; - webrtc::audioproc::OutputData output_data; - - if (!write_output_data) { - ReadMessageLiteFromFile(kOutputFileName, &output_data); - } else { - // We don't have a file; add the required tests to the protobuf. - // TODO(ajm): vary the output channels as well? - const int channels[] = {1, 2}; - const size_t channels_size = sizeof(channels) / sizeof(*channels); -#if defined(WEBRTC_APM_UNIT_TEST_FIXED_PROFILE) - // AECM doesn't support super-wb. - const int sample_rates[] = {8000, 16000}; -#elif defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE) - const int sample_rates[] = {8000, 16000, 32000}; -#endif - const size_t sample_rates_size = sizeof(sample_rates) / sizeof(*sample_rates); - for (size_t i = 0; i < channels_size; i++) { - for (size_t j = 0; j < channels_size; j++) { - for (size_t k = 0; k < sample_rates_size; k++) { - webrtc::audioproc::Test* test = output_data.add_test(); - test->set_num_reverse_channels(channels[i]); - test->set_num_input_channels(channels[j]); - test->set_num_output_channels(channels[j]); - test->set_sample_rate(sample_rates[k]); - } - } - } - } - -#if defined(WEBRTC_APM_UNIT_TEST_FIXED_PROFILE) - EXPECT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(16000)); - EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true)); - - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_mode(GainControl::kAdaptiveDigital)); - EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); -#elif defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE) - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->enable_drift_compensation(true)); - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->enable_metrics(true)); - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->enable_delay_logging(true)); - EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); - - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_mode(GainControl::kAdaptiveAnalog)); - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_analog_level_limits(0, 255)); - EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(true)); -#endif - - EXPECT_EQ(apm_->kNoError, - apm_->high_pass_filter()->Enable(true)); - - //EXPECT_EQ(apm_->kNoError, - // apm_->level_estimator()->Enable(true)); - - EXPECT_EQ(apm_->kNoError, - apm_->noise_suppression()->Enable(true)); - - EXPECT_EQ(apm_->kNoError, - apm_->voice_detection()->Enable(true)); - - for (int i = 0; i < output_data.test_size(); i++) { - printf("Running test %d of %d...\n", i + 1, output_data.test_size()); - - webrtc::audioproc::Test* test = output_data.mutable_test(i); - const int num_samples = test->sample_rate() / 100; - revframe_->_payloadDataLengthInSamples = num_samples; - revframe_->_audioChannel = test->num_reverse_channels(); - revframe_->_frequencyInHz = test->sample_rate(); - frame_->_payloadDataLengthInSamples = num_samples; - frame_->_audioChannel = test->num_input_channels(); - frame_->_frequencyInHz = test->sample_rate(); - - EXPECT_EQ(apm_->kNoError, apm_->Initialize()); - ASSERT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(test->sample_rate())); - ASSERT_EQ(apm_->kNoError, apm_->set_num_channels(frame_->_audioChannel, - frame_->_audioChannel)); - ASSERT_EQ(apm_->kNoError, - apm_->set_num_reverse_channels(revframe_->_audioChannel)); - - int frame_count = 0; - int has_echo_count = 0; - int has_voice_count = 0; - int is_saturated_count = 0; - int analog_level = 127; - int analog_level_average = 0; - int max_output_average = 0; - - while (1) { - WebRtc_Word16 temp_data[640]; - - // Read far-end frame - size_t read_count = fread(temp_data, - sizeof(WebRtc_Word16), - num_samples * 2, - far_file_); - if (read_count != static_cast(num_samples * 2)) { - // Check that the file really ended. - ASSERT_NE(0, feof(far_file_)); - break; // This is expected. - } - - if (revframe_->_audioChannel == 1) { - MixStereoToMono(temp_data, revframe_->_payloadData, - revframe_->_payloadDataLengthInSamples); - } else { - memcpy(revframe_->_payloadData, - &temp_data[0], - sizeof(WebRtc_Word16) * read_count); - } - - EXPECT_EQ(apm_->kNoError, - apm_->AnalyzeReverseStream(revframe_)); - - EXPECT_EQ(apm_->kNoError, apm_->set_stream_delay_ms(0)); - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->set_stream_drift_samples(0)); - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_stream_analog_level(analog_level)); - - // Read near-end frame - read_count = fread(temp_data, - sizeof(WebRtc_Word16), - num_samples * 2, - near_file_); - if (read_count != static_cast(num_samples * 2)) { - // Check that the file really ended. - ASSERT_NE(0, feof(near_file_)); - break; // This is expected. - } - - if (frame_->_audioChannel == 1) { - MixStereoToMono(temp_data, frame_->_payloadData, num_samples); - } else { - memcpy(frame_->_payloadData, - &temp_data[0], - sizeof(WebRtc_Word16) * read_count); - } - frame_->_vadActivity = AudioFrame::kVadUnknown; - - EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); - - max_output_average += MaxAudioFrame(*frame_); - - if (apm_->echo_cancellation()->stream_has_echo()) { - has_echo_count++; - } - - analog_level = apm_->gain_control()->stream_analog_level(); - analog_level_average += analog_level; - if (apm_->gain_control()->stream_is_saturated()) { - is_saturated_count++; - } - if (apm_->voice_detection()->stream_has_voice()) { - has_voice_count++; - EXPECT_EQ(AudioFrame::kVadActive, frame_->_vadActivity); - } else { - EXPECT_EQ(AudioFrame::kVadPassive, frame_->_vadActivity); - } - - frame_count++; - } - max_output_average /= frame_count; - analog_level_average /= frame_count; - - //LevelEstimator::Metrics far_metrics; - //LevelEstimator::Metrics near_metrics; - //EXPECT_EQ(apm_->kNoError, - // apm_->level_estimator()->GetMetrics(&near_metrics, - -#if defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE) - EchoCancellation::Metrics echo_metrics; - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->GetMetrics(&echo_metrics)); - int median = 0; - int std = 0; - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->GetDelayMetrics(&median, &std)); -#endif - - if (!write_output_data) { - EXPECT_EQ(test->has_echo_count(), has_echo_count); - EXPECT_EQ(test->has_voice_count(), has_voice_count); - EXPECT_EQ(test->is_saturated_count(), is_saturated_count); - - EXPECT_EQ(test->analog_level_average(), analog_level_average); - EXPECT_EQ(test->max_output_average(), max_output_average); - -#if defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE) - webrtc::audioproc::Test::EchoMetrics reference = - test->echo_metrics(); - TestStats(echo_metrics.residual_echo_return_loss, - reference.residual_echo_return_loss()); - TestStats(echo_metrics.echo_return_loss, - reference.echo_return_loss()); - TestStats(echo_metrics.echo_return_loss_enhancement, - reference.echo_return_loss_enhancement()); - TestStats(echo_metrics.a_nlp, - reference.a_nlp()); - - webrtc::audioproc::Test::DelayMetrics reference_delay = - test->delay_metrics(); - EXPECT_EQ(median, reference_delay.median()); - EXPECT_EQ(std, reference_delay.std()); -#endif - } else { - test->set_has_echo_count(has_echo_count); - test->set_has_voice_count(has_voice_count); - test->set_is_saturated_count(is_saturated_count); - - test->set_analog_level_average(analog_level_average); - test->set_max_output_average(max_output_average); - -#if defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE) - webrtc::audioproc::Test::EchoMetrics* message = - test->mutable_echo_metrics(); - WriteStatsMessage(echo_metrics.residual_echo_return_loss, - message->mutable_residual_echo_return_loss()); - WriteStatsMessage(echo_metrics.echo_return_loss, - message->mutable_echo_return_loss()); - WriteStatsMessage(echo_metrics.echo_return_loss_enhancement, - message->mutable_echo_return_loss_enhancement()); - WriteStatsMessage(echo_metrics.a_nlp, - message->mutable_a_nlp()); - - webrtc::audioproc::Test::DelayMetrics* message_delay = - test->mutable_delay_metrics(); - message_delay->set_median(median); - message_delay->set_std(std); -#endif - } - - rewind(far_file_); - rewind(near_file_); - } - - if (write_output_data) { - WriteMessageLiteToFile(kOutputFileName, output_data); - } -} - -TEST_F(ApmTest, EchoCancellation) { - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->enable_drift_compensation(true)); - EXPECT_TRUE(apm_->echo_cancellation()->is_drift_compensation_enabled()); - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->enable_drift_compensation(false)); - EXPECT_FALSE(apm_->echo_cancellation()->is_drift_compensation_enabled()); - - EXPECT_EQ(apm_->kBadParameterError, - apm_->echo_cancellation()->set_device_sample_rate_hz(4000)); - EXPECT_EQ(apm_->kBadParameterError, - apm_->echo_cancellation()->set_device_sample_rate_hz(100000)); - - int rate[] = {16000, 44100, 48000}; - for (size_t i = 0; i < sizeof(rate)/sizeof(*rate); i++) { - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->set_device_sample_rate_hz(rate[i])); - EXPECT_EQ(rate[i], - apm_->echo_cancellation()->device_sample_rate_hz()); - } - - EXPECT_EQ(apm_->kBadParameterError, - apm_->echo_cancellation()->set_suppression_level( - static_cast(-1))); - - EXPECT_EQ(apm_->kBadParameterError, - apm_->echo_cancellation()->set_suppression_level( - static_cast(4))); - - EchoCancellation::SuppressionLevel level[] = { - EchoCancellation::kLowSuppression, - EchoCancellation::kModerateSuppression, - EchoCancellation::kHighSuppression, - }; - for (size_t i = 0; i < sizeof(level)/sizeof(*level); i++) { - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->set_suppression_level(level[i])); - EXPECT_EQ(level[i], - apm_->echo_cancellation()->suppression_level()); - } - - EchoCancellation::Metrics metrics; - EXPECT_EQ(apm_->kNotEnabledError, - apm_->echo_cancellation()->GetMetrics(&metrics)); - - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->enable_metrics(true)); - EXPECT_TRUE(apm_->echo_cancellation()->are_metrics_enabled()); - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->enable_metrics(false)); - EXPECT_FALSE(apm_->echo_cancellation()->are_metrics_enabled()); - - int median = 0; - int std = 0; - EXPECT_EQ(apm_->kNotEnabledError, - apm_->echo_cancellation()->GetDelayMetrics(&median, &std)); - - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->enable_delay_logging(true)); - EXPECT_TRUE(apm_->echo_cancellation()->is_delay_logging_enabled()); - EXPECT_EQ(apm_->kNoError, - apm_->echo_cancellation()->enable_delay_logging(false)); - EXPECT_FALSE(apm_->echo_cancellation()->is_delay_logging_enabled()); - - EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true)); - EXPECT_TRUE(apm_->echo_cancellation()->is_enabled()); - EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(false)); - EXPECT_FALSE(apm_->echo_cancellation()->is_enabled()); -} - -TEST_F(ApmTest, EchoControlMobile) { - // AECM won't use super-wideband. - EXPECT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(32000)); - EXPECT_EQ(apm_->kBadSampleRateError, apm_->echo_control_mobile()->Enable(true)); - EXPECT_EQ(apm_->kNoError, apm_->set_sample_rate_hz(16000)); - // Turn AECM on (and AEC off) - EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(true)); - EXPECT_TRUE(apm_->echo_control_mobile()->is_enabled()); - - EXPECT_EQ(apm_->kBadParameterError, - apm_->echo_control_mobile()->set_routing_mode( - static_cast(-1))); - EXPECT_EQ(apm_->kBadParameterError, - apm_->echo_control_mobile()->set_routing_mode( - static_cast(5))); - - // Toggle routing modes - EchoControlMobile::RoutingMode mode[] = { - EchoControlMobile::kQuietEarpieceOrHeadset, - EchoControlMobile::kEarpiece, - EchoControlMobile::kLoudEarpiece, - EchoControlMobile::kSpeakerphone, - EchoControlMobile::kLoudSpeakerphone, - }; - for (size_t i = 0; i < sizeof(mode)/sizeof(*mode); i++) { - EXPECT_EQ(apm_->kNoError, - apm_->echo_control_mobile()->set_routing_mode(mode[i])); - EXPECT_EQ(mode[i], - apm_->echo_control_mobile()->routing_mode()); - } - // Turn comfort noise off/on - EXPECT_EQ(apm_->kNoError, - apm_->echo_control_mobile()->enable_comfort_noise(false)); - EXPECT_FALSE(apm_->echo_control_mobile()->is_comfort_noise_enabled()); - EXPECT_EQ(apm_->kNoError, - apm_->echo_control_mobile()->enable_comfort_noise(true)); - EXPECT_TRUE(apm_->echo_control_mobile()->is_comfort_noise_enabled()); - // Set and get echo path - const size_t echo_path_size = - apm_->echo_control_mobile()->echo_path_size_bytes(); - unsigned char echo_path_in[echo_path_size]; - unsigned char echo_path_out[echo_path_size]; - EXPECT_EQ(apm_->kNullPointerError, - apm_->echo_control_mobile()->SetEchoPath(NULL, echo_path_size)); - EXPECT_EQ(apm_->kNullPointerError, - apm_->echo_control_mobile()->GetEchoPath(NULL, echo_path_size)); - EXPECT_EQ(apm_->kBadParameterError, - apm_->echo_control_mobile()->GetEchoPath(echo_path_out, 1)); - EXPECT_EQ(apm_->kNoError, - apm_->echo_control_mobile()->GetEchoPath(echo_path_out, - echo_path_size)); - for (size_t i = 0; i < echo_path_size; i++) { - echo_path_in[i] = echo_path_out[i] + 1; - } - EXPECT_EQ(apm_->kBadParameterError, - apm_->echo_control_mobile()->SetEchoPath(echo_path_in, 1)); - EXPECT_EQ(apm_->kNoError, - apm_->echo_control_mobile()->SetEchoPath(echo_path_in, echo_path_size)); - EXPECT_EQ(apm_->kNoError, - apm_->echo_control_mobile()->GetEchoPath(echo_path_out, echo_path_size)); - for (size_t i = 0; i < echo_path_size; i++) { - EXPECT_EQ(echo_path_in[i], echo_path_out[i]); - } - // Turn AECM off - EXPECT_EQ(apm_->kNoError, apm_->echo_control_mobile()->Enable(false)); - EXPECT_FALSE(apm_->echo_control_mobile()->is_enabled()); -} - -TEST_F(ApmTest, GainControl) { - // Testing gain modes - EXPECT_EQ(apm_->kBadParameterError, - apm_->gain_control()->set_mode(static_cast(-1))); - - EXPECT_EQ(apm_->kBadParameterError, - apm_->gain_control()->set_mode(static_cast(3))); - - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_mode( - apm_->gain_control()->mode())); - - GainControl::Mode mode[] = { - GainControl::kAdaptiveAnalog, - GainControl::kAdaptiveDigital, - GainControl::kFixedDigital - }; - for (size_t i = 0; i < sizeof(mode)/sizeof(*mode); i++) { - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_mode(mode[i])); - EXPECT_EQ(mode[i], apm_->gain_control()->mode()); - } - // Testing invalid target levels - EXPECT_EQ(apm_->kBadParameterError, - apm_->gain_control()->set_target_level_dbfs(-3)); - EXPECT_EQ(apm_->kBadParameterError, - apm_->gain_control()->set_target_level_dbfs(-40)); - // Testing valid target levels - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_target_level_dbfs( - apm_->gain_control()->target_level_dbfs())); - - int level_dbfs[] = {0, 6, 31}; - for (size_t i = 0; i < sizeof(level_dbfs)/sizeof(*level_dbfs); i++) { - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_target_level_dbfs(level_dbfs[i])); - EXPECT_EQ(level_dbfs[i], apm_->gain_control()->target_level_dbfs()); - } - - // Testing invalid compression gains - EXPECT_EQ(apm_->kBadParameterError, - apm_->gain_control()->set_compression_gain_db(-1)); - EXPECT_EQ(apm_->kBadParameterError, - apm_->gain_control()->set_compression_gain_db(100)); - - // Testing valid compression gains - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_compression_gain_db( - apm_->gain_control()->compression_gain_db())); - - int gain_db[] = {0, 10, 90}; - for (size_t i = 0; i < sizeof(gain_db)/sizeof(*gain_db); i++) { - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_compression_gain_db(gain_db[i])); - EXPECT_EQ(gain_db[i], apm_->gain_control()->compression_gain_db()); - } - - // Testing limiter off/on - EXPECT_EQ(apm_->kNoError, apm_->gain_control()->enable_limiter(false)); - EXPECT_FALSE(apm_->gain_control()->is_limiter_enabled()); - EXPECT_EQ(apm_->kNoError, apm_->gain_control()->enable_limiter(true)); - EXPECT_TRUE(apm_->gain_control()->is_limiter_enabled()); - - // Testing invalid level limits - EXPECT_EQ(apm_->kBadParameterError, - apm_->gain_control()->set_analog_level_limits(-1, 512)); - EXPECT_EQ(apm_->kBadParameterError, - apm_->gain_control()->set_analog_level_limits(100000, 512)); - EXPECT_EQ(apm_->kBadParameterError, - apm_->gain_control()->set_analog_level_limits(512, -1)); - EXPECT_EQ(apm_->kBadParameterError, - apm_->gain_control()->set_analog_level_limits(512, 100000)); - EXPECT_EQ(apm_->kBadParameterError, - apm_->gain_control()->set_analog_level_limits(512, 255)); - - // Testing valid level limits - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_analog_level_limits( - apm_->gain_control()->analog_level_minimum(), - apm_->gain_control()->analog_level_maximum())); - - int min_level[] = {0, 255, 1024}; - for (size_t i = 0; i < sizeof(min_level)/sizeof(*min_level); i++) { - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_analog_level_limits(min_level[i], 1024)); - EXPECT_EQ(min_level[i], apm_->gain_control()->analog_level_minimum()); - } - - int max_level[] = {0, 1024, 65535}; - for (size_t i = 0; i < sizeof(min_level)/sizeof(*min_level); i++) { - EXPECT_EQ(apm_->kNoError, - apm_->gain_control()->set_analog_level_limits(0, max_level[i])); - EXPECT_EQ(max_level[i], apm_->gain_control()->analog_level_maximum()); - } - - // TODO(ajm): stream_is_saturated() and stream_analog_level() - - // Turn AGC off - EXPECT_EQ(apm_->kNoError, apm_->gain_control()->Enable(false)); - EXPECT_FALSE(apm_->gain_control()->is_enabled()); -} - -TEST_F(ApmTest, NoiseSuppression) { - // Tesing invalid suppression levels - EXPECT_EQ(apm_->kBadParameterError, - apm_->noise_suppression()->set_level( - static_cast(-1))); - - EXPECT_EQ(apm_->kBadParameterError, - apm_->noise_suppression()->set_level( - static_cast(5))); - - // Tesing valid suppression levels - NoiseSuppression::Level level[] = { - NoiseSuppression::kLow, - NoiseSuppression::kModerate, - NoiseSuppression::kHigh, - NoiseSuppression::kVeryHigh - }; - for (size_t i = 0; i < sizeof(level)/sizeof(*level); i++) { - EXPECT_EQ(apm_->kNoError, - apm_->noise_suppression()->set_level(level[i])); - EXPECT_EQ(level[i], apm_->noise_suppression()->level()); - } - - // Turing NS on/off - EXPECT_EQ(apm_->kNoError, apm_->noise_suppression()->Enable(true)); - EXPECT_TRUE(apm_->noise_suppression()->is_enabled()); - EXPECT_EQ(apm_->kNoError, apm_->noise_suppression()->Enable(false)); - EXPECT_FALSE(apm_->noise_suppression()->is_enabled()); -} - -TEST_F(ApmTest, HighPassFilter) { - // Turing HP filter on/off - EXPECT_EQ(apm_->kNoError, apm_->high_pass_filter()->Enable(true)); - EXPECT_TRUE(apm_->high_pass_filter()->is_enabled()); - EXPECT_EQ(apm_->kNoError, apm_->high_pass_filter()->Enable(false)); - EXPECT_FALSE(apm_->high_pass_filter()->is_enabled()); -} - -TEST_F(ApmTest, LevelEstimator) { - // Turing Level estimator on/off - EXPECT_EQ(apm_->kUnsupportedComponentError, - apm_->level_estimator()->Enable(true)); - EXPECT_FALSE(apm_->level_estimator()->is_enabled()); - EXPECT_EQ(apm_->kUnsupportedComponentError, - apm_->level_estimator()->Enable(false)); - EXPECT_FALSE(apm_->level_estimator()->is_enabled()); -} - -TEST_F(ApmTest, VoiceDetection) { - // Test external VAD - EXPECT_EQ(apm_->kNoError, - apm_->voice_detection()->set_stream_has_voice(true)); - EXPECT_TRUE(apm_->voice_detection()->stream_has_voice()); - EXPECT_EQ(apm_->kNoError, - apm_->voice_detection()->set_stream_has_voice(false)); - EXPECT_FALSE(apm_->voice_detection()->stream_has_voice()); - - // Tesing invalid likelihoods - EXPECT_EQ(apm_->kBadParameterError, - apm_->voice_detection()->set_likelihood( - static_cast(-1))); - - EXPECT_EQ(apm_->kBadParameterError, - apm_->voice_detection()->set_likelihood( - static_cast(5))); - - // Tesing valid likelihoods - VoiceDetection::Likelihood likelihood[] = { - VoiceDetection::kVeryLowLikelihood, - VoiceDetection::kLowLikelihood, - VoiceDetection::kModerateLikelihood, - VoiceDetection::kHighLikelihood - }; - for (size_t i = 0; i < sizeof(likelihood)/sizeof(*likelihood); i++) { - EXPECT_EQ(apm_->kNoError, - apm_->voice_detection()->set_likelihood(likelihood[i])); - EXPECT_EQ(likelihood[i], apm_->voice_detection()->likelihood()); - } - - /* TODO(bjornv): Enable once VAD supports other frame lengths than 10 ms - // Tesing invalid frame sizes - EXPECT_EQ(apm_->kBadParameterError, - apm_->voice_detection()->set_frame_size_ms(12)); - - // Tesing valid frame sizes - for (int i = 10; i <= 30; i += 10) { - EXPECT_EQ(apm_->kNoError, - apm_->voice_detection()->set_frame_size_ms(i)); - EXPECT_EQ(i, apm_->voice_detection()->frame_size_ms()); - } - */ - - // Turing VAD on/off - EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true)); - EXPECT_TRUE(apm_->voice_detection()->is_enabled()); - EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false)); - EXPECT_FALSE(apm_->voice_detection()->is_enabled()); - - // Test that AudioFrame activity is maintained when VAD is disabled. - EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false)); - AudioFrame::VADActivity activity[] = { - AudioFrame::kVadActive, - AudioFrame::kVadPassive, - AudioFrame::kVadUnknown - }; - for (size_t i = 0; i < sizeof(activity)/sizeof(*activity); i++) { - frame_->_vadActivity = activity[i]; - EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); - EXPECT_EQ(activity[i], frame_->_vadActivity); - } - - // Test that AudioFrame activity is set when VAD is enabled. - EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true)); - frame_->_vadActivity = AudioFrame::kVadUnknown; - EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_)); - EXPECT_NE(AudioFrame::kVadUnknown, frame_->_vadActivity); - - // TODO(bjornv): Add tests for streamed voice; stream_has_voice() -} -} // namespace - -int main(int argc, char** argv) { - ::testing::InitGoogleTest(&argc, argv); - ApmEnvironment* env = new ApmEnvironment; // GTest takes ownership. - ::testing::AddGlobalTestEnvironment(env); - - for (int i = 1; i < argc; i++) { - if (strcmp(argv[i], "--write_output_data") == 0) { - write_output_data = true; - } - } - - int err = RUN_ALL_TESTS(); - - // Optional, but removes memory leak noise from Valgrind. - google::protobuf::ShutdownProtobufLibrary(); - return err; -} diff --git a/webrtc/modules/audio_processing/test/unittest.proto b/webrtc/modules/audio_processing/test/unittest.proto deleted file mode 100644 index cdfacc4..0000000 --- a/webrtc/modules/audio_processing/test/unittest.proto +++ /dev/null @@ -1,50 +0,0 @@ -syntax = "proto2"; -option optimize_for = LITE_RUNTIME; -package webrtc.audioproc; - -message Test { - optional int32 num_reverse_channels = 1; - optional int32 num_input_channels = 2; - optional int32 num_output_channels = 3; - optional int32 sample_rate = 4; - - message Frame { - } - - repeated Frame frame = 5; - - optional int32 analog_level_average = 6; - optional int32 max_output_average = 7; - - optional int32 has_echo_count = 8; - optional int32 has_voice_count = 9; - optional int32 is_saturated_count = 10; - - message Statistic { - optional int32 instant = 1; - optional int32 average = 2; - optional int32 maximum = 3; - optional int32 minimum = 4; - } - - message EchoMetrics { - optional Statistic residual_echo_return_loss = 1; - optional Statistic echo_return_loss = 2; - optional Statistic echo_return_loss_enhancement = 3; - optional Statistic a_nlp = 4; - } - - optional EchoMetrics echo_metrics = 11; - - message DelayMetrics { - optional int32 median = 1; - optional int32 std = 2; - } - - optional DelayMetrics delay_metrics = 12; -} - -message OutputData { - repeated Test test = 1; -} - diff --git a/webrtc/modules/audio_processing/three_band_filter_bank.cc b/webrtc/modules/audio_processing/three_band_filter_bank.cc new file mode 100644 index 0000000..91e58df --- /dev/null +++ b/webrtc/modules/audio_processing/three_band_filter_bank.cc @@ -0,0 +1,212 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to +// the proposed in "Multirate Signal Processing for Communication Systems" by +// Fredric J Harris. +// +// The idea is to take a heterodyne system and change the order of the +// components to get something which is efficient to implement digitally. +// +// It is possible to separate the filter using the noble identity as follows: +// +// H(z) = H0(z^3) + z^-1 * H1(z^3) + z^-2 * H2(z^3) +// +// This is used in the analysis stage to first downsample serial to parallel +// and then filter each branch with one of these polyphase decompositions of the +// lowpass prototype. Because each filter is only a modulation of the prototype, +// it is enough to multiply each coefficient by the respective cosine value to +// shift it to the desired band. But because the cosine period is 12 samples, +// it requires separating the prototype even further using the noble identity. +// After filtering and modulating for each band, the output of all filters is +// accumulated to get the downsampled bands. +// +// A similar logic can be applied to the synthesis stage. + +// MSVC++ requires this to be set before any other includes to get M_PI. +#define _USE_MATH_DEFINES + +#include "webrtc/modules/audio_processing/three_band_filter_bank.h" + +#include + +#include "webrtc/base/checks.h" + +namespace webrtc { +namespace { + +const size_t kNumBands = 3; +const size_t kSparsity = 4; + +// Factors to take into account when choosing |kNumCoeffs|: +// 1. Higher |kNumCoeffs|, means faster transition, which ensures less +// aliasing. This is especially important when there is non-linear +// processing between the splitting and merging. +// 2. The delay that this filter bank introduces is +// |kNumBands| * |kSparsity| * |kNumCoeffs| / 2, so it increases linearly +// with |kNumCoeffs|. +// 3. The computation complexity also increases linearly with |kNumCoeffs|. +const size_t kNumCoeffs = 4; + +// The Matlab code to generate these |kLowpassCoeffs| is: +// +// N = kNumBands * kSparsity * kNumCoeffs - 1; +// h = fir1(N, 1 / (2 * kNumBands), kaiser(N + 1, 3.5)); +// reshape(h, kNumBands * kSparsity, kNumCoeffs); +// +// Because the total bandwidth of the lower and higher band is double the middle +// one (because of the spectrum parity), the low-pass prototype is half the +// bandwidth of 1 / (2 * |kNumBands|) and is then shifted with cosine modulation +// to the right places. +// A Kaiser window is used because of its flexibility and the alpha is set to +// 3.5, since that sets a stop band attenuation of 40dB ensuring a fast +// transition. +const float kLowpassCoeffs[kNumBands * kSparsity][kNumCoeffs] = + {{-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f}, + {-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f}, + {-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f}, + {-0.00383509f, -0.02982767f, +0.08543175f, +0.00983212f}, + {-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f}, + {-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f}, + {+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f}, + {+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f}, + {+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f}, + {+0.01157993f, +0.12154542f, -0.02536082f, -0.00304815f}, + {+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f}, + {+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}}; + +// Downsamples |in| into |out|, taking one every |kNumbands| starting from +// |offset|. |split_length| is the |out| length. |in| has to be at least +// |kNumBands| * |split_length| long. +void Downsample(const float* in, + size_t split_length, + size_t offset, + float* out) { + for (size_t i = 0; i < split_length; ++i) { + out[i] = in[kNumBands * i + offset]; + } +} + +// Upsamples |in| into |out|, scaling by |kNumBands| and accumulating it every +// |kNumBands| starting from |offset|. |split_length| is the |in| length. |out| +// has to be at least |kNumBands| * |split_length| long. +void Upsample(const float* in, size_t split_length, size_t offset, float* out) { + for (size_t i = 0; i < split_length; ++i) { + out[kNumBands * i + offset] += kNumBands * in[i]; + } +} + +} // namespace + +// Because the low-pass filter prototype has half bandwidth it is possible to +// use a DCT to shift it in both directions at the same time, to the center +// frequencies [1 / 12, 3 / 12, 5 / 12]. +ThreeBandFilterBank::ThreeBandFilterBank(size_t length) + : in_buffer_(rtc::CheckedDivExact(length, kNumBands)), + out_buffer_(in_buffer_.size()) { + for (size_t i = 0; i < kSparsity; ++i) { + for (size_t j = 0; j < kNumBands; ++j) { + analysis_filters_.push_back(new SparseFIRFilter( + kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i)); + synthesis_filters_.push_back(new SparseFIRFilter( + kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i)); + } + } + dct_modulation_.resize(kNumBands * kSparsity); + for (size_t i = 0; i < dct_modulation_.size(); ++i) { + dct_modulation_[i].resize(kNumBands); + for (size_t j = 0; j < kNumBands; ++j) { + dct_modulation_[i][j] = + 2.f * cos(2.f * M_PI * i * (2.f * j + 1.f) / dct_modulation_.size()); + } + } +} + +// The analysis can be separated in these steps: +// 1. Serial to parallel downsampling by a factor of |kNumBands|. +// 2. Filtering of |kSparsity| different delayed signals with polyphase +// decomposition of the low-pass prototype filter and upsampled by a factor +// of |kSparsity|. +// 3. Modulating with cosines and accumulating to get the desired band. +void ThreeBandFilterBank::Analysis(const float* in, + size_t length, + float* const* out) { + RTC_CHECK_EQ(in_buffer_.size(), rtc::CheckedDivExact(length, kNumBands)); + for (size_t i = 0; i < kNumBands; ++i) { + memset(out[i], 0, in_buffer_.size() * sizeof(*out[i])); + } + for (size_t i = 0; i < kNumBands; ++i) { + Downsample(in, in_buffer_.size(), kNumBands - i - 1, &in_buffer_[0]); + for (size_t j = 0; j < kSparsity; ++j) { + const size_t offset = i + j * kNumBands; + analysis_filters_[offset]->Filter(&in_buffer_[0], + in_buffer_.size(), + &out_buffer_[0]); + DownModulate(&out_buffer_[0], out_buffer_.size(), offset, out); + } + } +} + +// The synthesis can be separated in these steps: +// 1. Modulating with cosines. +// 2. Filtering each one with a polyphase decomposition of the low-pass +// prototype filter upsampled by a factor of |kSparsity| and accumulating +// |kSparsity| signals with different delays. +// 3. Parallel to serial upsampling by a factor of |kNumBands|. +void ThreeBandFilterBank::Synthesis(const float* const* in, + size_t split_length, + float* out) { + RTC_CHECK_EQ(in_buffer_.size(), split_length); + memset(out, 0, kNumBands * in_buffer_.size() * sizeof(*out)); + for (size_t i = 0; i < kNumBands; ++i) { + for (size_t j = 0; j < kSparsity; ++j) { + const size_t offset = i + j * kNumBands; + UpModulate(in, in_buffer_.size(), offset, &in_buffer_[0]); + synthesis_filters_[offset]->Filter(&in_buffer_[0], + in_buffer_.size(), + &out_buffer_[0]); + Upsample(&out_buffer_[0], out_buffer_.size(), i, out); + } + } +} + + +// Modulates |in| by |dct_modulation_| and accumulates it in each of the +// |kNumBands| bands of |out|. |offset| is the index in the period of the +// cosines used for modulation. |split_length| is the length of |in| and each +// band of |out|. +void ThreeBandFilterBank::DownModulate(const float* in, + size_t split_length, + size_t offset, + float* const* out) { + for (size_t i = 0; i < kNumBands; ++i) { + for (size_t j = 0; j < split_length; ++j) { + out[i][j] += dct_modulation_[offset][i] * in[j]; + } + } +} + +// Modulates each of the |kNumBands| bands of |in| by |dct_modulation_| and +// accumulates them in |out|. |out| is cleared before starting to accumulate. +// |offset| is the index in the period of the cosines used for modulation. +// |split_length| is the length of each band of |in| and |out|. +void ThreeBandFilterBank::UpModulate(const float* const* in, + size_t split_length, + size_t offset, + float* out) { + memset(out, 0, split_length * sizeof(*out)); + for (size_t i = 0; i < kNumBands; ++i) { + for (size_t j = 0; j < split_length; ++j) { + out[j] += dct_modulation_[offset][i] * in[i][j]; + } + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/three_band_filter_bank.h b/webrtc/modules/audio_processing/three_band_filter_bank.h new file mode 100644 index 0000000..18e8aee --- /dev/null +++ b/webrtc/modules/audio_processing/three_band_filter_bank.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ + +#include +#include + +#include "webrtc/common_audio/sparse_fir_filter.h" +#include "webrtc/system_wrappers/interface/scoped_vector.h" + +namespace webrtc { + +// An implementation of a 3-band FIR filter-bank with DCT modulation, similar to +// the proposed in "Multirate Signal Processing for Communication Systems" by +// Fredric J Harris. +// The low-pass filter prototype has these characteristics: +// * Pass-band ripple = 0.3dB +// * Pass-band frequency = 0.147 (7kHz at 48kHz) +// * Stop-band attenuation = 40dB +// * Stop-band frequency = 0.192 (9.2kHz at 48kHz) +// * Delay = 24 samples (500us at 48kHz) +// * Linear phase +// This filter bank does not satisfy perfect reconstruction. The SNR after +// analysis and synthesis (with no processing in between) is approximately 9.5dB +// depending on the input signal after compensating for the delay. +class ThreeBandFilterBank final { + public: + explicit ThreeBandFilterBank(size_t length); + + // Splits |in| into 3 downsampled frequency bands in |out|. + // |length| is the |in| length. Each of the 3 bands of |out| has to have a + // length of |length| / 3. + void Analysis(const float* in, size_t length, float* const* out); + + // Merges the 3 downsampled frequency bands in |in| into |out|. + // |split_length| is the length of each band of |in|. |out| has to have at + // least a length of 3 * |split_length|. + void Synthesis(const float* const* in, size_t split_length, float* out); + + private: + void DownModulate(const float* in, + size_t split_length, + size_t offset, + float* const* out); + void UpModulate(const float* const* in, + size_t split_length, + size_t offset, + float* out); + + std::vector in_buffer_; + std::vector out_buffer_; + ScopedVector analysis_filters_; + ScopedVector synthesis_filters_; + std::vector> dct_modulation_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_THREE_BAND_FILTER_BANK_H_ diff --git a/webrtc/modules/audio_processing/transient/click_annotate.cc b/webrtc/modules/audio_processing/transient/click_annotate.cc new file mode 100644 index 0000000..f913cfd --- /dev/null +++ b/webrtc/modules/audio_processing/transient/click_annotate.cc @@ -0,0 +1,114 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include +#include +#include +#include + +#include "webrtc/modules/audio_processing/transient/transient_detector.h" +#include "webrtc/modules/audio_processing/transient/file_utils.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/system_wrappers/interface/file_wrapper.h" + +using rtc::scoped_ptr; +using webrtc::FileWrapper; +using webrtc::TransientDetector; + +// Application to generate a RTP timing file. +// Opens the PCM file and divides the signal in frames. +// Creates a send times array, one for each step. +// Each block that contains a transient, has an infinite send time. +// The resultant array is written to a DAT file +// Returns -1 on error or |lost_packets| otherwise. +int main(int argc, char* argv[]) { + if (argc != 5) { + printf("\n%s - Application to generate a RTP timing file.\n\n", argv[0]); + printf("%s PCMfile DATfile chunkSize sampleRate\n\n", argv[0]); + printf("Opens the PCMfile with sampleRate in Hertz.\n"); + printf("Creates a send times array, one for each chunkSize "); + printf("milliseconds step.\n"); + printf("Each block that contains a transient, has an infinite send time. "); + printf("The resultant array is written to a DATfile.\n\n"); + return 0; + } + + scoped_ptr pcm_file(FileWrapper::Create()); + pcm_file->OpenFile(argv[1], true, false, false); + if (!pcm_file->Open()) { + printf("\nThe %s could not be opened.\n\n", argv[1]); + return -1; + } + + scoped_ptr dat_file(FileWrapper::Create()); + dat_file->OpenFile(argv[2], false, false, false); + if (!dat_file->Open()) { + printf("\nThe %s could not be opened.\n\n", argv[2]); + return -1; + } + + int chunk_size_ms = atoi(argv[3]); + if (chunk_size_ms <= 0) { + printf("\nThe chunkSize must be a positive integer\n\n"); + return -1; + } + + int sample_rate_hz = atoi(argv[4]); + if (sample_rate_hz <= 0) { + printf("\nThe sampleRate must be a positive integer\n\n"); + return -1; + } + + TransientDetector detector(sample_rate_hz); + int lost_packets = 0; + size_t audio_buffer_length = chunk_size_ms * sample_rate_hz / 1000; + scoped_ptr audio_buffer(new float[audio_buffer_length]); + std::vector send_times; + + // Read first buffer from the PCM test file. + size_t file_samples_read = ReadInt16FromFileToFloatBuffer( + pcm_file.get(), + audio_buffer_length, + audio_buffer.get()); + for (int time = 0; file_samples_read > 0; time += chunk_size_ms) { + // Pad the rest of the buffer with zeros. + for (size_t i = file_samples_read; i < audio_buffer_length; ++i) { + audio_buffer[i] = 0.0; + } + float value = + detector.Detect(audio_buffer.get(), audio_buffer_length, NULL, 0); + if (value < 0.5f) { + value = time; + } else { + value = FLT_MAX; + ++lost_packets; + } + send_times.push_back(value); + + // Read next buffer from the PCM test file. + file_samples_read = ReadInt16FromFileToFloatBuffer(pcm_file.get(), + audio_buffer_length, + audio_buffer.get()); + } + + size_t floats_written = WriteFloatBufferToFile(dat_file.get(), + send_times.size(), + &send_times[0]); + + if (floats_written == 0) { + printf("\nThe send times could not be written to DAT file\n\n"); + return -1; + } + + pcm_file->CloseFile(); + dat_file->CloseFile(); + + return lost_packets; +} diff --git a/webrtc/modules/audio_processing/transient/common.h b/webrtc/modules/audio_processing/transient/common.h new file mode 100644 index 0000000..92194e9 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/common.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ +namespace webrtc { +namespace ts { + +static const float kPi = 3.14159265358979323846f; +static const int kChunkSizeMs = 10; +enum { + kSampleRate8kHz = 8000, + kSampleRate16kHz = 16000, + kSampleRate32kHz = 32000, + kSampleRate48kHz = 48000 +}; + +} // namespace ts +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_ diff --git a/webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h b/webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h new file mode 100644 index 0000000..b1236ac --- /dev/null +++ b/webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This header file defines the coefficients of the FIR based approximation of +// the Meyer Wavelet +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ + +// Decomposition coefficients Daubechies 8. + +namespace webrtc { + +const int kDaubechies8CoefficientsLength = 16; + +const float kDaubechies8HighPassCoefficients[kDaubechies8CoefficientsLength] + = { + -5.44158422430816093862e-02f, + 3.12871590914465924627e-01f, + -6.75630736298012846142e-01f, + 5.85354683654869090148e-01f, + 1.58291052560238926228e-02f, + -2.84015542962428091389e-01f, + -4.72484573997972536787e-04f, + 1.28747426620186011803e-01f, + 1.73693010020221083600e-02f, + -4.40882539310647192377e-02f, + -1.39810279170155156436e-02f, + 8.74609404701565465445e-03f, + 4.87035299301066034600e-03f, + -3.91740372995977108837e-04f, + -6.75449405998556772109e-04f, + -1.17476784002281916305e-04f +}; + +const float kDaubechies8LowPassCoefficients[kDaubechies8CoefficientsLength] = { + -1.17476784002281916305e-04f, + 6.75449405998556772109e-04f, + -3.91740372995977108837e-04f, + -4.87035299301066034600e-03f, + 8.74609404701565465445e-03f, + 1.39810279170155156436e-02f, + -4.40882539310647192377e-02f, + -1.73693010020221083600e-02f, + 1.28747426620186011803e-01f, + 4.72484573997972536787e-04f, + -2.84015542962428091389e-01f, + -1.58291052560238926228e-02f, + 5.85354683654869090148e-01f, + 6.75630736298012846142e-01f, + 3.12871590914465924627e-01f, + 5.44158422430816093862e-02f +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_ diff --git a/webrtc/modules/audio_processing/transient/dyadic_decimator.h b/webrtc/modules/audio_processing/transient/dyadic_decimator.h new file mode 100644 index 0000000..c1046f2 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/dyadic_decimator.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ + +#include + +#include "webrtc/typedefs.h" + +// Provides a set of static methods to perform dyadic decimations. + +namespace webrtc { + +// Returns the proper length of the output buffer that you should use for the +// given |in_length| and decimation |odd_sequence|. +// Return -1 on error. +inline size_t GetOutLengthToDyadicDecimate(size_t in_length, + bool odd_sequence) { + size_t out_length = in_length / 2; + + if (in_length % 2 == 1 && !odd_sequence) { + ++out_length; + } + + return out_length; +} + +// Performs a dyadic decimation: removes every odd/even member of a sequence +// halving its overall length. +// Arguments: +// in: array of |in_length|. +// odd_sequence: If false, the odd members will be removed (1, 3, 5, ...); +// if true, the even members will be removed (0, 2, 4, ...). +// out: array of |out_length|. |out_length| must be large enough to +// hold the decimated output. The necessary length can be provided by +// GetOutLengthToDyadicDecimate(). +// Must be previously allocated. +// Returns the number of output samples, -1 on error. +template +static size_t DyadicDecimate(const T* in, + size_t in_length, + bool odd_sequence, + T* out, + size_t out_length) { + size_t half_length = GetOutLengthToDyadicDecimate(in_length, odd_sequence); + + if (!in || !out || in_length <= 0 || out_length < half_length) { + return 0; + } + + size_t output_samples = 0; + size_t index_adjustment = odd_sequence ? 1 : 0; + for (output_samples = 0; output_samples < half_length; ++output_samples) { + out[output_samples] = in[output_samples * 2 + index_adjustment]; + } + + return output_samples; +} + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_ diff --git a/webrtc/modules/audio_processing/transient/file_utils.cc b/webrtc/modules/audio_processing/transient/file_utils.cc new file mode 100644 index 0000000..2325bd6 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/file_utils.cc @@ -0,0 +1,257 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/file_utils.h" + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/system_wrappers/interface/file_wrapper.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out) { + if (!bytes || !out) { + return -1; + } + + uint32_t binary_value = 0; + for (int i = 3; i >= 0; --i) { + binary_value <<= 8; + binary_value += bytes[i]; + } + + *out = bit_cast(binary_value); + + return 0; +} + +int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out) { + if (!bytes || !out) { + return -1; + } + + uint64_t binary_value = 0; + for (int i = 7; i >= 0; --i) { + binary_value <<= 8; + binary_value += bytes[i]; + } + + *out = bit_cast(binary_value); + + return 0; +} + +int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]) { + if (!out_bytes) { + return -1; + } + + uint32_t binary_value = bit_cast(value); + for (size_t i = 0; i < 4; ++i) { + out_bytes[i] = binary_value; + binary_value >>= 8; + } + + return 0; +} + +int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]) { + if (!out_bytes) { + return -1; + } + + uint64_t binary_value = bit_cast(value); + for (size_t i = 0; i < 8; ++i) { + out_bytes[i] = binary_value; + binary_value >>= 8; + } + + return 0; +} + +size_t ReadInt16BufferFromFile(FileWrapper* file, + size_t length, + int16_t* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + rtc::scoped_ptr byte_array(new uint8_t[2]); + + size_t int16s_read = 0; + + while (int16s_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 2); + if (bytes_read < 2) { + break; + } + int16_t value = byte_array[1]; + value <<= 8; + value += byte_array[0]; + buffer[int16s_read] = value; + ++int16s_read; + } + + return int16s_read; +} + +size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file, + size_t length, + float* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + rtc::scoped_ptr buffer16(new int16_t[length]); + + size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get()); + + for (size_t i = 0; i < int16s_read; ++i) { + buffer[i] = buffer16[i]; + } + + return int16s_read; +} + +size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file, + size_t length, + double* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + rtc::scoped_ptr buffer16(new int16_t[length]); + + size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get()); + + for (size_t i = 0; i < int16s_read; ++i) { + buffer[i] = buffer16[i]; + } + + return int16s_read; +} + +size_t ReadFloatBufferFromFile(FileWrapper* file, + size_t length, + float* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + rtc::scoped_ptr byte_array(new uint8_t[4]); + + size_t floats_read = 0; + + while (floats_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 4); + if (bytes_read < 4) { + break; + } + ConvertByteArrayToFloat(byte_array.get(), &buffer[floats_read]); + ++floats_read; + } + + return floats_read; +} + +size_t ReadDoubleBufferFromFile(FileWrapper* file, + size_t length, + double* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + rtc::scoped_ptr byte_array(new uint8_t[8]); + + size_t doubles_read = 0; + + while (doubles_read < length) { + size_t bytes_read = file->Read(byte_array.get(), 8); + if (bytes_read < 8) { + break; + } + ConvertByteArrayToDouble(byte_array.get(), &buffer[doubles_read]); + ++doubles_read; + } + + return doubles_read; +} + +size_t WriteInt16BufferToFile(FileWrapper* file, + size_t length, + const int16_t* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + rtc::scoped_ptr byte_array(new uint8_t[2]); + + size_t int16s_written = 0; + + for (int16s_written = 0; int16s_written < length; ++int16s_written) { + // Get byte representation. + byte_array[0] = buffer[int16s_written] & 0xFF; + byte_array[1] = (buffer[int16s_written] >> 8) & 0xFF; + + file->Write(byte_array.get(), 2); + } + + file->Flush(); + + return int16s_written; +} + +size_t WriteFloatBufferToFile(FileWrapper* file, + size_t length, + const float* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + rtc::scoped_ptr byte_array(new uint8_t[4]); + + size_t floats_written = 0; + + for (floats_written = 0; floats_written < length; ++floats_written) { + // Get byte representation. + ConvertFloatToByteArray(buffer[floats_written], byte_array.get()); + + file->Write(byte_array.get(), 4); + } + + file->Flush(); + + return floats_written; +} + +size_t WriteDoubleBufferToFile(FileWrapper* file, + size_t length, + const double* buffer) { + if (!file || !file->Open() || !buffer || length <= 0) { + return 0; + } + + rtc::scoped_ptr byte_array(new uint8_t[8]); + + size_t doubles_written = 0; + + for (doubles_written = 0; doubles_written < length; ++doubles_written) { + // Get byte representation. + ConvertDoubleToByteArray(buffer[doubles_written], byte_array.get()); + + file->Write(byte_array.get(), 8); + } + + file->Flush(); + + return doubles_written; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/file_utils.h b/webrtc/modules/audio_processing/transient/file_utils.h new file mode 100644 index 0000000..dbc3b5f --- /dev/null +++ b/webrtc/modules/audio_processing/transient/file_utils.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ + +#include + +#include "webrtc/system_wrappers/interface/file_wrapper.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// This is a copy of the cast included in the Chromium codebase here: +// http://cs.chromium.org/src/third_party/cld/base/casts.h +template +inline Dest bit_cast(const Source& source) { + // A compile error here means your Dest and Source have different sizes. + static_assert(sizeof(Dest) == sizeof(Source), + "Dest and Source have different sizes"); + + Dest dest; + memcpy(&dest, &source, sizeof(dest)); + return dest; +} + +// Converts the byte array with binary float representation to float. +// Bytes must be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out); + +// Converts the byte array with binary double representation to double. +// Bytes must be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out); + +// Converts a float to a byte array with binary float representation. +// Bytes will be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]); + +// Converts a double to a byte array with binary double representation. +// Bytes will be in little-endian order. +// Returns 0 if correct, -1 on error. +int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]); + +// Reads |length| 16-bit integers from |file| to |buffer|. +// |file| must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16BufferFromFile(FileWrapper* file, + size_t length, + int16_t* buffer); + +// Reads |length| 16-bit integers from |file| and stores those values +// (converting them) in |buffer|. +// |file| must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file, + size_t length, + float* buffer); + +// Reads |length| 16-bit integers from |file| and stores those values +// (converting them) in |buffer|. +// |file| must be previously opened. +// Returns the number of 16-bit integers read or -1 on error. +size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file, + size_t length, + double* buffer); + +// Reads |length| floats in binary representation (4 bytes) from |file| to +// |buffer|. +// |file| must be previously opened. +// Returns the number of floats read or -1 on error. +size_t ReadFloatBufferFromFile(FileWrapper* file, size_t length, float* buffer); + +// Reads |length| doubles in binary representation (8 bytes) from |file| to +// |buffer|. +// |file| must be previously opened. +// Returns the number of doubles read or -1 on error. +size_t ReadDoubleBufferFromFile(FileWrapper* file, + size_t length, + double* buffer); + +// Writes |length| 16-bit integers from |buffer| in binary representation (2 +// bytes) to |file|. It flushes |file|, so after this call there are no +// writings pending. +// |file| must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteInt16BufferToFile(FileWrapper* file, + size_t length, + const int16_t* buffer); + +// Writes |length| floats from |buffer| in binary representation (4 bytes) to +// |file|. It flushes |file|, so after this call there are no writtings pending. +// |file| must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteFloatBufferToFile(FileWrapper* file, + size_t length, + const float* buffer); + +// Writes |length| doubles from |buffer| in binary representation (8 bytes) to +// |file|. It flushes |file|, so after this call there are no writings pending. +// |file| must be previously opened. +// Returns the number of doubles written or -1 on error. +size_t WriteDoubleBufferToFile(FileWrapper* file, + size_t length, + const double* buffer); + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_ diff --git a/webrtc/modules/audio_processing/transient/moving_moments.cc b/webrtc/modules/audio_processing/transient/moving_moments.cc new file mode 100644 index 0000000..aa47522 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/moving_moments.cc @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/moving_moments.h" + +#include +#include + +#include "webrtc/base/scoped_ptr.h" + +namespace webrtc { + +MovingMoments::MovingMoments(size_t length) + : length_(length), + queue_(), + sum_(0.0), + sum_of_squares_(0.0) { + assert(length > 0); + for (size_t i = 0; i < length; ++i) { + queue_.push(0.0); + } +} + +MovingMoments::~MovingMoments() {} + +void MovingMoments::CalculateMoments(const float* in, size_t in_length, + float* first, float* second) { + assert(in && in_length > 0 && first && second); + + for (size_t i = 0; i < in_length; ++i) { + const float old_value = queue_.front(); + queue_.pop(); + queue_.push(in[i]); + + sum_ += in[i] - old_value; + sum_of_squares_ += in[i] * in[i] - old_value * old_value; + first[i] = sum_ / length_; + second[i] = sum_of_squares_ / length_; + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/moving_moments.h b/webrtc/modules/audio_processing/transient/moving_moments.h new file mode 100644 index 0000000..6e3ad5b --- /dev/null +++ b/webrtc/modules/audio_processing/transient/moving_moments.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ + +#include + +#include "webrtc/base/scoped_ptr.h" + +namespace webrtc { + +// Calculates the first and second moments for each value of a buffer taking +// into account a given number of previous values. +// It preserves its state, so it can be multiple-called. +// TODO(chadan): Implement a function that takes a buffer of first moments and a +// buffer of second moments; and calculates the variances. When needed. +// TODO(chadan): Add functionality to update with a buffer but only output are +// the last values of the moments. When needed. +class MovingMoments { + public: + // Creates a Moving Moments object, that uses the last |length| values + // (including the new value introduced in every new calculation). + explicit MovingMoments(size_t length); + ~MovingMoments(); + + // Calculates the new values using |in|. Results will be in the out buffers. + // |first| and |second| must be allocated with at least |in_length|. + void CalculateMoments(const float* in, size_t in_length, + float* first, float* second); + + private: + size_t length_; + // A queue holding the |length_| latest input values. + std::queue queue_; + // Sum of the values of the queue. + float sum_; + // Sum of the squares of the values of the queue. + float sum_of_squares_; +}; + +} // namespace webrtc + + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_ diff --git a/webrtc/modules/audio_processing/transient/transient_detector.cc b/webrtc/modules/audio_processing/transient/transient_detector.cc new file mode 100644 index 0000000..7f021ac --- /dev/null +++ b/webrtc/modules/audio_processing/transient/transient_detector.cc @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/transient_detector.h" + +#include +#include +#include +#include + +#include "webrtc/modules/audio_processing/transient/common.h" +#include "webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h" +#include "webrtc/modules/audio_processing/transient/moving_moments.h" +#include "webrtc/modules/audio_processing/transient/wpd_tree.h" + +namespace webrtc { + +static const int kTransientLengthMs = 30; +static const int kChunksAtStartupLeftToDelete = + kTransientLengthMs / ts::kChunkSizeMs; +static const float kDetectThreshold = 16.f; + +TransientDetector::TransientDetector(int sample_rate_hz) + : samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000), + last_first_moment_(), + last_second_moment_(), + chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete), + reference_energy_(1.f), + using_reference_(false) { + assert(sample_rate_hz == ts::kSampleRate8kHz || + sample_rate_hz == ts::kSampleRate16kHz || + sample_rate_hz == ts::kSampleRate32kHz || + sample_rate_hz == ts::kSampleRate48kHz); + int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000; + // Adjustment to avoid data loss while downsampling, making + // |samples_per_chunk_| and |samples_per_transient| always divisible by + // |kLeaves|. + samples_per_chunk_ -= samples_per_chunk_ % kLeaves; + samples_per_transient -= samples_per_transient % kLeaves; + + tree_leaves_data_length_ = samples_per_chunk_ / kLeaves; + wpd_tree_.reset(new WPDTree(samples_per_chunk_, + kDaubechies8HighPassCoefficients, + kDaubechies8LowPassCoefficients, + kDaubechies8CoefficientsLength, + kLevels)); + for (size_t i = 0; i < kLeaves; ++i) { + moving_moments_[i].reset( + new MovingMoments(samples_per_transient / kLeaves)); + } + + first_moments_.reset(new float[tree_leaves_data_length_]); + second_moments_.reset(new float[tree_leaves_data_length_]); + + for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) { + previous_results_.push_back(0.f); + } +} + +TransientDetector::~TransientDetector() {} + +float TransientDetector::Detect(const float* data, + size_t data_length, + const float* reference_data, + size_t reference_length) { + assert(data && data_length == samples_per_chunk_); + + // TODO(aluebs): Check if these errors can logically happen and if not assert + // on them. + if (wpd_tree_->Update(data, samples_per_chunk_) != 0) { + return -1.f; + } + + float result = 0.f; + + for (size_t i = 0; i < kLeaves; ++i) { + WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i); + + moving_moments_[i]->CalculateMoments(leaf->data(), + tree_leaves_data_length_, + first_moments_.get(), + second_moments_.get()); + + // Add value delayed (Use the last moments from the last call to Detect). + float unbiased_data = leaf->data()[0] - last_first_moment_[i]; + result += + unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN); + + // Add new values. + for (size_t j = 1; j < tree_leaves_data_length_; ++j) { + unbiased_data = leaf->data()[j] - first_moments_[j - 1]; + result += + unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN); + } + + last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1]; + last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1]; + } + + result /= tree_leaves_data_length_; + + result *= ReferenceDetectionValue(reference_data, reference_length); + + if (chunks_at_startup_left_to_delete_ > 0) { + chunks_at_startup_left_to_delete_--; + result = 0.f; + } + + if (result >= kDetectThreshold) { + result = 1.f; + } else { + // Get proportional value. + // Proportion achieved with a squared raised cosine function with domain + // [0, kDetectThreshold) and image [0, 1), it's always increasing. + const float horizontal_scaling = ts::kPi / kDetectThreshold; + const float kHorizontalShift = ts::kPi; + const float kVerticalScaling = 0.5f; + const float kVerticalShift = 1.f; + + result = (cos(result * horizontal_scaling + kHorizontalShift) + + kVerticalShift) * kVerticalScaling; + result *= result; + } + + previous_results_.pop_front(); + previous_results_.push_back(result); + + // In the current implementation we return the max of the current result and + // the previous results, so the high results have a width equals to + // |transient_length|. + return *std::max_element(previous_results_.begin(), previous_results_.end()); +} + +// Looks for the highest slope and compares it with the previous ones. +// An exponential transformation takes this to the [0, 1] range. This value is +// multiplied by the detection result to avoid false positives. +float TransientDetector::ReferenceDetectionValue(const float* data, + size_t length) { + if (data == NULL) { + using_reference_ = false; + return 1.f; + } + static const float kEnergyRatioThreshold = 0.2f; + static const float kReferenceNonLinearity = 20.f; + static const float kMemory = 0.99f; + float reference_energy = 0.f; + for (size_t i = 1; i < length; ++i) { + reference_energy += data[i] * data[i]; + } + if (reference_energy == 0.f) { + using_reference_ = false; + return 1.f; + } + assert(reference_energy_ != 0); + float result = 1.f / (1.f + exp(kReferenceNonLinearity * + (kEnergyRatioThreshold - + reference_energy / reference_energy_))); + reference_energy_ = + kMemory * reference_energy_ + (1.f - kMemory) * reference_energy; + + using_reference_ = true; + + return result; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/transient_detector.h b/webrtc/modules/audio_processing/transient/transient_detector.h new file mode 100644 index 0000000..3f96582 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/transient_detector.h @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ + +#include + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/modules/audio_processing/transient/moving_moments.h" +#include "webrtc/modules/audio_processing/transient/wpd_tree.h" + +namespace webrtc { + +// This is an implementation of the transient detector described in "Causal +// Wavelet based transient detector". +// Calculates the log-likelihood of a transient to happen on a signal at any +// given time based on the previous samples; it uses a WPD tree to analyze the +// signal. It preserves its state, so it can be multiple-called. +class TransientDetector { + public: + // TODO(chadan): The only supported wavelet is Daubechies 8 using a WPD tree + // of 3 levels. Make an overloaded constructor to allow different wavelets and + // depths of the tree. When needed. + + // Creates a wavelet based transient detector. + TransientDetector(int sample_rate_hz); + + ~TransientDetector(); + + // Calculates the log-likelihood of the existence of a transient in |data|. + // |data_length| has to be equal to |samples_per_chunk_|. + // Returns a value between 0 and 1, as a non linear representation of this + // likelihood. + // Returns a negative value on error. + float Detect(const float* data, + size_t data_length, + const float* reference_data, + size_t reference_length); + + bool using_reference() { return using_reference_; } + + private: + float ReferenceDetectionValue(const float* data, size_t length); + + static const size_t kLevels = 3; + static const size_t kLeaves = 1 << kLevels; + + size_t samples_per_chunk_; + + rtc::scoped_ptr wpd_tree_; + size_t tree_leaves_data_length_; + + // A MovingMoments object is needed for each leaf in the WPD tree. + rtc::scoped_ptr moving_moments_[kLeaves]; + + rtc::scoped_ptr first_moments_; + rtc::scoped_ptr second_moments_; + + // Stores the last calculated moments from the previous detection. + float last_first_moment_[kLeaves]; + float last_second_moment_[kLeaves]; + + // We keep track of the previous results from the previous chunks, so it can + // be used to effectively give results according to the |transient_length|. + std::deque previous_results_; + + // Number of chunks that are going to return only zeros at the beginning of + // the detection. It helps to avoid infs and nans due to the lack of + // information. + int chunks_at_startup_left_to_delete_; + + float reference_energy_; + + bool using_reference_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_ diff --git a/webrtc/modules/audio_processing/transient/transient_suppressor.cc b/webrtc/modules/audio_processing/transient/transient_suppressor.cc new file mode 100644 index 0000000..206d14d --- /dev/null +++ b/webrtc/modules/audio_processing/transient/transient_suppressor.cc @@ -0,0 +1,424 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/transient_suppressor.h" + +#include +#include +#include +#include +#include +#include + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/fft4g.h" +#include "webrtc/common_audio/include/audio_util.h" +#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" +#include "webrtc/modules/audio_processing/transient/common.h" +#include "webrtc/modules/audio_processing/transient/transient_detector.h" +#include "webrtc/modules/audio_processing/ns/windows_private.h" +#include "webrtc/system_wrappers/interface/logging.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +static const float kMeanIIRCoefficient = 0.5f; +static const float kVoiceThreshold = 0.02f; + +// TODO(aluebs): Check if these values work also for 48kHz. +static const size_t kMinVoiceBin = 3; +static const size_t kMaxVoiceBin = 60; + +namespace { + +float ComplexMagnitude(float a, float b) { + return std::abs(a) + std::abs(b); +} + +} // namespace + +TransientSuppressor::TransientSuppressor() + : data_length_(0), + detection_length_(0), + analysis_length_(0), + buffer_delay_(0), + complex_analysis_length_(0), + num_channels_(0), + window_(NULL), + detector_smoothed_(0.f), + keypress_counter_(0), + chunks_since_keypress_(0), + detection_enabled_(false), + suppression_enabled_(false), + use_hard_restoration_(false), + chunks_since_voice_change_(0), + seed_(182), + using_reference_(false) { +} + +TransientSuppressor::~TransientSuppressor() {} + +int TransientSuppressor::Initialize(int sample_rate_hz, + int detection_rate_hz, + int num_channels) { + switch (sample_rate_hz) { + case ts::kSampleRate8kHz: + analysis_length_ = 128u; + window_ = kBlocks80w128; + break; + case ts::kSampleRate16kHz: + analysis_length_ = 256u; + window_ = kBlocks160w256; + break; + case ts::kSampleRate32kHz: + analysis_length_ = 512u; + window_ = kBlocks320w512; + break; + case ts::kSampleRate48kHz: + analysis_length_ = 1024u; + window_ = kBlocks480w1024; + break; + default: + return -1; + } + if (detection_rate_hz != ts::kSampleRate8kHz && + detection_rate_hz != ts::kSampleRate16kHz && + detection_rate_hz != ts::kSampleRate32kHz && + detection_rate_hz != ts::kSampleRate48kHz) { + return -1; + } + if (num_channels <= 0) { + return -1; + } + + detector_.reset(new TransientDetector(detection_rate_hz)); + data_length_ = sample_rate_hz * ts::kChunkSizeMs / 1000; + if (data_length_ > analysis_length_) { + assert(false); + return -1; + } + buffer_delay_ = analysis_length_ - data_length_; + + complex_analysis_length_ = analysis_length_ / 2 + 1; + assert(complex_analysis_length_ >= kMaxVoiceBin); + num_channels_ = num_channels; + in_buffer_.reset(new float[analysis_length_ * num_channels_]); + memset(in_buffer_.get(), + 0, + analysis_length_ * num_channels_ * sizeof(in_buffer_[0])); + detection_length_ = detection_rate_hz * ts::kChunkSizeMs / 1000; + detection_buffer_.reset(new float[detection_length_]); + memset(detection_buffer_.get(), + 0, + detection_length_ * sizeof(detection_buffer_[0])); + out_buffer_.reset(new float[analysis_length_ * num_channels_]); + memset(out_buffer_.get(), + 0, + analysis_length_ * num_channels_ * sizeof(out_buffer_[0])); + // ip[0] must be zero to trigger initialization using rdft(). + size_t ip_length = 2 + sqrtf(analysis_length_); + ip_.reset(new size_t[ip_length]()); + memset(ip_.get(), 0, ip_length * sizeof(ip_[0])); + wfft_.reset(new float[complex_analysis_length_ - 1]); + memset(wfft_.get(), 0, (complex_analysis_length_ - 1) * sizeof(wfft_[0])); + spectral_mean_.reset(new float[complex_analysis_length_ * num_channels_]); + memset(spectral_mean_.get(), + 0, + complex_analysis_length_ * num_channels_ * sizeof(spectral_mean_[0])); + fft_buffer_.reset(new float[analysis_length_ + 2]); + memset(fft_buffer_.get(), 0, (analysis_length_ + 2) * sizeof(fft_buffer_[0])); + magnitudes_.reset(new float[complex_analysis_length_]); + memset(magnitudes_.get(), + 0, + complex_analysis_length_ * sizeof(magnitudes_[0])); + mean_factor_.reset(new float[complex_analysis_length_]); + + static const float kFactorHeight = 10.f; + static const float kLowSlope = 1.f; + static const float kHighSlope = 0.3f; + for (size_t i = 0; i < complex_analysis_length_; ++i) { + mean_factor_[i] = + kFactorHeight / + (1.f + exp(kLowSlope * static_cast(i - kMinVoiceBin))) + + kFactorHeight / + (1.f + exp(kHighSlope * static_cast(kMaxVoiceBin - i))); + } + detector_smoothed_ = 0.f; + keypress_counter_ = 0; + chunks_since_keypress_ = 0; + detection_enabled_ = false; + suppression_enabled_ = false; + use_hard_restoration_ = false; + chunks_since_voice_change_ = 0; + seed_ = 182; + using_reference_ = false; + return 0; +} + +int TransientSuppressor::Suppress(float* data, + size_t data_length, + int num_channels, + const float* detection_data, + size_t detection_length, + const float* reference_data, + size_t reference_length, + float voice_probability, + bool key_pressed) { + if (!data || data_length != data_length_ || num_channels != num_channels_ || + detection_length != detection_length_ || voice_probability < 0 || + voice_probability > 1) { + return -1; + } + + UpdateKeypress(key_pressed); + UpdateBuffers(data); + + int result = 0; + if (detection_enabled_) { + UpdateRestoration(voice_probability); + + if (!detection_data) { + // Use the input data of the first channel if special detection data is + // not supplied. + detection_data = &in_buffer_[buffer_delay_]; + } + + float detector_result = detector_->Detect( + detection_data, detection_length, reference_data, reference_length); + if (detector_result < 0) { + return -1; + } + + using_reference_ = detector_->using_reference(); + + // |detector_smoothed_| follows the |detector_result| when this last one is + // increasing, but has an exponential decaying tail to be able to suppress + // the ringing of keyclicks. + float smooth_factor = using_reference_ ? 0.6 : 0.1; + detector_smoothed_ = detector_result >= detector_smoothed_ + ? detector_result + : smooth_factor * detector_smoothed_ + + (1 - smooth_factor) * detector_result; + + for (int i = 0; i < num_channels_; ++i) { + Suppress(&in_buffer_[i * analysis_length_], + &spectral_mean_[i * complex_analysis_length_], + &out_buffer_[i * analysis_length_]); + } + } + + // If the suppression isn't enabled, we use the in buffer to delay the signal + // appropriately. This also gives time for the out buffer to be refreshed with + // new data between detection and suppression getting enabled. + for (int i = 0; i < num_channels_; ++i) { + memcpy(&data[i * data_length_], + suppression_enabled_ ? &out_buffer_[i * analysis_length_] + : &in_buffer_[i * analysis_length_], + data_length_ * sizeof(*data)); + } + return result; +} + +// This should only be called when detection is enabled. UpdateBuffers() must +// have been called. At return, |out_buffer_| will be filled with the +// processed output. +void TransientSuppressor::Suppress(float* in_ptr, + float* spectral_mean, + float* out_ptr) { + // Go to frequency domain. + for (size_t i = 0; i < analysis_length_; ++i) { + // TODO(aluebs): Rename windows + fft_buffer_[i] = in_ptr[i] * window_[i]; + } + + WebRtc_rdft(analysis_length_, 1, fft_buffer_.get(), ip_.get(), wfft_.get()); + + // Since WebRtc_rdft puts R[n/2] in fft_buffer_[1], we move it to the end + // for convenience. + fft_buffer_[analysis_length_] = fft_buffer_[1]; + fft_buffer_[analysis_length_ + 1] = 0.f; + fft_buffer_[1] = 0.f; + + for (size_t i = 0; i < complex_analysis_length_; ++i) { + magnitudes_[i] = ComplexMagnitude(fft_buffer_[i * 2], + fft_buffer_[i * 2 + 1]); + } + // Restore audio if necessary. + if (suppression_enabled_) { + if (use_hard_restoration_) { + HardRestoration(spectral_mean); + } else { + SoftRestoration(spectral_mean); + } + } + + // Update the spectral mean. + for (size_t i = 0; i < complex_analysis_length_; ++i) { + spectral_mean[i] = (1 - kMeanIIRCoefficient) * spectral_mean[i] + + kMeanIIRCoefficient * magnitudes_[i]; + } + + // Back to time domain. + // Put R[n/2] back in fft_buffer_[1]. + fft_buffer_[1] = fft_buffer_[analysis_length_]; + + WebRtc_rdft(analysis_length_, + -1, + fft_buffer_.get(), + ip_.get(), + wfft_.get()); + const float fft_scaling = 2.f / analysis_length_; + + for (size_t i = 0; i < analysis_length_; ++i) { + out_ptr[i] += fft_buffer_[i] * window_[i] * fft_scaling; + } +} + +void TransientSuppressor::UpdateKeypress(bool key_pressed) { + const int kKeypressPenalty = 1000 / ts::kChunkSizeMs; + const int kIsTypingThreshold = 1000 / ts::kChunkSizeMs; + const int kChunksUntilNotTyping = 4000 / ts::kChunkSizeMs; // 4 seconds. + + if (key_pressed) { + keypress_counter_ += kKeypressPenalty; + chunks_since_keypress_ = 0; + detection_enabled_ = true; + } + keypress_counter_ = std::max(0, keypress_counter_ - 1); + + if (keypress_counter_ > kIsTypingThreshold) { + if (!suppression_enabled_) { + LOG(LS_INFO) << "[ts] Transient suppression is now enabled."; + } + suppression_enabled_ = true; + keypress_counter_ = 0; + } + + if (detection_enabled_ && + ++chunks_since_keypress_ > kChunksUntilNotTyping) { + if (suppression_enabled_) { + LOG(LS_INFO) << "[ts] Transient suppression is now disabled."; + } + detection_enabled_ = false; + suppression_enabled_ = false; + keypress_counter_ = 0; + } +} + +void TransientSuppressor::UpdateRestoration(float voice_probability) { + const int kHardRestorationOffsetDelay = 3; + const int kHardRestorationOnsetDelay = 80; + + bool not_voiced = voice_probability < kVoiceThreshold; + + if (not_voiced == use_hard_restoration_) { + chunks_since_voice_change_ = 0; + } else { + ++chunks_since_voice_change_; + + if ((use_hard_restoration_ && + chunks_since_voice_change_ > kHardRestorationOffsetDelay) || + (!use_hard_restoration_ && + chunks_since_voice_change_ > kHardRestorationOnsetDelay)) { + use_hard_restoration_ = not_voiced; + chunks_since_voice_change_ = 0; + } + } +} + +// Shift buffers to make way for new data. Must be called after +// |detection_enabled_| is updated by UpdateKeypress(). +void TransientSuppressor::UpdateBuffers(float* data) { + // TODO(aluebs): Change to ring buffer. + memmove(in_buffer_.get(), + &in_buffer_[data_length_], + (buffer_delay_ + (num_channels_ - 1) * analysis_length_) * + sizeof(in_buffer_[0])); + // Copy new chunk to buffer. + for (int i = 0; i < num_channels_; ++i) { + memcpy(&in_buffer_[buffer_delay_ + i * analysis_length_], + &data[i * data_length_], + data_length_ * sizeof(*data)); + } + if (detection_enabled_) { + // Shift previous chunk in out buffer. + memmove(out_buffer_.get(), + &out_buffer_[data_length_], + (buffer_delay_ + (num_channels_ - 1) * analysis_length_) * + sizeof(out_buffer_[0])); + // Initialize new chunk in out buffer. + for (int i = 0; i < num_channels_; ++i) { + memset(&out_buffer_[buffer_delay_ + i * analysis_length_], + 0, + data_length_ * sizeof(out_buffer_[0])); + } + } +} + +// Restores the unvoiced signal if a click is present. +// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds +// the spectral mean. The attenuation depends on |detector_smoothed_|. +// If a restoration takes place, the |magnitudes_| are updated to the new value. +void TransientSuppressor::HardRestoration(float* spectral_mean) { + const float detector_result = + 1.f - pow(1.f - detector_smoothed_, using_reference_ ? 200.f : 50.f); + // To restore, we get the peaks in the spectrum. If higher than the previous + // spectral mean we adjust them. + for (size_t i = 0; i < complex_analysis_length_; ++i) { + if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0) { + // RandU() generates values on [0, int16::max()] + const float phase = 2 * ts::kPi * WebRtcSpl_RandU(&seed_) / + std::numeric_limits::max(); + const float scaled_mean = detector_result * spectral_mean[i]; + + fft_buffer_[i * 2] = (1 - detector_result) * fft_buffer_[i * 2] + + scaled_mean * cosf(phase); + fft_buffer_[i * 2 + 1] = (1 - detector_result) * fft_buffer_[i * 2 + 1] + + scaled_mean * sinf(phase); + magnitudes_[i] = magnitudes_[i] - + detector_result * (magnitudes_[i] - spectral_mean[i]); + } + } +} + +// Restores the voiced signal if a click is present. +// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds +// the spectral mean and that is lower than some function of the current block +// frequency mean. The attenuation depends on |detector_smoothed_|. +// If a restoration takes place, the |magnitudes_| are updated to the new value. +void TransientSuppressor::SoftRestoration(float* spectral_mean) { + // Get the spectral magnitude mean of the current block. + float block_frequency_mean = 0; + for (size_t i = kMinVoiceBin; i < kMaxVoiceBin; ++i) { + block_frequency_mean += magnitudes_[i]; + } + block_frequency_mean /= (kMaxVoiceBin - kMinVoiceBin); + + // To restore, we get the peaks in the spectrum. If higher than the + // previous spectral mean and lower than a factor of the block mean + // we adjust them. The factor is a double sigmoid that has a minimum in the + // voice frequency range (300Hz - 3kHz). + for (size_t i = 0; i < complex_analysis_length_; ++i) { + if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0 && + (using_reference_ || + magnitudes_[i] < block_frequency_mean * mean_factor_[i])) { + const float new_magnitude = + magnitudes_[i] - + detector_smoothed_ * (magnitudes_[i] - spectral_mean[i]); + const float magnitude_ratio = new_magnitude / magnitudes_[i]; + + fft_buffer_[i * 2] *= magnitude_ratio; + fft_buffer_[i * 2 + 1] *= magnitude_ratio; + magnitudes_[i] = new_magnitude; + } + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/transient_suppressor.h b/webrtc/modules/audio_processing/transient/transient_suppressor.h new file mode 100644 index 0000000..5a6f117 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/transient_suppressor.h @@ -0,0 +1,120 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ + +#include +#include + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/test/testsupport/gtest_prod_util.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class TransientDetector; + +// Detects transients in an audio stream and suppress them using a simple +// restoration algorithm that attenuates unexpected spikes in the spectrum. +class TransientSuppressor { + public: + TransientSuppressor(); + ~TransientSuppressor(); + + int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels); + + // Processes a |data| chunk, and returns it with keystrokes suppressed from + // it. The float format is assumed to be int16 ranged. If there are more than + // one channel, the chunks are concatenated one after the other in |data|. + // |data_length| must be equal to |data_length_|. + // |num_channels| must be equal to |num_channels_|. + // A sub-band, ideally the higher, can be used as |detection_data|. If it is + // NULL, |data| is used for the detection too. The |detection_data| is always + // assumed mono. + // If a reference signal (e.g. keyboard microphone) is available, it can be + // passed in as |reference_data|. It is assumed mono and must have the same + // length as |data|. NULL is accepted if unavailable. + // This suppressor performs better if voice information is available. + // |voice_probability| is the probability of voice being present in this chunk + // of audio. If voice information is not available, |voice_probability| must + // always be set to 1. + // |key_pressed| determines if a key was pressed on this audio chunk. + // Returns 0 on success and -1 otherwise. + int Suppress(float* data, + size_t data_length, + int num_channels, + const float* detection_data, + size_t detection_length, + const float* reference_data, + size_t reference_length, + float voice_probability, + bool key_pressed); + + private: + FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest, + TypingDetectionLogicWorksAsExpectedForMono); + void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr); + + void UpdateKeypress(bool key_pressed); + void UpdateRestoration(float voice_probability); + + void UpdateBuffers(float* data); + + void HardRestoration(float* spectral_mean); + void SoftRestoration(float* spectral_mean); + + rtc::scoped_ptr detector_; + + size_t data_length_; + size_t detection_length_; + size_t analysis_length_; + size_t buffer_delay_; + size_t complex_analysis_length_; + int num_channels_; + // Input buffer where the original samples are stored. + rtc::scoped_ptr in_buffer_; + rtc::scoped_ptr detection_buffer_; + // Output buffer where the restored samples are stored. + rtc::scoped_ptr out_buffer_; + + // Arrays for fft. + rtc::scoped_ptr ip_; + rtc::scoped_ptr wfft_; + + rtc::scoped_ptr spectral_mean_; + + // Stores the data for the fft. + rtc::scoped_ptr fft_buffer_; + + rtc::scoped_ptr magnitudes_; + + const float* window_; + + rtc::scoped_ptr mean_factor_; + + float detector_smoothed_; + + int keypress_counter_; + int chunks_since_keypress_; + bool detection_enabled_; + bool suppression_enabled_; + + bool use_hard_restoration_; + int chunks_since_voice_change_; + + uint32_t seed_; + + bool using_reference_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_ diff --git a/webrtc/modules/audio_processing/transient/wpd_node.cc b/webrtc/modules/audio_processing/transient/wpd_node.cc new file mode 100644 index 0000000..8114a70 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/wpd_node.cc @@ -0,0 +1,71 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/wpd_node.h" + +#include +#include +#include + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/fir_filter.h" +#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h" + +namespace webrtc { + +WPDNode::WPDNode(size_t length, + const float* coefficients, + size_t coefficients_length) + : // The data buffer has parent data length to be able to contain and filter + // it. + data_(new float[2 * length + 1]), + length_(length), + filter_(FIRFilter::Create(coefficients, + coefficients_length, + 2 * length + 1)) { + assert(length > 0 && coefficients && coefficients_length > 0); + memset(data_.get(), 0.f, (2 * length + 1) * sizeof(data_[0])); +} + +WPDNode::~WPDNode() {} + +int WPDNode::Update(const float* parent_data, size_t parent_data_length) { + if (!parent_data || (parent_data_length / 2) != length_) { + return -1; + } + + // Filter data. + filter_->Filter(parent_data, parent_data_length, data_.get()); + + // Decimate data. + const bool kOddSequence = true; + size_t output_samples = DyadicDecimate( + data_.get(), parent_data_length, kOddSequence, data_.get(), length_); + if (output_samples != length_) { + return -1; + } + + // Get abs to all values. + for (size_t i = 0; i < length_; ++i) { + data_[i] = fabs(data_[i]); + } + + return 0; +} + +int WPDNode::set_data(const float* new_data, size_t length) { + if (!new_data || length != length_) { + return -1; + } + memcpy(data_.get(), new_data, length * sizeof(data_[0])); + return 0; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/wpd_node.h b/webrtc/modules/audio_processing/transient/wpd_node.h new file mode 100644 index 0000000..f66cad9 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/wpd_node.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class FIRFilter; + +// A single node of a Wavelet Packet Decomposition (WPD) tree. +class WPDNode { + public: + // Creates a WPDNode. The data vector will contain zeros. The filter will have + // the coefficients provided. + WPDNode(size_t length, const float* coefficients, size_t coefficients_length); + ~WPDNode(); + + // Updates the node data. |parent_data| / 2 must be equals to |length_|. + // Returns 0 if correct, and -1 otherwise. + int Update(const float* parent_data, size_t parent_data_length); + + const float* data() const { return data_.get(); } + // Returns 0 if correct, and -1 otherwise. + int set_data(const float* new_data, size_t length); + size_t length() const { return length_; } + + private: + rtc::scoped_ptr data_; + size_t length_; + rtc::scoped_ptr filter_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_ diff --git a/webrtc/modules/audio_processing/transient/wpd_tree.cc b/webrtc/modules/audio_processing/transient/wpd_tree.cc new file mode 100644 index 0000000..40a37a0 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/wpd_tree.cc @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/transient/wpd_tree.h" + +#include +#include +#include + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h" +#include "webrtc/modules/audio_processing/transient/wpd_node.h" + +namespace webrtc { + +WPDTree::WPDTree(size_t data_length, const float* high_pass_coefficients, + const float* low_pass_coefficients, size_t coefficients_length, + int levels) + : data_length_(data_length), + levels_(levels), + num_nodes_((1 << (levels + 1)) - 1) { + assert(data_length > (static_cast(1) << levels) && + high_pass_coefficients && + low_pass_coefficients && + levels > 0); + // Size is 1 more, so we can use the array as 1-based. nodes_[0] is never + // allocated. + nodes_.reset(new rtc::scoped_ptr[num_nodes_ + 1]); + + // Create the first node + const float kRootCoefficient = 1.f; // Identity Coefficient. + nodes_[1].reset(new WPDNode(data_length, &kRootCoefficient, 1)); + // Variables used to create the rest of the nodes. + size_t index = 1; + size_t index_left_child = 0; + size_t index_right_child = 0; + + int num_nodes_at_curr_level = 0; + + // Branching each node in each level to create its children. The last level is + // not branched (all the nodes of that level are leaves). + for (int current_level = 0; current_level < levels; ++current_level) { + num_nodes_at_curr_level = 1 << current_level; + for (int i = 0; i < num_nodes_at_curr_level; ++i) { + index = (1 << current_level) + i; + // Obtain the index of the current node children. + index_left_child = index * 2; + index_right_child = index_left_child + 1; + nodes_[index_left_child].reset(new WPDNode(nodes_[index]->length() / 2, + low_pass_coefficients, + coefficients_length)); + nodes_[index_right_child].reset(new WPDNode(nodes_[index]->length() / 2, + high_pass_coefficients, + coefficients_length)); + } + } +} + +WPDTree::~WPDTree() {} + +WPDNode* WPDTree::NodeAt(int level, int index) { + const int kNumNodesAtLevel = 1 << level; + if (level < 0 || level > levels_ || index < 0 || index >= kNumNodesAtLevel) { + return NULL; + } + return nodes_[(1 << level) + index].get(); +} + +int WPDTree::Update(const float* data, size_t data_length) { + if (!data || data_length != data_length_) { + return -1; + } + + // Update the root node. + int update_result = nodes_[1]->set_data(data, data_length); + if (update_result != 0) { + return -1; + } + + // Variables used to update the rest of the nodes. + size_t index = 1; + size_t index_left_child = 0; + size_t index_right_child = 0; + + int num_nodes_at_curr_level = 0; + + for (int current_level = 0; current_level < levels_; ++current_level) { + num_nodes_at_curr_level = 1 << current_level; + for (int i = 0; i < num_nodes_at_curr_level; ++i) { + index = (1 << current_level) + i; + // Obtain the index of the current node children. + index_left_child = index * 2; + index_right_child = index_left_child + 1; + + update_result = nodes_[index_left_child]->Update( + nodes_[index]->data(), nodes_[index]->length()); + if (update_result != 0) { + return -1; + } + + update_result = nodes_[index_right_child]->Update( + nodes_[index]->data(), nodes_[index]->length()); + if (update_result != 0) { + return -1; + } + } + } + + return 0; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/transient/wpd_tree.h b/webrtc/modules/audio_processing/transient/wpd_tree.h new file mode 100644 index 0000000..7f0fc79 --- /dev/null +++ b/webrtc/modules/audio_processing/transient/wpd_tree.h @@ -0,0 +1,91 @@ +/* + * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/modules/audio_processing/transient/wpd_node.h" + +namespace webrtc { + +// Tree of a Wavelet Packet Decomposition (WPD). +// +// The root node contains all the data provided; for each node in the tree, the +// left child contains the approximation coefficients extracted from the node, +// and the right child contains the detail coefficients. +// It preserves its state, so it can be multiple-called. +// +// The number of nodes in the tree will be 2 ^ levels - 1. +// +// Implementation details: Since the tree always will be a complete binary tree, +// it is implemented using a single linear array instead of managing the +// relationships in each node. For convience is better to use a array that +// starts in 1 (instead of 0). Taking that into account, the following formulas +// apply: +// Root node index: 1. +// Node(Level, Index in that level): 2 ^ Level + (Index in that level). +// Left Child: Current node index * 2. +// Right Child: Current node index * 2 + 1. +// Parent: Current Node Index / 2 (Integer division). +class WPDTree { + public: + // Creates a WPD tree using the data length and coefficients provided. + WPDTree(size_t data_length, + const float* high_pass_coefficients, + const float* low_pass_coefficients, + size_t coefficients_length, + int levels); + ~WPDTree(); + + // Returns the number of nodes at any given level. + static int NumberOfNodesAtLevel(int level) { + return 1 << level; + } + + // Returns a pointer to the node at the given level and index(of that level). + // Level goes from 0 to levels(). + // Index goes from 0 to the number of NumberOfNodesAtLevel(level) - 1. + // + // You can use the following formulas to get any node within the tree: + // Notation: (Level, Index of node in that level). + // Root node: (0/0). + // Left Child: (Current node level + 1, Current node index * 2). + // Right Child: (Current node level + 1, Current node index * 2 + 1). + // Parent: (Current node level - 1, Current node index / 2) (Integer division) + // + // If level or index are out of bounds the function will return NULL. + WPDNode* NodeAt(int level, int index); + + // Updates all the nodes of the tree with the new data. |data_length| must be + // teh same that was used for the creation of the tree. + // Returns 0 if correct, and -1 otherwise. + int Update(const float* data, size_t data_length); + + // Returns the total number of levels below the root. Root is cosidered level + // 0. + int levels() const { return levels_; } + + // Returns the total number of nodes. + int num_nodes() const { return num_nodes_; } + + // Returns the total number of leaves. + int num_leaves() const { return 1 << levels_; } + + private: + size_t data_length_; + int levels_; + int num_nodes_; + rtc::scoped_ptr[]> nodes_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_ diff --git a/webrtc/modules/audio_processing/typing_detection.cc b/webrtc/modules/audio_processing/typing_detection.cc new file mode 100644 index 0000000..5f5ce0a --- /dev/null +++ b/webrtc/modules/audio_processing/typing_detection.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/typing_detection.h" + +namespace webrtc { + +TypingDetection::TypingDetection() + : time_active_(0), + time_since_last_typing_(0), + penalty_counter_(0), + counter_since_last_detection_update_(0), + detection_to_report_(false), + new_detection_to_report_(false), + time_window_(10), + cost_per_typing_(100), + reporting_threshold_(300), + penalty_decay_(1), + type_event_delay_(2), + report_detection_update_period_(1) { +} + +TypingDetection::~TypingDetection() {} + +bool TypingDetection::Process(bool key_pressed, bool vad_activity) { + if (vad_activity) + time_active_++; + else + time_active_ = 0; + + // Keep track if time since last typing event + if (key_pressed) + time_since_last_typing_ = 0; + else + ++time_since_last_typing_; + + if (time_since_last_typing_ < type_event_delay_ && + vad_activity && + time_active_ < time_window_) { + penalty_counter_ += cost_per_typing_; + if (penalty_counter_ > reporting_threshold_) + new_detection_to_report_ = true; + } + + if (penalty_counter_ > 0) + penalty_counter_ -= penalty_decay_; + + if (++counter_since_last_detection_update_ == + report_detection_update_period_) { + detection_to_report_ = new_detection_to_report_; + new_detection_to_report_ = false; + counter_since_last_detection_update_ = 0; + } + + return detection_to_report_; +} + +int TypingDetection::TimeSinceLastDetectionInSeconds() { + // Round to whole seconds. + return (time_since_last_typing_ + 50) / 100; +} + +void TypingDetection::SetParameters(int time_window, + int cost_per_typing, + int reporting_threshold, + int penalty_decay, + int type_event_delay, + int report_detection_update_period) { + if (time_window) time_window_ = time_window; + + if (cost_per_typing) cost_per_typing_ = cost_per_typing; + + if (reporting_threshold) reporting_threshold_ = reporting_threshold; + + if (penalty_decay) penalty_decay_ = penalty_decay; + + if (type_event_delay) type_event_delay_ = type_event_delay; + + if (report_detection_update_period) + report_detection_update_period_ = report_detection_update_period; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/typing_detection.h b/webrtc/modules/audio_processing/typing_detection.h new file mode 100644 index 0000000..5fa6456 --- /dev/null +++ b/webrtc/modules/audio_processing/typing_detection.h @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_ + +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class TypingDetection { + public: + TypingDetection(); + virtual ~TypingDetection(); + + // Run the detection algortihm. Shall be called every 10 ms. Returns true if + // typing is detected, or false if not, based on the update period as set with + // SetParameters(). See |report_detection_update_period_| description below. + bool Process(bool key_pressed, bool vad_activity); + + // Gets the time in seconds since the last detection. + int TimeSinceLastDetectionInSeconds(); + + // Sets the algorithm parameters. A parameter value of 0 leaves it unchanged. + // See the correspondning member variables below for descriptions. + void SetParameters(int time_window, + int cost_per_typing, + int reporting_threshold, + int penalty_decay, + int type_event_delay, + int report_detection_update_period); + + private: + int time_active_; + int time_since_last_typing_; + int penalty_counter_; + + // Counter since last time the detection status reported by Process() was + // updated. See also |report_detection_update_period_|. + int counter_since_last_detection_update_; + + // The detection status to report. Updated every + // |report_detection_update_period_| call to Process(). + bool detection_to_report_; + + // What |detection_to_report_| should be set to next time it is updated. + bool new_detection_to_report_; + + // Settable threshold values. + + // Number of 10 ms slots accepted to count as a hit. + int time_window_; + + // Penalty added for a typing + activity coincide. + int cost_per_typing_; + + // Threshold for |penalty_counter_|. + int reporting_threshold_; + + // How much we reduce |penalty_counter_| every 10 ms. + int penalty_decay_; + + // How old typing events we allow. + int type_event_delay_; + + // Settable update period. + + // Number of 10 ms slots between each update of the detection status returned + // by Process(). This inertia added to the algorithm is usually desirable and + // provided so that consumers of the class don't have to implement that + // themselves if they don't wish. + // If set to 1, each call to Process() will return the detection status for + // that 10 ms slot. + // If set to N (where N > 1), the detection status returned from Process() + // will remain the same until Process() has been called N times. Then, if none + // of the last N calls to Process() has detected typing for each respective + // 10 ms slot, Process() will return false. If at least one of the last N + // calls has detected typing, Process() will return true. And that returned + // status will then remain the same until the next N calls have been done. + int report_detection_update_period_; +}; + +} // namespace webrtc + +#endif // #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TYPING_DETECTION_H_ diff --git a/webrtc/modules/audio_processing/utility/Makefile.am b/webrtc/modules/audio_processing/utility/Makefile.am deleted file mode 100644 index b1376be..0000000 --- a/webrtc/modules/audio_processing/utility/Makefile.am +++ /dev/null @@ -1,12 +0,0 @@ -noinst_LTLIBRARIES = libapm_util.la - -libapm_util_la_SOURCES = delay_estimator_float.c \ - delay_estimator_float.h \ - delay_estimator.c \ - delay_estimator.h \ - fft4g.c \ - fft4g.h \ - ring_buffer.c \ - ring_buffer.h -libapm_util_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \ - -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface diff --git a/webrtc/modules/audio_processing/utility/delay_estimator.c b/webrtc/modules/audio_processing/utility/delay_estimator.c index 044d545..f9f3dc2 100644 --- a/webrtc/modules/audio_processing/utility/delay_estimator.c +++ b/webrtc/modules/audio_processing/utility/delay_estimator.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,61 +8,42 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "delay_estimator.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator.h" #include #include #include -#include "signal_processing_library.h" +// Number of right shifts for scaling is linearly depending on number of bits in +// the far-end binary spectrum. +static const int kShiftsAtZero = 13; // Right shifts at zero binary spectrum. +static const int kShiftsLinearSlope = 3; -typedef struct { - // Pointers to mean values of spectrum and bit counts - int32_t* mean_far_spectrum; - int32_t* mean_near_spectrum; - int32_t* mean_bit_counts; +static const int32_t kProbabilityOffset = 1024; // 2 in Q9. +static const int32_t kProbabilityLowerLimit = 8704; // 17 in Q9. +static const int32_t kProbabilityMinSpread = 2816; // 5.5 in Q9. - // Arrays only used locally in DelayEstimatorProcess() but whose size - // is determined at run-time. - int32_t* bit_counts; - int32_t* far_spectrum_32; - int32_t* near_spectrum_32; +// Robust validation settings +static const float kHistogramMax = 3000.f; +static const float kLastHistogramMax = 250.f; +static const float kMinHistogramThreshold = 1.5f; +static const int kMinRequiredHits = 10; +static const int kMaxHitsWhenPossiblyNonCausal = 10; +static const int kMaxHitsWhenPossiblyCausal = 1000; +static const float kQ14Scaling = 1.f / (1 << 14); // Scaling by 2^14 to get Q0. +static const float kFractionSlope = 0.05f; +static const float kMinFractionWhenPossiblyCausal = 0.5f; +static const float kMinFractionWhenPossiblyNonCausal = 0.25f; - // Binary history variables - uint32_t* binary_far_history; +// Counts and returns number of bits of a 32-bit word. +static int BitCount(uint32_t u32) { + uint32_t tmp = u32 - ((u32 >> 1) & 033333333333) - + ((u32 >> 2) & 011111111111); + tmp = ((tmp + (tmp >> 3)) & 030707070707); + tmp = (tmp + (tmp >> 6)); + tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077; - // Far end history variables - uint16_t* far_history; - int far_history_pos; - int* far_q_domains; - - // Delay histogram variables - int* delay_histogram; - int vad_counter; - - // Delay memory - int last_delay; - - // Used to enable far end alignment. If it is disabled, only delay values are - // produced - int alignment_enabled; - - // Buffer size parameters - int history_size; - int spectrum_size; - -} DelayEstimator_t; - -// Only bit |kBandFirst| through bit |kBandLast| are processed -// |kBandFirst| - |kBandLast| must be < 32 -static const int kBandFirst = 12; -static const int kBandLast = 43; - -static __inline uint32_t SetBit(uint32_t in, int32_t pos) { - uint32_t mask = WEBRTC_SPL_LSHIFT_W32(1, pos); - uint32_t out = (in | mask); - - return out; + return ((int) tmp); } // Compares the |binary_vector| with all rows of the |binary_matrix| and counts @@ -83,468 +64,621 @@ static void BitCountComparison(uint32_t binary_vector, int matrix_size, int32_t* bit_counts) { int n = 0; - uint32_t a = binary_vector; - register uint32_t tmp; - // compare |binary_vector| with all rows of the |binary_matrix| + // Compare |binary_vector| with all rows of the |binary_matrix| for (; n < matrix_size; n++) { - a = (binary_vector ^ binary_matrix[n]); - // Returns bit counts in tmp - tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111); - tmp = ((tmp + (tmp >> 3)) & 030707070707); - tmp = (tmp + (tmp >> 6)); - tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077; - - bit_counts[n] = (int32_t) tmp; + bit_counts[n] = (int32_t) BitCount(binary_vector ^ binary_matrix[n]); } } -// Computes the binary spectrum by comparing the input |spectrum| with a -// |threshold_spectrum|. +// Collects necessary statistics for the HistogramBasedValidation(). This +// function has to be called prior to calling HistogramBasedValidation(). The +// statistics updated and used by the HistogramBasedValidation() are: +// 1. the number of |candidate_hits|, which states for how long we have had the +// same |candidate_delay| +// 2. the |histogram| of candidate delays over time. This histogram is +// weighted with respect to a reliability measure and time-varying to cope +// with possible delay shifts. +// For further description see commented code. // // Inputs: -// - spectrum : Spectrum of which the binary spectrum should be -// calculated. -// - threshold_spectrum : Threshold spectrum with which the input -// spectrum is compared. -// Return: -// - out : Binary spectrum -// -static uint32_t BinarySpectrum(int32_t* spectrum, int32_t* threshold_spectrum) { - int k = kBandFirst; - uint32_t out = 0; +// - candidate_delay : The delay to validate. +// - valley_depth_q14 : The cost function has a valley/minimum at the +// |candidate_delay| location. |valley_depth_q14| is the +// cost function difference between the minimum and +// maximum locations. The value is in the Q14 domain. +// - valley_level_q14 : Is the cost function value at the minimum, in Q14. +static void UpdateRobustValidationStatistics(BinaryDelayEstimator* self, + int candidate_delay, + int32_t valley_depth_q14, + int32_t valley_level_q14) { + const float valley_depth = valley_depth_q14 * kQ14Scaling; + float decrease_in_last_set = valley_depth; + const int max_hits_for_slow_change = (candidate_delay < self->last_delay) ? + kMaxHitsWhenPossiblyNonCausal : kMaxHitsWhenPossiblyCausal; + int i = 0; - for (; k <= kBandLast; k++) { - if (spectrum[k] > threshold_spectrum[k]) { - out = SetBit(out, k - kBandFirst); + assert(self->history_size == self->farend->history_size); + // Reset |candidate_hits| if we have a new candidate. + if (candidate_delay != self->last_candidate_delay) { + self->candidate_hits = 0; + self->last_candidate_delay = candidate_delay; + } + self->candidate_hits++; + + // The |histogram| is updated differently across the bins. + // 1. The |candidate_delay| histogram bin is increased with the + // |valley_depth|, which is a simple measure of how reliable the + // |candidate_delay| is. The histogram is not increased above + // |kHistogramMax|. + self->histogram[candidate_delay] += valley_depth; + if (self->histogram[candidate_delay] > kHistogramMax) { + self->histogram[candidate_delay] = kHistogramMax; + } + // 2. The histogram bins in the neighborhood of |candidate_delay| are + // unaffected. The neighborhood is defined as x + {-2, -1, 0, 1}. + // 3. The histogram bins in the neighborhood of |last_delay| are decreased + // with |decrease_in_last_set|. This value equals the difference between + // the cost function values at the locations |candidate_delay| and + // |last_delay| until we reach |max_hits_for_slow_change| consecutive hits + // at the |candidate_delay|. If we exceed this amount of hits the + // |candidate_delay| is a "potential" candidate and we start decreasing + // these histogram bins more rapidly with |valley_depth|. + if (self->candidate_hits < max_hits_for_slow_change) { + decrease_in_last_set = (self->mean_bit_counts[self->compare_delay] - + valley_level_q14) * kQ14Scaling; + } + // 4. All other bins are decreased with |valley_depth|. + // TODO(bjornv): Investigate how to make this loop more efficient. Split up + // the loop? Remove parts that doesn't add too much. + for (i = 0; i < self->history_size; ++i) { + int is_in_last_set = (i >= self->last_delay - 2) && + (i <= self->last_delay + 1) && (i != candidate_delay); + int is_in_candidate_set = (i >= candidate_delay - 2) && + (i <= candidate_delay + 1); + self->histogram[i] -= decrease_in_last_set * is_in_last_set + + valley_depth * (!is_in_last_set && !is_in_candidate_set); + // 5. No histogram bin can go below 0. + if (self->histogram[i] < 0) { + self->histogram[i] = 0; } } - - return out; } -// Calculates the mean recursively. +// Validates the |candidate_delay|, estimated in WebRtc_ProcessBinarySpectrum(), +// based on a mix of counting concurring hits with a modified histogram +// of recent delay estimates. In brief a candidate is valid (returns 1) if it +// is the most likely according to the histogram. There are a couple of +// exceptions that are worth mentioning: +// 1. If the |candidate_delay| < |last_delay| it can be that we are in a +// non-causal state, breaking a possible echo control algorithm. Hence, we +// open up for a quicker change by allowing the change even if the +// |candidate_delay| is not the most likely one according to the histogram. +// 2. There's a minimum number of hits (kMinRequiredHits) and the histogram +// value has to reached a minimum (kMinHistogramThreshold) to be valid. +// 3. The action is also depending on the filter length used for echo control. +// If the delay difference is larger than what the filter can capture, we +// also move quicker towards a change. +// For further description see commented code. // -// Inputs: -// - new_value : new additional value -// - factor : factor for smoothing +// Input: +// - candidate_delay : The delay to validate. // -// Input/Output: -// - mean_value : pointer to the mean value that should be updated -// -static void MeanEstimator(const int32_t new_value, - int factor, - int32_t* mean_value) { - int32_t mean_new = *mean_value; - int32_t diff = new_value - mean_new; +// Return value: +// - is_histogram_valid : 1 - The |candidate_delay| is valid. +// 0 - Otherwise. +static int HistogramBasedValidation(const BinaryDelayEstimator* self, + int candidate_delay) { + float fraction = 1.f; + float histogram_threshold = self->histogram[self->compare_delay]; + const int delay_difference = candidate_delay - self->last_delay; + int is_histogram_valid = 0; - // mean_new = mean_value + ((new_value - mean_value) >> factor); - if (diff < 0) { - diff = -WEBRTC_SPL_RSHIFT_W32(-diff, factor); - } else { - diff = WEBRTC_SPL_RSHIFT_W32(diff, factor); + // The histogram based validation of |candidate_delay| is done by comparing + // the |histogram| at bin |candidate_delay| with a |histogram_threshold|. + // This |histogram_threshold| equals a |fraction| of the |histogram| at bin + // |last_delay|. The |fraction| is a piecewise linear function of the + // |delay_difference| between the |candidate_delay| and the |last_delay| + // allowing for a quicker move if + // i) a potential echo control filter can not handle these large differences. + // ii) keeping |last_delay| instead of updating to |candidate_delay| could + // force an echo control into a non-causal state. + // We further require the histogram to have reached a minimum value of + // |kMinHistogramThreshold|. In addition, we also require the number of + // |candidate_hits| to be more than |kMinRequiredHits| to remove spurious + // values. + + // Calculate a comparison histogram value (|histogram_threshold|) that is + // depending on the distance between the |candidate_delay| and |last_delay|. + // TODO(bjornv): How much can we gain by turning the fraction calculation + // into tables? + if (delay_difference > self->allowed_offset) { + fraction = 1.f - kFractionSlope * (delay_difference - self->allowed_offset); + fraction = (fraction > kMinFractionWhenPossiblyCausal ? fraction : + kMinFractionWhenPossiblyCausal); + } else if (delay_difference < 0) { + fraction = kMinFractionWhenPossiblyNonCausal - + kFractionSlope * delay_difference; + fraction = (fraction > 1.f ? 1.f : fraction); } - mean_new += diff; + histogram_threshold *= fraction; + histogram_threshold = (histogram_threshold > kMinHistogramThreshold ? + histogram_threshold : kMinHistogramThreshold); - *mean_value = mean_new; + is_histogram_valid = + (self->histogram[candidate_delay] >= histogram_threshold) && + (self->candidate_hits > kMinRequiredHits); + + return is_histogram_valid; } -// Moves the pointer to the next entry and inserts |far_spectrum| and -// corresponding Q-domain in its buffer. +// Performs a robust validation of the |candidate_delay| estimated in +// WebRtc_ProcessBinarySpectrum(). The algorithm takes the +// |is_instantaneous_valid| and the |is_histogram_valid| and combines them +// into a robust validation. The HistogramBasedValidation() has to be called +// prior to this call. +// For further description on how the combination is done, see commented code. // // Inputs: -// - self : Pointer to the delay estimation instance -// - far_spectrum : Pointer to the far end spectrum -// - far_q : Q-domain of far end spectrum +// - candidate_delay : The delay to validate. +// - is_instantaneous_valid : The instantaneous validation performed in +// WebRtc_ProcessBinarySpectrum(). +// - is_histogram_valid : The histogram based validation. // -static void UpdateFarHistory(DelayEstimator_t* self, - uint16_t* far_spectrum, - int far_q) { - // Get new buffer position - self->far_history_pos++; - if (self->far_history_pos >= self->history_size) { - self->far_history_pos = 0; - } - // Update Q-domain buffer - self->far_q_domains[self->far_history_pos] = far_q; - // Update far end spectrum buffer - memcpy(&(self->far_history[self->far_history_pos * self->spectrum_size]), - far_spectrum, - sizeof(uint16_t) * self->spectrum_size); +// Return value: +// - is_robust : 1 - The candidate_delay is valid according to a +// combination of the two inputs. +// : 0 - Otherwise. +static int RobustValidation(const BinaryDelayEstimator* self, + int candidate_delay, + int is_instantaneous_valid, + int is_histogram_valid) { + int is_robust = 0; + + // The final robust validation is based on the two algorithms; 1) the + // |is_instantaneous_valid| and 2) the histogram based with result stored in + // |is_histogram_valid|. + // i) Before we actually have a valid estimate (|last_delay| == -2), we say + // a candidate is valid if either algorithm states so + // (|is_instantaneous_valid| OR |is_histogram_valid|). + is_robust = (self->last_delay < 0) && + (is_instantaneous_valid || is_histogram_valid); + // ii) Otherwise, we need both algorithms to be certain + // (|is_instantaneous_valid| AND |is_histogram_valid|) + is_robust |= is_instantaneous_valid && is_histogram_valid; + // iii) With one exception, i.e., the histogram based algorithm can overrule + // the instantaneous one if |is_histogram_valid| = 1 and the histogram + // is significantly strong. + is_robust |= is_histogram_valid && + (self->histogram[candidate_delay] > self->last_delay_histogram); + + return is_robust; } -int WebRtc_FreeDelayEstimator(void* handle) { - DelayEstimator_t* self = (DelayEstimator_t*) handle; +void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) { if (self == NULL) { - return -1; + return; } - if (self->mean_far_spectrum != NULL) { - free(self->mean_far_spectrum); - self->mean_far_spectrum = NULL; - } - if (self->mean_near_spectrum != NULL) { - free(self->mean_near_spectrum); - self->mean_near_spectrum = NULL; - } - if (self->mean_bit_counts != NULL) { - free(self->mean_bit_counts); - self->mean_bit_counts = NULL; - } - if (self->bit_counts != NULL) { - free(self->bit_counts); - self->bit_counts = NULL; - } - if (self->far_spectrum_32 != NULL) { - free(self->far_spectrum_32); - self->far_spectrum_32 = NULL; - } - if (self->near_spectrum_32 != NULL) { - free(self->near_spectrum_32); - self->near_spectrum_32 = NULL; - } - if (self->binary_far_history != NULL) { - free(self->binary_far_history); - self->binary_far_history = NULL; - } - if (self->far_history != NULL) { - free(self->far_history); - self->far_history = NULL; - } - if (self->far_q_domains != NULL) { - free(self->far_q_domains); - self->far_q_domains = NULL; - } - if (self->delay_histogram != NULL) { - free(self->delay_histogram); - self->delay_histogram = NULL; - } + free(self->binary_far_history); + self->binary_far_history = NULL; + + free(self->far_bit_counts); + self->far_bit_counts = NULL; free(self); - - return 0; } -int WebRtc_CreateDelayEstimator(void** handle, - int spectrum_size, - int history_size, - int enable_alignment) { - DelayEstimator_t *self = NULL; +BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend( + int history_size) { + BinaryDelayEstimatorFarend* self = NULL; - // Check if the sub band used in the delay estimation is small enough to - // fit the binary spectra in a uint32. - assert(kBandLast - kBandFirst < 32); - - if (spectrum_size < kBandLast) { - return -1; + if (history_size > 1) { + // Sanity conditions fulfilled. + self = malloc(sizeof(BinaryDelayEstimatorFarend)); } - if (history_size < 0) { - return -1; - } - if ((enable_alignment != 0) && (enable_alignment != 1)) { - return -1; - } - - self = malloc(sizeof(DelayEstimator_t)); - *handle = self; if (self == NULL) { - return -1; + return NULL; } - self->mean_far_spectrum = NULL; - self->mean_near_spectrum = NULL; - self->mean_bit_counts = NULL; - self->bit_counts = NULL; - self->far_spectrum_32 = NULL; - self->near_spectrum_32 = NULL; + self->history_size = 0; self->binary_far_history = NULL; - self->far_history = NULL; - self->far_q_domains = NULL; - self->delay_histogram = NULL; - - // Allocate memory for spectrum buffers - self->mean_far_spectrum = malloc(spectrum_size * sizeof(int32_t)); - if (self->mean_far_spectrum == NULL) { - WebRtc_FreeDelayEstimator(self); + self->far_bit_counts = NULL; + if (WebRtc_AllocateFarendBufferMemory(self, history_size) == 0) { + WebRtc_FreeBinaryDelayEstimatorFarend(self); self = NULL; - return -1; } - self->mean_near_spectrum = malloc(spectrum_size * sizeof(int32_t)); - if (self->mean_near_spectrum == NULL) { - WebRtc_FreeDelayEstimator(self); - self = NULL; - return -1; - } - self->mean_bit_counts = malloc(history_size * sizeof(int32_t)); - if (self->mean_bit_counts == NULL) { - WebRtc_FreeDelayEstimator(self); - self = NULL; - return -1; - } - self->bit_counts = malloc(history_size * sizeof(int32_t)); - if (self->bit_counts == NULL) { - WebRtc_FreeDelayEstimator(self); - self = NULL; - return -1; - } - self->far_spectrum_32 = malloc(spectrum_size * sizeof(int32_t)); - if (self->far_spectrum_32 == NULL) { - WebRtc_FreeDelayEstimator(self); - self = NULL; - return -1; - } - self->near_spectrum_32 = malloc(spectrum_size * sizeof(int32_t)); - if (self->near_spectrum_32 == NULL) { - WebRtc_FreeDelayEstimator(self); - self = NULL; - return -1; - } - // Allocate memory for history buffers - self->binary_far_history = malloc(history_size * sizeof(uint32_t)); - if (self->binary_far_history == NULL) { - WebRtc_FreeDelayEstimator(self); - self = NULL; - return -1; - } - if (enable_alignment) { - self->far_history = malloc(spectrum_size * history_size * sizeof(uint16_t)); - if (self->far_history == NULL) { - WebRtc_FreeDelayEstimator(self); - self = NULL; - return -1; - } - self->far_q_domains = malloc(history_size * sizeof(int)); - if (self->far_q_domains == NULL) { - WebRtc_FreeDelayEstimator(self); - self = NULL; - return -1; - } - } - self->delay_histogram = malloc(history_size * sizeof(int)); - if (self->delay_histogram == NULL) { - WebRtc_FreeDelayEstimator(self); - self = NULL; - return -1; - } - - self->spectrum_size = spectrum_size; - self->history_size = history_size; - self->alignment_enabled = enable_alignment; - - return 0; + return self; } -int WebRtc_InitDelayEstimator(void* handle) { - DelayEstimator_t* self = (DelayEstimator_t*) handle; - - if (self == NULL) { - return -1; +int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self, + int history_size) { + assert(self != NULL); + // (Re-)Allocate memory for history buffers. + self->binary_far_history = + realloc(self->binary_far_history, + history_size * sizeof(*self->binary_far_history)); + self->far_bit_counts = realloc(self->far_bit_counts, + history_size * sizeof(*self->far_bit_counts)); + if ((self->binary_far_history == NULL) || (self->far_bit_counts == NULL)) { + history_size = 0; } - // Set averaged far and near end spectra to zero - memset(self->mean_far_spectrum, 0, sizeof(int32_t) * self->spectrum_size); - memset(self->mean_near_spectrum, 0, sizeof(int32_t) * self->spectrum_size); - // Set averaged bit counts to zero - memset(self->mean_bit_counts, 0, sizeof(int32_t) * self->history_size); - memset(self->bit_counts, 0, sizeof(int32_t) * self->history_size); - memset(self->far_spectrum_32, 0, sizeof(int32_t) * self->spectrum_size); - memset(self->near_spectrum_32, 0, sizeof(int32_t) * self->spectrum_size); - // Set far end histories to zero - memset(self->binary_far_history, 0, sizeof(uint32_t) * self->history_size); - if (self->alignment_enabled) { - memset(self->far_history, + // Fill with zeros if we have expanded the buffers. + if (history_size > self->history_size) { + int size_diff = history_size - self->history_size; + memset(&self->binary_far_history[self->history_size], 0, - sizeof(uint16_t) * self->spectrum_size * self->history_size); - memset(self->far_q_domains, 0, sizeof(int) * self->history_size); - self->far_history_pos = self->history_size; - } - // Set delay histogram to zero - memset(self->delay_histogram, 0, sizeof(int) * self->history_size); - // Set VAD counter to zero - self->vad_counter = 0; - // Set delay memory to zero - self->last_delay = 0; - - return 0; -} - -int WebRtc_DelayEstimatorProcess(void* handle, - uint16_t* far_spectrum, - uint16_t* near_spectrum, - int spectrum_size, - int far_q, - int vad_value) { - DelayEstimator_t* self = (DelayEstimator_t*) handle; - - const int kVadCountThreshold = 25; - const int kMaxHistogram = 600; - - int histogram_bin = 0; - int i = 0; - int max_histogram_level = 0; - int min_position = -1; - - uint32_t binary_far_spectrum = 0; - uint32_t binary_near_spectrum = 0; - - int32_t bit_counts_tmp = 0; - - if (self == NULL) { - return -1; - } - - if (spectrum_size != self->spectrum_size) { - // Data sizes don't match - return -1; - } - if (far_q > 15) { - // If |far_q| is larger than 15 we cannot guarantee no wrap around - return -1; - } - - if (self->alignment_enabled) { - // Update far end history - UpdateFarHistory(self, far_spectrum, far_q); - } // Update the far and near end means - for (i = 0; i < self->spectrum_size; i++) { - self->far_spectrum_32[i] = (int32_t) far_spectrum[i]; - MeanEstimator(self->far_spectrum_32[i], 6, &(self->mean_far_spectrum[i])); - - self->near_spectrum_32[i] = (int32_t) near_spectrum[i]; - MeanEstimator(self->near_spectrum_32[i], 6, &(self->mean_near_spectrum[i])); - } - - // Shift binary spectrum history - memmove(&(self->binary_far_history[1]), &(self->binary_far_history[0]), - (self->history_size - 1) * sizeof(uint32_t)); - - // Get binary spectra - binary_far_spectrum = BinarySpectrum(self->far_spectrum_32, - self->mean_far_spectrum); - binary_near_spectrum = BinarySpectrum(self->near_spectrum_32, - self->mean_near_spectrum); - // Insert new binary spectrum - self->binary_far_history[0] = binary_far_spectrum; - - // Compare with delayed spectra - BitCountComparison(binary_near_spectrum, - self->binary_far_history, - self->history_size, - self->bit_counts); - - // Smooth bit count curve - for (i = 0; i < self->history_size; i++) { - // Update sum - // |bit_counts| is constrained to [0, 32], meaning we can smooth with a - // factor up to 2^26. We use Q9. - bit_counts_tmp = WEBRTC_SPL_LSHIFT_W32(self->bit_counts[i], 9); // Q9 - MeanEstimator(bit_counts_tmp, 9, &(self->mean_bit_counts[i])); - } - - // Find minimum position of bit count curve - min_position = (int) WebRtcSpl_MinIndexW32(self->mean_bit_counts, - (int16_t) self->history_size); - - // If the far end has been active sufficiently long, begin accumulating a - // histogram of the minimum positions. Search for the maximum bin to - // determine the delay. - if (vad_value == 1) { - if (self->vad_counter >= kVadCountThreshold) { - // Increment the histogram at the current minimum position. - if (self->delay_histogram[min_position] < kMaxHistogram) { - self->delay_histogram[min_position] += 3; - } - - self->last_delay = 0; - for (i = 0; i < self->history_size; i++) { - histogram_bin = self->delay_histogram[i]; - - // Decrement the histogram bin. - if (histogram_bin > 0) { - histogram_bin--; - self->delay_histogram[i] = histogram_bin; - // Select the histogram index corresponding to the maximum bin as the - // delay. - if (histogram_bin > max_histogram_level) { - max_histogram_level = histogram_bin; - self->last_delay = i; - } - } - } - } else { - self->vad_counter++; - } - } else { - self->vad_counter = 0; - } - - return self->last_delay; -} - -const uint16_t* WebRtc_AlignedFarend(void* handle, - int far_spectrum_size, - int* far_q) { - DelayEstimator_t* self = (DelayEstimator_t*) handle; - int buffer_position = 0; - - if (self == NULL) { - return NULL; - } - if (far_spectrum_size != self->spectrum_size) { - return NULL; - } - if (self->alignment_enabled == 0) { - return NULL; - } - - // Get buffer position - buffer_position = self->far_history_pos - self->last_delay; - if (buffer_position < 0) { - buffer_position += self->history_size; - } - // Get Q-domain - *far_q = self->far_q_domains[buffer_position]; - // Return far end spectrum - return (self->far_history + (buffer_position * far_spectrum_size)); - -} - -int WebRtc_last_delay(void* handle) { - DelayEstimator_t* self = (DelayEstimator_t*) handle; - - if (self == NULL) { - return -1; - } - - return self->last_delay; -} - -int WebRtc_history_size(void* handle) { - DelayEstimator_t* self = (DelayEstimator_t*) handle; - - if (self == NULL) { - return -1; + sizeof(*self->binary_far_history) * size_diff); + memset(&self->far_bit_counts[self->history_size], + 0, + sizeof(*self->far_bit_counts) * size_diff); } + self->history_size = history_size; return self->history_size; } -int WebRtc_spectrum_size(void* handle) { - DelayEstimator_t* self = (DelayEstimator_t*) handle; - - if (self == NULL) { - return -1; - } - - return self->spectrum_size; +void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self) { + assert(self != NULL); + memset(self->binary_far_history, 0, sizeof(uint32_t) * self->history_size); + memset(self->far_bit_counts, 0, sizeof(int) * self->history_size); } -int WebRtc_is_alignment_enabled(void* handle) { - DelayEstimator_t* self = (DelayEstimator_t*) handle; +void WebRtc_SoftResetBinaryDelayEstimatorFarend( + BinaryDelayEstimatorFarend* self, int delay_shift) { + int abs_shift = abs(delay_shift); + int shift_size = 0; + int dest_index = 0; + int src_index = 0; + int padding_index = 0; - if (self == NULL) { - return -1; + assert(self != NULL); + shift_size = self->history_size - abs_shift; + assert(shift_size > 0); + if (delay_shift == 0) { + return; + } else if (delay_shift > 0) { + dest_index = abs_shift; + } else if (delay_shift < 0) { + src_index = abs_shift; + padding_index = shift_size; } - return self->alignment_enabled; + // Shift and zero pad buffers. + memmove(&self->binary_far_history[dest_index], + &self->binary_far_history[src_index], + sizeof(*self->binary_far_history) * shift_size); + memset(&self->binary_far_history[padding_index], 0, + sizeof(*self->binary_far_history) * abs_shift); + memmove(&self->far_bit_counts[dest_index], + &self->far_bit_counts[src_index], + sizeof(*self->far_bit_counts) * shift_size); + memset(&self->far_bit_counts[padding_index], 0, + sizeof(*self->far_bit_counts) * abs_shift); +} + +void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* handle, + uint32_t binary_far_spectrum) { + assert(handle != NULL); + // Shift binary spectrum history and insert current |binary_far_spectrum|. + memmove(&(handle->binary_far_history[1]), &(handle->binary_far_history[0]), + (handle->history_size - 1) * sizeof(uint32_t)); + handle->binary_far_history[0] = binary_far_spectrum; + + // Shift history of far-end binary spectrum bit counts and insert bit count + // of current |binary_far_spectrum|. + memmove(&(handle->far_bit_counts[1]), &(handle->far_bit_counts[0]), + (handle->history_size - 1) * sizeof(int)); + handle->far_bit_counts[0] = BitCount(binary_far_spectrum); +} + +void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self) { + + if (self == NULL) { + return; + } + + free(self->mean_bit_counts); + self->mean_bit_counts = NULL; + + free(self->bit_counts); + self->bit_counts = NULL; + + free(self->binary_near_history); + self->binary_near_history = NULL; + + free(self->histogram); + self->histogram = NULL; + + // BinaryDelayEstimator does not have ownership of |farend|, hence we do not + // free the memory here. That should be handled separately by the user. + self->farend = NULL; + + free(self); +} + +BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator( + BinaryDelayEstimatorFarend* farend, int max_lookahead) { + BinaryDelayEstimator* self = NULL; + + if ((farend != NULL) && (max_lookahead >= 0)) { + // Sanity conditions fulfilled. + self = malloc(sizeof(BinaryDelayEstimator)); + } + if (self == NULL) { + return NULL; + } + + self->farend = farend; + self->near_history_size = max_lookahead + 1; + self->history_size = 0; + self->robust_validation_enabled = 0; // Disabled by default. + self->allowed_offset = 0; + + self->lookahead = max_lookahead; + + // Allocate memory for spectrum and history buffers. + self->mean_bit_counts = NULL; + self->bit_counts = NULL; + self->histogram = NULL; + self->binary_near_history = + malloc((max_lookahead + 1) * sizeof(*self->binary_near_history)); + if (self->binary_near_history == NULL || + WebRtc_AllocateHistoryBufferMemory(self, farend->history_size) == 0) { + WebRtc_FreeBinaryDelayEstimator(self); + self = NULL; + } + + return self; +} + +int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self, + int history_size) { + BinaryDelayEstimatorFarend* far = self->farend; + // (Re-)Allocate memory for spectrum and history buffers. + if (history_size != far->history_size) { + // Only update far-end buffers if we need. + history_size = WebRtc_AllocateFarendBufferMemory(far, history_size); + } + // The extra array element in |mean_bit_counts| and |histogram| is a dummy + // element only used while |last_delay| == -2, i.e., before we have a valid + // estimate. + self->mean_bit_counts = + realloc(self->mean_bit_counts, + (history_size + 1) * sizeof(*self->mean_bit_counts)); + self->bit_counts = + realloc(self->bit_counts, history_size * sizeof(*self->bit_counts)); + self->histogram = + realloc(self->histogram, (history_size + 1) * sizeof(*self->histogram)); + + if ((self->mean_bit_counts == NULL) || + (self->bit_counts == NULL) || + (self->histogram == NULL)) { + history_size = 0; + } + // Fill with zeros if we have expanded the buffers. + if (history_size > self->history_size) { + int size_diff = history_size - self->history_size; + memset(&self->mean_bit_counts[self->history_size], + 0, + sizeof(*self->mean_bit_counts) * size_diff); + memset(&self->bit_counts[self->history_size], + 0, + sizeof(*self->bit_counts) * size_diff); + memset(&self->histogram[self->history_size], + 0, + sizeof(*self->histogram) * size_diff); + } + self->history_size = history_size; + + return self->history_size; +} + +void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self) { + int i = 0; + assert(self != NULL); + + memset(self->bit_counts, 0, sizeof(int32_t) * self->history_size); + memset(self->binary_near_history, + 0, + sizeof(uint32_t) * self->near_history_size); + for (i = 0; i <= self->history_size; ++i) { + self->mean_bit_counts[i] = (20 << 9); // 20 in Q9. + self->histogram[i] = 0.f; + } + self->minimum_probability = kMaxBitCountsQ9; // 32 in Q9. + self->last_delay_probability = (int) kMaxBitCountsQ9; // 32 in Q9. + + // Default return value if we're unable to estimate. -1 is used for errors. + self->last_delay = -2; + + self->last_candidate_delay = -2; + self->compare_delay = self->history_size; + self->candidate_hits = 0; + self->last_delay_histogram = 0.f; +} + +int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self, + int delay_shift) { + int lookahead = 0; + assert(self != NULL); + lookahead = self->lookahead; + self->lookahead -= delay_shift; + if (self->lookahead < 0) { + self->lookahead = 0; + } + if (self->lookahead > self->near_history_size - 1) { + self->lookahead = self->near_history_size - 1; + } + return lookahead - self->lookahead; +} + +int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self, + uint32_t binary_near_spectrum) { + int i = 0; + int candidate_delay = -1; + int valid_candidate = 0; + + int32_t value_best_candidate = kMaxBitCountsQ9; + int32_t value_worst_candidate = 0; + int32_t valley_depth = 0; + + assert(self != NULL); + if (self->farend->history_size != self->history_size) { + // Non matching history sizes. + return -1; + } + if (self->near_history_size > 1) { + // If we apply lookahead, shift near-end binary spectrum history. Insert + // current |binary_near_spectrum| and pull out the delayed one. + memmove(&(self->binary_near_history[1]), &(self->binary_near_history[0]), + (self->near_history_size - 1) * sizeof(uint32_t)); + self->binary_near_history[0] = binary_near_spectrum; + binary_near_spectrum = self->binary_near_history[self->lookahead]; + } + + // Compare with delayed spectra and store the |bit_counts| for each delay. + BitCountComparison(binary_near_spectrum, self->farend->binary_far_history, + self->history_size, self->bit_counts); + + // Update |mean_bit_counts|, which is the smoothed version of |bit_counts|. + for (i = 0; i < self->history_size; i++) { + // |bit_counts| is constrained to [0, 32], meaning we can smooth with a + // factor up to 2^26. We use Q9. + int32_t bit_count = (self->bit_counts[i] << 9); // Q9. + + // Update |mean_bit_counts| only when far-end signal has something to + // contribute. If |far_bit_counts| is zero the far-end signal is weak and + // we likely have a poor echo condition, hence don't update. + if (self->farend->far_bit_counts[i] > 0) { + // Make number of right shifts piecewise linear w.r.t. |far_bit_counts|. + int shifts = kShiftsAtZero; + shifts -= (kShiftsLinearSlope * self->farend->far_bit_counts[i]) >> 4; + WebRtc_MeanEstimatorFix(bit_count, shifts, &(self->mean_bit_counts[i])); + } + } + + // Find |candidate_delay|, |value_best_candidate| and |value_worst_candidate| + // of |mean_bit_counts|. + for (i = 0; i < self->history_size; i++) { + if (self->mean_bit_counts[i] < value_best_candidate) { + value_best_candidate = self->mean_bit_counts[i]; + candidate_delay = i; + } + if (self->mean_bit_counts[i] > value_worst_candidate) { + value_worst_candidate = self->mean_bit_counts[i]; + } + } + valley_depth = value_worst_candidate - value_best_candidate; + + // The |value_best_candidate| is a good indicator on the probability of + // |candidate_delay| being an accurate delay (a small |value_best_candidate| + // means a good binary match). In the following sections we make a decision + // whether to update |last_delay| or not. + // 1) If the difference bit counts between the best and the worst delay + // candidates is too small we consider the situation to be unreliable and + // don't update |last_delay|. + // 2) If the situation is reliable we update |last_delay| if the value of the + // best candidate delay has a value less than + // i) an adaptive threshold |minimum_probability|, or + // ii) this corresponding value |last_delay_probability|, but updated at + // this time instant. + + // Update |minimum_probability|. + if ((self->minimum_probability > kProbabilityLowerLimit) && + (valley_depth > kProbabilityMinSpread)) { + // The "hard" threshold can't be lower than 17 (in Q9). + // The valley in the curve also has to be distinct, i.e., the + // difference between |value_worst_candidate| and |value_best_candidate| has + // to be large enough. + int32_t threshold = value_best_candidate + kProbabilityOffset; + if (threshold < kProbabilityLowerLimit) { + threshold = kProbabilityLowerLimit; + } + if (self->minimum_probability > threshold) { + self->minimum_probability = threshold; + } + } + // Update |last_delay_probability|. + // We use a Markov type model, i.e., a slowly increasing level over time. + self->last_delay_probability++; + // Validate |candidate_delay|. We have a reliable instantaneous delay + // estimate if + // 1) The valley is distinct enough (|valley_depth| > |kProbabilityOffset|) + // and + // 2) The depth of the valley is deep enough + // (|value_best_candidate| < |minimum_probability|) + // and deeper than the best estimate so far + // (|value_best_candidate| < |last_delay_probability|) + valid_candidate = ((valley_depth > kProbabilityOffset) && + ((value_best_candidate < self->minimum_probability) || + (value_best_candidate < self->last_delay_probability))); + + UpdateRobustValidationStatistics(self, candidate_delay, valley_depth, + value_best_candidate); + if (self->robust_validation_enabled) { + int is_histogram_valid = HistogramBasedValidation(self, candidate_delay); + valid_candidate = RobustValidation(self, candidate_delay, valid_candidate, + is_histogram_valid); + + } + if (valid_candidate) { + if (candidate_delay != self->last_delay) { + self->last_delay_histogram = + (self->histogram[candidate_delay] > kLastHistogramMax ? + kLastHistogramMax : self->histogram[candidate_delay]); + // Adjust the histogram if we made a change to |last_delay|, though it was + // not the most likely one according to the histogram. + if (self->histogram[candidate_delay] < + self->histogram[self->compare_delay]) { + self->histogram[self->compare_delay] = self->histogram[candidate_delay]; + } + } + self->last_delay = candidate_delay; + if (value_best_candidate < self->last_delay_probability) { + self->last_delay_probability = value_best_candidate; + } + self->compare_delay = self->last_delay; + } + + return self->last_delay; +} + +int WebRtc_binary_last_delay(BinaryDelayEstimator* self) { + assert(self != NULL); + return self->last_delay; +} + +float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self) { + float quality = 0; + assert(self != NULL); + + if (self->robust_validation_enabled) { + // Simply a linear function of the histogram height at delay estimate. + quality = self->histogram[self->compare_delay] / kHistogramMax; + } else { + // Note that |last_delay_probability| states how deep the minimum of the + // cost function is, so it is rather an error probability. + quality = (float) (kMaxBitCountsQ9 - self->last_delay_probability) / + kMaxBitCountsQ9; + if (quality < 0) { + quality = 0; + } + } + return quality; +} + +void WebRtc_MeanEstimatorFix(int32_t new_value, + int factor, + int32_t* mean_value) { + int32_t diff = new_value - *mean_value; + + // mean_new = mean_value + ((new_value - mean_value) >> factor); + if (diff < 0) { + diff = -((-diff) >> factor); + } else { + diff = (diff >> factor); + } + *mean_value += diff; } diff --git a/webrtc/modules/audio_processing/utility/delay_estimator.h b/webrtc/modules/audio_processing/utility/delay_estimator.h index 9b77b55..65c3f03 100644 --- a/webrtc/modules/audio_processing/utility/delay_estimator.h +++ b/webrtc/modules/audio_processing/utility/delay_estimator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,147 +8,244 @@ * be found in the AUTHORS file in the root of the source tree. */ -// Performs delay estimation on a block by block basis +// Performs delay estimation on binary converted spectra. // The return value is 0 - OK and -1 - Error, unless otherwise stated. #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ #define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ -#include "typedefs.h" +#include "webrtc/typedefs.h" -// Releases the memory allocated by WebRtc_CreateDelayEstimator(...) +static const int32_t kMaxBitCountsQ9 = (32 << 9); // 32 matching bits in Q9. + +typedef struct { + // Pointer to bit counts. + int* far_bit_counts; + // Binary history variables. + uint32_t* binary_far_history; + int history_size; +} BinaryDelayEstimatorFarend; + +typedef struct { + // Pointer to bit counts. + int32_t* mean_bit_counts; + // Array only used locally in ProcessBinarySpectrum() but whose size is + // determined at run-time. + int32_t* bit_counts; + + // Binary history variables. + uint32_t* binary_near_history; + int near_history_size; + int history_size; + + // Delay estimation variables. + int32_t minimum_probability; + int last_delay_probability; + + // Delay memory. + int last_delay; + + // Robust validation + int robust_validation_enabled; + int allowed_offset; + int last_candidate_delay; + int compare_delay; + int candidate_hits; + float* histogram; + float last_delay_histogram; + + // For dynamically changing the lookahead when using SoftReset...(). + int lookahead; + + // Far-end binary spectrum history buffer etc. + BinaryDelayEstimatorFarend* farend; +} BinaryDelayEstimator; + +// Releases the memory allocated by +// WebRtc_CreateBinaryDelayEstimatorFarend(...). // Input: -// - handle : Pointer to the delay estimation instance +// - self : Pointer to the binary delay estimation far-end +// instance which is the return value of +// WebRtc_CreateBinaryDelayEstimatorFarend(). // -int WebRtc_FreeDelayEstimator(void* handle); +void WebRtc_FreeBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self); -// Allocates the memory needed by the delay estimation. The memory needs to be -// initialized separately using the WebRtc_InitDelayEstimator(...) -// function. +// Allocates the memory needed by the far-end part of the binary delay +// estimation. The memory needs to be initialized separately through +// WebRtc_InitBinaryDelayEstimatorFarend(...). // // Inputs: -// - handle : Instance that should be created -// - spectrum_size : Size of the spectrum used both in far end and -// near end. Used to allocate memory for spectrum -// specific buffers. -// - history_size : Size of the far end history used to estimate the -// delay from. Used to allocate memory for history -// specific buffers. -// - enable_alignment : With this mode set to 1, a far end history is -// created, so that the user can retrieve aligned -// far end spectra using -// WebRtc_AlignedFarend(...). Otherwise, only delay -// values are calculated. +// - history_size : Size of the far-end binary spectrum history. +// +// Return value: +// - BinaryDelayEstimatorFarend* +// : Created |handle|. If the memory can't be allocated +// or if any of the input parameters are invalid NULL +// is returned. +// +BinaryDelayEstimatorFarend* WebRtc_CreateBinaryDelayEstimatorFarend( + int history_size); + +// Re-allocates the buffers. +// +// Inputs: +// - self : Pointer to the binary estimation far-end instance +// which is the return value of +// WebRtc_CreateBinaryDelayEstimatorFarend(). +// - history_size : Size of the far-end binary spectrum history. +// +// Return value: +// - history_size : The history size allocated. +int WebRtc_AllocateFarendBufferMemory(BinaryDelayEstimatorFarend* self, + int history_size); + +// Initializes the delay estimation far-end instance created with +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// +// Input: +// - self : Pointer to the delay estimation far-end instance. // // Output: -// - handle : Created instance +// - self : Initialized far-end instance. // -int WebRtc_CreateDelayEstimator(void** handle, - int spectrum_size, - int history_size, - int enable_alignment); +void WebRtc_InitBinaryDelayEstimatorFarend(BinaryDelayEstimatorFarend* self); + +// Soft resets the delay estimation far-end instance created with +// WebRtc_CreateBinaryDelayEstimatorFarend(...). +// +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +void WebRtc_SoftResetBinaryDelayEstimatorFarend( + BinaryDelayEstimatorFarend* self, int delay_shift); + +// Adds the binary far-end spectrum to the internal far-end history buffer. This +// spectrum is used as reference when calculating the delay using +// WebRtc_ProcessBinarySpectrum(). +// +// Inputs: +// - self : Pointer to the delay estimation far-end +// instance. +// - binary_far_spectrum : Far-end binary spectrum. +// +// Output: +// - self : Updated far-end instance. +// +void WebRtc_AddBinaryFarSpectrum(BinaryDelayEstimatorFarend* self, + uint32_t binary_far_spectrum); + +// Releases the memory allocated by WebRtc_CreateBinaryDelayEstimator(...). +// +// Note that BinaryDelayEstimator utilizes BinaryDelayEstimatorFarend, but does +// not take ownership of it, hence the BinaryDelayEstimator has to be torn down +// before the far-end. +// +// Input: +// - self : Pointer to the binary delay estimation instance +// which is the return value of +// WebRtc_CreateBinaryDelayEstimator(). +// +void WebRtc_FreeBinaryDelayEstimator(BinaryDelayEstimator* self); + +// Allocates the memory needed by the binary delay estimation. The memory needs +// to be initialized separately through WebRtc_InitBinaryDelayEstimator(...). +// +// See WebRtc_CreateDelayEstimator(..) in delay_estimator_wrapper.c for detailed +// description. +BinaryDelayEstimator* WebRtc_CreateBinaryDelayEstimator( + BinaryDelayEstimatorFarend* farend, int max_lookahead); + +// Re-allocates |history_size| dependent buffers. The far-end buffers will be +// updated at the same time if needed. +// +// Input: +// - self : Pointer to the binary estimation instance which is +// the return value of +// WebRtc_CreateBinaryDelayEstimator(). +// - history_size : Size of the history buffers. +// +// Return value: +// - history_size : The history size allocated. +int WebRtc_AllocateHistoryBufferMemory(BinaryDelayEstimator* self, + int history_size); // Initializes the delay estimation instance created with -// WebRtc_CreateDelayEstimator(...) +// WebRtc_CreateBinaryDelayEstimator(...). +// // Input: -// - handle : Pointer to the delay estimation instance +// - self : Pointer to the delay estimation instance. // // Output: -// - handle : Initialized instance +// - self : Initialized instance. // -int WebRtc_InitDelayEstimator(void* handle); +void WebRtc_InitBinaryDelayEstimator(BinaryDelayEstimator* self); -// Estimates and returns the delay between the far end and near end blocks. -// Inputs: -// - handle : Pointer to the delay estimation instance -// - far_spectrum : Pointer to the far end spectrum data -// - near_spectrum : Pointer to the near end spectrum data of the current -// block -// - spectrum_size : The size of the data arrays (same for both far and -// near end) -// - far_q : The Q-domain of the far end data -// - vad_value : The VAD decision of the current block +// Soft resets the delay estimation instance created with +// WebRtc_CreateBinaryDelayEstimator(...). // -// Output: -// - handle : Updated instance +// Input: +// - delay_shift : The amount of blocks to shift history buffers. // // Return value: -// - delay : >= 0 - Calculated delay value -// -1 - Error +// - actual_shifts : The actual number of shifts performed. // -int WebRtc_DelayEstimatorProcess(void* handle, - uint16_t* far_spectrum, - uint16_t* near_spectrum, - int spectrum_size, - int far_q, - int vad_value); +int WebRtc_SoftResetBinaryDelayEstimator(BinaryDelayEstimator* self, + int delay_shift); -// Returns a pointer to the far end spectrum aligned to current near end -// spectrum. The function WebRtc_DelayEstimatorProcess(...) should have been -// called before WebRtc_AlignedFarend(...). Otherwise, you get the pointer to -// the previous frame. The memory is only valid until the next call of -// WebRtc_DelayEstimatorProcess(...). +// Estimates and returns the delay between the binary far-end and binary near- +// end spectra. It is assumed the binary far-end spectrum has been added using +// WebRtc_AddBinaryFarSpectrum() prior to this call. The value will be offset by +// the lookahead (i.e. the lookahead should be subtracted from the returned +// value). // // Inputs: -// - handle : Pointer to the delay estimation instance -// - far_spectrum_size : Size of far_spectrum allocated by the caller +// - self : Pointer to the delay estimation instance. +// - binary_near_spectrum : Near-end binary spectrum of the current block. // // Output: -// - far_q : The Q-domain of the aligned far end spectrum +// - self : Updated instance. // // Return value: -// - far_spectrum : Pointer to the aligned far end spectrum -// NULL - Error +// - delay : >= 0 - Calculated delay value. +// -2 - Insufficient data for estimation. // -const uint16_t* WebRtc_AlignedFarend(void* handle, - int far_spectrum_size, - int* far_q); +int WebRtc_ProcessBinarySpectrum(BinaryDelayEstimator* self, + uint32_t binary_near_spectrum); // Returns the last calculated delay updated by the function -// WebRtc_DelayEstimatorProcess(...) +// WebRtc_ProcessBinarySpectrum(...). // // Input: -// - handle : Pointer to the delay estimation instance +// - self : Pointer to the delay estimation instance. // // Return value: -// - delay : >= 0 - Last calculated delay value -// -1 - Error +// - delay : >= 0 - Last calculated delay value +// -2 - Insufficient data for estimation. // -int WebRtc_last_delay(void* handle); +int WebRtc_binary_last_delay(BinaryDelayEstimator* self); -// Returns the history size used in the far end buffers to calculate the delay -// over. -// -// Input: -// - handle : Pointer to the delay estimation instance +// Returns the estimation quality of the last calculated delay updated by the +// function WebRtc_ProcessBinarySpectrum(...). The estimation quality is a value +// in the interval [0, 1]. The higher the value, the better the quality. // // Return value: -// - history_size : > 0 - Far end history size -// -1 - Error -// -int WebRtc_history_size(void* handle); +// - delay_quality : >= 0 - Estimation quality of last calculated +// delay value. +float WebRtc_binary_last_delay_quality(BinaryDelayEstimator* self); -// Returns the fixed spectrum size used in the algorithm. +// Updates the |mean_value| recursively with a step size of 2^-|factor|. This +// function is used internally in the Binary Delay Estimator as well as the +// Fixed point wrapper. // -// Input: -// - handle : Pointer to the delay estimation instance +// Inputs: +// - new_value : The new value the mean should be updated with. +// - factor : The step size, in number of right shifts. // -// Return value: -// - spectrum_size : > 0 - Spectrum size -// -1 - Error +// Input/Output: +// - mean_value : Pointer to the mean value. // -int WebRtc_spectrum_size(void* handle); - -// Returns 1 if the far end alignment is enabled and 0 otherwise. -// -// Input: -// - handle : Pointer to the delay estimation instance -// -// Return value: -// - alignment_enabled : 1 - Enabled -// 0 - Disabled -// -1 - Error -// -int WebRtc_is_alignment_enabled(void* handle); +void WebRtc_MeanEstimatorFix(int32_t new_value, + int factor, + int32_t* mean_value); #endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_ diff --git a/webrtc/modules/audio_processing/utility/delay_estimator_float.c b/webrtc/modules/audio_processing/utility/delay_estimator_float.c deleted file mode 100644 index 5633521..0000000 --- a/webrtc/modules/audio_processing/utility/delay_estimator_float.c +++ /dev/null @@ -1,288 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -#include "delay_estimator_float.h" - -#include -#include -#include -#include - -#include "delay_estimator.h" -#include "signal_processing_library.h" - -typedef struct { - // Fixed point spectra - uint16_t* far_spectrum_u16; - uint16_t* near_spectrum_u16; - - // Far end history variables - float* far_history; - int far_history_pos; - - // Fixed point delay estimator - void* fixed_handle; - -} DelayEstimatorFloat_t; - -// Moves the pointer to the next buffer entry and inserts new far end spectrum. -// Only used when alignment is enabled. -// -// Inputs: -// - self : Pointer to the delay estimation instance -// - far_spectrum : Pointer to the far end spectrum -// -static void UpdateFarHistory(DelayEstimatorFloat_t* self, float* far_spectrum) { - int spectrum_size = WebRtc_spectrum_size(self->fixed_handle); - // Get new buffer position - self->far_history_pos++; - if (self->far_history_pos >= WebRtc_history_size(self->fixed_handle)) { - self->far_history_pos = 0; - } - // Update far end spectrum buffer - memcpy(&(self->far_history[self->far_history_pos * spectrum_size]), - far_spectrum, - sizeof(float) * spectrum_size); -} - -int WebRtc_FreeDelayEstimatorFloat(void* handle) { - DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle; - - if (self == NULL) { - return -1; - } - - if (self->far_history != NULL) { - free(self->far_history); - self->far_history = NULL; - } - if (self->far_spectrum_u16 != NULL) { - free(self->far_spectrum_u16); - self->far_spectrum_u16 = NULL; - } - if (self->near_spectrum_u16 != NULL) { - free(self->near_spectrum_u16); - self->near_spectrum_u16 = NULL; - } - - WebRtc_FreeDelayEstimator(self->fixed_handle); - free(self); - - return 0; -} - -int WebRtc_CreateDelayEstimatorFloat(void** handle, - int spectrum_size, - int history_size, - int enable_alignment) { - DelayEstimatorFloat_t *self = NULL; - if ((enable_alignment != 0) && (enable_alignment != 1)) { - return -1; - } - - self = malloc(sizeof(DelayEstimatorFloat_t)); - *handle = self; - if (self == NULL) { - return -1; - } - - self->far_history = NULL; - self->far_spectrum_u16 = NULL; - self->near_spectrum_u16 = NULL; - - // Create fixed point core delay estimator - if (WebRtc_CreateDelayEstimator(&self->fixed_handle, - spectrum_size, - history_size, - enable_alignment) != 0) { - WebRtc_FreeDelayEstimatorFloat(self); - self = NULL; - return -1; - } - - // Allocate memory for far history buffer - if (enable_alignment) { - self->far_history = malloc(spectrum_size * history_size * sizeof(float)); - if (self->far_history == NULL) { - WebRtc_FreeDelayEstimatorFloat(self); - self = NULL; - return -1; - } - } - // Allocate memory for fixed point spectra - self->far_spectrum_u16 = malloc(spectrum_size * sizeof(uint16_t)); - if (self->far_spectrum_u16 == NULL) { - WebRtc_FreeDelayEstimatorFloat(self); - self = NULL; - return -1; - } - self->near_spectrum_u16 = malloc(spectrum_size * sizeof(uint16_t)); - if (self->near_spectrum_u16 == NULL) { - WebRtc_FreeDelayEstimatorFloat(self); - self = NULL; - return -1; - } - - return 0; -} - -int WebRtc_InitDelayEstimatorFloat(void* handle) { - DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle; - - if (self == NULL) { - return -1; - } - - if (WebRtc_InitDelayEstimator(self->fixed_handle) != 0) { - return -1; - } - - { - int history_size = WebRtc_history_size(self->fixed_handle); - int spectrum_size = WebRtc_spectrum_size(self->fixed_handle); - if (WebRtc_is_alignment_enabled(self->fixed_handle) == 1) { - // Set far end histories to zero - memset(self->far_history, - 0, - sizeof(float) * spectrum_size * history_size); - self->far_history_pos = history_size; - } - // Set fixed point spectra to zero - memset(self->far_spectrum_u16, 0, sizeof(uint16_t) * spectrum_size); - memset(self->near_spectrum_u16, 0, sizeof(uint16_t) * spectrum_size); - } - - return 0; -} - -int WebRtc_DelayEstimatorProcessFloat(void* handle, - float* far_spectrum, - float* near_spectrum, - int spectrum_size, - int vad_value) { - DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle; - - const float kFftSize = (float) (2 * (spectrum_size - 1)); - const float kLogOf2Inverse = 1.4426950f; - float max_value = 0.0f; - float scaling = 0; - - int far_q = 0; - int scaling_log = 0; - int i = 0; - - if (self == NULL) { - return -1; - } - if (far_spectrum == NULL) { - // Empty far end spectrum - return -1; - } - if (near_spectrum == NULL) { - // Empty near end spectrum - return -1; - } - if (spectrum_size != WebRtc_spectrum_size(self->fixed_handle)) { - // Data sizes don't match - return -1; - } - - // Convert floating point spectrum to fixed point - // 1) Find largest value - // 2) Scale largest value to fit in Word16 - for (i = 0; i < spectrum_size; ++i) { - if (near_spectrum[i] > max_value) { - max_value = near_spectrum[i]; - } - } - // Find the largest possible scaling that is a multiple of two. - // With largest we mean to fit in a Word16. - // TODO(bjornv): I've taken the size of FFT into account, since there is a - // different scaling in float vs fixed point FFTs. I'm not completely sure - // this is necessary. - scaling_log = 14 - (int) (log(max_value / kFftSize + 1) * kLogOf2Inverse); - scaling = (float) (1 << scaling_log) / kFftSize; - for (i = 0; i < spectrum_size; ++i) { - self->near_spectrum_u16[i] = (uint16_t) (near_spectrum[i] * scaling); - } - - // Same for far end - max_value = 0.0f; - for (i = 0; i < spectrum_size; ++i) { - if (far_spectrum[i] > max_value) { - max_value = far_spectrum[i]; - } - } - // Find the largest possible scaling that is a multiple of two. - // With largest we mean to fit in a Word16. - scaling_log = 14 - (int) (log(max_value / kFftSize + 1) * kLogOf2Inverse); - scaling = (float) (1 << scaling_log) / kFftSize; - for (i = 0; i < spectrum_size; ++i) { - self->far_spectrum_u16[i] = (uint16_t) (far_spectrum[i] * scaling); - } - far_q = (int) scaling_log; - assert(far_q < 16); // Catch too large scaling, which should never be able to - // occur. - - if (WebRtc_is_alignment_enabled(self->fixed_handle) == 1) { - // Update far end history - UpdateFarHistory(self, far_spectrum); - } - - return WebRtc_DelayEstimatorProcess(self->fixed_handle, - self->far_spectrum_u16, - self->near_spectrum_u16, - spectrum_size, - far_q, - vad_value); -} - -const float* WebRtc_AlignedFarendFloat(void* handle, int far_spectrum_size) { - DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle; - int buffer_pos = 0; - - if (self == NULL) { - return NULL; - } - if (far_spectrum_size != WebRtc_spectrum_size(self->fixed_handle)) { - return NULL; - } - if (WebRtc_is_alignment_enabled(self->fixed_handle) != 1) { - return NULL; - } - - // Get buffer position - buffer_pos = self->far_history_pos - WebRtc_last_delay(self->fixed_handle); - if (buffer_pos < 0) { - buffer_pos += WebRtc_history_size(self->fixed_handle); - } - // Return pointer to far end spectrum - return (self->far_history + (buffer_pos * far_spectrum_size)); -} - -int WebRtc_last_delay_float(void* handle) { - DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle; - - if (self == NULL) { - return -1; - } - - return WebRtc_last_delay(self->fixed_handle); -} - -int WebRtc_is_alignment_enabled_float(void* handle) { - DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle; - - if (self == NULL) { - return -1; - } - - return WebRtc_is_alignment_enabled(self->fixed_handle); -} diff --git a/webrtc/modules/audio_processing/utility/delay_estimator_float.h b/webrtc/modules/audio_processing/utility/delay_estimator_float.h deleted file mode 100644 index 3089965..0000000 --- a/webrtc/modules/audio_processing/utility/delay_estimator_float.h +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -// Performs delay estimation on a block by block basis -// The return value is 0 - OK and -1 - Error, unless otherwise stated. - -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_ - -// Releases the memory allocated by WebRtc_CreateDelayEstimatorFloat(...) -// Input: -// - handle : Pointer to the delay estimation instance -// -int WebRtc_FreeDelayEstimatorFloat(void* handle); - -// Allocates the memory needed by the delay estimation. The memory needs to be -// initialized separately using the WebRtc_InitDelayEstimatorFloat(...) -// function. -// -// Inputs: -// - handle : Instance that should be created -// - spectrum_size : Size of the spectrum used both in far end and -// near end. Used to allocate memory for spectrum -// specific buffers. -// - history_size : Size of the far end history used to estimate the -// delay from. Used to allocate memory for history -// specific buffers. -// - enable_alignment : With this mode set to 1, a far end history is -// created, so that the user can retrieve aligned -// far end spectra using -// WebRtc_AlignedFarendFloat(...). Otherwise, only -// delay values are calculated. -// -// Output: -// - handle : Created instance -// -int WebRtc_CreateDelayEstimatorFloat(void** handle, - int spectrum_size, - int history_size, - int enable_alignment); - -// Initializes the delay estimation instance created with -// WebRtc_CreateDelayEstimatorFloat(...) -// Input: -// - handle : Pointer to the delay estimation instance -// -// Output: -// - handle : Initialized instance -// -int WebRtc_InitDelayEstimatorFloat(void* handle); - -// Estimates and returns the delay between the far end and near end blocks. -// Inputs: -// - handle : Pointer to the delay estimation instance -// - far_spectrum : Pointer to the far end spectrum data -// - near_spectrum : Pointer to the near end spectrum data of the current -// block -// - spectrum_size : The size of the data arrays (same for both far and -// near end) -// - far_q : The Q-domain of the far end data -// - vad_value : The VAD decision of the current block -// -// Output: -// - handle : Updated instance -// -// Return value: -// - delay : >= 0 - Calculated delay value -// -1 - Error -// -int WebRtc_DelayEstimatorProcessFloat(void* handle, - float* far_spectrum, - float* near_spectrum, - int spectrum_size, - int vad_value); - -// Returns a pointer to the far end spectrum aligned to current near end -// spectrum. The function WebRtc_DelayEstimatorProcessFloat(...) should -// have been called before WebRtc_AlignedFarendFloat(...). Otherwise, you get -// the pointer to the previous frame. The memory is only valid until the -// next call of WebRtc_DelayEstimatorProcessFloat(...). -// -// Inputs: -// - handle : Pointer to the delay estimation instance -// - far_spectrum_size : Size of far_spectrum allocated by the caller -// -// Output: -// -// Return value: -// - far_spectrum : Pointer to the aligned far end spectrum -// NULL - Error -// -const float* WebRtc_AlignedFarendFloat(void* handle, int far_spectrum_size); - -// Returns the last calculated delay updated by the function -// WebRtcApm_DelayEstimatorProcessFloat(...) -// -// Inputs: -// - handle : Pointer to the delay estimation instance -// -// Return value: -// - delay : >= 0 - Last calculated delay value -// -1 - Error -// -int WebRtc_last_delay_float(void* handle); - -// Returns 1 if the far end alignment is enabled and 0 otherwise. -// -// Input: -// - handle : Pointer to the delay estimation instance -// -// Return value: -// - alignment_enabled : 1 - Enabled -// 0 - Disabled -// -1 - Error -// -int WebRtc_is_alignment_enabled_float(void* handle); - -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_ diff --git a/webrtc/modules/audio_processing/utility/delay_estimator_internal.h b/webrtc/modules/audio_processing/utility/delay_estimator_internal.h new file mode 100644 index 0000000..fd11028 --- /dev/null +++ b/webrtc/modules/audio_processing/utility/delay_estimator_internal.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Header file including the delay estimator handle used for testing. + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ + +#include "webrtc/modules/audio_processing/utility/delay_estimator.h" +#include "webrtc/typedefs.h" + +typedef union { + float float_; + int32_t int32_; +} SpectrumType; + +typedef struct { + // Pointers to mean values of spectrum. + SpectrumType* mean_far_spectrum; + // |mean_far_spectrum| initialization indicator. + int far_spectrum_initialized; + + int spectrum_size; + + // Far-end part of binary spectrum based delay estimation. + BinaryDelayEstimatorFarend* binary_farend; +} DelayEstimatorFarend; + +typedef struct { + // Pointers to mean values of spectrum. + SpectrumType* mean_near_spectrum; + // |mean_near_spectrum| initialization indicator. + int near_spectrum_initialized; + + int spectrum_size; + + // Binary spectrum based delay estimator + BinaryDelayEstimator* binary_handle; +} DelayEstimator; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_INTERNAL_H_ diff --git a/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.c b/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.c new file mode 100644 index 0000000..270588f --- /dev/null +++ b/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.c @@ -0,0 +1,485 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" + +#include +#include +#include + +#include "webrtc/modules/audio_processing/utility/delay_estimator.h" +#include "webrtc/modules/audio_processing/utility/delay_estimator_internal.h" +#include "webrtc/system_wrappers/interface/compile_assert_c.h" + +// Only bit |kBandFirst| through bit |kBandLast| are processed and +// |kBandFirst| - |kBandLast| must be < 32. +enum { kBandFirst = 12 }; +enum { kBandLast = 43 }; + +static __inline uint32_t SetBit(uint32_t in, int pos) { + uint32_t mask = (1 << pos); + uint32_t out = (in | mask); + + return out; +} + +// Calculates the mean recursively. Same version as WebRtc_MeanEstimatorFix(), +// but for float. +// +// Inputs: +// - new_value : New additional value. +// - scale : Scale for smoothing (should be less than 1.0). +// +// Input/Output: +// - mean_value : Pointer to the mean value for updating. +// +static void MeanEstimatorFloat(float new_value, + float scale, + float* mean_value) { + assert(scale < 1.0f); + *mean_value += (new_value - *mean_value) * scale; +} + +// Computes the binary spectrum by comparing the input |spectrum| with a +// |threshold_spectrum|. Float and fixed point versions. +// +// Inputs: +// - spectrum : Spectrum of which the binary spectrum should be +// calculated. +// - threshold_spectrum : Threshold spectrum with which the input +// spectrum is compared. +// Return: +// - out : Binary spectrum. +// +static uint32_t BinarySpectrumFix(const uint16_t* spectrum, + SpectrumType* threshold_spectrum, + int q_domain, + int* threshold_initialized) { + int i = kBandFirst; + uint32_t out = 0; + + assert(q_domain < 16); + + if (!(*threshold_initialized)) { + // Set the |threshold_spectrum| to half the input |spectrum| as starting + // value. This speeds up the convergence. + for (i = kBandFirst; i <= kBandLast; i++) { + if (spectrum[i] > 0) { + // Convert input spectrum from Q(|q_domain|) to Q15. + int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain); + threshold_spectrum[i].int32_ = (spectrum_q15 >> 1); + *threshold_initialized = 1; + } + } + } + for (i = kBandFirst; i <= kBandLast; i++) { + // Convert input spectrum from Q(|q_domain|) to Q15. + int32_t spectrum_q15 = ((int32_t) spectrum[i]) << (15 - q_domain); + // Update the |threshold_spectrum|. + WebRtc_MeanEstimatorFix(spectrum_q15, 6, &(threshold_spectrum[i].int32_)); + // Convert |spectrum| at current frequency bin to a binary value. + if (spectrum_q15 > threshold_spectrum[i].int32_) { + out = SetBit(out, i - kBandFirst); + } + } + + return out; +} + +static uint32_t BinarySpectrumFloat(const float* spectrum, + SpectrumType* threshold_spectrum, + int* threshold_initialized) { + int i = kBandFirst; + uint32_t out = 0; + const float kScale = 1 / 64.0; + + if (!(*threshold_initialized)) { + // Set the |threshold_spectrum| to half the input |spectrum| as starting + // value. This speeds up the convergence. + for (i = kBandFirst; i <= kBandLast; i++) { + if (spectrum[i] > 0.0f) { + threshold_spectrum[i].float_ = (spectrum[i] / 2); + *threshold_initialized = 1; + } + } + } + + for (i = kBandFirst; i <= kBandLast; i++) { + // Update the |threshold_spectrum|. + MeanEstimatorFloat(spectrum[i], kScale, &(threshold_spectrum[i].float_)); + // Convert |spectrum| at current frequency bin to a binary value. + if (spectrum[i] > threshold_spectrum[i].float_) { + out = SetBit(out, i - kBandFirst); + } + } + + return out; +} + +void WebRtc_FreeDelayEstimatorFarend(void* handle) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + + if (handle == NULL) { + return; + } + + free(self->mean_far_spectrum); + self->mean_far_spectrum = NULL; + + WebRtc_FreeBinaryDelayEstimatorFarend(self->binary_farend); + self->binary_farend = NULL; + + free(self); +} + +void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size) { + DelayEstimatorFarend* self = NULL; + + // Check if the sub band used in the delay estimation is small enough to fit + // the binary spectra in a uint32_t. + COMPILE_ASSERT(kBandLast - kBandFirst < 32); + + if (spectrum_size >= kBandLast) { + self = malloc(sizeof(DelayEstimatorFarend)); + } + + if (self != NULL) { + int memory_fail = 0; + + // Allocate memory for the binary far-end spectrum handling. + self->binary_farend = WebRtc_CreateBinaryDelayEstimatorFarend(history_size); + memory_fail |= (self->binary_farend == NULL); + + // Allocate memory for spectrum buffers. + self->mean_far_spectrum = malloc(spectrum_size * sizeof(SpectrumType)); + memory_fail |= (self->mean_far_spectrum == NULL); + + self->spectrum_size = spectrum_size; + + if (memory_fail) { + WebRtc_FreeDelayEstimatorFarend(self); + self = NULL; + } + } + + return self; +} + +int WebRtc_InitDelayEstimatorFarend(void* handle) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + + if (self == NULL) { + return -1; + } + + // Initialize far-end part of binary delay estimator. + WebRtc_InitBinaryDelayEstimatorFarend(self->binary_farend); + + // Set averaged far and near end spectra to zero. + memset(self->mean_far_spectrum, 0, + sizeof(SpectrumType) * self->spectrum_size); + // Reset initialization indicators. + self->far_spectrum_initialized = 0; + + return 0; +} + +void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + assert(self != NULL); + WebRtc_SoftResetBinaryDelayEstimatorFarend(self->binary_farend, delay_shift); +} + +int WebRtc_AddFarSpectrumFix(void* handle, + const uint16_t* far_spectrum, + int spectrum_size, + int far_q) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (far_spectrum == NULL) { + // Empty far end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + if (far_q > 15) { + // If |far_q| is larger than 15 we cannot guarantee no wrap around. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFix(far_spectrum, self->mean_far_spectrum, + far_q, &(self->far_spectrum_initialized)); + WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum); + + return 0; +} + +int WebRtc_AddFarSpectrumFloat(void* handle, + const float* far_spectrum, + int spectrum_size) { + DelayEstimatorFarend* self = (DelayEstimatorFarend*) handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (far_spectrum == NULL) { + // Empty far end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFloat(far_spectrum, self->mean_far_spectrum, + &(self->far_spectrum_initialized)); + WebRtc_AddBinaryFarSpectrum(self->binary_farend, binary_spectrum); + + return 0; +} + +void WebRtc_FreeDelayEstimator(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + + if (handle == NULL) { + return; + } + + free(self->mean_near_spectrum); + self->mean_near_spectrum = NULL; + + WebRtc_FreeBinaryDelayEstimator(self->binary_handle); + self->binary_handle = NULL; + + free(self); +} + +void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead) { + DelayEstimator* self = NULL; + DelayEstimatorFarend* farend = (DelayEstimatorFarend*) farend_handle; + + if (farend_handle != NULL) { + self = malloc(sizeof(DelayEstimator)); + } + + if (self != NULL) { + int memory_fail = 0; + + // Allocate memory for the farend spectrum handling. + self->binary_handle = + WebRtc_CreateBinaryDelayEstimator(farend->binary_farend, max_lookahead); + memory_fail |= (self->binary_handle == NULL); + + // Allocate memory for spectrum buffers. + self->mean_near_spectrum = malloc(farend->spectrum_size * + sizeof(SpectrumType)); + memory_fail |= (self->mean_near_spectrum == NULL); + + self->spectrum_size = farend->spectrum_size; + + if (memory_fail) { + WebRtc_FreeDelayEstimator(self); + self = NULL; + } + } + + return self; +} + +int WebRtc_InitDelayEstimator(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + + // Initialize binary delay estimator. + WebRtc_InitBinaryDelayEstimator(self->binary_handle); + + // Set averaged far and near end spectra to zero. + memset(self->mean_near_spectrum, 0, + sizeof(SpectrumType) * self->spectrum_size); + // Reset initialization indicators. + self->near_spectrum_initialized = 0; + + return 0; +} + +int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift) { + DelayEstimator* self = (DelayEstimator*) handle; + assert(self != NULL); + return WebRtc_SoftResetBinaryDelayEstimator(self->binary_handle, delay_shift); +} + +int WebRtc_set_history_size(void* handle, int history_size) { + DelayEstimator* self = handle; + + if ((self == NULL) || (history_size <= 1)) { + return -1; + } + return WebRtc_AllocateHistoryBufferMemory(self->binary_handle, history_size); +} + +int WebRtc_history_size(const void* handle) { + const DelayEstimator* self = handle; + + if (self == NULL) { + return -1; + } + if (self->binary_handle->farend->history_size != + self->binary_handle->history_size) { + // Non matching history sizes. + return -1; + } + return self->binary_handle->history_size; +} + +int WebRtc_set_lookahead(void* handle, int lookahead) { + DelayEstimator* self = (DelayEstimator*) handle; + assert(self != NULL); + assert(self->binary_handle != NULL); + if ((lookahead > self->binary_handle->near_history_size - 1) || + (lookahead < 0)) { + return -1; + } + self->binary_handle->lookahead = lookahead; + return self->binary_handle->lookahead; +} + +int WebRtc_lookahead(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + assert(self != NULL); + assert(self->binary_handle != NULL); + return self->binary_handle->lookahead; +} + +int WebRtc_set_allowed_offset(void* handle, int allowed_offset) { + DelayEstimator* self = (DelayEstimator*) handle; + + if ((self == NULL) || (allowed_offset < 0)) { + return -1; + } + self->binary_handle->allowed_offset = allowed_offset; + return 0; +} + +int WebRtc_get_allowed_offset(const void* handle) { + const DelayEstimator* self = (const DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + return self->binary_handle->allowed_offset; +} + +int WebRtc_enable_robust_validation(void* handle, int enable) { + DelayEstimator* self = (DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + if ((enable < 0) || (enable > 1)) { + return -1; + } + assert(self->binary_handle != NULL); + self->binary_handle->robust_validation_enabled = enable; + return 0; +} + +int WebRtc_is_robust_validation_enabled(const void* handle) { + const DelayEstimator* self = (const DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + return self->binary_handle->robust_validation_enabled; +} + +int WebRtc_DelayEstimatorProcessFix(void* handle, + const uint16_t* near_spectrum, + int spectrum_size, + int near_q) { + DelayEstimator* self = (DelayEstimator*) handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (near_spectrum == NULL) { + // Empty near end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + if (near_q > 15) { + // If |near_q| is larger than 15 we cannot guarantee no wrap around. + return -1; + } + + // Get binary spectra. + binary_spectrum = BinarySpectrumFix(near_spectrum, + self->mean_near_spectrum, + near_q, + &(self->near_spectrum_initialized)); + + return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum); +} + +int WebRtc_DelayEstimatorProcessFloat(void* handle, + const float* near_spectrum, + int spectrum_size) { + DelayEstimator* self = (DelayEstimator*) handle; + uint32_t binary_spectrum = 0; + + if (self == NULL) { + return -1; + } + if (near_spectrum == NULL) { + // Empty near end spectrum. + return -1; + } + if (spectrum_size != self->spectrum_size) { + // Data sizes don't match. + return -1; + } + + // Get binary spectrum. + binary_spectrum = BinarySpectrumFloat(near_spectrum, self->mean_near_spectrum, + &(self->near_spectrum_initialized)); + + return WebRtc_ProcessBinarySpectrum(self->binary_handle, binary_spectrum); +} + +int WebRtc_last_delay(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + + if (self == NULL) { + return -1; + } + + return WebRtc_binary_last_delay(self->binary_handle); +} + +float WebRtc_last_delay_quality(void* handle) { + DelayEstimator* self = (DelayEstimator*) handle; + assert(self != NULL); + return WebRtc_binary_last_delay_quality(self->binary_handle); +} diff --git a/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h b/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h new file mode 100644 index 0000000..fdadebe --- /dev/null +++ b/webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Performs delay estimation on block by block basis. +// The return value is 0 - OK and -1 - Error, unless otherwise stated. + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ + +#include "webrtc/typedefs.h" + +// Releases the memory allocated by WebRtc_CreateDelayEstimatorFarend(...) +void WebRtc_FreeDelayEstimatorFarend(void* handle); + +// Allocates the memory needed by the far-end part of the delay estimation. The +// memory needs to be initialized separately through +// WebRtc_InitDelayEstimatorFarend(...). +// +// Inputs: +// - spectrum_size : Size of the spectrum used both in far-end and +// near-end. Used to allocate memory for spectrum +// specific buffers. +// - history_size : The far-end history buffer size. A change in buffer +// size can be forced with WebRtc_set_history_size(). +// Note that the maximum delay which can be estimated is +// determined together with WebRtc_set_lookahead(). +// +// Return value: +// - void* : Created |handle|. If the memory can't be allocated or +// if any of the input parameters are invalid NULL is +// returned. +void* WebRtc_CreateDelayEstimatorFarend(int spectrum_size, int history_size); + +// Initializes the far-end part of the delay estimation instance returned by +// WebRtc_CreateDelayEstimatorFarend(...) +int WebRtc_InitDelayEstimatorFarend(void* handle); + +// Soft resets the far-end part of the delay estimation instance returned by +// WebRtc_CreateDelayEstimatorFarend(...). +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +void WebRtc_SoftResetDelayEstimatorFarend(void* handle, int delay_shift); + +// Adds the far-end spectrum to the far-end history buffer. This spectrum is +// used as reference when calculating the delay using +// WebRtc_ProcessSpectrum(). +// +// Inputs: +// - far_spectrum : Far-end spectrum. +// - spectrum_size : The size of the data arrays (same for both far- and +// near-end). +// - far_q : The Q-domain of the far-end data. +// +// Output: +// - handle : Updated far-end instance. +// +int WebRtc_AddFarSpectrumFix(void* handle, + const uint16_t* far_spectrum, + int spectrum_size, + int far_q); + +// See WebRtc_AddFarSpectrumFix() for description. +int WebRtc_AddFarSpectrumFloat(void* handle, + const float* far_spectrum, + int spectrum_size); + +// Releases the memory allocated by WebRtc_CreateDelayEstimator(...) +void WebRtc_FreeDelayEstimator(void* handle); + +// Allocates the memory needed by the delay estimation. The memory needs to be +// initialized separately through WebRtc_InitDelayEstimator(...). +// +// Inputs: +// - farend_handle : Pointer to the far-end part of the delay estimation +// instance created prior to this call using +// WebRtc_CreateDelayEstimatorFarend(). +// +// Note that WebRtc_CreateDelayEstimator does not take +// ownership of |farend_handle|, which has to be torn +// down properly after this instance. +// +// - max_lookahead : Maximum amount of non-causal lookahead allowed. The +// actual amount of lookahead used can be controlled by +// WebRtc_set_lookahead(...). The default |lookahead| is +// set to |max_lookahead| at create time. Use +// WebRtc_set_lookahead(...) before start if a different +// value is desired. +// +// Using lookahead can detect cases in which a near-end +// signal occurs before the corresponding far-end signal. +// It will delay the estimate for the current block by an +// equal amount, and the returned values will be offset +// by it. +// +// A value of zero is the typical no-lookahead case. +// This also represents the minimum delay which can be +// estimated. +// +// Note that the effective range of delay estimates is +// [-|lookahead|,... ,|history_size|-|lookahead|) +// where |history_size| is set through +// WebRtc_set_history_size(). +// +// Return value: +// - void* : Created |handle|. If the memory can't be allocated or +// if any of the input parameters are invalid NULL is +// returned. +void* WebRtc_CreateDelayEstimator(void* farend_handle, int max_lookahead); + +// Initializes the delay estimation instance returned by +// WebRtc_CreateDelayEstimator(...) +int WebRtc_InitDelayEstimator(void* handle); + +// Soft resets the delay estimation instance returned by +// WebRtc_CreateDelayEstimator(...) +// Input: +// - delay_shift : The amount of blocks to shift history buffers. +// +// Return value: +// - actual_shifts : The actual number of shifts performed. +int WebRtc_SoftResetDelayEstimator(void* handle, int delay_shift); + +// Sets the effective |history_size| used. Valid values from 2. We simply need +// at least two delays to compare to perform an estimate. If |history_size| is +// changed, buffers are reallocated filling in with zeros if necessary. +// Note that changing the |history_size| affects both buffers in far-end and +// near-end. Hence it is important to change all DelayEstimators that use the +// same reference far-end, to the same |history_size| value. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - history_size : Effective history size to be used. +// Return value: +// - new_history_size : The new history size used. If the memory was not able +// to be allocated 0 is returned. +int WebRtc_set_history_size(void* handle, int history_size); + +// Returns the history_size currently used. +// Input: +// - handle : Pointer to the delay estimation instance. +int WebRtc_history_size(const void* handle); + +// Sets the amount of |lookahead| to use. Valid values are [0, max_lookahead] +// where |max_lookahead| was set at create time through +// WebRtc_CreateDelayEstimator(...). +// +// Input: +// - handle : Pointer to the delay estimation instance. +// - lookahead : The amount of lookahead to be used. +// +// Return value: +// - new_lookahead : The actual amount of lookahead set, unless |handle| is +// a NULL pointer or |lookahead| is invalid, for which an +// error is returned. +int WebRtc_set_lookahead(void* handle, int lookahead); + +// Returns the amount of lookahead we currently use. +// Input: +// - handle : Pointer to the delay estimation instance. +int WebRtc_lookahead(void* handle); + +// Sets the |allowed_offset| used in the robust validation scheme. If the +// delay estimator is used in an echo control component, this parameter is +// related to the filter length. In principle |allowed_offset| should be set to +// the echo control filter length minus the expected echo duration, i.e., the +// delay offset the echo control can handle without quality regression. The +// default value, used if not set manually, is zero. Note that |allowed_offset| +// has to be non-negative. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - allowed_offset : The amount of delay offset, measured in partitions, +// the echo control filter can handle. +int WebRtc_set_allowed_offset(void* handle, int allowed_offset); + +// Returns the |allowed_offset| in number of partitions. +int WebRtc_get_allowed_offset(const void* handle); + +// Enables/Disables a robust validation functionality in the delay estimation. +// This is by default set to disabled at create time. The state is preserved +// over a reset. +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - enable : Enable (1) or disable (0) this feature. +int WebRtc_enable_robust_validation(void* handle, int enable); + +// Returns 1 if robust validation is enabled and 0 if disabled. +int WebRtc_is_robust_validation_enabled(const void* handle); + +// Estimates and returns the delay between the far-end and near-end blocks. The +// value will be offset by the lookahead (i.e. the lookahead should be +// subtracted from the returned value). +// Inputs: +// - handle : Pointer to the delay estimation instance. +// - near_spectrum : Pointer to the near-end spectrum data of the current +// block. +// - spectrum_size : The size of the data arrays (same for both far- and +// near-end). +// - near_q : The Q-domain of the near-end data. +// +// Output: +// - handle : Updated instance. +// +// Return value: +// - delay : >= 0 - Calculated delay value. +// -1 - Error. +// -2 - Insufficient data for estimation. +int WebRtc_DelayEstimatorProcessFix(void* handle, + const uint16_t* near_spectrum, + int spectrum_size, + int near_q); + +// See WebRtc_DelayEstimatorProcessFix() for description. +int WebRtc_DelayEstimatorProcessFloat(void* handle, + const float* near_spectrum, + int spectrum_size); + +// Returns the last calculated delay updated by the function +// WebRtc_DelayEstimatorProcess(...). +// +// Input: +// - handle : Pointer to the delay estimation instance. +// +// Return value: +// - delay : >= 0 - Last calculated delay value. +// -1 - Error. +// -2 - Insufficient data for estimation. +int WebRtc_last_delay(void* handle); + +// Returns the estimation quality/probability of the last calculated delay +// updated by the function WebRtc_DelayEstimatorProcess(...). The estimation +// quality is a value in the interval [0, 1]. The higher the value, the better +// the quality. +// +// Return value: +// - delay_quality : >= 0 - Estimation quality of last calculated delay. +float WebRtc_last_delay_quality(void* handle); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_WRAPPER_H_ diff --git a/webrtc/modules/audio_processing/utility/ring_buffer.c b/webrtc/modules/audio_processing/utility/ring_buffer.c deleted file mode 100644 index ea2e354..0000000 --- a/webrtc/modules/audio_processing/utility/ring_buffer.c +++ /dev/null @@ -1,239 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -/* - * Provides a generic ring buffer that can be written to and read from with - * arbitrarily sized blocks. The AEC uses this for several different tasks. - */ - -#include -#include -#include "ring_buffer.h" - -typedef struct { - int readPos; - int writePos; - int size; - char rwWrap; - bufdata_t *data; -} buf_t; - -enum {SAME_WRAP, DIFF_WRAP}; - -int WebRtcApm_CreateBuffer(void **bufInst, int size) -{ - buf_t *buf = NULL; - - if (size < 0) { - return -1; - } - - buf = malloc(sizeof(buf_t)); - *bufInst = buf; - if (buf == NULL) { - return -1; - } - - buf->data = malloc(size*sizeof(bufdata_t)); - if (buf->data == NULL) { - free(buf); - buf = NULL; - return -1; - } - - buf->size = size; - return 0; -} - -int WebRtcApm_InitBuffer(void *bufInst) -{ - buf_t *buf = (buf_t*)bufInst; - - buf->readPos = 0; - buf->writePos = 0; - buf->rwWrap = SAME_WRAP; - - // Initialize buffer to zeros - memset(buf->data, 0, sizeof(bufdata_t)*buf->size); - - return 0; -} - -int WebRtcApm_FreeBuffer(void *bufInst) -{ - buf_t *buf = (buf_t*)bufInst; - - if (buf == NULL) { - return -1; - } - - free(buf->data); - free(buf); - - return 0; -} - -int WebRtcApm_ReadBuffer(void *bufInst, bufdata_t *data, int size) -{ - buf_t *buf = (buf_t*)bufInst; - int n = 0, margin = 0; - - if (size <= 0 || size > buf->size) { - return -1; - } - - n = size; - if (buf->rwWrap == DIFF_WRAP) { - margin = buf->size - buf->readPos; - if (n > margin) { - buf->rwWrap = SAME_WRAP; - memcpy(data, buf->data + buf->readPos, - sizeof(bufdata_t)*margin); - buf->readPos = 0; - n = size - margin; - } - else { - memcpy(data, buf->data + buf->readPos, - sizeof(bufdata_t)*n); - buf->readPos += n; - return n; - } - } - - if (buf->rwWrap == SAME_WRAP) { - margin = buf->writePos - buf->readPos; - if (margin > n) - margin = n; - memcpy(data + size - n, buf->data + buf->readPos, - sizeof(bufdata_t)*margin); - buf->readPos += margin; - n -= margin; - } - - return size - n; -} - -int WebRtcApm_WriteBuffer(void *bufInst, const bufdata_t *data, int size) -{ - buf_t *buf = (buf_t*)bufInst; - int n = 0, margin = 0; - - if (size < 0 || size > buf->size) { - return -1; - } - - n = size; - if (buf->rwWrap == SAME_WRAP) { - margin = buf->size - buf->writePos; - if (n > margin) { - buf->rwWrap = DIFF_WRAP; - memcpy(buf->data + buf->writePos, data, - sizeof(bufdata_t)*margin); - buf->writePos = 0; - n = size - margin; - } - else { - memcpy(buf->data + buf->writePos, data, - sizeof(bufdata_t)*n); - buf->writePos += n; - return n; - } - } - - if (buf->rwWrap == DIFF_WRAP) { - margin = buf->readPos - buf->writePos; - if (margin > n) - margin = n; - memcpy(buf->data + buf->writePos, data + size - n, - sizeof(bufdata_t)*margin); - buf->writePos += margin; - n -= margin; - } - - return size - n; -} - -int WebRtcApm_FlushBuffer(void *bufInst, int size) -{ - buf_t *buf = (buf_t*)bufInst; - int n = 0, margin = 0; - - if (size <= 0 || size > buf->size) { - return -1; - } - - n = size; - if (buf->rwWrap == DIFF_WRAP) { - margin = buf->size - buf->readPos; - if (n > margin) { - buf->rwWrap = SAME_WRAP; - buf->readPos = 0; - n = size - margin; - } - else { - buf->readPos += n; - return n; - } - } - - if (buf->rwWrap == SAME_WRAP) { - margin = buf->writePos - buf->readPos; - if (margin > n) - margin = n; - buf->readPos += margin; - n -= margin; - } - - return size - n; -} - -int WebRtcApm_StuffBuffer(void *bufInst, int size) -{ - buf_t *buf = (buf_t*)bufInst; - int n = 0, margin = 0; - - if (size <= 0 || size > buf->size) { - return -1; - } - - n = size; - if (buf->rwWrap == SAME_WRAP) { - margin = buf->readPos; - if (n > margin) { - buf->rwWrap = DIFF_WRAP; - buf->readPos = buf->size - 1; - n -= margin + 1; - } - else { - buf->readPos -= n; - return n; - } - } - - if (buf->rwWrap == DIFF_WRAP) { - margin = buf->readPos - buf->writePos; - if (margin > n) - margin = n; - buf->readPos -= margin; - n -= margin; - } - - return size - n; -} - -int WebRtcApm_get_buffer_size(const void *bufInst) -{ - const buf_t *buf = (buf_t*)bufInst; - - if (buf->rwWrap == SAME_WRAP) - return buf->writePos - buf->readPos; - else - return buf->size - buf->readPos + buf->writePos; -} diff --git a/webrtc/modules/audio_processing/utility/ring_buffer.h b/webrtc/modules/audio_processing/utility/ring_buffer.h deleted file mode 100644 index 0fd261d..0000000 --- a/webrtc/modules/audio_processing/utility/ring_buffer.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. - * - * Use of this source code is governed by a BSD-style license - * that can be found in the LICENSE file in the root of the source - * tree. An additional intellectual property rights grant can be found - * in the file PATENTS. All contributing project authors may - * be found in the AUTHORS file in the root of the source tree. - */ - -/* - * Specifies the interface for the AEC generic buffer. - */ - -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_RING_BUFFER_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_RING_BUFFER_H_ - -// Determines buffer datatype -typedef short bufdata_t; - -// Unless otherwise specified, functions return 0 on success and -1 on error -int WebRtcApm_CreateBuffer(void **bufInst, int size); -int WebRtcApm_InitBuffer(void *bufInst); -int WebRtcApm_FreeBuffer(void *bufInst); - -// Returns number of samples read -int WebRtcApm_ReadBuffer(void *bufInst, bufdata_t *data, int size); - -// Returns number of samples written -int WebRtcApm_WriteBuffer(void *bufInst, const bufdata_t *data, int size); - -// Returns number of samples flushed -int WebRtcApm_FlushBuffer(void *bufInst, int size); - -// Returns number of samples stuffed -int WebRtcApm_StuffBuffer(void *bufInst, int size); - -// Returns number of samples in buffer -int WebRtcApm_get_buffer_size(const void *bufInst); - -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_RING_BUFFER_H_ diff --git a/webrtc/modules/audio_processing/utility/util.gypi b/webrtc/modules/audio_processing/utility/util.gypi deleted file mode 100644 index c088e98..0000000 --- a/webrtc/modules/audio_processing/utility/util.gypi +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. -# -# Use of this source code is governed by a BSD-style license -# that can be found in the LICENSE file in the root of the source -# tree. An additional intellectual property rights grant can be found -# in the file PATENTS. All contributing project authors may -# be found in the AUTHORS file in the root of the source tree. - -{ - 'targets': [ - { - 'target_name': 'apm_util', - 'type': '<(library)', - 'dependencies': [ - '<(webrtc_root)/common_audio/common_audio.gyp:spl', - ], - 'direct_dependent_settings': { - 'include_dirs': [ - '.', - ], - }, - 'sources': [ - 'delay_estimator_float.c', - 'delay_estimator_float.h', - 'delay_estimator.c', - 'delay_estimator.h', - 'fft4g.c', - 'fft4g.h', - 'ring_buffer.c', - 'ring_buffer.h', - ], - }, - ], -} - -# Local Variables: -# tab-width:2 -# indent-tabs-mode:nil -# End: -# vim: set expandtab tabstop=2 shiftwidth=2: diff --git a/webrtc/modules/audio_processing/vad/common.h b/webrtc/modules/audio_processing/vad/common.h new file mode 100644 index 0000000..be99c1c --- /dev/null +++ b/webrtc/modules/audio_processing/vad/common.h @@ -0,0 +1,27 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ + +static const int kSampleRateHz = 16000; +static const size_t kLength10Ms = kSampleRateHz / 100; +static const size_t kMaxNumFrames = 4; + +struct AudioFeatures { + double log_pitch_gain[kMaxNumFrames]; + double pitch_lag_hz[kMaxNumFrames]; + double spectral_peak[kMaxNumFrames]; + double rms[kMaxNumFrames]; + size_t num_frames; + bool silence; +}; + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ diff --git a/webrtc/modules/audio_processing/vad/gmm.cc b/webrtc/modules/audio_processing/vad/gmm.cc new file mode 100644 index 0000000..9651975 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/gmm.cc @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/vad/gmm.h" + +#include +#include + +#include "webrtc/typedefs.h" + +namespace webrtc { + +static const int kMaxDimension = 10; + +static void RemoveMean(const double* in, + const double* mean_vec, + int dimension, + double* out) { + for (int n = 0; n < dimension; ++n) + out[n] = in[n] - mean_vec[n]; +} + +static double ComputeExponent(const double* in, + const double* covar_inv, + int dimension) { + double q = 0; + for (int i = 0; i < dimension; ++i) { + double v = 0; + for (int j = 0; j < dimension; j++) + v += (*covar_inv++) * in[j]; + q += v * in[i]; + } + q *= -0.5; + return q; +} + +double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters) { + if (gmm_parameters.dimension > kMaxDimension) { + return -1; // This is invalid pdf so the caller can check this. + } + double f = 0; + double v[kMaxDimension]; + const double* mean_vec = gmm_parameters.mean; + const double* covar_inv = gmm_parameters.covar_inverse; + + for (int n = 0; n < gmm_parameters.num_mixtures; n++) { + RemoveMean(x, mean_vec, gmm_parameters.dimension, v); + double q = ComputeExponent(v, covar_inv, gmm_parameters.dimension) + + gmm_parameters.weight[n]; + f += exp(q); + mean_vec += gmm_parameters.dimension; + covar_inv += gmm_parameters.dimension * gmm_parameters.dimension; + } + return f; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/vad/gmm.h b/webrtc/modules/audio_processing/vad/gmm.h new file mode 100644 index 0000000..9f3e578 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/gmm.h @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_GMM_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_GMM_H_ + +namespace webrtc { + +// A structure that specifies a GMM. +// A GMM is formulated as +// f(x) = w[0] * mixture[0] + w[1] * mixture[1] + ... + +// w[num_mixtures - 1] * mixture[num_mixtures - 1]; +// Where a 'mixture' is a Gaussian density. + +struct GmmParameters { + // weight[n] = log(w[n]) - |dimension|/2 * log(2*pi) - 1/2 * log(det(cov[n])); + // where cov[n] is the covariance matrix of mixture n; + const double* weight; + // pointer to the first element of a |num_mixtures|x|dimension| matrix + // where kth row is the mean of the kth mixture. + const double* mean; + // pointer to the first element of a |num_mixtures|x|dimension|x|dimension| + // 3D-matrix, where the kth 2D-matrix is the inverse of the covariance + // matrix of the kth mixture. + const double* covar_inverse; + // Dimensionality of the mixtures. + int dimension; + // number of the mixtures. + int num_mixtures; +}; + +// Evaluate the given GMM, according to |gmm_parameters|, at the given point +// |x|. If the dimensionality of the given GMM is larger that the maximum +// acceptable dimension by the following function -1 is returned. +double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters); + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_GMM_H_ diff --git a/webrtc/modules/audio_processing/vad/noise_gmm_tables.h b/webrtc/modules/audio_processing/vad/noise_gmm_tables.h new file mode 100644 index 0000000..293af57 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/noise_gmm_tables.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// GMM tables for inactive segments. Generated by MakeGmmTables.m. + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ + +static const int kNoiseGmmNumMixtures = 12; +static const int kNoiseGmmDim = 3; + +static const double + kNoiseGmmCovarInverse[kNoiseGmmNumMixtures][kNoiseGmmDim][kNoiseGmmDim] = { + {{7.36219567592941e+00, 4.83060785179861e-03, 1.23335151497610e-02}, + {4.83060785179861e-03, 1.65289507047817e-04, -2.41490588169997e-04}, + {1.23335151497610e-02, -2.41490588169997e-04, 6.59472060689382e-03}}, + {{8.70265239309140e+00, -5.30636201431086e-04, 5.44014966585347e-03}, + {-5.30636201431086e-04, 3.11095453521008e-04, -1.86287206836035e-04}, + {5.44014966585347e-03, -1.86287206836035e-04, 6.29493388790744e-04}}, + {{4.53467851955055e+00, -3.92977536695197e-03, -2.46521420693317e-03}, + {-3.92977536695197e-03, 4.94650752632750e-05, -1.08587438501826e-05}, + {-2.46521420693317e-03, -1.08587438501826e-05, 9.28793975422261e-05}}, + {{9.26817997114275e-01, -4.03976069276753e-04, -3.56441427392165e-03}, + {-4.03976069276753e-04, 2.51976251631430e-06, 1.46914206734572e-07}, + {-3.56441427392165e-03, 1.46914206734572e-07, 8.19914567685373e-05}}, + {{7.61715986787441e+00, -1.54889041216888e-04, 2.41756280071656e-02}, + {-1.54889041216888e-04, 3.50282550461672e-07, -6.27251196972490e-06}, + {2.41756280071656e-02, -6.27251196972490e-06, 1.45061847649872e-02}}, + {{8.31193642663158e+00, -3.84070508164323e-04, -3.09750630821876e-02}, + {-3.84070508164323e-04, 3.80433432277336e-07, -1.14321142836636e-06}, + {-3.09750630821876e-02, -1.14321142836636e-06, 8.35091486289997e-04}}, + {{9.67283151270894e-01, 5.82465812445039e-05, -3.18350798617053e-03}, + {5.82465812445039e-05, 2.23762672000318e-07, -7.74196587408623e-07}, + {-3.18350798617053e-03, -7.74196587408623e-07, 3.85120938338325e-04}}, + {{8.28066236985388e+00, 5.87634508319763e-05, 6.99303090891743e-03}, + {5.87634508319763e-05, 2.93746018618058e-07, 3.40843332882272e-07}, + {6.99303090891743e-03, 3.40843332882272e-07, 1.99379171190344e-04}}, + {{6.07488998675646e+00, -1.11494526618473e-02, 5.10013111123381e-03}, + {-1.11494526618473e-02, 6.99238879921751e-04, 5.36718550370870e-05}, + {5.10013111123381e-03, 5.36718550370870e-05, 5.26909853276753e-04}}, + {{6.90492021419175e+00, 4.20639355257863e-04, -2.38612752336481e-03}, + {4.20639355257863e-04, 3.31246767338153e-06, -2.42052288150859e-08}, + {-2.38612752336481e-03, -2.42052288150859e-08, 4.46608368363412e-04}}, + {{1.31069150869715e+01, -1.73718583865670e-04, -1.97591814508578e-02}, + {-1.73718583865670e-04, 2.80451716300124e-07, 9.96570755379865e-07}, + {-1.97591814508578e-02, 9.96570755379865e-07, 2.41361900868847e-03}}, + {{4.69566344239814e+00, -2.61077567563690e-04, 5.26359000761433e-03}, + {-2.61077567563690e-04, 1.82420859823767e-06, -7.83645887541601e-07}, + {5.26359000761433e-03, -7.83645887541601e-07, 1.33586288288802e-02}}}; + +static const double kNoiseGmmMean[kNoiseGmmNumMixtures][kNoiseGmmDim] = { + {-2.01386094766163e+00, 1.69702162045397e+02, 7.41715804872181e+01}, + {-1.94684591777290e+00, 1.42398396732668e+02, 1.64186321157831e+02}, + {-2.29319297562437e+00, 3.86415425589868e+02, 2.13452215267125e+02}, + {-3.25487177070268e+00, 1.08668712553616e+03, 2.33119949467419e+02}, + {-2.13159632447467e+00, 4.83821702557717e+03, 6.86786166673740e+01}, + {-2.26171410780526e+00, 4.79420193982422e+03, 1.53222513286450e+02}, + {-3.32166740703185e+00, 4.35161135834358e+03, 1.33206448431316e+02}, + {-2.19290322814343e+00, 3.98325506609408e+03, 2.13249167359934e+02}, + {-2.02898459255404e+00, 7.37039893155007e+03, 1.12518527491926e+02}, + {-2.26150236399500e+00, 1.54896745196145e+03, 1.49717357868579e+02}, + {-2.00417668301790e+00, 3.82434760310304e+03, 1.07438913004312e+02}, + {-2.30193040814533e+00, 1.43953696546439e+03, 7.04085275122649e+01}}; + +static const double kNoiseGmmWeights[kNoiseGmmNumMixtures] = { + -1.09422832086193e+01, + -1.10847897513425e+01, + -1.36767587732187e+01, + -1.79789356118641e+01, + -1.42830169160894e+01, + -1.56500228061379e+01, + -1.83124990950113e+01, + -1.69979436177477e+01, + -1.12329424387828e+01, + -1.41311785780639e+01, + -1.47171861448585e+01, + -1.35963362781839e+01}; +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ diff --git a/webrtc/modules/audio_processing/vad/pitch_based_vad.cc b/webrtc/modules/audio_processing/vad/pitch_based_vad.cc new file mode 100644 index 0000000..39ec37e --- /dev/null +++ b/webrtc/modules/audio_processing/vad/pitch_based_vad.cc @@ -0,0 +1,124 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/vad/pitch_based_vad.h" + +#include +#include +#include + +#include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h" +#include "webrtc/modules/audio_processing/vad/common.h" +#include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h" +#include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h" +#include "webrtc/modules/interface/module_common_types.h" + +namespace webrtc { + +static_assert(kNoiseGmmDim == kVoiceGmmDim, + "noise and voice gmm dimension not equal"); + +// These values should match MATLAB counterparts for unit-tests to pass. +static const int kPosteriorHistorySize = 500; // 5 sec of 10 ms frames. +static const double kInitialPriorProbability = 0.3; +static const int kTransientWidthThreshold = 7; +static const double kLowProbabilityThreshold = 0.2; + +static double LimitProbability(double p) { + const double kLimHigh = 0.99; + const double kLimLow = 0.01; + + if (p > kLimHigh) + p = kLimHigh; + else if (p < kLimLow) + p = kLimLow; + return p; +} + +PitchBasedVad::PitchBasedVad() + : p_prior_(kInitialPriorProbability), + circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) { + // Setup noise GMM. + noise_gmm_.dimension = kNoiseGmmDim; + noise_gmm_.num_mixtures = kNoiseGmmNumMixtures; + noise_gmm_.weight = kNoiseGmmWeights; + noise_gmm_.mean = &kNoiseGmmMean[0][0]; + noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0]; + + // Setup voice GMM. + voice_gmm_.dimension = kVoiceGmmDim; + voice_gmm_.num_mixtures = kVoiceGmmNumMixtures; + voice_gmm_.weight = kVoiceGmmWeights; + voice_gmm_.mean = &kVoiceGmmMean[0][0]; + voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0]; +} + +PitchBasedVad::~PitchBasedVad() { +} + +int PitchBasedVad::VoicingProbability(const AudioFeatures& features, + double* p_combined) { + double p; + double gmm_features[3]; + double pdf_features_given_voice; + double pdf_features_given_noise; + // These limits are the same in matlab implementation 'VoicingProbGMM().' + const double kLimLowLogPitchGain = -2.0; + const double kLimHighLogPitchGain = -0.9; + const double kLimLowSpectralPeak = 200; + const double kLimHighSpectralPeak = 2000; + const double kEps = 1e-12; + for (size_t n = 0; n < features.num_frames; n++) { + gmm_features[0] = features.log_pitch_gain[n]; + gmm_features[1] = features.spectral_peak[n]; + gmm_features[2] = features.pitch_lag_hz[n]; + + pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_); + pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_); + + if (features.spectral_peak[n] < kLimLowSpectralPeak || + features.spectral_peak[n] > kLimHighSpectralPeak || + features.log_pitch_gain[n] < kLimLowLogPitchGain) { + pdf_features_given_voice = kEps * pdf_features_given_noise; + } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) { + pdf_features_given_noise = kEps * pdf_features_given_voice; + } + + p = p_prior_ * pdf_features_given_voice / + (pdf_features_given_voice * p_prior_ + + pdf_features_given_noise * (1 - p_prior_)); + + p = LimitProbability(p); + + // Combine pitch-based probability with standalone probability, before + // updating prior probabilities. + double prod_active = p * p_combined[n]; + double prod_inactive = (1 - p) * (1 - p_combined[n]); + p_combined[n] = prod_active / (prod_active + prod_inactive); + + if (UpdatePrior(p_combined[n]) < 0) + return -1; + // Limit prior probability. With a zero prior probability the posterior + // probability is always zero. + p_prior_ = LimitProbability(p_prior_); + } + return 0; +} + +int PitchBasedVad::UpdatePrior(double p) { + circular_buffer_->Insert(p); + if (circular_buffer_->RemoveTransient(kTransientWidthThreshold, + kLowProbabilityThreshold) < 0) + return -1; + p_prior_ = circular_buffer_->Mean(); + return 0; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/vad/pitch_based_vad.h b/webrtc/modules/audio_processing/vad/pitch_based_vad.h new file mode 100644 index 0000000..c502184 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/pitch_based_vad.h @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/modules/audio_processing/vad/common.h" +#include "webrtc/modules/audio_processing/vad/gmm.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class AudioFrame; +class VadCircularBuffer; + +// Computes the probability of the input audio frame to be active given +// the corresponding pitch-gain and lag of the frame. +class PitchBasedVad { + public: + PitchBasedVad(); + ~PitchBasedVad(); + + // Compute pitch-based voicing probability, given the features. + // features: a structure containing features required for computing voicing + // probabilities. + // + // p_combined: an array which contains the combined activity probabilities + // computed prior to the call of this function. The method, + // then, computes the voicing probabilities and combine them + // with the given values. The result are returned in |p|. + int VoicingProbability(const AudioFeatures& features, double* p_combined); + + private: + int UpdatePrior(double p); + + // TODO(turajs): maybe defining this at a higher level (maybe enum) so that + // all the code recognize it as "no-error." + static const int kNoError = 0; + + GmmParameters noise_gmm_; + GmmParameters voice_gmm_; + + double p_prior_; + + rtc::scoped_ptr circular_buffer_; +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ diff --git a/webrtc/modules/audio_processing/vad/pitch_internal.cc b/webrtc/modules/audio_processing/vad/pitch_internal.cc new file mode 100644 index 0000000..309b45a --- /dev/null +++ b/webrtc/modules/audio_processing/vad/pitch_internal.cc @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/vad/pitch_internal.h" + +#include + +// A 4-to-3 linear interpolation. +// The interpolation constants are derived as following: +// Input pitch parameters are updated every 7.5 ms. Within a 30-ms interval +// we are interested in pitch parameters of 0-5 ms, 10-15ms and 20-25ms. This is +// like interpolating 4-to-6 and keep the odd samples. +// The reason behind this is that LPC coefficients are computed for the first +// half of each 10ms interval. +static void PitchInterpolation(double old_val, const double* in, double* out) { + out[0] = 1. / 6. * old_val + 5. / 6. * in[0]; + out[1] = 5. / 6. * in[1] + 1. / 6. * in[2]; + out[2] = 0.5 * in[2] + 0.5 * in[3]; +} + +void GetSubframesPitchParameters(int sampling_rate_hz, + double* gains, + double* lags, + int num_in_frames, + int num_out_frames, + double* log_old_gain, + double* old_lag, + double* log_pitch_gain, + double* pitch_lag_hz) { + // Gain interpolation is in log-domain, also returned in log-domain. + for (int n = 0; n < num_in_frames; n++) + gains[n] = log(gains[n] + 1e-12); + + // Interpolate lags and gains. + PitchInterpolation(*log_old_gain, gains, log_pitch_gain); + *log_old_gain = gains[num_in_frames - 1]; + PitchInterpolation(*old_lag, lags, pitch_lag_hz); + *old_lag = lags[num_in_frames - 1]; + + // Convert pitch-lags to Hertz. + for (int n = 0; n < num_out_frames; n++) { + pitch_lag_hz[n] = (sampling_rate_hz) / (pitch_lag_hz[n]); + } +} diff --git a/webrtc/modules/audio_processing/vad/pitch_internal.h b/webrtc/modules/audio_processing/vad/pitch_internal.h new file mode 100644 index 0000000..b25b1a8 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/pitch_internal.h @@ -0,0 +1,26 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ + +// TODO(turajs): Write a description of this function. Also be consistent with +// usage of |sampling_rate_hz| vs |kSamplingFreqHz|. +void GetSubframesPitchParameters(int sampling_rate_hz, + double* gains, + double* lags, + int num_in_frames, + int num_out_frames, + double* log_old_gain, + double* old_lag, + double* log_pitch_gain, + double* pitch_lag_hz); + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ diff --git a/webrtc/modules/audio_processing/vad/pole_zero_filter.cc b/webrtc/modules/audio_processing/vad/pole_zero_filter.cc new file mode 100644 index 0000000..9769515 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/pole_zero_filter.cc @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/vad/pole_zero_filter.h" + +#include +#include +#include + +namespace webrtc { + +PoleZeroFilter* PoleZeroFilter::Create(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator) { + if (order_numerator > kMaxFilterOrder || + order_denominator > kMaxFilterOrder || denominator_coefficients[0] == 0 || + numerator_coefficients == NULL || denominator_coefficients == NULL) + return NULL; + return new PoleZeroFilter(numerator_coefficients, order_numerator, + denominator_coefficients, order_denominator); +} + +PoleZeroFilter::PoleZeroFilter(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator) + : past_input_(), + past_output_(), + numerator_coefficients_(), + denominator_coefficients_(), + order_numerator_(order_numerator), + order_denominator_(order_denominator), + highest_order_(std::max(order_denominator, order_numerator)) { + memcpy(numerator_coefficients_, numerator_coefficients, + sizeof(numerator_coefficients_[0]) * (order_numerator_ + 1)); + memcpy(denominator_coefficients_, denominator_coefficients, + sizeof(denominator_coefficients_[0]) * (order_denominator_ + 1)); + + if (denominator_coefficients_[0] != 1) { + for (size_t n = 0; n <= order_numerator_; n++) + numerator_coefficients_[n] /= denominator_coefficients_[0]; + for (size_t n = 0; n <= order_denominator_; n++) + denominator_coefficients_[n] /= denominator_coefficients_[0]; + } +} + +template +static float FilterArPast(const T* past, size_t order, + const float* coefficients) { + float sum = 0.0f; + size_t past_index = order - 1; + for (size_t k = 1; k <= order; k++, past_index--) + sum += coefficients[k] * past[past_index]; + return sum; +} + +int PoleZeroFilter::Filter(const int16_t* in, + size_t num_input_samples, + float* output) { + if (in == NULL || output == NULL) + return -1; + // This is the typical case, just a memcpy. + const size_t k = std::min(num_input_samples, highest_order_); + size_t n; + for (n = 0; n < k; n++) { + output[n] = in[n] * numerator_coefficients_[0]; + output[n] += FilterArPast(&past_input_[n], order_numerator_, + numerator_coefficients_); + output[n] -= FilterArPast(&past_output_[n], order_denominator_, + denominator_coefficients_); + + past_input_[n + order_numerator_] = in[n]; + past_output_[n + order_denominator_] = output[n]; + } + if (highest_order_ < num_input_samples) { + for (size_t m = 0; n < num_input_samples; n++, m++) { + output[n] = in[n] * numerator_coefficients_[0]; + output[n] += + FilterArPast(&in[m], order_numerator_, numerator_coefficients_); + output[n] -= FilterArPast(&output[m], order_denominator_, + denominator_coefficients_); + } + // Record into the past signal. + memcpy(past_input_, &in[num_input_samples - order_numerator_], + sizeof(in[0]) * order_numerator_); + memcpy(past_output_, &output[num_input_samples - order_denominator_], + sizeof(output[0]) * order_denominator_); + } else { + // Odd case that the length of the input is shorter that filter order. + memmove(past_input_, &past_input_[num_input_samples], + order_numerator_ * sizeof(past_input_[0])); + memmove(past_output_, &past_output_[num_input_samples], + order_denominator_ * sizeof(past_output_[0])); + } + return 0; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/vad/pole_zero_filter.h b/webrtc/modules/audio_processing/vad/pole_zero_filter.h new file mode 100644 index 0000000..bd13050 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/pole_zero_filter.h @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ + +#include + +#include "webrtc/typedefs.h" + +namespace webrtc { + +class PoleZeroFilter { + public: + ~PoleZeroFilter() {} + + static PoleZeroFilter* Create(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator); + + int Filter(const int16_t* in, size_t num_input_samples, float* output); + + private: + PoleZeroFilter(const float* numerator_coefficients, + size_t order_numerator, + const float* denominator_coefficients, + size_t order_denominator); + + static const int kMaxFilterOrder = 24; + + int16_t past_input_[kMaxFilterOrder * 2]; + float past_output_[kMaxFilterOrder * 2]; + + float numerator_coefficients_[kMaxFilterOrder + 1]; + float denominator_coefficients_[kMaxFilterOrder + 1]; + + size_t order_numerator_; + size_t order_denominator_; + size_t highest_order_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ diff --git a/webrtc/modules/audio_processing/vad/standalone_vad.cc b/webrtc/modules/audio_processing/vad/standalone_vad.cc new file mode 100644 index 0000000..468b8ff --- /dev/null +++ b/webrtc/modules/audio_processing/vad/standalone_vad.cc @@ -0,0 +1,93 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/vad/standalone_vad.h" + +#include + +#include "webrtc/modules/interface/module_common_types.h" +#include "webrtc/modules/utility/interface/audio_frame_operations.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +static const int kDefaultStandaloneVadMode = 3; + +StandaloneVad::StandaloneVad(VadInst* vad) + : vad_(vad), buffer_(), index_(0), mode_(kDefaultStandaloneVadMode) { +} + +StandaloneVad::~StandaloneVad() { + WebRtcVad_Free(vad_); +} + +StandaloneVad* StandaloneVad::Create() { + VadInst* vad = WebRtcVad_Create(); + if (!vad) + return nullptr; + + int err = WebRtcVad_Init(vad); + err |= WebRtcVad_set_mode(vad, kDefaultStandaloneVadMode); + if (err != 0) { + WebRtcVad_Free(vad); + return nullptr; + } + return new StandaloneVad(vad); +} + +int StandaloneVad::AddAudio(const int16_t* data, size_t length) { + if (length != kLength10Ms) + return -1; + + if (index_ + length > kLength10Ms * kMaxNum10msFrames) + // Reset the buffer if it's full. + // TODO(ajm): Instead, consider just processing every 10 ms frame. Then we + // can forgo the buffering. + index_ = 0; + + memcpy(&buffer_[index_], data, sizeof(int16_t) * length); + index_ += length; + return 0; +} + +int StandaloneVad::GetActivity(double* p, size_t length_p) { + if (index_ == 0) + return -1; + + const size_t num_frames = index_ / kLength10Ms; + if (num_frames > length_p) + return -1; + assert(WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_) == 0); + + int activity = WebRtcVad_Process(vad_, kSampleRateHz, buffer_, index_); + if (activity < 0) + return -1; + else if (activity == 0) + p[0] = 0.01; // Arbitrary but small and non-zero. + else + p[0] = 0.5; // 0.5 is neutral values when combinned by other probabilities. + for (size_t n = 1; n < num_frames; n++) + p[n] = p[0]; + // Reset the buffer to start from the beginning. + index_ = 0; + return activity; +} + +int StandaloneVad::set_mode(int mode) { + if (mode < 0 || mode > 3) + return -1; + if (WebRtcVad_set_mode(vad_, mode) != 0) + return -1; + + mode_ = mode; + return 0; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/vad/standalone_vad.h b/webrtc/modules/audio_processing/vad/standalone_vad.h new file mode 100644 index 0000000..6a25424 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/standalone_vad.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/modules/audio_processing/vad/common.h" +#include "webrtc/common_audio/vad/include/webrtc_vad.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class AudioFrame; + +class StandaloneVad { + public: + static StandaloneVad* Create(); + ~StandaloneVad(); + + // Outputs + // p: a buffer where probabilities are written to. + // length_p: number of elements of |p|. + // + // return value: + // -1: if no audio is stored or VAD returns error. + // 0: in success. + // In case of error the content of |activity| is unchanged. + // + // Note that due to a high false-positive (VAD decision is active while the + // processed audio is just background noise) rate, stand-alone VAD is used as + // a one-sided indicator. The activity probability is 0.5 if the frame is + // classified as active, and the probability is 0.01 if the audio is + // classified as passive. In this way, when probabilities are combined, the + // effect of the stand-alone VAD is neutral if the input is classified as + // active. + int GetActivity(double* p, size_t length_p); + + // Expecting 10 ms of 16 kHz audio to be pushed in. + int AddAudio(const int16_t* data, size_t length); + + // Set aggressiveness of VAD, 0 is the least aggressive and 3 is the most + // aggressive mode. Returns -1 if the input is less than 0 or larger than 3, + // otherwise 0 is returned. + int set_mode(int mode); + // Get the agressiveness of the current VAD. + int mode() const { return mode_; } + + private: + explicit StandaloneVad(VadInst* vad); + + static const size_t kMaxNum10msFrames = 3; + + // TODO(turajs): Is there a way to use scoped-pointer here? + VadInst* vad_; + int16_t buffer_[kMaxNum10msFrames * kLength10Ms]; + size_t index_; + int mode_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ diff --git a/webrtc/modules/audio_processing/vad/vad_audio_proc.cc b/webrtc/modules/audio_processing/vad/vad_audio_proc.cc new file mode 100644 index 0000000..8535d1f --- /dev/null +++ b/webrtc/modules/audio_processing/vad/vad_audio_proc.cc @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/vad/vad_audio_proc.h" + +#include +#include + +#include "webrtc/common_audio/fft4g.h" +#include "webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h" +#include "webrtc/modules/audio_processing/vad/pitch_internal.h" +#include "webrtc/modules/audio_processing/vad/pole_zero_filter.h" +extern "C" { +#include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h" +#include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h" +#include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" +#include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h" +} +#include "webrtc/modules/interface/module_common_types.h" + +namespace webrtc { + +// The following structures are declared anonymous in iSAC's structs.h. To +// forward declare them, we use this derived class trick. +struct VadAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {}; +struct VadAudioProc::PreFiltBankstr : public ::PreFiltBankstr {}; + +static const float kFrequencyResolution = + kSampleRateHz / static_cast(VadAudioProc::kDftSize); +static const int kSilenceRms = 5; + +// TODO(turajs): Make a Create or Init for VadAudioProc. +VadAudioProc::VadAudioProc() + : audio_buffer_(), + num_buffer_samples_(kNumPastSignalSamples), + log_old_gain_(-2), + old_lag_(50), // Arbitrary but valid as pitch-lag (in samples). + pitch_analysis_handle_(new PitchAnalysisStruct), + pre_filter_handle_(new PreFiltBankstr), + high_pass_filter_(PoleZeroFilter::Create(kCoeffNumerator, + kFilterOrder, + kCoeffDenominator, + kFilterOrder)) { + static_assert(kNumPastSignalSamples + kNumSubframeSamples == + sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]), + "lpc analysis window incorrect size"); + static_assert(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]), + "correlation weight incorrect size"); + + // TODO(turajs): Are we doing too much in the constructor? + float data[kDftSize]; + // Make FFT to initialize. + ip_[0] = 0; + WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); + // TODO(turajs): Need to initialize high-pass filter. + + // Initialize iSAC components. + WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get()); + WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get()); +} + +VadAudioProc::~VadAudioProc() { +} + +void VadAudioProc::ResetBuffer() { + memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess], + sizeof(audio_buffer_[0]) * kNumPastSignalSamples); + num_buffer_samples_ = kNumPastSignalSamples; +} + +int VadAudioProc::ExtractFeatures(const int16_t* frame, + size_t length, + AudioFeatures* features) { + features->num_frames = 0; + if (length != kNumSubframeSamples) { + return -1; + } + + // High-pass filter to remove the DC component and very low frequency content. + // We have experienced that this high-pass filtering improves voice/non-voiced + // classification. + if (high_pass_filter_->Filter(frame, kNumSubframeSamples, + &audio_buffer_[num_buffer_samples_]) != 0) { + return -1; + } + + num_buffer_samples_ += kNumSubframeSamples; + if (num_buffer_samples_ < kBufferLength) { + return 0; + } + assert(num_buffer_samples_ == kBufferLength); + features->num_frames = kNum10msSubframes; + features->silence = false; + + Rms(features->rms, kMaxNumFrames); + for (size_t i = 0; i < kNum10msSubframes; ++i) { + if (features->rms[i] < kSilenceRms) { + // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence. + // Bail out here instead. + features->silence = true; + ResetBuffer(); + return 0; + } + } + + PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz, + kMaxNumFrames); + FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames); + ResetBuffer(); + return 0; +} + +// Computes |kLpcOrder + 1| correlation coefficients. +void VadAudioProc::SubframeCorrelation(double* corr, + size_t length_corr, + size_t subframe_index) { + assert(length_corr >= kLpcOrder + 1); + double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples]; + size_t buffer_index = subframe_index * kNumSubframeSamples; + + for (size_t n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++) + windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n]; + + WebRtcIsac_AutoCorr(corr, windowed_audio, + kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder); +} + +// Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input. +// The analysis window is 15 ms long and it is centered on the first half of +// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the +// first half of each 10 ms subframe. +void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) { + assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1)); + double corr[kLpcOrder + 1]; + double reflec_coeff[kLpcOrder]; + for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes; + i++, offset_lpc += kLpcOrder + 1) { + SubframeCorrelation(corr, kLpcOrder + 1, i); + corr[0] *= 1.0001; + // This makes Lev-Durb a bit more stable. + for (size_t k = 0; k < kLpcOrder + 1; k++) { + corr[k] *= kCorrWeight[k]; + } + WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder); + } +} + +// Fit a second order curve to these 3 points and find the location of the +// extremum. The points are inverted before curve fitting. +static float QuadraticInterpolation(float prev_val, + float curr_val, + float next_val) { + // Doing the interpolation in |1 / A(z)|^2. + float fractional_index = 0; + next_val = 1.0f / next_val; + prev_val = 1.0f / prev_val; + curr_val = 1.0f / curr_val; + + fractional_index = + -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val); + assert(fabs(fractional_index) < 1); + return fractional_index; +} + +// 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope +// of the input signal. The local maximum of the spectral envelope corresponds +// with the local minimum of A(z). It saves complexity, as we save one +// inversion. Furthermore, we find the first local maximum of magnitude squared, +// to save on one square root. +void VadAudioProc::FindFirstSpectralPeaks(double* f_peak, + size_t length_f_peak) { + assert(length_f_peak >= kNum10msSubframes); + double lpc[kNum10msSubframes * (kLpcOrder + 1)]; + // For all sub-frames. + GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1)); + + const size_t kNumDftCoefficients = kDftSize / 2 + 1; + float data[kDftSize]; + + for (size_t i = 0; i < kNum10msSubframes; i++) { + // Convert to float with zero pad. + memset(data, 0, sizeof(data)); + for (size_t n = 0; n < kLpcOrder + 1; n++) { + data[n] = static_cast(lpc[i * (kLpcOrder + 1) + n]); + } + // Transform to frequency domain. + WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); + + size_t index_peak = 0; + float prev_magn_sqr = data[0] * data[0]; + float curr_magn_sqr = data[2] * data[2] + data[3] * data[3]; + float next_magn_sqr; + bool found_peak = false; + for (size_t n = 2; n < kNumDftCoefficients - 1; n++) { + next_magn_sqr = + data[2 * n] * data[2 * n] + data[2 * n + 1] * data[2 * n + 1]; + if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { + found_peak = true; + index_peak = n - 1; + break; + } + prev_magn_sqr = curr_magn_sqr; + curr_magn_sqr = next_magn_sqr; + } + float fractional_index = 0; + if (!found_peak) { + // Checking if |kNumDftCoefficients - 1| is the local minimum. + next_magn_sqr = data[1] * data[1]; + if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { + index_peak = kNumDftCoefficients - 1; + } + } else { + // A peak is found, do a simple quadratic interpolation to get a more + // accurate estimate of the peak location. + fractional_index = + QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr, next_magn_sqr); + } + f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution; + } +} + +// Using iSAC functions to estimate pitch gains & lags. +void VadAudioProc::PitchAnalysis(double* log_pitch_gains, + double* pitch_lags_hz, + size_t length) { + // TODO(turajs): This can be "imported" from iSAC & and the next two + // constants. + assert(length >= kNum10msSubframes); + const int kNumPitchSubframes = 4; + double gains[kNumPitchSubframes]; + double lags[kNumPitchSubframes]; + + const int kNumSubbandFrameSamples = 240; + const int kNumLookaheadSamples = 24; + + float lower[kNumSubbandFrameSamples]; + float upper[kNumSubbandFrameSamples]; + double lower_lookahead[kNumSubbandFrameSamples]; + double upper_lookahead[kNumSubbandFrameSamples]; + double lower_lookahead_pre_filter[kNumSubbandFrameSamples + + kNumLookaheadSamples]; + + // Split signal to lower and upper bands + WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower, + upper, lower_lookahead, upper_lookahead, + pre_filter_handle_.get()); + WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter, + pitch_analysis_handle_.get(), lags, gains); + + // Lags are computed on lower-band signal with sampling rate half of the + // input signal. + GetSubframesPitchParameters( + kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes, + &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz); +} + +void VadAudioProc::Rms(double* rms, size_t length_rms) { + assert(length_rms >= kNum10msSubframes); + size_t offset = kNumPastSignalSamples; + for (size_t i = 0; i < kNum10msSubframes; i++) { + rms[i] = 0; + for (size_t n = 0; n < kNumSubframeSamples; n++, offset++) + rms[i] += audio_buffer_[offset] * audio_buffer_[offset]; + rms[i] = sqrt(rms[i] / kNumSubframeSamples); + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/vad/vad_audio_proc.h b/webrtc/modules/audio_processing/vad/vad_audio_proc.h new file mode 100644 index 0000000..85500ae --- /dev/null +++ b/webrtc/modules/audio_processing/vad/vad_audio_proc.h @@ -0,0 +1,89 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/modules/audio_processing/vad/common.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class AudioFrame; +class PoleZeroFilter; + +class VadAudioProc { + public: + // Forward declare iSAC structs. + struct PitchAnalysisStruct; + struct PreFiltBankstr; + + VadAudioProc(); + ~VadAudioProc(); + + int ExtractFeatures(const int16_t* audio_frame, + size_t length, + AudioFeatures* audio_features); + + static const size_t kDftSize = 512; + + private: + void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length); + void SubframeCorrelation(double* corr, + size_t length_corr, + size_t subframe_index); + void GetLpcPolynomials(double* lpc, size_t length_lpc); + void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak); + void Rms(double* rms, size_t length_rms); + void ResetBuffer(); + + // To compute spectral peak we perform LPC analysis to get spectral envelope. + // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis. + // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame + // we need 5 ms of past signal to create the input of LPC analysis. + static const size_t kNumPastSignalSamples = + static_cast(kSampleRateHz / 200); + + // TODO(turajs): maybe defining this at a higher level (maybe enum) so that + // all the code recognize it as "no-error." + static const int kNoError = 0; + + static const size_t kNum10msSubframes = 3; + static const size_t kNumSubframeSamples = + static_cast(kSampleRateHz / 100); + static const size_t kNumSamplesToProcess = + kNum10msSubframes * + kNumSubframeSamples; // Samples in 30 ms @ given sampling rate. + static const size_t kBufferLength = + kNumPastSignalSamples + kNumSamplesToProcess; + static const size_t kIpLength = kDftSize >> 1; + static const size_t kWLength = kDftSize >> 1; + + static const size_t kLpcOrder = 16; + + size_t ip_[kIpLength]; + float w_fft_[kWLength]; + + // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ). + float audio_buffer_[kBufferLength]; + size_t num_buffer_samples_; + + double log_old_gain_; + double old_lag_; + + rtc::scoped_ptr pitch_analysis_handle_; + rtc::scoped_ptr pre_filter_handle_; + rtc::scoped_ptr high_pass_filter_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ diff --git a/webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h b/webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h new file mode 100644 index 0000000..45586b9 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_ + +namespace webrtc { + +// These values should match MATLAB counterparts for unit-tests to pass. +static const double kCorrWeight[] = {1.000000, + 0.985000, + 0.970225, + 0.955672, + 0.941337, + 0.927217, + 0.913308, + 0.899609, + 0.886115, + 0.872823, + 0.859730, + 0.846834, + 0.834132, + 0.821620, + 0.809296, + 0.797156, + 0.785199}; + +static const double kLpcAnalWin[] = { + 0.00000000, 0.01314436, 0.02628645, 0.03942400, 0.05255473, 0.06567639, + 0.07878670, 0.09188339, 0.10496421, 0.11802689, 0.13106918, 0.14408883, + 0.15708358, 0.17005118, 0.18298941, 0.19589602, 0.20876878, 0.22160547, + 0.23440387, 0.24716177, 0.25987696, 0.27254725, 0.28517045, 0.29774438, + 0.31026687, 0.32273574, 0.33514885, 0.34750406, 0.35979922, 0.37203222, + 0.38420093, 0.39630327, 0.40833713, 0.42030043, 0.43219112, 0.44400713, + 0.45574642, 0.46740697, 0.47898676, 0.49048379, 0.50189608, 0.51322164, + 0.52445853, 0.53560481, 0.54665854, 0.55761782, 0.56848075, 0.57924546, + 0.58991008, 0.60047278, 0.61093173, 0.62128512, 0.63153117, 0.64166810, + 0.65169416, 0.66160761, 0.67140676, 0.68108990, 0.69065536, 0.70010148, + 0.70942664, 0.71862923, 0.72770765, 0.73666033, 0.74548573, 0.75418233, + 0.76274862, 0.77118312, 0.77948437, 0.78765094, 0.79568142, 0.80357442, + 0.81132858, 0.81894256, 0.82641504, 0.83374472, 0.84093036, 0.84797069, + 0.85486451, 0.86161063, 0.86820787, 0.87465511, 0.88095122, 0.88709512, + 0.89308574, 0.89892206, 0.90460306, 0.91012776, 0.91549520, 0.92070447, + 0.92575465, 0.93064488, 0.93537432, 0.93994213, 0.94434755, 0.94858979, + 0.95266814, 0.95658189, 0.96033035, 0.96391289, 0.96732888, 0.97057773, + 0.97365889, 0.97657181, 0.97931600, 0.98189099, 0.98429632, 0.98653158, + 0.98859639, 0.99049038, 0.99221324, 0.99376466, 0.99514438, 0.99635215, + 0.99738778, 0.99825107, 0.99894188, 0.99946010, 0.99980562, 0.99997840, + 0.99997840, 0.99980562, 0.99946010, 0.99894188, 0.99825107, 0.99738778, + 0.99635215, 0.99514438, 0.99376466, 0.99221324, 0.99049038, 0.98859639, + 0.98653158, 0.98429632, 0.98189099, 0.97931600, 0.97657181, 0.97365889, + 0.97057773, 0.96732888, 0.96391289, 0.96033035, 0.95658189, 0.95266814, + 0.94858979, 0.94434755, 0.93994213, 0.93537432, 0.93064488, 0.92575465, + 0.92070447, 0.91549520, 0.91012776, 0.90460306, 0.89892206, 0.89308574, + 0.88709512, 0.88095122, 0.87465511, 0.86820787, 0.86161063, 0.85486451, + 0.84797069, 0.84093036, 0.83374472, 0.82641504, 0.81894256, 0.81132858, + 0.80357442, 0.79568142, 0.78765094, 0.77948437, 0.77118312, 0.76274862, + 0.75418233, 0.74548573, 0.73666033, 0.72770765, 0.71862923, 0.70942664, + 0.70010148, 0.69065536, 0.68108990, 0.67140676, 0.66160761, 0.65169416, + 0.64166810, 0.63153117, 0.62128512, 0.61093173, 0.60047278, 0.58991008, + 0.57924546, 0.56848075, 0.55761782, 0.54665854, 0.53560481, 0.52445853, + 0.51322164, 0.50189608, 0.49048379, 0.47898676, 0.46740697, 0.45574642, + 0.44400713, 0.43219112, 0.42030043, 0.40833713, 0.39630327, 0.38420093, + 0.37203222, 0.35979922, 0.34750406, 0.33514885, 0.32273574, 0.31026687, + 0.29774438, 0.28517045, 0.27254725, 0.25987696, 0.24716177, 0.23440387, + 0.22160547, 0.20876878, 0.19589602, 0.18298941, 0.17005118, 0.15708358, + 0.14408883, 0.13106918, 0.11802689, 0.10496421, 0.09188339, 0.07878670, + 0.06567639, 0.05255473, 0.03942400, 0.02628645, 0.01314436, 0.00000000}; + +static const size_t kFilterOrder = 2; +static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f, + -1.949650f, + 0.974827f}; +static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f, + -1.971999f, + 0.972457f}; + +static_assert(kFilterOrder + 1 == + sizeof(kCoeffNumerator) / sizeof(kCoeffNumerator[0]), + "numerator coefficients incorrect size"); +static_assert(kFilterOrder + 1 == + sizeof(kCoeffDenominator) / sizeof(kCoeffDenominator[0]), + "denominator coefficients incorrect size"); + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROCESSING_H_ diff --git a/webrtc/modules/audio_processing/vad/vad_circular_buffer.cc b/webrtc/modules/audio_processing/vad/vad_circular_buffer.cc new file mode 100644 index 0000000..d337893 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/vad_circular_buffer.cc @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h" + +#include +#include + +namespace webrtc { + +VadCircularBuffer::VadCircularBuffer(int buffer_size) + : buffer_(new double[buffer_size]), + is_full_(false), + index_(0), + buffer_size_(buffer_size), + sum_(0) { +} + +VadCircularBuffer::~VadCircularBuffer() { +} + +void VadCircularBuffer::Reset() { + is_full_ = false; + index_ = 0; + sum_ = 0; +} + +VadCircularBuffer* VadCircularBuffer::Create(int buffer_size) { + if (buffer_size <= 0) + return NULL; + return new VadCircularBuffer(buffer_size); +} + +double VadCircularBuffer::Oldest() const { + if (!is_full_) + return buffer_[0]; + else + return buffer_[index_]; +} + +double VadCircularBuffer::Mean() { + double m; + if (is_full_) { + m = sum_ / buffer_size_; + } else { + if (index_ > 0) + m = sum_ / index_; + else + m = 0; + } + return m; +} + +void VadCircularBuffer::Insert(double value) { + if (is_full_) { + sum_ -= buffer_[index_]; + } + sum_ += value; + buffer_[index_] = value; + index_++; + if (index_ >= buffer_size_) { + is_full_ = true; + index_ = 0; + } +} +int VadCircularBuffer::BufferLevel() { + if (is_full_) + return buffer_size_; + return index_; +} + +int VadCircularBuffer::Get(int index, double* value) const { + int err = ConvertToLinearIndex(&index); + if (err < 0) + return -1; + *value = buffer_[index]; + return 0; +} + +int VadCircularBuffer::Set(int index, double value) { + int err = ConvertToLinearIndex(&index); + if (err < 0) + return -1; + + sum_ -= buffer_[index]; + buffer_[index] = value; + sum_ += value; + return 0; +} + +int VadCircularBuffer::ConvertToLinearIndex(int* index) const { + if (*index < 0 || *index >= buffer_size_) + return -1; + + if (!is_full_ && *index >= index_) + return -1; + + *index = index_ - 1 - *index; + if (*index < 0) + *index += buffer_size_; + return 0; +} + +int VadCircularBuffer::RemoveTransient(int width_threshold, + double val_threshold) { + if (!is_full_ && index_ < width_threshold + 2) + return 0; + + int index_1 = 0; + int index_2 = width_threshold + 1; + double v = 0; + if (Get(index_1, &v) < 0) + return -1; + if (v < val_threshold) { + Set(index_1, 0); + int index; + for (index = index_2; index > index_1; index--) { + if (Get(index, &v) < 0) + return -1; + if (v < val_threshold) + break; + } + for (; index > index_1; index--) { + if (Set(index, 0.0) < 0) + return -1; + } + } + return 0; +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/vad/vad_circular_buffer.h b/webrtc/modules/audio_processing/vad/vad_circular_buffer.h new file mode 100644 index 0000000..5238f77 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/vad_circular_buffer.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ + +#include "webrtc/base/scoped_ptr.h" + +namespace webrtc { + +// A circular buffer tailored to the need of this project. It stores last +// K samples of the input, and keeps track of the mean of the last samples. +// +// It is used in class "PitchBasedActivity" to keep track of posterior +// probabilities in the past few seconds. The posterior probabilities are used +// to recursively update prior probabilities. +class VadCircularBuffer { + public: + static VadCircularBuffer* Create(int buffer_size); + ~VadCircularBuffer(); + + // If buffer is wrapped around. + bool is_full() const { return is_full_; } + // Get the oldest entry in the buffer. + double Oldest() const; + // Insert new value into the buffer. + void Insert(double value); + // Reset buffer, forget the past, start fresh. + void Reset(); + + // The mean value of the elements in the buffer. The return value is zero if + // buffer is empty, i.e. no value is inserted. + double Mean(); + // Remove transients. If the values exceed |val_threshold| for a period + // shorter then or equal to |width_threshold|, then that period is considered + // transient and set to zero. + int RemoveTransient(int width_threshold, double val_threshold); + + private: + explicit VadCircularBuffer(int buffer_size); + // Get previous values. |index = 0| corresponds to the most recent + // insertion. |index = 1| is the one before the most recent insertion, and + // so on. + int Get(int index, double* value) const; + // Set a given position to |value|. |index| is interpreted as above. + int Set(int index, double value); + // Return the number of valid elements in the buffer. + int BufferLevel(); + + // Convert an index with the interpretation as get() method to the + // corresponding linear index. + int ConvertToLinearIndex(int* index) const; + + rtc::scoped_ptr buffer_; + bool is_full_; + int index_; + int buffer_size_; + double sum_; +}; + +} // namespace webrtc +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ diff --git a/webrtc/modules/audio_processing/vad/voice_activity_detector.cc b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc new file mode 100644 index 0000000..ef56a35 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" + +#include + +#include "webrtc/base/checks.h" + +namespace webrtc { +namespace { + +const size_t kMaxLength = 320; +const int kNumChannels = 1; + +const double kDefaultVoiceValue = 1.0; +const double kNeutralProbability = 0.5; +const double kLowProbability = 0.01; + +} // namespace + +VoiceActivityDetector::VoiceActivityDetector() + : last_voice_probability_(kDefaultVoiceValue), + standalone_vad_(StandaloneVad::Create()) { +} + +// Because ISAC has a different chunk length, it updates +// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data. +// Otherwise it clears them. +void VoiceActivityDetector::ProcessChunk(const int16_t* audio, + size_t length, + int sample_rate_hz) { + RTC_DCHECK_EQ(static_cast(length), sample_rate_hz / 100); + RTC_DCHECK_LE(length, kMaxLength); + // Resample to the required rate. + const int16_t* resampled_ptr = audio; + if (sample_rate_hz != kSampleRateHz) { + RTC_CHECK_EQ( + resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels), + 0); + resampler_.Push(audio, length, resampled_, kLength10Ms, length); + resampled_ptr = resampled_; + } + RTC_DCHECK_EQ(length, kLength10Ms); + + // Each chunk needs to be passed into |standalone_vad_|, because internally it + // buffers the audio and processes it all at once when GetActivity() is + // called. + RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0); + + audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); + + chunkwise_voice_probabilities_.resize(features_.num_frames); + chunkwise_rms_.resize(features_.num_frames); + std::copy(features_.rms, features_.rms + chunkwise_rms_.size(), + chunkwise_rms_.begin()); + if (features_.num_frames > 0) { + if (features_.silence) { + // The other features are invalid, so set the voice probabilities to an + // arbitrary low value. + std::fill(chunkwise_voice_probabilities_.begin(), + chunkwise_voice_probabilities_.end(), kLowProbability); + } else { + std::fill(chunkwise_voice_probabilities_.begin(), + chunkwise_voice_probabilities_.end(), kNeutralProbability); + RTC_CHECK_GE( + standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], + chunkwise_voice_probabilities_.size()), + 0); + RTC_CHECK_GE(pitch_based_vad_.VoicingProbability( + features_, &chunkwise_voice_probabilities_[0]), + 0); + } + last_voice_probability_ = chunkwise_voice_probabilities_.back(); + } +} + +} // namespace webrtc diff --git a/webrtc/modules/audio_processing/vad/voice_activity_detector.h b/webrtc/modules/audio_processing/vad/voice_activity_detector.h new file mode 100644 index 0000000..e2dcf02 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/voice_activity_detector.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ + +#include + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_audio/resampler/include/resampler.h" +#include "webrtc/modules/audio_processing/vad/vad_audio_proc.h" +#include "webrtc/modules/audio_processing/vad/common.h" +#include "webrtc/modules/audio_processing/vad/pitch_based_vad.h" +#include "webrtc/modules/audio_processing/vad/standalone_vad.h" + +namespace webrtc { + +// A Voice Activity Detector (VAD) that combines the voice probability from the +// StandaloneVad and PitchBasedVad to get a more robust estimation. +class VoiceActivityDetector { + public: + VoiceActivityDetector(); + + // Processes each audio chunk and estimates the voice probability. The maximum + // supported sample rate is 32kHz. + // TODO(aluebs): Change |length| to size_t. + void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz); + + // Returns a vector of voice probabilities for each chunk. It can be empty for + // some chunks, but it catches up afterwards returning multiple values at + // once. + const std::vector& chunkwise_voice_probabilities() const { + return chunkwise_voice_probabilities_; + } + + // Returns a vector of RMS values for each chunk. It has the same length as + // chunkwise_voice_probabilities(). + const std::vector& chunkwise_rms() const { return chunkwise_rms_; } + + // Returns the last voice probability, regardless of the internal + // implementation, although it has a few chunks of delay. + float last_voice_probability() const { return last_voice_probability_; } + + private: + // TODO(aluebs): Change these to float. + std::vector chunkwise_voice_probabilities_; + std::vector chunkwise_rms_; + + float last_voice_probability_; + + Resampler resampler_; + VadAudioProc audio_processing_; + + rtc::scoped_ptr standalone_vad_; + PitchBasedVad pitch_based_vad_; + + int16_t resampled_[kLength10Ms]; + AudioFeatures features_; +}; + +} // namespace webrtc + +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ diff --git a/webrtc/modules/audio_processing/vad/voice_gmm_tables.h b/webrtc/modules/audio_processing/vad/voice_gmm_tables.h new file mode 100644 index 0000000..2f247c3 --- /dev/null +++ b/webrtc/modules/audio_processing/vad/voice_gmm_tables.h @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// GMM tables for active segments. Generated by MakeGmmTables.m. + +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ + +static const int kVoiceGmmNumMixtures = 12; +static const int kVoiceGmmDim = 3; + +static const double + kVoiceGmmCovarInverse[kVoiceGmmNumMixtures][kVoiceGmmDim][kVoiceGmmDim] = { + {{1.83673825579513e+00, -8.09791637570095e-04, 4.60106414365986e-03}, + {-8.09791637570095e-04, 8.89351738394608e-04, -9.80188953277734e-04}, + {4.60106414365986e-03, -9.80188953277734e-04, 1.38706060206582e-03}}, + {{6.76228912850703e+01, -1.98893120119660e-02, -3.53548357253551e-03}, + {-1.98893120119660e-02, 3.96216858500530e-05, -4.08492938394097e-05}, + {-3.53548357253551e-03, -4.08492938394097e-05, 9.31864352856416e-04}}, + {{9.98612435944558e+00, -5.27880954316893e-03, -6.30342541619017e-03}, + {-5.27880954316893e-03, 4.54359480225226e-05, 6.30804591626044e-05}, + {-6.30342541619017e-03, 6.30804591626044e-05, 5.36466441382942e-04}}, + {{3.39917474216349e+01, -1.56213579433191e-03, -4.01459014990225e-02}, + {-1.56213579433191e-03, 6.40415424897724e-05, 6.20076342427833e-05}, + {-4.01459014990225e-02, 6.20076342427833e-05, 3.51199070103063e-03}}, + {{1.34545062271428e+01, -7.94513610147144e-03, -5.34401019341728e-02}, + {-7.94513610147144e-03, 1.16511820098649e-04, 4.66063702069293e-05}, + {-5.34401019341728e-02, 4.66063702069293e-05, 2.72354323774163e-03}}, + {{1.08557844314806e+02, -1.54885805673668e-02, -1.88029692674851e-02}, + {-1.54885805673668e-02, 1.16404042786406e-04, 6.45579292702802e-06}, + {-1.88029692674851e-02, 6.45579292702802e-06, 4.32330478391416e-04}}, + {{8.22940066541450e+01, -1.15903110231303e-02, -4.92166764865343e-02}, + {-1.15903110231303e-02, 7.42510742165261e-05, 3.73007314191290e-06}, + {-4.92166764865343e-02, 3.73007314191290e-06, 3.64005221593244e-03}}, + {{2.31133605685660e+00, -7.83261568950254e-04, 7.45744012346313e-04}, + {-7.83261568950254e-04, 1.29460648214142e-05, -2.22774455093730e-06}, + {7.45744012346313e-04, -2.22774455093730e-06, 1.05117294093010e-04}}, + {{3.78767849189611e+02, 1.57759761011568e-03, -2.08551217988774e-02}, + {1.57759761011568e-03, 4.76066236886865e-05, -2.33977412299324e-05}, + {-2.08551217988774e-02, -2.33977412299324e-05, 5.24261005371196e-04}}, + {{6.98580096506135e-01, -5.13850255217378e-04, -4.01124551717056e-04}, + {-5.13850255217378e-04, 1.40501021984840e-06, -2.09496928716569e-06}, + {-4.01124551717056e-04, -2.09496928716569e-06, 2.82879357740037e-04}}, + {{2.62770945162399e+00, -2.31825753241430e-03, -5.30447217466318e-03}, + {-2.31825753241430e-03, 4.59108572227649e-05, 7.67631886355405e-05}, + {-5.30447217466318e-03, 7.67631886355405e-05, 2.28521601674098e-03}}, + {{1.89940391362152e+02, -4.23280856852379e-03, -2.70608873541399e-02}, + {-4.23280856852379e-03, 6.77547582742563e-05, 2.69154203800467e-05}, + {-2.70608873541399e-02, 2.69154203800467e-05, 3.88574543373470e-03}}}; + +static const double kVoiceGmmMean[kVoiceGmmNumMixtures][kVoiceGmmDim] = { + {-2.15020241646536e+00, 4.97079062999877e+02, 4.77078119504505e+02}, + {-8.92097680029190e-01, 5.92064964199921e+02, 1.81045145941059e+02}, + {-1.29435784144398e+00, 4.98450293410611e+02, 1.71991263804064e+02}, + {-1.03925228397884e+00, 4.99511274321571e+02, 1.05838336539105e+02}, + {-1.29229047206129e+00, 4.15026762566707e+02, 1.12861119017125e+02}, + {-7.88748114599810e-01, 4.48739336688113e+02, 1.89784216956337e+02}, + {-8.77777402332642e-01, 4.86620285054533e+02, 1.13477708016491e+02}, + {-2.06465957063057e+00, 6.33385049870607e+02, 2.32758546796149e+02}, + {-6.98893789231685e-01, 5.93622051503385e+02, 1.92536982473203e+02}, + {-2.55901217508894e+00, 1.55914919756205e+03, 1.39769980835570e+02}, + {-1.92070024165837e+00, 4.87983940444185e+02, 1.02745468128289e+02}, + {-7.29187507662854e-01, 5.22717685022855e+02, 1.16377942283991e+02}}; + +static const double kVoiceGmmWeights[kVoiceGmmNumMixtures] = { + -1.39789694361035e+01, + -1.19527720202104e+01, + -1.32396317929055e+01, + -1.09436815209238e+01, + -1.13440027478149e+01, + -1.12200721834504e+01, + -1.02537324043693e+01, + -1.60789861938302e+01, + -1.03394494048344e+01, + -1.83207938586818e+01, + -1.31186044948288e+01, + -9.52479998673554e+00}; +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ diff --git a/webrtc/modules/audio_processing/voice_detection_impl.cc b/webrtc/modules/audio_processing/voice_detection_impl.cc index 49aac2e..710df42 100644 --- a/webrtc/modules/audio_processing/voice_detection_impl.cc +++ b/webrtc/modules/audio_processing/voice_detection_impl.cc @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,45 +8,40 @@ * be found in the AUTHORS file in the root of the source tree. */ -#include "voice_detection_impl.h" +#include "webrtc/modules/audio_processing/voice_detection_impl.h" -#include +#include -#include "critical_section_wrapper.h" -#include "webrtc_vad.h" - -#include "audio_processing_impl.h" -#include "audio_buffer.h" +#include "webrtc/common_audio/vad/include/webrtc_vad.h" +#include "webrtc/modules/audio_processing/audio_buffer.h" +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" namespace webrtc { typedef VadInst Handle; namespace { -WebRtc_Word16 MapSetting(VoiceDetection::Likelihood likelihood) { +int MapSetting(VoiceDetection::Likelihood likelihood) { switch (likelihood) { case VoiceDetection::kVeryLowLikelihood: return 3; - break; case VoiceDetection::kLowLikelihood: return 2; - break; case VoiceDetection::kModerateLikelihood: return 1; - break; case VoiceDetection::kHighLikelihood: return 0; - break; - default: - return -1; } + assert(false); + return -1; } } // namespace - -VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessingImpl* apm) - : ProcessingComponent(apm), +VoiceDetectionImpl::VoiceDetectionImpl(const AudioProcessing* apm, + CriticalSectionWrapper* crit) + : ProcessingComponent(), apm_(apm), + crit_(crit), stream_has_voice_(false), using_external_vad_(false), likelihood_(kLowLikelihood), @@ -64,19 +59,13 @@ int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { using_external_vad_ = false; return apm_->kNoError; } - assert(audio->samples_per_split_channel() <= 160); - - WebRtc_Word16* mixed_data = audio->low_pass_split_data(0); - if (audio->num_channels() > 1) { - audio->CopyAndMixLowPass(1); - mixed_data = audio->mixed_low_pass_data(0); - } + assert(audio->num_frames_per_band() <= 160); // TODO(ajm): concatenate data in frame buffer here. int vad_ret = WebRtcVad_Process(static_cast(handle(0)), - apm_->split_sample_rate_hz(), - mixed_data, + apm_->proc_split_sample_rate_hz(), + audio->mixed_low_pass_data(), frame_size_samples_); if (vad_ret == 0) { stream_has_voice_ = false; @@ -92,7 +81,7 @@ int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) { } int VoiceDetectionImpl::Enable(bool enable) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); return EnableComponent(enable); } @@ -113,7 +102,7 @@ bool VoiceDetectionImpl::stream_has_voice() const { } int VoiceDetectionImpl::set_likelihood(VoiceDetection::Likelihood likelihood) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); if (MapSetting(likelihood) == -1) { return apm_->kBadParameterError; } @@ -127,7 +116,7 @@ VoiceDetection::Likelihood VoiceDetectionImpl::likelihood() const { } int VoiceDetectionImpl::set_frame_size_ms(int size) { - CriticalSectionScoped crit_scoped(*apm_->crit()); + CriticalSectionScoped crit_scoped(crit_); assert(size == 10); // TODO(ajm): remove when supported. if (size != 10 && size != 20 && @@ -151,34 +140,19 @@ int VoiceDetectionImpl::Initialize() { } using_external_vad_ = false; - frame_size_samples_ = frame_size_ms_ * (apm_->split_sample_rate_hz() / 1000); + frame_size_samples_ = static_cast( + frame_size_ms_ * apm_->proc_split_sample_rate_hz() / 1000); // TODO(ajm): intialize frame buffer here. return apm_->kNoError; } -int VoiceDetectionImpl::get_version(char* version, - int version_len_bytes) const { - if (WebRtcVad_get_version(version, version_len_bytes) != 0) { - return apm_->kBadParameterError; - } - - return apm_->kNoError; -} - void* VoiceDetectionImpl::CreateHandle() const { - Handle* handle = NULL; - if (WebRtcVad_Create(&handle) != apm_->kNoError) { - handle = NULL; - } else { - assert(handle != NULL); - } - - return handle; + return WebRtcVad_Create(); } -int VoiceDetectionImpl::DestroyHandle(void* handle) const { - return WebRtcVad_Free(static_cast(handle)); +void VoiceDetectionImpl::DestroyHandle(void* handle) const { + WebRtcVad_Free(static_cast(handle)); } int VoiceDetectionImpl::InitializeHandle(void* handle) const { diff --git a/webrtc/modules/audio_processing/voice_detection_impl.h b/webrtc/modules/audio_processing/voice_detection_impl.h index ef212d1..b188083 100644 --- a/webrtc/modules/audio_processing/voice_detection_impl.h +++ b/webrtc/modules/audio_processing/voice_detection_impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -8,56 +8,57 @@ * be found in the AUTHORS file in the root of the source tree. */ -#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_VOICE_DETECTION_IMPL_H_ -#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_VOICE_DETECTION_IMPL_H_ +#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_ +#define WEBRTC_MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_ -#include "audio_processing.h" -#include "processing_component.h" +#include "webrtc/modules/audio_processing/include/audio_processing.h" +#include "webrtc/modules/audio_processing/processing_component.h" namespace webrtc { -class AudioProcessingImpl; + class AudioBuffer; +class CriticalSectionWrapper; class VoiceDetectionImpl : public VoiceDetection, public ProcessingComponent { public: - explicit VoiceDetectionImpl(const AudioProcessingImpl* apm); + VoiceDetectionImpl(const AudioProcessing* apm, CriticalSectionWrapper* crit); virtual ~VoiceDetectionImpl(); int ProcessCaptureAudio(AudioBuffer* audio); // VoiceDetection implementation. - virtual bool is_enabled() const; + bool is_enabled() const override; // ProcessingComponent implementation. - virtual int Initialize(); - virtual int get_version(char* version, int version_len_bytes) const; + int Initialize() override; private: // VoiceDetection implementation. - virtual int Enable(bool enable); - virtual int set_stream_has_voice(bool has_voice); - virtual bool stream_has_voice() const; - virtual int set_likelihood(Likelihood likelihood); - virtual Likelihood likelihood() const; - virtual int set_frame_size_ms(int size); - virtual int frame_size_ms() const; + int Enable(bool enable) override; + int set_stream_has_voice(bool has_voice) override; + bool stream_has_voice() const override; + int set_likelihood(Likelihood likelihood) override; + Likelihood likelihood() const override; + int set_frame_size_ms(int size) override; + int frame_size_ms() const override; // ProcessingComponent implementation. - virtual void* CreateHandle() const; - virtual int InitializeHandle(void* handle) const; - virtual int ConfigureHandle(void* handle) const; - virtual int DestroyHandle(void* handle) const; - virtual int num_handles_required() const; - virtual int GetHandleError(void* handle) const; + void* CreateHandle() const override; + int InitializeHandle(void* handle) const override; + int ConfigureHandle(void* handle) const override; + void DestroyHandle(void* handle) const override; + int num_handles_required() const override; + int GetHandleError(void* handle) const override; - const AudioProcessingImpl* apm_; + const AudioProcessing* apm_; + CriticalSectionWrapper* crit_; bool stream_has_voice_; bool using_external_vad_; Likelihood likelihood_; int frame_size_ms_; - int frame_size_samples_; + size_t frame_size_samples_; }; } // namespace webrtc -#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_VOICE_DETECTION_IMPL_H_ +#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VOICE_DETECTION_IMPL_H_ diff --git a/webrtc/modules/interface/module.h b/webrtc/modules/interface/module.h index aae9322..ffd3065 100644 --- a/webrtc/modules/interface/module.h +++ b/webrtc/modules/interface/module.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source @@ -11,58 +11,69 @@ #ifndef MODULES_INTERFACE_MODULE_H_ #define MODULES_INTERFACE_MODULE_H_ -#include - -#include "typedefs.h" +#include "webrtc/typedefs.h" namespace webrtc { +class ProcessThread; + class Module { public: - // Returns version of the module and its components. - virtual int32_t Version(char* version, - uint32_t& remaining_buffer_in_bytes, - uint32_t& position) const = 0; - - // Change the unique identifier of this object. - virtual int32_t ChangeUniqueId(const int32_t id) = 0; - - // Returns the number of milliseconds until the module want a worker + // Returns the number of milliseconds until the module wants a worker // thread to call Process. - virtual int32_t TimeUntilNextProcess() = 0; + // This method is called on the same worker thread as Process will + // be called on. + // TODO(tommi): Almost all implementations of this function, need to know + // the current tick count. Consider passing it as an argument. It could + // also improve the accuracy of when the next callback occurs since the + // thread that calls Process() will also have it's tick count reference + // which might not match with what the implementations use. + virtual int64_t TimeUntilNextProcess() = 0; // Process any pending tasks such as timeouts. + // Called on a worker thread. virtual int32_t Process() = 0; + // This method is called when the module is attached to a *running* process + // thread or detached from one. In the case of detaching, |process_thread| + // will be nullptr. + // + // This method will be called in the following cases: + // + // * Non-null process_thread: + // * ProcessThread::RegisterModule() is called while the thread is running. + // * ProcessThread::Start() is called and RegisterModule has previously + // been called. The thread will be started immediately after notifying + // all modules. + // + // * Null process_thread: + // * ProcessThread::DeRegisterModule() is called while the thread is + // running. + // * ProcessThread::Stop() was called and the thread has been stopped. + // + // NOTE: This method is not called from the worker thread itself, but from + // the thread that registers/deregisters the module or calls Start/Stop. + virtual void ProcessThreadAttached(ProcessThread* process_thread) {} + protected: virtual ~Module() {} }; -// Reference counted version of the module interface. +// Reference counted version of the Module interface. class RefCountedModule : public Module { public: // Increase the reference count by one. // Returns the incremented reference count. - // TODO(perkj): Make this pure virtual when Chromium have implemented - // reference counting ADM and Video capture module. - virtual int32_t AddRef() { - assert(!"Not implemented."); - return 1; - } + virtual int32_t AddRef() const = 0; // Decrease the reference count by one. // Returns the decreased reference count. // Returns 0 if the last reference was just released. - // When the reference count reach 0 the object will self-destruct. - // TODO(perkj): Make this pure virtual when Chromium have implemented - // reference counting ADM and Video capture module. - virtual int32_t Release() { - assert(!"Not implemented."); - return 1; - } + // When the reference count reaches 0 the object will self-destruct. + virtual int32_t Release() const = 0; protected: - virtual ~RefCountedModule() {} + ~RefCountedModule() override = default; }; } // namespace webrtc diff --git a/webrtc/modules/interface/module_common_types.h b/webrtc/modules/interface/module_common_types.h index 99e1b68..67019ca 100644 --- a/webrtc/modules/interface/module_common_types.h +++ b/webrtc/modules/interface/module_common_types.h @@ -1,1034 +1,810 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + #ifndef MODULE_COMMON_TYPES_H #define MODULE_COMMON_TYPES_H -#include // memcpy #include +#include // memcpy -#include "typedefs.h" -#include "common_types.h" +#include +#include -#ifdef _WIN32 - #pragma warning(disable:4351) // remove warning "new behavior: elements of array - // 'array' will be default initialized" -#endif +#include "webrtc/base/constructormagic.h" +#include "webrtc/common_types.h" +#include "webrtc/common_video/rotation.h" +#include "webrtc/typedefs.h" -namespace webrtc -{ -struct RTPHeader -{ - bool markerBit; - WebRtc_UWord8 payloadType; - WebRtc_UWord16 sequenceNumber; - WebRtc_UWord32 timestamp; - WebRtc_UWord32 ssrc; - WebRtc_UWord8 numCSRCs; - WebRtc_UWord32 arrOfCSRCs[kRtpCsrcSize]; - WebRtc_UWord8 paddingLength; - WebRtc_UWord16 headerLength; +namespace webrtc { + +struct RTPAudioHeader { + uint8_t numEnergy; // number of valid entries in arrOfEnergy + uint8_t arrOfEnergy[kRtpCsrcSize]; // one energy byte (0-9) per channel + bool isCNG; // is this CNG + uint8_t channel; // number of channels 2 = stereo }; -struct RTPAudioHeader -{ - WebRtc_UWord8 numEnergy; // number of valid entries in arrOfEnergy - WebRtc_UWord8 arrOfEnergy[kRtpCsrcSize]; // one energy byte (0-9) per channel - bool isCNG; // is this CNG - WebRtc_UWord8 channel; // number of channels 2 = stereo +const int16_t kNoPictureId = -1; +const int16_t kMaxOneBytePictureId = 0x7F; // 7 bits +const int16_t kMaxTwoBytePictureId = 0x7FFF; // 15 bits +const int16_t kNoTl0PicIdx = -1; +const uint8_t kNoTemporalIdx = 0xFF; +const uint8_t kNoSpatialIdx = 0xFF; +const uint8_t kNoGofIdx = 0xFF; +const size_t kMaxVp9RefPics = 3; +const size_t kMaxVp9FramesInGof = 16; +const size_t kMaxVp9NumberOfSpatialLayers = 8; +const int kNoKeyIdx = -1; + +struct RTPVideoHeaderVP8 { + void InitRTPVideoHeaderVP8() { + nonReference = false; + pictureId = kNoPictureId; + tl0PicIdx = kNoTl0PicIdx; + temporalIdx = kNoTemporalIdx; + layerSync = false; + keyIdx = kNoKeyIdx; + partitionId = 0; + beginningOfPartition = false; + } + + bool nonReference; // Frame is discardable. + int16_t pictureId; // Picture ID index, 15 bits; + // kNoPictureId if PictureID does not exist. + int16_t tl0PicIdx; // TL0PIC_IDX, 8 bits; + // kNoTl0PicIdx means no value provided. + uint8_t temporalIdx; // Temporal layer index, or kNoTemporalIdx. + bool layerSync; // This frame is a layer sync frame. + // Disabled if temporalIdx == kNoTemporalIdx. + int keyIdx; // 5 bits; kNoKeyIdx means not used. + int partitionId; // VP8 partition ID + bool beginningOfPartition; // True if this packet is the first + // in a VP8 partition. Otherwise false }; -struct RTPVideoHeaderH263 -{ - void InitRTPVideoHeaderH263() {}; - bool independentlyDecodable; // H.263-1998 if no P bit it's not independently decodable - bool bits; // H.263 mode B, Xor the lasy byte of previus packet with the - // first byte of this packet +enum TemporalStructureMode { + kTemporalStructureMode1, // 1 temporal layer structure - i.e., IPPP... + kTemporalStructureMode2, // 2 temporal layers 0-1-0-1... + kTemporalStructureMode3 // 3 temporal layers 0-2-1-2-0-2-1-2... }; -enum {kNoPictureId = -1}; -enum {kNoTl0PicIdx = -1}; -enum {kNoTemporalIdx = -1}; -enum {kNoSimulcastIdx = 0}; +struct GofInfoVP9 { + void SetGofInfoVP9(TemporalStructureMode tm) { + switch (tm) { + case kTemporalStructureMode1: + num_frames_in_gof = 1; + temporal_idx[0] = 0; + temporal_up_switch[0] = false; + num_ref_pics[0] = 1; + pid_diff[0][0] = 1; + break; + case kTemporalStructureMode2: + num_frames_in_gof = 2; + temporal_idx[0] = 0; + temporal_up_switch[0] = false; + num_ref_pics[0] = 1; + pid_diff[0][0] = 2; -struct RTPVideoHeaderVP8 -{ - void InitRTPVideoHeaderVP8() - { - nonReference = false; - pictureId = kNoPictureId; - tl0PicIdx = kNoTl0PicIdx; - temporalIdx = kNoTemporalIdx; - partitionId = 0; - beginningOfPartition = false; + temporal_idx[1] = 1; + temporal_up_switch[1] = true; + num_ref_pics[1] = 1; + pid_diff[1][0] = 1; + break; + case kTemporalStructureMode3: + num_frames_in_gof = 4; + temporal_idx[0] = 0; + temporal_up_switch[0] = false; + num_ref_pics[0] = 1; + pid_diff[0][0] = 4; + + temporal_idx[1] = 2; + temporal_up_switch[1] = true; + num_ref_pics[1] = 1; + pid_diff[1][0] = 1; + + temporal_idx[2] = 1; + temporal_up_switch[2] = true; + num_ref_pics[2] = 1; + pid_diff[2][0] = 2; + + temporal_idx[3] = 2; + temporal_up_switch[3] = false; + num_ref_pics[3] = 2; + pid_diff[3][0] = 1; + pid_diff[3][1] = 2; + break; + default: + assert(false); } + } - bool nonReference; // Frame is discardable. - WebRtc_Word16 pictureId; // Picture ID index, 15 bits; - // kNoPictureId if PictureID does not exist. - WebRtc_Word16 tl0PicIdx; // TL0PIC_IDX, 8 bits; - // kNoTl0PicIdx means no value provided. - WebRtc_Word8 temporalIdx; // Temporal layer index, or kNoTemporalIdx. - int partitionId; // VP8 partition ID - bool beginningOfPartition; // True if this packet is the first - // in a VP8 partition. Otherwise false -}; -union RTPVideoTypeHeader -{ - RTPVideoHeaderH263 H263; - RTPVideoHeaderVP8 VP8; + void CopyGofInfoVP9(const GofInfoVP9& src) { + num_frames_in_gof = src.num_frames_in_gof; + for (size_t i = 0; i < num_frames_in_gof; ++i) { + temporal_idx[i] = src.temporal_idx[i]; + temporal_up_switch[i] = src.temporal_up_switch[i]; + num_ref_pics[i] = src.num_ref_pics[i]; + for (size_t r = 0; r < num_ref_pics[i]; ++r) { + pid_diff[i][r] = src.pid_diff[i][r]; + } + } + } + + size_t num_frames_in_gof; + uint8_t temporal_idx[kMaxVp9FramesInGof]; + bool temporal_up_switch[kMaxVp9FramesInGof]; + size_t num_ref_pics[kMaxVp9FramesInGof]; + int16_t pid_diff[kMaxVp9FramesInGof][kMaxVp9RefPics]; }; -enum RTPVideoCodecTypes -{ - kRTPVideoGeneric = 0, - kRTPVideoH263 = 1, - kRTPVideoMPEG4 = 5, - kRTPVideoVP8 = 8, - kRTPVideoNoVideo = 10, - kRTPVideoFEC = 11, - kRTPVideoI420 = 12 -}; -struct RTPVideoHeader -{ - WebRtc_UWord16 width; // size - WebRtc_UWord16 height; +struct RTPVideoHeaderVP9 { + void InitRTPVideoHeaderVP9() { + inter_pic_predicted = false; + flexible_mode = false; + beginning_of_frame = false; + end_of_frame = false; + ss_data_available = false; + picture_id = kNoPictureId; + max_picture_id = kMaxTwoBytePictureId; + tl0_pic_idx = kNoTl0PicIdx; + temporal_idx = kNoTemporalIdx; + spatial_idx = kNoSpatialIdx; + temporal_up_switch = false; + inter_layer_predicted = false; + gof_idx = kNoGofIdx; + num_ref_pics = 0; + num_spatial_layers = 1; + } - bool isFirstPacket; // first packet in frame - WebRtc_UWord8 simulcastIdx; // Index if the simulcast encoder creating - // this frame, 0 if not using simulcast. - RTPVideoCodecTypes codec; - RTPVideoTypeHeader codecHeader; -}; -union RTPTypeHeader -{ - RTPAudioHeader Audio; - RTPVideoHeader Video; + bool inter_pic_predicted; // This layer frame is dependent on previously + // coded frame(s). + bool flexible_mode; // This frame is in flexible mode. + bool beginning_of_frame; // True if this packet is the first in a VP9 layer + // frame. + bool end_of_frame; // True if this packet is the last in a VP9 layer frame. + bool ss_data_available; // True if SS data is available in this payload + // descriptor. + int16_t picture_id; // PictureID index, 15 bits; + // kNoPictureId if PictureID does not exist. + int16_t max_picture_id; // Maximum picture ID index; either 0x7F or 0x7FFF; + int16_t tl0_pic_idx; // TL0PIC_IDX, 8 bits; + // kNoTl0PicIdx means no value provided. + uint8_t temporal_idx; // Temporal layer index, or kNoTemporalIdx. + uint8_t spatial_idx; // Spatial layer index, or kNoSpatialIdx. + bool temporal_up_switch; // True if upswitch to higher frame rate is possible + // starting from this frame. + bool inter_layer_predicted; // Frame is dependent on directly lower spatial + // layer frame. + + uint8_t gof_idx; // Index to predefined temporal frame info in SS data. + + size_t num_ref_pics; // Number of reference pictures used by this layer + // frame. + int16_t pid_diff[kMaxVp9RefPics]; // P_DIFF signaled to derive the PictureID + // of the reference pictures. + int16_t ref_picture_id[kMaxVp9RefPics]; // PictureID of reference pictures. + + // SS data. + size_t num_spatial_layers; // Always populated. + bool spatial_layer_resolution_present; + uint16_t width[kMaxVp9NumberOfSpatialLayers]; + uint16_t height[kMaxVp9NumberOfSpatialLayers]; + GofInfoVP9 gof; }; -struct WebRtcRTPHeader -{ - RTPHeader header; - FrameType frameType; - RTPTypeHeader type; +// The packetization types that we support: single, aggregated, and fragmented. +enum H264PacketizationTypes { + kH264SingleNalu, // This packet contains a single NAL unit. + kH264StapA, // This packet contains STAP-A (single time + // aggregation) packets. If this packet has an + // associated NAL unit type, it'll be for the + // first such aggregated packet. + kH264FuA, // This packet contains a FU-A (fragmentation + // unit) packet, meaning it is a part of a frame + // that was too large to fit into a single packet. }; -class RTPFragmentationHeader -{ -public: - RTPFragmentationHeader() : - fragmentationVectorSize(0), +struct RTPVideoHeaderH264 { + uint8_t nalu_type; // The NAL unit type. If this is a header for a + // fragmented packet, it's the NAL unit type of + // the original data. If this is the header for an + // aggregated packet, it's the NAL unit type of + // the first NAL unit in the packet. + H264PacketizationTypes packetization_type; +}; + +union RTPVideoTypeHeader { + RTPVideoHeaderVP8 VP8; + RTPVideoHeaderVP9 VP9; + RTPVideoHeaderH264 H264; +}; + +enum RtpVideoCodecTypes { + kRtpVideoNone, + kRtpVideoGeneric, + kRtpVideoVp8, + kRtpVideoVp9, + kRtpVideoH264 +}; +// Since RTPVideoHeader is used as a member of a union, it can't have a +// non-trivial default constructor. +struct RTPVideoHeader { + uint16_t width; // size + uint16_t height; + VideoRotation rotation; + + bool isFirstPacket; // first packet in frame + uint8_t simulcastIdx; // Index if the simulcast encoder creating + // this frame, 0 if not using simulcast. + RtpVideoCodecTypes codec; + RTPVideoTypeHeader codecHeader; +}; +union RTPTypeHeader { + RTPAudioHeader Audio; + RTPVideoHeader Video; +}; + +struct WebRtcRTPHeader { + RTPHeader header; + FrameType frameType; + RTPTypeHeader type; + // NTP time of the capture time in local timebase in milliseconds. + int64_t ntp_time_ms; +}; + +class RTPFragmentationHeader { + public: + RTPFragmentationHeader() + : fragmentationVectorSize(0), fragmentationOffset(NULL), fragmentationLength(NULL), fragmentationTimeDiff(NULL), - fragmentationPlType(NULL) - {}; + fragmentationPlType(NULL) {}; - ~RTPFragmentationHeader() - { - delete [] fragmentationOffset; - delete [] fragmentationLength; - delete [] fragmentationTimeDiff; - delete [] fragmentationPlType; + ~RTPFragmentationHeader() { + delete[] fragmentationOffset; + delete[] fragmentationLength; + delete[] fragmentationTimeDiff; + delete[] fragmentationPlType; + } + + void CopyFrom(const RTPFragmentationHeader& src) { + if (this == &src) { + return; } - RTPFragmentationHeader& operator=(const RTPFragmentationHeader& header) - { - if(this == &header) - { - return *this; + if (src.fragmentationVectorSize != fragmentationVectorSize) { + // new size of vectors + + // delete old + delete[] fragmentationOffset; + fragmentationOffset = NULL; + delete[] fragmentationLength; + fragmentationLength = NULL; + delete[] fragmentationTimeDiff; + fragmentationTimeDiff = NULL; + delete[] fragmentationPlType; + fragmentationPlType = NULL; + + if (src.fragmentationVectorSize > 0) { + // allocate new + if (src.fragmentationOffset) { + fragmentationOffset = new size_t[src.fragmentationVectorSize]; } - - if(header.fragmentationVectorSize != fragmentationVectorSize) - { - // new size of vectors - - // delete old - delete [] fragmentationOffset; - fragmentationOffset = NULL; - delete [] fragmentationLength; - fragmentationLength = NULL; - delete [] fragmentationTimeDiff; - fragmentationTimeDiff = NULL; - delete [] fragmentationPlType; - fragmentationPlType = NULL; - - if(header.fragmentationVectorSize > 0) - { - // allocate new - if(header.fragmentationOffset) - { - fragmentationOffset = new WebRtc_UWord32[header.fragmentationVectorSize]; - } - if(header.fragmentationLength) - { - fragmentationLength = new WebRtc_UWord32[header.fragmentationVectorSize]; - } - if(header.fragmentationTimeDiff) - { - fragmentationTimeDiff = new WebRtc_UWord16[header.fragmentationVectorSize]; - } - if(header.fragmentationPlType) - { - fragmentationPlType = new WebRtc_UWord8[header.fragmentationVectorSize]; - } - } - // set new size - fragmentationVectorSize = header.fragmentationVectorSize; + if (src.fragmentationLength) { + fragmentationLength = new size_t[src.fragmentationVectorSize]; } - - if(header.fragmentationVectorSize > 0) - { - // copy values - if(header.fragmentationOffset) - { - memcpy(fragmentationOffset, header.fragmentationOffset, - header.fragmentationVectorSize * sizeof(WebRtc_UWord32)); - } - if(header.fragmentationLength) - { - memcpy(fragmentationLength, header.fragmentationLength, - header.fragmentationVectorSize * sizeof(WebRtc_UWord32)); - } - if(header.fragmentationTimeDiff) - { - memcpy(fragmentationTimeDiff, header.fragmentationTimeDiff, - header.fragmentationVectorSize * sizeof(WebRtc_UWord16)); - } - if(header.fragmentationPlType) - { - memcpy(fragmentationPlType, header.fragmentationPlType, - header.fragmentationVectorSize * sizeof(WebRtc_UWord8)); - } + if (src.fragmentationTimeDiff) { + fragmentationTimeDiff = new uint16_t[src.fragmentationVectorSize]; } - return *this; - } - void VerifyAndAllocateFragmentationHeader( const WebRtc_UWord16 size) - { - if( fragmentationVectorSize < size) - { - WebRtc_UWord16 oldVectorSize = fragmentationVectorSize; - { - // offset - WebRtc_UWord32* oldOffsets = fragmentationOffset; - fragmentationOffset = new WebRtc_UWord32[size]; - memset(fragmentationOffset+oldVectorSize, 0, - sizeof(WebRtc_UWord32)*(size-oldVectorSize)); - // copy old values - memcpy(fragmentationOffset,oldOffsets, sizeof(WebRtc_UWord32) * oldVectorSize); - delete[] oldOffsets; - } - // length - { - WebRtc_UWord32* oldLengths = fragmentationLength; - fragmentationLength = new WebRtc_UWord32[size]; - memset(fragmentationLength+oldVectorSize, 0, - sizeof(WebRtc_UWord32) * (size- oldVectorSize)); - memcpy(fragmentationLength, oldLengths, - sizeof(WebRtc_UWord32) * oldVectorSize); - delete[] oldLengths; - } - // time diff - { - WebRtc_UWord16* oldTimeDiffs = fragmentationTimeDiff; - fragmentationTimeDiff = new WebRtc_UWord16[size]; - memset(fragmentationTimeDiff+oldVectorSize, 0, - sizeof(WebRtc_UWord16) * (size- oldVectorSize)); - memcpy(fragmentationTimeDiff, oldTimeDiffs, - sizeof(WebRtc_UWord16) * oldVectorSize); - delete[] oldTimeDiffs; - } - // payload type - { - WebRtc_UWord8* oldTimePlTypes = fragmentationPlType; - fragmentationPlType = new WebRtc_UWord8[size]; - memset(fragmentationPlType+oldVectorSize, 0, - sizeof(WebRtc_UWord8) * (size- oldVectorSize)); - memcpy(fragmentationPlType, oldTimePlTypes, - sizeof(WebRtc_UWord8) * oldVectorSize); - delete[] oldTimePlTypes; - } - fragmentationVectorSize = size; + if (src.fragmentationPlType) { + fragmentationPlType = new uint8_t[src.fragmentationVectorSize]; } + } + // set new size + fragmentationVectorSize = src.fragmentationVectorSize; } - WebRtc_UWord16 fragmentationVectorSize; // Number of fragmentations - WebRtc_UWord32* fragmentationOffset; // Offset of pointer to data for each fragm. - WebRtc_UWord32* fragmentationLength; // Data size for each fragmentation - WebRtc_UWord16* fragmentationTimeDiff; // Timestamp difference relative "now" for - // each fragmentation - WebRtc_UWord8* fragmentationPlType; // Payload type of each fragmentation -}; - -struct RTCPVoIPMetric -{ - // RFC 3611 4.7 - WebRtc_UWord8 lossRate; - WebRtc_UWord8 discardRate; - WebRtc_UWord8 burstDensity; - WebRtc_UWord8 gapDensity; - WebRtc_UWord16 burstDuration; - WebRtc_UWord16 gapDuration; - WebRtc_UWord16 roundTripDelay; - WebRtc_UWord16 endSystemDelay; - WebRtc_UWord8 signalLevel; - WebRtc_UWord8 noiseLevel; - WebRtc_UWord8 RERL; - WebRtc_UWord8 Gmin; - WebRtc_UWord8 Rfactor; - WebRtc_UWord8 extRfactor; - WebRtc_UWord8 MOSLQ; - WebRtc_UWord8 MOSCQ; - WebRtc_UWord8 RXconfig; - WebRtc_UWord16 JBnominal; - WebRtc_UWord16 JBmax; - WebRtc_UWord16 JBabsMax; -}; - -// class describing a complete, or parts of an encoded frame. -class EncodedVideoData -{ -public: - EncodedVideoData() : - completeFrame(false), - missingFrame(false), - payloadData(NULL), - payloadSize(0), - bufferSize(0) - {}; - - EncodedVideoData(const EncodedVideoData& data) - { - payloadType = data.payloadType; - timeStamp = data.timeStamp; - renderTimeMs = data.renderTimeMs; - encodedWidth = data.encodedWidth; - encodedHeight = data.encodedHeight; - completeFrame = data.completeFrame; - missingFrame = data.missingFrame; - payloadSize = data.payloadSize; - fragmentationHeader = data.fragmentationHeader; - frameType = data.frameType; - codec = data.codec; - if (data.payloadSize > 0) - { - payloadData = new WebRtc_UWord8[data.payloadSize]; - memcpy(payloadData, data.payloadData, data.payloadSize); - } - else - { - payloadData = NULL; - } - } - - - ~EncodedVideoData() - { - delete [] payloadData; - }; - - EncodedVideoData& operator=(const EncodedVideoData& data) - { - if (this == &data) - { - return *this; - } - payloadType = data.payloadType; - timeStamp = data.timeStamp; - renderTimeMs = data.renderTimeMs; - encodedWidth = data.encodedWidth; - encodedHeight = data.encodedHeight; - completeFrame = data.completeFrame; - missingFrame = data.missingFrame; - payloadSize = data.payloadSize; - fragmentationHeader = data.fragmentationHeader; - frameType = data.frameType; - codec = data.codec; - if (data.payloadSize > 0) - { - delete [] payloadData; - payloadData = new WebRtc_UWord8[data.payloadSize]; - memcpy(payloadData, data.payloadData, data.payloadSize); - bufferSize = data.payloadSize; - } - return *this; - }; - void VerifyAndAllocate( const WebRtc_UWord32 size) - { - if (bufferSize < size) - { - WebRtc_UWord8* oldPayload = payloadData; - payloadData = new WebRtc_UWord8[size]; - memcpy(payloadData, oldPayload, sizeof(WebRtc_UWord8) * payloadSize); - - bufferSize = size; - delete[] oldPayload; - } - } - - WebRtc_UWord8 payloadType; - WebRtc_UWord32 timeStamp; - WebRtc_Word64 renderTimeMs; - WebRtc_UWord32 encodedWidth; - WebRtc_UWord32 encodedHeight; - bool completeFrame; - bool missingFrame; - WebRtc_UWord8* payloadData; - WebRtc_UWord32 payloadSize; - WebRtc_UWord32 bufferSize; - RTPFragmentationHeader fragmentationHeader; - FrameType frameType; - VideoCodecType codec; -}; - -// Video Content Metrics -struct VideoContentMetrics -{ - VideoContentMetrics(): motionMagnitudeNZ(0), sizeZeroMotion(0), spatialPredErr(0), - spatialPredErrH(0), spatialPredErrV(0), motionPredErr(0), - motionHorizontalness(0), motionClusterDistortion(0), - nativeWidth(0), nativeHeight(0), contentChange(false) { } - void Reset(){ motionMagnitudeNZ = 0; sizeZeroMotion = 0; spatialPredErr = 0; - spatialPredErrH = 0; spatialPredErrV = 0; motionPredErr = 0; - motionHorizontalness = 0; motionClusterDistortion = 0; - nativeWidth = 0; nativeHeight = 0; contentChange = false; } - - float motionMagnitudeNZ; - float sizeZeroMotion; - float spatialPredErr; - float spatialPredErrH; - float spatialPredErrV; - float motionPredErr; - float motionHorizontalness; - float motionClusterDistortion; - WebRtc_UWord32 nativeWidth; - WebRtc_UWord32 nativeHeight; - WebRtc_UWord32 nativeFrameRate; - bool contentChange; -}; - -/************************************************* - * - * VideoFrame class - * - * The VideoFrame class allows storing and - * handling of video frames. - * - * - *************************************************/ -class VideoFrame -{ -public: - VideoFrame(); - ~VideoFrame(); - /** - * Verifies that current allocated buffer size is larger than or equal to the input size. - * If the current buffer size is smaller, a new allocation is made and the old buffer data - * is copied to the new buffer. - * Buffer size is updated to minimumSize. - */ - WebRtc_Word32 VerifyAndAllocate(const WebRtc_UWord32 minimumSize); - /** - * Update length of data buffer in frame. Function verifies that new length is less or - * equal to allocated size. - */ - WebRtc_Word32 SetLength(const WebRtc_UWord32 newLength); - /* - * Swap buffer and size data - */ - WebRtc_Word32 Swap(WebRtc_UWord8*& newMemory, - WebRtc_UWord32& newLength, - WebRtc_UWord32& newSize); - /* - * Swap buffer and size data - */ - WebRtc_Word32 SwapFrame(VideoFrame& videoFrame); - /** - * Copy buffer: If newLength is bigger than allocated size, a new buffer of size length - * is allocated. - */ - WebRtc_Word32 CopyFrame(const VideoFrame& videoFrame); - /** - * Copy buffer: If newLength is bigger than allocated size, a new buffer of size length - * is allocated. - */ - WebRtc_Word32 CopyFrame(WebRtc_UWord32 length, const WebRtc_UWord8* sourceBuffer); - /** - * Delete VideoFrame and resets members to zero - */ - void Free(); - /** - * Set frame timestamp (90kHz) - */ - void SetTimeStamp(const WebRtc_UWord32 timeStamp) {_timeStamp = timeStamp;} - /** - * Get pointer to frame buffer - */ - WebRtc_UWord8* Buffer() const {return _buffer;} - - WebRtc_UWord8*& Buffer() {return _buffer;} - - /** - * Get allocated buffer size - */ - WebRtc_UWord32 Size() const {return _bufferSize;} - /** - * Get frame length - */ - WebRtc_UWord32 Length() const {return _bufferLength;} - /** - * Get frame timestamp (90kHz) - */ - WebRtc_UWord32 TimeStamp() const {return _timeStamp;} - /** - * Get frame width - */ - WebRtc_UWord32 Width() const {return _width;} - /** - * Get frame height - */ - WebRtc_UWord32 Height() const {return _height;} - /** - * Set frame width - */ - void SetWidth(const WebRtc_UWord32 width) {_width = width;} - /** - * Set frame height - */ - void SetHeight(const WebRtc_UWord32 height) {_height = height;} - /** - * Set render time in miliseconds - */ - void SetRenderTime(const WebRtc_Word64 renderTimeMs) {_renderTimeMs = renderTimeMs;} - /** - * Get render time in miliseconds - */ - WebRtc_Word64 RenderTimeMs() const {return _renderTimeMs;} - -private: - void Set(WebRtc_UWord8* buffer, - WebRtc_UWord32 size, - WebRtc_UWord32 length, - WebRtc_UWord32 timeStamp); - - WebRtc_UWord8* _buffer; // Pointer to frame buffer - WebRtc_UWord32 _bufferSize; // Allocated buffer size - WebRtc_UWord32 _bufferLength; // Length (in bytes) of buffer - WebRtc_UWord32 _timeStamp; // Timestamp of frame (90kHz) - WebRtc_UWord32 _width; - WebRtc_UWord32 _height; - WebRtc_Word64 _renderTimeMs; -}; // end of VideoFrame class declaration - -// inline implementation of VideoFrame class: -inline -VideoFrame::VideoFrame(): - _buffer(0), - _bufferSize(0), - _bufferLength(0), - _timeStamp(0), - _width(0), - _height(0), - _renderTimeMs(0) -{ - // -} -inline -VideoFrame::~VideoFrame() -{ - if(_buffer) - { - delete [] _buffer; - _buffer = NULL; - } -} - - -inline -WebRtc_Word32 -VideoFrame::VerifyAndAllocate(const WebRtc_UWord32 minimumSize) -{ - if (minimumSize < 1) - { - return -1; - } - if(minimumSize > _bufferSize) - { - // create buffer of sufficient size - WebRtc_UWord8* newBufferBuffer = new WebRtc_UWord8[minimumSize]; - if(_buffer) - { - // copy old data - memcpy(newBufferBuffer, _buffer, _bufferSize); - delete [] _buffer; - } - _buffer = newBufferBuffer; - _bufferSize = minimumSize; - } - return 0; -} - -inline -WebRtc_Word32 -VideoFrame::SetLength(const WebRtc_UWord32 newLength) -{ - if (newLength >_bufferSize ) - { // can't accomodate new value - return -1; - } - _bufferLength = newLength; - return 0; -} - -inline -WebRtc_Word32 -VideoFrame::SwapFrame(VideoFrame& videoFrame) -{ - WebRtc_UWord32 tmpTimeStamp = _timeStamp; - WebRtc_UWord32 tmpWidth = _width; - WebRtc_UWord32 tmpHeight = _height; - WebRtc_Word64 tmpRenderTime = _renderTimeMs; - - _timeStamp = videoFrame._timeStamp; - _width = videoFrame._width; - _height = videoFrame._height; - _renderTimeMs = videoFrame._renderTimeMs; - - videoFrame._timeStamp = tmpTimeStamp; - videoFrame._width = tmpWidth; - videoFrame._height = tmpHeight; - videoFrame._renderTimeMs = tmpRenderTime; - - return Swap(videoFrame._buffer, videoFrame._bufferLength, videoFrame._bufferSize); -} - -inline -WebRtc_Word32 -VideoFrame::Swap(WebRtc_UWord8*& newMemory, WebRtc_UWord32& newLength, WebRtc_UWord32& newSize) -{ - WebRtc_UWord8* tmpBuffer = _buffer; - WebRtc_UWord32 tmpLength = _bufferLength; - WebRtc_UWord32 tmpSize = _bufferSize; - _buffer = newMemory; - _bufferLength = newLength; - _bufferSize = newSize; - newMemory = tmpBuffer; - newLength = tmpLength; - newSize = tmpSize; - return 0; -} - -inline -WebRtc_Word32 -VideoFrame::CopyFrame(WebRtc_UWord32 length, const WebRtc_UWord8* sourceBuffer) -{ - if (length > _bufferSize) - { - WebRtc_Word32 ret = VerifyAndAllocate(length); - if (ret < 0) - { - return ret; - } - } - memcpy(_buffer, sourceBuffer, length); - _bufferLength = length; - return 0; -} - -inline -WebRtc_Word32 -VideoFrame::CopyFrame(const VideoFrame& videoFrame) -{ - if(CopyFrame(videoFrame.Length(), videoFrame.Buffer()) != 0) - { - return -1; - } - _timeStamp = videoFrame._timeStamp; - _width = videoFrame._width; - _height = videoFrame._height; - _renderTimeMs = videoFrame._renderTimeMs; - return 0; -} - -inline -void -VideoFrame::Free() -{ - _timeStamp = 0; - _bufferLength = 0; - _bufferSize = 0; - _height = 0; - _width = 0; - _renderTimeMs = 0; - - if(_buffer) - { - delete [] _buffer; - _buffer = NULL; - } -} - - -/************************************************* - * - * AudioFrame class - * - * The AudioFrame class holds up to 60 ms wideband - * audio. It allows for adding and subtracting frames - * while keeping track of the resulting states. - * - * Note - * - The +operator assume that you would never add - * exact opposite frames when deciding the resulting - * state. To do this use the -operator. - * - * - _audioChannel of 1 indicated mono, and 2 - * indicates stereo. - * - * - _payloadDataLengthInSamples is the number of - * samples per channel. Therefore, the total - * number of samples in _payloadData is - * (_payloadDataLengthInSamples * _audioChannel). - * - * - Stereo data is stored in interleaved fashion - * starting with the left channel. - * - *************************************************/ -class AudioFrame -{ -public: - enum{kMaxAudioFrameSizeSamples = 3840}; // stereo 32KHz 60ms 2*32*60 - - enum VADActivity - { - kVadActive = 0, - kVadPassive = 1, - kVadUnknown = 2 - }; - enum SpeechType - { - kNormalSpeech = 0, - kPLC = 1, - kCNG = 2, - kPLCCNG = 3, - kUndefined = 4 - }; - - AudioFrame(); - virtual ~AudioFrame(); - - WebRtc_Word32 UpdateFrame( - const WebRtc_Word32 id, - const WebRtc_UWord32 timeStamp, - const WebRtc_Word16* payloadData, - const WebRtc_UWord16 payloadDataLengthInSamples, - const int frequencyInHz, - const SpeechType speechType, - const VADActivity vadActivity, - const WebRtc_UWord8 audioChannel = 1, - const WebRtc_Word32 volume = -1, - const WebRtc_Word32 energy = -1); - - AudioFrame& Append(const AudioFrame& rhs); - - void Mute() const; - - AudioFrame& operator=(const AudioFrame& rhs); - AudioFrame& operator>>=(const WebRtc_Word32 rhs); - AudioFrame& operator+=(const AudioFrame& rhs); - AudioFrame& operator-=(const AudioFrame& rhs); - - WebRtc_Word32 _id; - WebRtc_UWord32 _timeStamp; - - // Supporting Stereo, stereo samples are interleaved - mutable WebRtc_Word16 _payloadData[kMaxAudioFrameSizeSamples]; - WebRtc_UWord16 _payloadDataLengthInSamples; - int _frequencyInHz; - WebRtc_UWord8 _audioChannel; - SpeechType _speechType; - VADActivity _vadActivity; - - WebRtc_UWord32 _energy; - WebRtc_Word32 _volume; -}; - -inline -AudioFrame::AudioFrame() - : - _id(-1), - _timeStamp(0), - _payloadData(), - _payloadDataLengthInSamples(0), - _frequencyInHz(0), - _audioChannel(1), - _speechType(kUndefined), - _vadActivity(kVadUnknown), - _energy(0xffffffff), - _volume(0xffffffff) -{ -} - -inline -AudioFrame::~AudioFrame() -{ -} - -inline -WebRtc_Word32 -AudioFrame::UpdateFrame( - const WebRtc_Word32 id, - const WebRtc_UWord32 timeStamp, - const WebRtc_Word16* payloadData, - const WebRtc_UWord16 payloadDataLengthInSamples, - const int frequencyInHz, - const SpeechType speechType, - const VADActivity vadActivity, - const WebRtc_UWord8 audioChannel, - const WebRtc_Word32 volume, - const WebRtc_Word32 energy) -{ - _id = id; - _timeStamp = timeStamp; - _frequencyInHz = frequencyInHz; - _speechType = speechType; - _vadActivity = vadActivity; - _volume = volume; - _audioChannel = audioChannel; - _energy = energy; - - if((payloadDataLengthInSamples > kMaxAudioFrameSizeSamples) || - (audioChannel > 2) || (audioChannel < 1)) - { - _payloadDataLengthInSamples = 0; - return -1; - } - _payloadDataLengthInSamples = payloadDataLengthInSamples; - if(payloadData != NULL) - { - memcpy(_payloadData, payloadData, sizeof(WebRtc_Word16) * - payloadDataLengthInSamples * _audioChannel); - } - else - { - memset(_payloadData,0,sizeof(WebRtc_Word16) * - payloadDataLengthInSamples * _audioChannel); - } - return 0; -} - -inline -void -AudioFrame::Mute() const -{ - memset(_payloadData, 0, _payloadDataLengthInSamples * sizeof(WebRtc_Word16)); -} - -inline -AudioFrame& -AudioFrame::operator=(const AudioFrame& rhs) -{ - // Sanity Check - if((rhs._payloadDataLengthInSamples > kMaxAudioFrameSizeSamples) || - (rhs._audioChannel > 2) || - (rhs._audioChannel < 1)) - { - return *this; - } - if(this == &rhs) - { - return *this; - } - _id = rhs._id; - _timeStamp = rhs._timeStamp; - _frequencyInHz = rhs._frequencyInHz; - _speechType = rhs._speechType; - _vadActivity = rhs._vadActivity; - _volume = rhs._volume; - _audioChannel = rhs._audioChannel; - _energy = rhs._energy; - - _payloadDataLengthInSamples = rhs._payloadDataLengthInSamples; - memcpy(_payloadData, rhs._payloadData, - sizeof(WebRtc_Word16) * rhs._payloadDataLengthInSamples * _audioChannel); - - return *this; -} - -inline -AudioFrame& -AudioFrame::operator>>=(const WebRtc_Word32 rhs) -{ - assert((_audioChannel > 0) && (_audioChannel < 3)); - if((_audioChannel > 2) || - (_audioChannel < 1)) - { - return *this; - } - for(WebRtc_UWord16 i = 0; i < _payloadDataLengthInSamples * _audioChannel; i++) - { - _payloadData[i] = WebRtc_Word16(_payloadData[i] >> rhs); - } - return *this; -} - -inline -AudioFrame& -AudioFrame::Append(const AudioFrame& rhs) -{ - // Sanity check - assert((_audioChannel > 0) && (_audioChannel < 3)); - if((_audioChannel > 2) || - (_audioChannel < 1)) - { - return *this; - } - if(_audioChannel != rhs._audioChannel) - { - return *this; - } - if((_vadActivity == kVadActive) || - rhs._vadActivity == kVadActive) - { - _vadActivity = kVadActive; - } - else if((_vadActivity == kVadUnknown) || - rhs._vadActivity == kVadUnknown) - { - _vadActivity = kVadUnknown; - } - if(_speechType != rhs._speechType) - { - _speechType = kUndefined; - } - - WebRtc_UWord16 offset = _payloadDataLengthInSamples * _audioChannel; - for(WebRtc_UWord16 i = 0; - i < rhs._payloadDataLengthInSamples * rhs._audioChannel; - i++) - { - _payloadData[offset+i] = rhs._payloadData[i]; - } - _payloadDataLengthInSamples += rhs._payloadDataLengthInSamples; - return *this; -} - -// merge vectors -inline -AudioFrame& -AudioFrame::operator+=(const AudioFrame& rhs) -{ - // Sanity check - assert((_audioChannel > 0) && (_audioChannel < 3)); - if((_audioChannel > 2) || - (_audioChannel < 1)) - { - return *this; - } - if(_audioChannel != rhs._audioChannel) - { - return *this; - } - bool noPrevData = false; - if(_payloadDataLengthInSamples != rhs._payloadDataLengthInSamples) - { - if(_payloadDataLengthInSamples == 0) - { - // special case we have no data to start with - _payloadDataLengthInSamples = rhs._payloadDataLengthInSamples; - noPrevData = true; - } else - { - return *this; - } - } - - if((_vadActivity == kVadActive) || - rhs._vadActivity == kVadActive) - { - _vadActivity = kVadActive; - } - else if((_vadActivity == kVadUnknown) || - rhs._vadActivity == kVadUnknown) - { - _vadActivity = kVadUnknown; - } - - if(_speechType != rhs._speechType) - { - _speechType = kUndefined; - } - - if(noPrevData) - { - memcpy(_payloadData, rhs._payloadData, - sizeof(WebRtc_Word16) * rhs._payloadDataLengthInSamples * _audioChannel); - } else - { - // IMPROVEMENT this can be done very fast in assembly - for(WebRtc_UWord16 i = 0; i < _payloadDataLengthInSamples * _audioChannel; i++) - { - WebRtc_Word32 wrapGuard = (WebRtc_Word32)_payloadData[i] + - (WebRtc_Word32)rhs._payloadData[i]; - if(wrapGuard < -32768) - { - _payloadData[i] = -32768; - }else if(wrapGuard > 32767) - { - _payloadData[i] = 32767; - }else - { - _payloadData[i] = (WebRtc_Word16)wrapGuard; - } + if (src.fragmentationVectorSize > 0) { + // copy values + if (src.fragmentationOffset) { + memcpy(fragmentationOffset, src.fragmentationOffset, + src.fragmentationVectorSize * sizeof(size_t)); + } + if (src.fragmentationLength) { + memcpy(fragmentationLength, src.fragmentationLength, + src.fragmentationVectorSize * sizeof(size_t)); + } + if (src.fragmentationTimeDiff) { + memcpy(fragmentationTimeDiff, src.fragmentationTimeDiff, + src.fragmentationVectorSize * sizeof(uint16_t)); + } + if (src.fragmentationPlType) { + memcpy(fragmentationPlType, src.fragmentationPlType, + src.fragmentationVectorSize * sizeof(uint8_t)); } } - _energy = 0xffffffff; - _volume = 0xffffffff; - return *this; + } + + void VerifyAndAllocateFragmentationHeader(const size_t size) { + assert(size <= std::numeric_limits::max()); + const uint16_t size16 = static_cast(size); + if (fragmentationVectorSize < size16) { + uint16_t oldVectorSize = fragmentationVectorSize; + { + // offset + size_t* oldOffsets = fragmentationOffset; + fragmentationOffset = new size_t[size16]; + memset(fragmentationOffset + oldVectorSize, 0, + sizeof(size_t) * (size16 - oldVectorSize)); + // copy old values + memcpy(fragmentationOffset, oldOffsets, + sizeof(size_t) * oldVectorSize); + delete[] oldOffsets; + } + // length + { + size_t* oldLengths = fragmentationLength; + fragmentationLength = new size_t[size16]; + memset(fragmentationLength + oldVectorSize, 0, + sizeof(size_t) * (size16 - oldVectorSize)); + memcpy(fragmentationLength, oldLengths, + sizeof(size_t) * oldVectorSize); + delete[] oldLengths; + } + // time diff + { + uint16_t* oldTimeDiffs = fragmentationTimeDiff; + fragmentationTimeDiff = new uint16_t[size16]; + memset(fragmentationTimeDiff + oldVectorSize, 0, + sizeof(uint16_t) * (size16 - oldVectorSize)); + memcpy(fragmentationTimeDiff, oldTimeDiffs, + sizeof(uint16_t) * oldVectorSize); + delete[] oldTimeDiffs; + } + // payload type + { + uint8_t* oldTimePlTypes = fragmentationPlType; + fragmentationPlType = new uint8_t[size16]; + memset(fragmentationPlType + oldVectorSize, 0, + sizeof(uint8_t) * (size16 - oldVectorSize)); + memcpy(fragmentationPlType, oldTimePlTypes, + sizeof(uint8_t) * oldVectorSize); + delete[] oldTimePlTypes; + } + fragmentationVectorSize = size16; + } + } + + uint16_t fragmentationVectorSize; // Number of fragmentations + size_t* fragmentationOffset; // Offset of pointer to data for each + // fragmentation + size_t* fragmentationLength; // Data size for each fragmentation + uint16_t* fragmentationTimeDiff; // Timestamp difference relative "now" for + // each fragmentation + uint8_t* fragmentationPlType; // Payload type of each fragmentation + + private: + RTC_DISALLOW_COPY_AND_ASSIGN(RTPFragmentationHeader); +}; + +struct RTCPVoIPMetric { + // RFC 3611 4.7 + uint8_t lossRate; + uint8_t discardRate; + uint8_t burstDensity; + uint8_t gapDensity; + uint16_t burstDuration; + uint16_t gapDuration; + uint16_t roundTripDelay; + uint16_t endSystemDelay; + uint8_t signalLevel; + uint8_t noiseLevel; + uint8_t RERL; + uint8_t Gmin; + uint8_t Rfactor; + uint8_t extRfactor; + uint8_t MOSLQ; + uint8_t MOSCQ; + uint8_t RXconfig; + uint16_t JBnominal; + uint16_t JBmax; + uint16_t JBabsMax; +}; + +// Types for the FEC packet masks. The type |kFecMaskRandom| is based on a +// random loss model. The type |kFecMaskBursty| is based on a bursty/consecutive +// loss model. The packet masks are defined in +// modules/rtp_rtcp/fec_private_tables_random(bursty).h +enum FecMaskType { + kFecMaskRandom, + kFecMaskBursty, +}; + +// Struct containing forward error correction settings. +struct FecProtectionParams { + int fec_rate; + bool use_uep_protection; + int max_fec_frames; + FecMaskType fec_mask_type; +}; + +// Interface used by the CallStats class to distribute call statistics. +// Callbacks will be triggered as soon as the class has been registered to a +// CallStats object using RegisterStatsObserver. +class CallStatsObserver { + public: + virtual void OnRttUpdate(int64_t avg_rtt_ms, int64_t max_rtt_ms) = 0; + + virtual ~CallStatsObserver() {} +}; + +struct VideoContentMetrics { + VideoContentMetrics() + : motion_magnitude(0.0f), + spatial_pred_err(0.0f), + spatial_pred_err_h(0.0f), + spatial_pred_err_v(0.0f) {} + + void Reset() { + motion_magnitude = 0.0f; + spatial_pred_err = 0.0f; + spatial_pred_err_h = 0.0f; + spatial_pred_err_v = 0.0f; + } + float motion_magnitude; + float spatial_pred_err; + float spatial_pred_err_h; + float spatial_pred_err_v; +}; + +/* This class holds up to 60 ms of super-wideband (32 kHz) stereo audio. It + * allows for adding and subtracting frames while keeping track of the resulting + * states. + * + * Notes + * - The total number of samples in |data_| is + * samples_per_channel_ * num_channels_ + * + * - Stereo data is interleaved starting with the left channel. + * + * - The +operator assume that you would never add exactly opposite frames when + * deciding the resulting state. To do this use the -operator. + */ +class AudioFrame { + public: + // Stereo, 32 kHz, 60 ms (2 * 32 * 60) + static const size_t kMaxDataSizeSamples = 3840; + + enum VADActivity { + kVadActive = 0, + kVadPassive = 1, + kVadUnknown = 2 + }; + enum SpeechType { + kNormalSpeech = 0, + kPLC = 1, + kCNG = 2, + kPLCCNG = 3, + kUndefined = 4 + }; + + AudioFrame(); + virtual ~AudioFrame() {} + + // Resets all members to their default state (except does not modify the + // contents of |data_|). + void Reset(); + + // |interleaved_| is not changed by this method. + void UpdateFrame(int id, uint32_t timestamp, const int16_t* data, + size_t samples_per_channel, int sample_rate_hz, + SpeechType speech_type, VADActivity vad_activity, + int num_channels = 1, uint32_t energy = -1); + + AudioFrame& Append(const AudioFrame& rhs); + + void CopyFrom(const AudioFrame& src); + + void Mute(); + + AudioFrame& operator>>=(const int rhs); + AudioFrame& operator+=(const AudioFrame& rhs); + AudioFrame& operator-=(const AudioFrame& rhs); + + int id_; + // RTP timestamp of the first sample in the AudioFrame. + uint32_t timestamp_; + // Time since the first frame in milliseconds. + // -1 represents an uninitialized value. + int64_t elapsed_time_ms_; + // NTP time of the estimated capture time in local timebase in milliseconds. + // -1 represents an uninitialized value. + int64_t ntp_time_ms_; + int16_t data_[kMaxDataSizeSamples]; + size_t samples_per_channel_; + int sample_rate_hz_; + int num_channels_; + SpeechType speech_type_; + VADActivity vad_activity_; + // Note that there is no guarantee that |energy_| is correct. Any user of this + // member must verify that the value is correct. + // TODO(henrike) Remove |energy_|. + // See https://code.google.com/p/webrtc/issues/detail?id=3315. + uint32_t energy_; + bool interleaved_; + + private: + RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame); +}; + +inline AudioFrame::AudioFrame() + : data_() { + Reset(); } -inline -AudioFrame& -AudioFrame::operator-=(const AudioFrame& rhs) -{ - // Sanity check - assert((_audioChannel > 0) && (_audioChannel < 3)); - if((_audioChannel > 2)|| - (_audioChannel < 1)) - { - return *this; - } - if((_payloadDataLengthInSamples != rhs._payloadDataLengthInSamples) || - (_audioChannel != rhs._audioChannel)) - { - return *this; - } - if((_vadActivity != kVadPassive) || - rhs._vadActivity != kVadPassive) - { - _vadActivity = kVadUnknown; - } - _speechType = kUndefined; - - for(WebRtc_UWord16 i = 0; i < _payloadDataLengthInSamples * _audioChannel; i++) - { - WebRtc_Word32 wrapGuard = (WebRtc_Word32)_payloadData[i] - - (WebRtc_Word32)rhs._payloadData[i]; - if(wrapGuard < -32768) - { - _payloadData[i] = -32768; - } - else if(wrapGuard > 32767) - { - _payloadData[i] = 32767; - } - else - { - _payloadData[i] = (WebRtc_Word16)wrapGuard; - } - } - _energy = 0xffffffff; - _volume = 0xffffffff; - return *this; +inline void AudioFrame::Reset() { + id_ = -1; + // TODO(wu): Zero is a valid value for |timestamp_|. We should initialize + // to an invalid value, or add a new member to indicate invalidity. + timestamp_ = 0; + elapsed_time_ms_ = -1; + ntp_time_ms_ = -1; + samples_per_channel_ = 0; + sample_rate_hz_ = 0; + num_channels_ = 0; + speech_type_ = kUndefined; + vad_activity_ = kVadUnknown; + energy_ = 0xffffffff; + interleaved_ = true; } -} // namespace webrtc +inline void AudioFrame::UpdateFrame(int id, + uint32_t timestamp, + const int16_t* data, + size_t samples_per_channel, + int sample_rate_hz, + SpeechType speech_type, + VADActivity vad_activity, + int num_channels, + uint32_t energy) { + id_ = id; + timestamp_ = timestamp; + samples_per_channel_ = samples_per_channel; + sample_rate_hz_ = sample_rate_hz; + speech_type_ = speech_type; + vad_activity_ = vad_activity; + num_channels_ = num_channels; + energy_ = energy; -#endif // MODULE_COMMON_TYPES_H + assert(num_channels >= 0); + const size_t length = samples_per_channel * num_channels; + assert(length <= kMaxDataSizeSamples); + if (data != NULL) { + memcpy(data_, data, sizeof(int16_t) * length); + } else { + memset(data_, 0, sizeof(int16_t) * length); + } +} + +inline void AudioFrame::CopyFrom(const AudioFrame& src) { + if (this == &src) return; + + id_ = src.id_; + timestamp_ = src.timestamp_; + elapsed_time_ms_ = src.elapsed_time_ms_; + ntp_time_ms_ = src.ntp_time_ms_; + samples_per_channel_ = src.samples_per_channel_; + sample_rate_hz_ = src.sample_rate_hz_; + speech_type_ = src.speech_type_; + vad_activity_ = src.vad_activity_; + num_channels_ = src.num_channels_; + energy_ = src.energy_; + interleaved_ = src.interleaved_; + + assert(num_channels_ >= 0); + const size_t length = samples_per_channel_ * num_channels_; + assert(length <= kMaxDataSizeSamples); + memcpy(data_, src.data_, sizeof(int16_t) * length); +} + +inline void AudioFrame::Mute() { + memset(data_, 0, samples_per_channel_ * num_channels_ * sizeof(int16_t)); +} + +inline AudioFrame& AudioFrame::operator>>=(const int rhs) { + assert((num_channels_ > 0) && (num_channels_ < 3)); + if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; + + for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { + data_[i] = static_cast(data_[i] >> rhs); + } + return *this; +} + +inline AudioFrame& AudioFrame::Append(const AudioFrame& rhs) { + // Sanity check + assert((num_channels_ > 0) && (num_channels_ < 3)); + assert(interleaved_ == rhs.interleaved_); + if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; + if (num_channels_ != rhs.num_channels_) return *this; + + if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) { + vad_activity_ = kVadActive; + } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) { + vad_activity_ = kVadUnknown; + } + if (speech_type_ != rhs.speech_type_) { + speech_type_ = kUndefined; + } + + size_t offset = samples_per_channel_ * num_channels_; + for (size_t i = 0; i < rhs.samples_per_channel_ * rhs.num_channels_; i++) { + data_[offset + i] = rhs.data_[i]; + } + samples_per_channel_ += rhs.samples_per_channel_; + return *this; +} + +namespace { +inline int16_t ClampToInt16(int32_t input) { + if (input < -0x00008000) { + return -0x8000; + } else if (input > 0x00007FFF) { + return 0x7FFF; + } else { + return static_cast(input); + } +} +} + +inline AudioFrame& AudioFrame::operator+=(const AudioFrame& rhs) { + // Sanity check + assert((num_channels_ > 0) && (num_channels_ < 3)); + assert(interleaved_ == rhs.interleaved_); + if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; + if (num_channels_ != rhs.num_channels_) return *this; + + bool noPrevData = false; + if (samples_per_channel_ != rhs.samples_per_channel_) { + if (samples_per_channel_ == 0) { + // special case we have no data to start with + samples_per_channel_ = rhs.samples_per_channel_; + noPrevData = true; + } else { + return *this; + } + } + + if ((vad_activity_ == kVadActive) || rhs.vad_activity_ == kVadActive) { + vad_activity_ = kVadActive; + } else if (vad_activity_ == kVadUnknown || rhs.vad_activity_ == kVadUnknown) { + vad_activity_ = kVadUnknown; + } + + if (speech_type_ != rhs.speech_type_) speech_type_ = kUndefined; + + if (noPrevData) { + memcpy(data_, rhs.data_, + sizeof(int16_t) * rhs.samples_per_channel_ * num_channels_); + } else { + // IMPROVEMENT this can be done very fast in assembly + for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { + int32_t wrap_guard = + static_cast(data_[i]) + static_cast(rhs.data_[i]); + data_[i] = ClampToInt16(wrap_guard); + } + } + energy_ = 0xffffffff; + return *this; +} + +inline AudioFrame& AudioFrame::operator-=(const AudioFrame& rhs) { + // Sanity check + assert((num_channels_ > 0) && (num_channels_ < 3)); + assert(interleaved_ == rhs.interleaved_); + if ((num_channels_ > 2) || (num_channels_ < 1)) return *this; + + if ((samples_per_channel_ != rhs.samples_per_channel_) || + (num_channels_ != rhs.num_channels_)) { + return *this; + } + if ((vad_activity_ != kVadPassive) || rhs.vad_activity_ != kVadPassive) { + vad_activity_ = kVadUnknown; + } + speech_type_ = kUndefined; + + for (size_t i = 0; i < samples_per_channel_ * num_channels_; i++) { + int32_t wrap_guard = + static_cast(data_[i]) - static_cast(rhs.data_[i]); + data_[i] = ClampToInt16(wrap_guard); + } + energy_ = 0xffffffff; + return *this; +} + +inline bool IsNewerSequenceNumber(uint16_t sequence_number, + uint16_t prev_sequence_number) { + // Distinguish between elements that are exactly 0x8000 apart. + // If s1>s2 and |s1-s2| = 0x8000: IsNewer(s1,s2)=true, IsNewer(s2,s1)=false + // rather than having IsNewer(s1,s2) = IsNewer(s2,s1) = false. + if (static_cast(sequence_number - prev_sequence_number) == 0x8000) { + return sequence_number > prev_sequence_number; + } + return sequence_number != prev_sequence_number && + static_cast(sequence_number - prev_sequence_number) < 0x8000; +} + +inline bool IsNewerTimestamp(uint32_t timestamp, uint32_t prev_timestamp) { + // Distinguish between elements that are exactly 0x80000000 apart. + // If t1>t2 and |t1-t2| = 0x80000000: IsNewer(t1,t2)=true, + // IsNewer(t2,t1)=false + // rather than having IsNewer(t1,t2) = IsNewer(t2,t1) = false. + if (static_cast(timestamp - prev_timestamp) == 0x80000000) { + return timestamp > prev_timestamp; + } + return timestamp != prev_timestamp && + static_cast(timestamp - prev_timestamp) < 0x80000000; +} + +inline uint16_t LatestSequenceNumber(uint16_t sequence_number1, + uint16_t sequence_number2) { + return IsNewerSequenceNumber(sequence_number1, sequence_number2) + ? sequence_number1 + : sequence_number2; +} + +inline uint32_t LatestTimestamp(uint32_t timestamp1, uint32_t timestamp2) { + return IsNewerTimestamp(timestamp1, timestamp2) ? timestamp1 : timestamp2; +} + +// Utility class to unwrap a sequence number to a larger type, for easier +// handling large ranges. Note that sequence numbers will never be unwrapped +// to a negative value. +class SequenceNumberUnwrapper { + public: + SequenceNumberUnwrapper() : last_seq_(-1) {} + + // Get the unwrapped sequence, but don't update the internal state. + int64_t UnwrapWithoutUpdate(uint16_t sequence_number) { + if (last_seq_ == -1) + return sequence_number; + + uint16_t cropped_last = static_cast(last_seq_); + int64_t delta = sequence_number - cropped_last; + if (IsNewerSequenceNumber(sequence_number, cropped_last)) { + if (delta < 0) + delta += (1 << 16); // Wrap forwards. + } else if (delta > 0 && (last_seq_ + delta - (1 << 16)) >= 0) { + // If sequence_number is older but delta is positive, this is a backwards + // wrap-around. However, don't wrap backwards past 0 (unwrapped). + delta -= (1 << 16); + } + + return last_seq_ + delta; + } + + // Only update the internal state to the specified last (unwrapped) sequence. + void UpdateLast(int64_t last_sequence) { last_seq_ = last_sequence; } + + // Unwrap the sequence number and update the internal state. + int64_t Unwrap(uint16_t sequence_number) { + int64_t unwrapped = UnwrapWithoutUpdate(sequence_number); + UpdateLast(unwrapped); + return unwrapped; + } + + private: + int64_t last_seq_; +}; + +} // namespace webrtc + +#endif // MODULE_COMMON_TYPES_H diff --git a/webrtc/modules/utility/interface/audio_frame_operations.h b/webrtc/modules/utility/interface/audio_frame_operations.h new file mode 100644 index 0000000..c2af68a --- /dev/null +++ b/webrtc/modules/utility/interface/audio_frame_operations.h @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_VOICE_ENGINE_AUDIO_FRAME_OPERATIONS_H_ +#define WEBRTC_VOICE_ENGINE_AUDIO_FRAME_OPERATIONS_H_ + +#include "webrtc/typedefs.h" + +namespace webrtc { + +class AudioFrame; + +// TODO(andrew): consolidate this with utility.h and audio_frame_manipulator.h. +// Change reference parameters to pointers. Consider using a namespace rather +// than a class. +class AudioFrameOperations { + public: + // Upmixes mono |src_audio| to stereo |dst_audio|. This is an out-of-place + // operation, meaning src_audio and dst_audio must point to different + // buffers. It is the caller's responsibility to ensure that |dst_audio| is + // sufficiently large. + static void MonoToStereo(const int16_t* src_audio, size_t samples_per_channel, + int16_t* dst_audio); + // |frame.num_channels_| will be updated. This version checks for sufficient + // buffer size and that |num_channels_| is mono. + static int MonoToStereo(AudioFrame* frame); + + // Downmixes stereo |src_audio| to mono |dst_audio|. This is an in-place + // operation, meaning |src_audio| and |dst_audio| may point to the same + // buffer. + static void StereoToMono(const int16_t* src_audio, size_t samples_per_channel, + int16_t* dst_audio); + // |frame.num_channels_| will be updated. This version checks that + // |num_channels_| is stereo. + static int StereoToMono(AudioFrame* frame); + + // Swap the left and right channels of |frame|. Fails silently if |frame| is + // not stereo. + static void SwapStereoChannels(AudioFrame* frame); + + // Zeros out the audio and sets |frame.energy| to zero. + static void Mute(AudioFrame& frame); + + static int Scale(float left, float right, AudioFrame& frame); + + static int ScaleWithSat(float scale, AudioFrame& frame); +}; + +} // namespace webrtc + +#endif // #ifndef WEBRTC_VOICE_ENGINE_AUDIO_FRAME_OPERATIONS_H_ diff --git a/webrtc/system_wrappers/Makefile.am b/webrtc/system_wrappers/Makefile.am index e7cad51..2f999d7 100644 --- a/webrtc/system_wrappers/Makefile.am +++ b/webrtc/system_wrappers/Makefile.am @@ -1,13 +1,53 @@ noinst_LTLIBRARIES = libsystem_wrappers.la -libsystem_wrappers_la_SOURCES = interface/cpu_features_wrapper.h \ +noinst_HEADERS = interface/aligned_array.h \ + interface/compile_assert_c.h \ + interface/scoped_vector.h \ + interface/stl_util.h + +libsystem_wrappers_la_SOURCES = interface/aligned_malloc.h \ + interface/cpu_features_wrapper.h \ interface/critical_section_wrapper.h \ + interface/file_wrapper.h \ + interface/logging.h \ + interface/metrics.h \ + interface/sleep.h \ + interface/trace.h \ + source/aligned_malloc.cc \ source/cpu_features.cc \ + source/event.cc \ + source/event_timer_posix.h \ + source/event_timer_win.h \ + source/file_impl.cc \ + source/file_impl.h \ source/critical_section.cc \ source/critical_section_posix.h \ - source/critical_section_windows.h + source/critical_section_win.h \ + source/logging.cc \ + source/metrics_default.cc \ + source/rw_lock_generic.h \ + source/rw_lock_posix.h \ + source/rw_lock_win.h \ + source/sleep.cc \ + source/thread.cc \ + source/thread_posix.h \ + source/thread_win.h + source/trace_impl.cc \ + source/trace_impl.h \ + source/trace_posix.h \ + source/trace_win.h # This assumes that we want the POSIX implementation -- should eventually be # converted to a conditional to include Windows support -libsystem_wrappers_la_SOURCES += source/critical_section_posix.cc -libsystem_wrappers_la_CXXFLAGS = $(AM_CXXFLAGS) $(COMMON_CXXFLAGS) \ - -I$(srcdir)/interface +libsystem_wrappers_la_SOURCES += source/critical_section_posix.cc \ + source/event_timer_posix.cc \ + source/rw_lock_posix.cc \ + source/thread_posix.cc \ + source/trace_posix.cc +libsystem_wrappers_la_CXXFLAGS = $(AM_CXXFLAGS) $(COMMON_CXXFLAGS) + +EXTRA_DIST = source/critical_section_win.cc \ + source/event_timer_win.cc \ + source/rw_lock_generic.cc \ + source/rw_lock_win.cc \ + source/thread_win.cc \ + source/trace_win.cc diff --git a/webrtc/system_wrappers/interface/aligned_array.h b/webrtc/system_wrappers/interface/aligned_array.h new file mode 100644 index 0000000..6d6c81b --- /dev/null +++ b/webrtc/system_wrappers/interface/aligned_array.h @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_ALIGNED_ARRAY_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_ALIGNED_ARRAY_ + +#include "webrtc/base/checks.h" +#include "webrtc/system_wrappers/interface/aligned_malloc.h" + +namespace webrtc { + +// Wrapper class for aligned arrays. Every row (and the first dimension) are +// aligned to the given byte alignment. +template class AlignedArray { + public: + AlignedArray(int rows, size_t cols, int alignment) + : rows_(rows), + cols_(cols), + alignment_(alignment) { + RTC_CHECK_GT(alignment_, 0); + head_row_ = static_cast(AlignedMalloc(rows_ * sizeof(*head_row_), + alignment_)); + for (int i = 0; i < rows_; ++i) { + head_row_[i] = static_cast(AlignedMalloc(cols_ * sizeof(**head_row_), + alignment_)); + } + } + + ~AlignedArray() { + for (int i = 0; i < rows_; ++i) { + AlignedFree(head_row_[i]); + } + AlignedFree(head_row_); + } + + T* const* Array() { + return head_row_; + } + + const T* const* Array() const { + return head_row_; + } + + T* Row(int row) { + RTC_CHECK_LE(row, rows_); + return head_row_[row]; + } + + const T* Row(int row) const { + RTC_CHECK_LE(row, rows_); + return head_row_[row]; + } + + T& At(int row, size_t col) { + RTC_CHECK_LE(col, cols_); + return Row(row)[col]; + } + + const T& At(int row, size_t col) const { + RTC_CHECK_LE(col, cols_); + return Row(row)[col]; + } + + int rows() const { + return rows_; + } + + size_t cols() const { + return cols_; + } + + private: + int rows_; + size_t cols_; + int alignment_; + T** head_row_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_ALIGNED_ARRAY_ diff --git a/webrtc/system_wrappers/interface/aligned_malloc.h b/webrtc/system_wrappers/interface/aligned_malloc.h new file mode 100644 index 0000000..5d343cd --- /dev/null +++ b/webrtc/system_wrappers/interface/aligned_malloc.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_ALIGNED_MALLOC_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_ALIGNED_MALLOC_H_ + +// The functions declared here +// 1) Allocates block of aligned memory. +// 2) Re-calculates a pointer such that it is aligned to a higher or equal +// address. +// Note: alignment must be a power of two. The alignment is in bytes. + +#include + +namespace webrtc { + +// Returns a pointer to the first boundry of |alignment| bytes following the +// address of |ptr|. +// Note that there is no guarantee that the memory in question is available. +// |ptr| has no requirements other than it can't be NULL. +void* GetRightAlign(const void* ptr, size_t alignment); + +// Allocates memory of |size| bytes aligned on an |alignment| boundry. +// The return value is a pointer to the memory. Note that the memory must +// be de-allocated using AlignedFree. +void* AlignedMalloc(size_t size, size_t alignment); +// De-allocates memory created using the AlignedMalloc() API. +void AlignedFree(void* mem_block); + +// Templated versions to facilitate usage of aligned malloc without casting +// to and from void*. +template +T* GetRightAlign(const T* ptr, size_t alignment) { + return reinterpret_cast(GetRightAlign(reinterpret_cast(ptr), + alignment)); +} +template +T* AlignedMalloc(size_t size, size_t alignment) { + return reinterpret_cast(AlignedMalloc(size, alignment)); +} + +// Deleter for use with scoped_ptr. E.g., use as +// scoped_ptr foo; +struct AlignedFreeDeleter { + inline void operator()(void* ptr) const { + AlignedFree(ptr); + } +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_ALIGNED_MALLOC_H_ diff --git a/webrtc/system_wrappers/interface/compile_assert_c.h b/webrtc/system_wrappers/interface/compile_assert_c.h new file mode 100644 index 0000000..dbb5292 --- /dev/null +++ b/webrtc/system_wrappers/interface/compile_assert_c.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_ + +#ifdef __cplusplus +#error "Only use this for C files. For C++, use static_assert." +#endif + +// Use this macro to verify at compile time that certain restrictions are met. +// The argument is the boolean expression to evaluate. +// Example: +// COMPILE_ASSERT(sizeof(foo) < 128); +#define COMPILE_ASSERT(expression) switch (0) {case 0: case expression:;} + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_COMPILE_ASSERT_H_ diff --git a/webrtc/system_wrappers/interface/event_wrapper.h b/webrtc/system_wrappers/interface/event_wrapper.h new file mode 100644 index 0000000..bd12eef --- /dev/null +++ b/webrtc/system_wrappers/interface/event_wrapper.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_EVENT_WRAPPER_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_EVENT_WRAPPER_H_ + +namespace webrtc { +enum EventTypeWrapper { + kEventSignaled = 1, + kEventError = 2, + kEventTimeout = 3 +}; + +#define WEBRTC_EVENT_INFINITE 0xffffffff + +class EventTimerWrapper; + +class EventWrapper { + public: + // Factory method. Constructor disabled. + static EventWrapper* Create(); + + virtual ~EventWrapper() {} + + // Releases threads who are calling Wait() and has started waiting. Please + // note that a thread calling Wait() will not start waiting immediately. + // assumptions to the contrary is a very common source of issues in + // multithreaded programming. + // Set is sticky in the sense that it will release at least one thread + // either immediately or some time in the future. + virtual bool Set() = 0; + + // Puts the calling thread into a wait state. The thread may be released + // by a Set() call depending on if other threads are waiting and if so on + // timing. The thread that was released will reset the event before leaving + // preventing more threads from being released. If multiple threads + // are waiting for the same Set(), only one (random) thread is guaranteed to + // be released. It is possible that multiple (random) threads are released + // Depending on timing. + // + // |max_time| is the maximum time to wait in milliseconds or + // WEBRTC_EVENT_INFINITE to wait infinitely. + virtual EventTypeWrapper Wait(unsigned long max_time) = 0; +}; + +class EventTimerWrapper : public EventWrapper { + public: + static EventTimerWrapper* Create(); + + // Starts a timer that will call a non-sticky version of Set() either once + // or periodically. If the timer is periodic it ensures that there is no + // drift over time relative to the system clock. + // + // |time| is in milliseconds. + virtual bool StartTimer(bool periodic, unsigned long time) = 0; + + virtual bool StopTimer() = 0; + +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_EVENT_WRAPPER_H_ diff --git a/webrtc/system_wrappers/interface/file_wrapper.h b/webrtc/system_wrappers/interface/file_wrapper.h new file mode 100644 index 0000000..8f4e09f --- /dev/null +++ b/webrtc/system_wrappers/interface/file_wrapper.h @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_FILE_WRAPPER_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_FILE_WRAPPER_H_ + +#include +#include + +#include "webrtc/common_types.h" +#include "webrtc/typedefs.h" + +// Implementation of an InStream and OutStream that can read (exclusive) or +// write from/to a file. + +namespace webrtc { + +class FileWrapper : public InStream, public OutStream { + public: + static const size_t kMaxFileNameSize = 1024; + + // Factory method. Constructor disabled. + static FileWrapper* Create(); + + // Returns true if a file has been opened. + virtual bool Open() const = 0; + + // Opens a file in read or write mode, decided by the read_only parameter. + virtual int OpenFile(const char* file_name_utf8, + bool read_only, + bool loop = false, + bool text = false) = 0; + + // Initializes the wrapper from an existing handle. |read_only| must match in + // the mode the file was opened in. If |manage_file| is true, the wrapper + // takes ownership of |handle| and closes it in CloseFile(). + virtual int OpenFromFileHandle(FILE* handle, + bool manage_file, + bool read_only, + bool loop = false) = 0; + + virtual int CloseFile() = 0; + + // Limits the file size to |bytes|. Writing will fail after the cap + // is hit. Pass zero to use an unlimited size. + virtual int SetMaxFileSize(size_t bytes) = 0; + + // Flush any pending writes. + virtual int Flush() = 0; + + // Returns the opened file's name in |file_name_utf8|. Provide the size of + // the buffer in bytes in |size|. The name will be truncated if |size| is + // too small. + virtual int FileName(char* file_name_utf8, + size_t size) const = 0; + + // Write |format| to the opened file. Arguments are taken in the same manner + // as printf. That is, supply a format string containing text and + // specifiers. Returns the number of characters written or -1 on error. + virtual int WriteText(const char* format, ...) = 0; + + // Inherited from both Instream and OutStream. + // Rewinds the file to the start. Only available when OpenFile() has been + // called with |loop| == true or |readOnly| == true. + // virtual int Rewind() = 0; + int Rewind() override; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_FILE_WRAPPER_H_ diff --git a/webrtc/system_wrappers/interface/logging.h b/webrtc/system_wrappers/interface/logging.h new file mode 100644 index 0000000..41c436b --- /dev/null +++ b/webrtc/system_wrappers/interface/logging.h @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// This is a highly stripped-down version of libjingle's talk/base/logging.h. +// It is a thin wrapper around WEBRTC_TRACE, maintaining the libjingle log +// semantics to ease a transition to that format. + +// NOTE: LS_INFO maps to a new trace level which should be reserved for +// infrequent, non-verbose logs. The other levels below kTraceWarning have been +// rendered essentially useless due to their verbosity. Carefully consider the +// impact of adding a new LS_INFO log. If it will be logged at anything +// approaching a frame or packet frequency, use LS_VERBOSE if necessary, or +// preferably, do not log at all. + +// LOG(...) an ostream target that can be used to send formatted +// output to a variety of logging targets, such as debugger console, stderr, +// file, or any StreamInterface. +// The severity level passed as the first argument to the LOGging +// functions is used as a filter, to limit the verbosity of the logging. +// Static members of LogMessage documented below are used to control the +// verbosity and target of the output. +// There are several variations on the LOG macro which facilitate logging +// of common error conditions, detailed below. + +// LOG(sev) logs the given stream at severity "sev", which must be a +// compile-time constant of the LoggingSeverity type, without the namespace +// prefix. +// LOG_V(sev) Like LOG(), but sev is a run-time variable of the LoggingSeverity +// type (basically, it just doesn't prepend the namespace). +// LOG_F(sev) Like LOG(), but includes the name of the current function. + +// Additional helper macros added by WebRTC: +// LOG_API is a shortcut for API call logging. Pass in the input parameters of +// the method. For example: +// Foo(int bar, int baz) { +// LOG_API2(bar, baz); +// } +// +// LOG_FERR is a shortcut for logging a failed function call. For example: +// if (!Foo(bar)) { +// LOG_FERR1(LS_WARNING, Foo, bar); +// } + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_LOGGING_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_LOGGING_H_ + +#include + +namespace webrtc { + +////////////////////////////////////////////////////////////////////// + +// Note that the non-standard LoggingSeverity aliases exist because they are +// still in broad use. The meanings of the levels are: +// LS_SENSITIVE: Information which should only be logged with the consent +// of the user, due to privacy concerns. +// LS_VERBOSE: This level is for data which we do not want to appear in the +// normal debug log, but should appear in diagnostic logs. +// LS_INFO: Chatty level used in debugging for all sorts of things, the default +// in debug builds. +// LS_WARNING: Something that may warrant investigation. +// LS_ERROR: Something that should not have occurred. +enum LoggingSeverity { + LS_SENSITIVE, LS_VERBOSE, LS_INFO, LS_WARNING, LS_ERROR +}; + +class LogMessage { + public: + LogMessage(const char* file, int line, LoggingSeverity sev); + ~LogMessage(); + + static bool Loggable(LoggingSeverity sev); + std::ostream& stream() { return print_stream_; } + + private: + // The ostream that buffers the formatted message before output + std::ostringstream print_stream_; + + // The severity level of this message + LoggingSeverity severity_; +}; + +////////////////////////////////////////////////////////////////////// +// Macros which automatically disable logging when WEBRTC_LOGGING == 0 +////////////////////////////////////////////////////////////////////// + +#ifndef LOG +// The following non-obvious technique for implementation of a +// conditional log stream was stolen from google3/base/logging.h. + +// This class is used to explicitly ignore values in the conditional +// logging macros. This avoids compiler warnings like "value computed +// is not used" and "statement has no effect". + +class LogMessageVoidify { + public: + LogMessageVoidify() { } + // This has to be an operator with a precedence lower than << but + // higher than ?: + void operator&(std::ostream&) { } +}; + +#if defined(WEBRTC_RESTRICT_LOGGING) +// This should compile away logs matching the following condition. +#define RESTRICT_LOGGING_PRECONDITION(sev) \ + sev < webrtc::LS_INFO ? (void) 0 : +#else +#define RESTRICT_LOGGING_PRECONDITION(sev) +#endif + +#define LOG_SEVERITY_PRECONDITION(sev) \ + RESTRICT_LOGGING_PRECONDITION(sev) !(webrtc::LogMessage::Loggable(sev)) \ + ? (void) 0 \ + : webrtc::LogMessageVoidify() & + +#define LOG(sev) \ + LOG_SEVERITY_PRECONDITION(webrtc::sev) \ + webrtc::LogMessage(__FILE__, __LINE__, webrtc::sev).stream() + +// The _V version is for when a variable is passed in. It doesn't do the +// namespace concatination. +#define LOG_V(sev) \ + LOG_SEVERITY_PRECONDITION(sev) \ + webrtc::LogMessage(__FILE__, __LINE__, sev).stream() + +// The _F version prefixes the message with the current function name. +#if (defined(__GNUC__) && defined(_DEBUG)) || defined(WANT_PRETTY_LOG_F) +#define LOG_F(sev) LOG(sev) << __PRETTY_FUNCTION__ << ": " +#else +#define LOG_F(sev) LOG(sev) << __FUNCTION__ << ": " +#endif + +#define LOG_API0() LOG_F(LS_VERBOSE) +#define LOG_API1(v1) LOG_API0() << #v1 << "=" << v1 +#define LOG_API2(v1, v2) LOG_API1(v1) \ + << ", " << #v2 << "=" << v2 +#define LOG_API3(v1, v2, v3) LOG_API2(v1, v2) \ + << ", " << #v3 << "=" << v3 + +#define LOG_FERR0(sev, func) LOG(sev) << #func << " failed" +#define LOG_FERR1(sev, func, v1) LOG_FERR0(sev, func) \ + << ": " << #v1 << "=" << v1 +#define LOG_FERR2(sev, func, v1, v2) LOG_FERR1(sev, func, v1) \ + << ", " << #v2 << "=" << v2 +#define LOG_FERR3(sev, func, v1, v2, v3) LOG_FERR2(sev, func, v1, v2) \ + << ", " << #v3 << "=" << v3 +#define LOG_FERR4(sev, func, v1, v2, v3, v4) LOG_FERR3(sev, func, v1, v2, v3) \ + << ", " << #v4 << "=" << v4 + +#endif // LOG + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_LOGGING_H_ diff --git a/webrtc/system_wrappers/interface/metrics.h b/webrtc/system_wrappers/interface/metrics.h new file mode 100644 index 0000000..cb641c0 --- /dev/null +++ b/webrtc/system_wrappers/interface/metrics.h @@ -0,0 +1,136 @@ +// +// Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_METRICS_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_METRICS_H_ + +#include + +#include "webrtc/common_types.h" + +// Macros for allowing WebRTC clients (e.g. Chrome) to gather and aggregate +// statistics. +// +// Histogram for counters. +// RTC_HISTOGRAM_COUNTS(name, sample, min, max, bucket_count); +// +// Histogram for enumerators. +// The boundary should be above the max enumerator sample. +// RTC_HISTOGRAM_ENUMERATION(name, sample, boundary); +// +// +// The macros use the methods HistogramFactoryGetCounts, +// HistogramFactoryGetEnumeration and HistogramAdd. +// +// Therefore, WebRTC clients must either: +// +// - provide implementations of +// Histogram* webrtc::metrics::HistogramFactoryGetCounts( +// const std::string& name, int sample, int min, int max, +// int bucket_count); +// Histogram* webrtc::metrics::HistogramFactoryGetEnumeration( +// const std::string& name, int sample, int boundary); +// void webrtc::metrics::HistogramAdd( +// Histogram* histogram_pointer, const std::string& name, int sample); +// +// - or link with the default implementations (i.e. +// system_wrappers/system_wrappers.gyp:metrics_default). +// +// +// Example usage: +// +// RTC_HISTOGRAM_COUNTS("WebRTC.Video.NacksSent", nacks_sent, 1, 100000, 100); +// +// enum Types { +// kTypeX, +// kTypeY, +// kBoundary, +// }; +// +// RTC_HISTOGRAM_ENUMERATION("WebRTC.Types", kTypeX, kBoundary); + + +// Macros for adding samples to a named histogram. +// +// NOTE: this is a temporary solution. +// The aim is to mimic the behaviour in Chromium's src/base/metrics/histograms.h +// However as atomics are not supported in webrtc, this is for now a modified +// and temporary solution. Note that the histogram is constructed/found for +// each call. Therefore, for now only use this implementation for metrics +// that do not need to be updated frequently. +// TODO(asapersson): Change implementation when atomics are supported. +// Also consider changing string to const char* when switching to atomics. + +// Histogram for counters. +#define RTC_HISTOGRAM_COUNTS_100(name, sample) RTC_HISTOGRAM_COUNTS( \ + name, sample, 1, 100, 50) + +#define RTC_HISTOGRAM_COUNTS_1000(name, sample) RTC_HISTOGRAM_COUNTS( \ + name, sample, 1, 1000, 50) + +#define RTC_HISTOGRAM_COUNTS_10000(name, sample) RTC_HISTOGRAM_COUNTS( \ + name, sample, 1, 10000, 50) + +#define RTC_HISTOGRAM_COUNTS_100000(name, sample) RTC_HISTOGRAM_COUNTS( \ + name, sample, 1, 100000, 50) + +#define RTC_HISTOGRAM_COUNTS(name, sample, min, max, bucket_count) \ + RTC_HISTOGRAM_COMMON_BLOCK(name, sample, \ + webrtc::metrics::HistogramFactoryGetCounts( \ + name, min, max, bucket_count)) + +// Histogram for percentage. +#define RTC_HISTOGRAM_PERCENTAGE(name, sample) \ + RTC_HISTOGRAM_ENUMERATION(name, sample, 101) + +// Histogram for enumerators. +// |boundary| should be above the max enumerator sample. +#define RTC_HISTOGRAM_ENUMERATION(name, sample, boundary) \ + RTC_HISTOGRAM_COMMON_BLOCK(name, sample, \ + webrtc::metrics::HistogramFactoryGetEnumeration(name, boundary)) + +#define RTC_HISTOGRAM_COMMON_BLOCK(constant_name, sample, \ + factory_get_invocation) \ + do { \ + webrtc::metrics::Histogram* histogram_pointer = factory_get_invocation; \ + webrtc::metrics::HistogramAdd(histogram_pointer, constant_name, sample); \ + } while (0) + + +namespace webrtc { +namespace metrics { + +// Time that should have elapsed for stats that are gathered once per call. +enum { kMinRunTimeInSeconds = 10 }; + +class Histogram; + +// Functions for getting pointer to histogram (constructs or finds the named +// histogram). + +// Get histogram for counters. +Histogram* HistogramFactoryGetCounts( + const std::string& name, int min, int max, int bucket_count); + +// Get histogram for enumerators. +// |boundary| should be above the max enumerator sample. +Histogram* HistogramFactoryGetEnumeration( + const std::string& name, int boundary); + +// Function for adding a |sample| to a histogram. +// |name| can be used to verify that it matches the histogram name. +void HistogramAdd( + Histogram* histogram_pointer, const std::string& name, int sample); + +} // namespace metrics +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_METRICS_H_ + diff --git a/webrtc/system_wrappers/interface/rw_lock_wrapper.h b/webrtc/system_wrappers/interface/rw_lock_wrapper.h new file mode 100644 index 0000000..dbe6d6c --- /dev/null +++ b/webrtc/system_wrappers/interface/rw_lock_wrapper.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_RW_LOCK_WRAPPER_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_RW_LOCK_WRAPPER_H_ + +#include "webrtc/base/thread_annotations.h" + +// Note, Windows pre-Vista version of RW locks are not supported natively. For +// these OSs regular critical sections have been used to approximate RW lock +// functionality and will therefore have worse performance. + +namespace webrtc { + +class LOCKABLE RWLockWrapper { + public: + static RWLockWrapper* CreateRWLock(); + virtual ~RWLockWrapper() {} + + virtual void AcquireLockExclusive() EXCLUSIVE_LOCK_FUNCTION() = 0; + virtual void ReleaseLockExclusive() UNLOCK_FUNCTION() = 0; + + virtual void AcquireLockShared() SHARED_LOCK_FUNCTION() = 0; + virtual void ReleaseLockShared() UNLOCK_FUNCTION() = 0; +}; + +// RAII extensions of the RW lock. Prevents Acquire/Release missmatches and +// provides more compact locking syntax. +class SCOPED_LOCKABLE ReadLockScoped { + public: + ReadLockScoped(RWLockWrapper& rw_lock) SHARED_LOCK_FUNCTION(rw_lock) + : rw_lock_(rw_lock) { + rw_lock_.AcquireLockShared(); + } + + ~ReadLockScoped() UNLOCK_FUNCTION() { + rw_lock_.ReleaseLockShared(); + } + + private: + RWLockWrapper& rw_lock_; +}; + +class SCOPED_LOCKABLE WriteLockScoped { + public: + WriteLockScoped(RWLockWrapper& rw_lock) EXCLUSIVE_LOCK_FUNCTION(rw_lock) + : rw_lock_(rw_lock) { + rw_lock_.AcquireLockExclusive(); + } + + ~WriteLockScoped() UNLOCK_FUNCTION() { + rw_lock_.ReleaseLockExclusive(); + } + + private: + RWLockWrapper& rw_lock_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_RW_LOCK_WRAPPER_H_ diff --git a/webrtc/system_wrappers/interface/scoped_vector.h b/webrtc/system_wrappers/interface/scoped_vector.h new file mode 100644 index 0000000..1a70a2c --- /dev/null +++ b/webrtc/system_wrappers/interface/scoped_vector.h @@ -0,0 +1,157 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Borrowed from Chromium's src/base/memory/scoped_vector.h. + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_SCOPED_VECTOR_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_SCOPED_VECTOR_H_ + +#include + +#include "webrtc/base/checks.h" +#include "webrtc/system_wrappers/interface/stl_util.h" + +namespace webrtc { + +// ScopedVector wraps a vector deleting the elements from its +// destructor. +template +class ScopedVector { + public: + typedef typename std::vector::allocator_type allocator_type; + typedef typename std::vector::size_type size_type; + typedef typename std::vector::difference_type difference_type; + typedef typename std::vector::pointer pointer; + typedef typename std::vector::const_pointer const_pointer; + typedef typename std::vector::reference reference; + typedef typename std::vector::const_reference const_reference; + typedef typename std::vector::value_type value_type; + typedef typename std::vector::iterator iterator; + typedef typename std::vector::const_iterator const_iterator; + typedef typename std::vector::reverse_iterator reverse_iterator; + typedef typename std::vector::const_reverse_iterator + const_reverse_iterator; + + ScopedVector() {} + ~ScopedVector() { clear(); } + + // Move construction and assignment. + ScopedVector(ScopedVector&& other) { + *this = static_cast(other); + } + ScopedVector& operator=(ScopedVector&& other) { + std::swap(v_, other.v_); // The arguments are std::vectors, so std::swap + // is the one that we want. + other.clear(); + return *this; + } + + // Deleted copy constructor and copy assignment, to make the type move-only. + ScopedVector(const ScopedVector& other) = delete; + ScopedVector& operator=(const ScopedVector& other) = delete; + + // Get an rvalue reference. (sv.Pass() does the same thing as std::move(sv).) + ScopedVector&& Pass() { return static_cast(*this); } + + reference operator[](size_t index) { return v_[index]; } + const_reference operator[](size_t index) const { return v_[index]; } + + bool empty() const { return v_.empty(); } + size_t size() const { return v_.size(); } + + reverse_iterator rbegin() { return v_.rbegin(); } + const_reverse_iterator rbegin() const { return v_.rbegin(); } + reverse_iterator rend() { return v_.rend(); } + const_reverse_iterator rend() const { return v_.rend(); } + + iterator begin() { return v_.begin(); } + const_iterator begin() const { return v_.begin(); } + iterator end() { return v_.end(); } + const_iterator end() const { return v_.end(); } + + const_reference front() const { return v_.front(); } + reference front() { return v_.front(); } + const_reference back() const { return v_.back(); } + reference back() { return v_.back(); } + + void push_back(T* elem) { v_.push_back(elem); } + + void pop_back() { + RTC_DCHECK(!empty()); + delete v_.back(); + v_.pop_back(); + } + + std::vector& get() { return v_; } + const std::vector& get() const { return v_; } + void swap(std::vector& other) { v_.swap(other); } + void swap(ScopedVector& other) { v_.swap(other.v_); } + void release(std::vector* out) { + out->swap(v_); + v_.clear(); + } + + void reserve(size_t capacity) { v_.reserve(capacity); } + + // Resize, deleting elements in the disappearing range if we are shrinking. + void resize(size_t new_size) { + if (v_.size() > new_size) + STLDeleteContainerPointers(v_.begin() + new_size, v_.end()); + v_.resize(new_size); + } + + template + void assign(InputIterator begin, InputIterator end) { + v_.assign(begin, end); + } + + void clear() { STLDeleteElements(&v_); } + + // Like |clear()|, but doesn't delete any elements. + void weak_clear() { v_.clear(); } + + // Lets the ScopedVector take ownership of |x|. + iterator insert(iterator position, T* x) { + return v_.insert(position, x); + } + + // Lets the ScopedVector take ownership of elements in [first,last). + template + void insert(iterator position, InputIterator first, InputIterator last) { + v_.insert(position, first, last); + } + + iterator erase(iterator position) { + delete *position; + return v_.erase(position); + } + + iterator erase(iterator first, iterator last) { + STLDeleteContainerPointers(first, last); + return v_.erase(first, last); + } + + // Like |erase()|, but doesn't delete the element at |position|. + iterator weak_erase(iterator position) { + return v_.erase(position); + } + + // Like |erase()|, but doesn't delete the elements in [first, last). + iterator weak_erase(iterator first, iterator last) { + return v_.erase(first, last); + } + + private: + std::vector v_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_SCOPED_VECTOR_H_ diff --git a/webrtc/system_wrappers/interface/sleep.h b/webrtc/system_wrappers/interface/sleep.h new file mode 100644 index 0000000..c0205bf --- /dev/null +++ b/webrtc/system_wrappers/interface/sleep.h @@ -0,0 +1,24 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +// An OS-independent sleep function. + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_SLEEP_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_SLEEP_H_ + +namespace webrtc { + +// This function sleeps for the specified number of milliseconds. +// It may return early if the thread is woken by some other event, +// such as the delivery of a signal on Unix. +void SleepMs(int msecs); + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_SLEEP_H_ diff --git a/webrtc/system_wrappers/interface/static_instance.h b/webrtc/system_wrappers/interface/static_instance.h new file mode 100644 index 0000000..dad9c52 --- /dev/null +++ b/webrtc/system_wrappers/interface/static_instance.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_STATIC_INSTANCE_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_STATIC_INSTANCE_H_ + +#include + +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" +#ifdef _WIN32 +#include "webrtc/system_wrappers/interface/fix_interlocked_exchange_pointer_win.h" +#endif + +namespace webrtc { + +enum CountOperation { + kRelease, + kAddRef, + kAddRefNoCreate +}; +enum CreateOperation { + kInstanceExists, + kCreate, + kDestroy +}; + +template +// Construct On First Use idiom. Avoids +// "static initialization order fiasco". +static T* GetStaticInstance(CountOperation count_operation) { + // TODO (hellner): use atomic wrapper instead. + static volatile long instance_count = 0; + static T* volatile instance = NULL; + CreateOperation state = kInstanceExists; +#ifndef _WIN32 + // This memory is staticly allocated once. The application does not try to + // free this memory. This approach is taken to avoid issues with + // destruction order for statically allocated memory. The memory will be + // reclaimed by the OS and memory leak tools will not recognize memory + // reachable from statics leaked so no noise is added by doing this. + static CriticalSectionWrapper* crit_sect( + CriticalSectionWrapper::CreateCriticalSection()); + CriticalSectionScoped lock(crit_sect); + + if (count_operation == + kAddRefNoCreate && instance_count == 0) { + return NULL; + } + if (count_operation == + kAddRef || + count_operation == kAddRefNoCreate) { + instance_count++; + if (instance_count == 1) { + state = kCreate; + } + } else { + instance_count--; + if (instance_count == 0) { + state = kDestroy; + } + } + if (state == kCreate) { + instance = T::CreateInstance(); + } else if (state == kDestroy) { + T* old_instance = instance; + instance = NULL; + // The state will not change past this point. Release the critical + // section while deleting the object in case it would be blocking on + // access back to this object. (This is the case for the tracing class + // since the thread owned by the tracing class also traces). + // TODO(hellner): this is a bit out of place but here goes, de-couple + // thread implementation with trace implementation. + crit_sect->Leave(); + if (old_instance) { + delete old_instance; + } + // Re-acquire the lock since the scoped critical section will release + // it. + crit_sect->Enter(); + return NULL; + } +#else // _WIN32 + if (count_operation == + kAddRefNoCreate && instance_count == 0) { + return NULL; + } + if (count_operation == kAddRefNoCreate) { + if (1 == InterlockedIncrement(&instance_count)) { + // The instance has been destroyed by some other thread. Rollback. + InterlockedDecrement(&instance_count); + assert(false); + return NULL; + } + // Sanity to catch corrupt state. + if (instance == NULL) { + assert(false); + InterlockedDecrement(&instance_count); + return NULL; + } + } else if (count_operation == kAddRef) { + if (instance_count == 0) { + state = kCreate; + } else { + if (1 == InterlockedIncrement(&instance_count)) { + // InterlockedDecrement because reference count should not be + // updated just yet (that's done when the instance is created). + InterlockedDecrement(&instance_count); + state = kCreate; + } + } + } else { + int new_value = InterlockedDecrement(&instance_count); + if (new_value == 0) { + state = kDestroy; + } + } + + if (state == kCreate) { + // Create instance and let whichever thread finishes first assign its + // local copy to the global instance. All other threads reclaim their + // local copy. + T* new_instance = T::CreateInstance(); + if (1 == InterlockedIncrement(&instance_count)) { + InterlockedExchangePointer(reinterpret_cast(&instance), + new_instance); + } else { + InterlockedDecrement(&instance_count); + if (new_instance) { + delete static_cast(new_instance); + } + } + } else if (state == kDestroy) { + T* old_value = static_cast(InterlockedExchangePointer( + reinterpret_cast(&instance), NULL)); + if (old_value) { + delete static_cast(old_value); + } + return NULL; + } +#endif // #ifndef _WIN32 + return instance; +} + +} // namspace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_STATIC_INSTANCE_H_ diff --git a/webrtc/system_wrappers/interface/stl_util.h b/webrtc/system_wrappers/interface/stl_util.h new file mode 100644 index 0000000..ebe855f --- /dev/null +++ b/webrtc/system_wrappers/interface/stl_util.h @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// Borrowed from Chromium's src/base/stl_util.h. + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_STL_UTIL_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_STL_UTIL_H_ + +#include +#include +#include +#include +#include +#include + +namespace webrtc { + +// Clears internal memory of an STL object. +// STL clear()/reserve(0) does not always free internal memory allocated +// This function uses swap/destructor to ensure the internal memory is freed. +template +void STLClearObject(T* obj) { + T tmp; + tmp.swap(*obj); + // Sometimes "T tmp" allocates objects with memory (arena implementation?). + // Hence using additional reserve(0) even if it doesn't always work. + obj->reserve(0); +} + +// For a range within a container of pointers, calls delete (non-array version) +// on these pointers. +// NOTE: for these three functions, we could just implement a DeleteObject +// functor and then call for_each() on the range and functor, but this +// requires us to pull in all of algorithm.h, which seems expensive. +// For hash_[multi]set, it is important that this deletes behind the iterator +// because the hash_set may call the hash function on the iterator when it is +// advanced, which could result in the hash function trying to deference a +// stale pointer. +template +void STLDeleteContainerPointers(ForwardIterator begin, ForwardIterator end) { + while (begin != end) { + ForwardIterator temp = begin; + ++begin; + delete *temp; + } +} + +// For a range within a container of pairs, calls delete (non-array version) on +// BOTH items in the pairs. +// NOTE: Like STLDeleteContainerPointers, it is important that this deletes +// behind the iterator because if both the key and value are deleted, the +// container may call the hash function on the iterator when it is advanced, +// which could result in the hash function trying to dereference a stale +// pointer. +template +void STLDeleteContainerPairPointers(ForwardIterator begin, + ForwardIterator end) { + while (begin != end) { + ForwardIterator temp = begin; + ++begin; + delete temp->first; + delete temp->second; + } +} + +// For a range within a container of pairs, calls delete (non-array version) on +// the FIRST item in the pairs. +// NOTE: Like STLDeleteContainerPointers, deleting behind the iterator. +template +void STLDeleteContainerPairFirstPointers(ForwardIterator begin, + ForwardIterator end) { + while (begin != end) { + ForwardIterator temp = begin; + ++begin; + delete temp->first; + } +} + +// For a range within a container of pairs, calls delete. +// NOTE: Like STLDeleteContainerPointers, deleting behind the iterator. +// Deleting the value does not always invalidate the iterator, but it may +// do so if the key is a pointer into the value object. +template +void STLDeleteContainerPairSecondPointers(ForwardIterator begin, + ForwardIterator end) { + while (begin != end) { + ForwardIterator temp = begin; + ++begin; + delete temp->second; + } +} + +// To treat a possibly-empty vector as an array, use these functions. +// If you know the array will never be empty, you can use &*v.begin() +// directly, but that is undefined behaviour if |v| is empty. +template +inline T* vector_as_array(std::vector* v) { + return v->empty() ? NULL : &*v->begin(); +} + +template +inline const T* vector_as_array(const std::vector* v) { + return v->empty() ? NULL : &*v->begin(); +} + +// Return a mutable char* pointing to a string's internal buffer, +// which may not be null-terminated. Writing through this pointer will +// modify the string. +// +// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the +// next call to a string method that invalidates iterators. +// +// As of 2006-04, there is no standard-blessed way of getting a +// mutable reference to a string's internal buffer. However, issue 530 +// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-active.html#530) +// proposes this as the method. According to Matt Austern, this should +// already work on all current implementations. +inline char* string_as_array(std::string* str) { + // DO NOT USE const_cast(str->data()) + return str->empty() ? NULL : &*str->begin(); +} + +// The following functions are useful for cleaning up STL containers whose +// elements point to allocated memory. + +// STLDeleteElements() deletes all the elements in an STL container and clears +// the container. This function is suitable for use with a vector, set, +// hash_set, or any other STL container which defines sensible begin(), end(), +// and clear() methods. +// +// If container is NULL, this function is a no-op. +// +// As an alternative to calling STLDeleteElements() directly, consider +// STLElementDeleter (defined below), which ensures that your container's +// elements are deleted when the STLElementDeleter goes out of scope. +template +void STLDeleteElements(T* container) { + if (!container) + return; + STLDeleteContainerPointers(container->begin(), container->end()); + container->clear(); +} + +// Given an STL container consisting of (key, value) pairs, STLDeleteValues +// deletes all the "value" components and clears the container. Does nothing +// in the case it's given a NULL pointer. +template +void STLDeleteValues(T* container) { + if (!container) + return; + for (typename T::iterator i(container->begin()); i != container->end(); ++i) + delete i->second; + container->clear(); +} + + +// The following classes provide a convenient way to delete all elements or +// values from STL containers when they goes out of scope. This greatly +// simplifies code that creates temporary objects and has multiple return +// statements. Example: +// +// vector tmp_proto; +// STLElementDeleter > d(&tmp_proto); +// if (...) return false; +// ... +// return success; + +// Given a pointer to an STL container this class will delete all the element +// pointers when it goes out of scope. +template +class STLElementDeleter { + public: + STLElementDeleter(T* container) : container_(container) {} + ~STLElementDeleter() { STLDeleteElements(container_); } + + private: + T* container_; +}; + +// Given a pointer to an STL container this class will delete all the value +// pointers when it goes out of scope. +template +class STLValueDeleter { + public: + STLValueDeleter(T* container) : container_(container) {} + ~STLValueDeleter() { STLDeleteValues(container_); } + + private: + T* container_; +}; + +// Test to see if a set, map, hash_set or hash_map contains a particular key. +// Returns true if the key is in the collection. +template +bool ContainsKey(const Collection& collection, const Key& key) { + return collection.find(key) != collection.end(); +} + +// Returns true if the container is sorted. +template +bool STLIsSorted(const Container& cont) { + // Note: Use reverse iterator on container to ensure we only require + // value_type to implement operator<. + return std::adjacent_find(cont.rbegin(), cont.rend(), + std::less()) + == cont.rend(); +} + +// Returns a new ResultType containing the difference of two sorted containers. +template +ResultType STLSetDifference(const Arg1& a1, const Arg2& a2) { + assert(STLIsSorted(a1)); + assert(STLIsSorted(a2)); + ResultType difference; + std::set_difference(a1.begin(), a1.end(), + a2.begin(), a2.end(), + std::inserter(difference, difference.end())); + return difference; +} + +// Returns a new ResultType containing the union of two sorted containers. +template +ResultType STLSetUnion(const Arg1& a1, const Arg2& a2) { + assert(STLIsSorted(a1)); + assert(STLIsSorted(a2)); + ResultType result; + std::set_union(a1.begin(), a1.end(), + a2.begin(), a2.end(), + std::inserter(result, result.end())); + return result; +} + +// Returns a new ResultType containing the intersection of two sorted +// containers. +template +ResultType STLSetIntersection(const Arg1& a1, const Arg2& a2) { + assert(STLIsSorted(a1)); + assert(STLIsSorted(a2)); + ResultType result; + std::set_intersection(a1.begin(), a1.end(), + a2.begin(), a2.end(), + std::inserter(result, result.end())); + return result; +} + +// Returns true if the sorted container |a1| contains all elements of the sorted +// container |a2|. +template +bool STLIncludes(const Arg1& a1, const Arg2& a2) { + assert(STLIsSorted(a1)); + assert(STLIsSorted(a2)); + return std::includes(a1.begin(), a1.end(), + a2.begin(), a2.end()); +} + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_STL_UTIL_H_ diff --git a/webrtc/system_wrappers/interface/thread_wrapper.h b/webrtc/system_wrappers/interface/thread_wrapper.h new file mode 100644 index 0000000..7420561 --- /dev/null +++ b/webrtc/system_wrappers/interface/thread_wrapper.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +// System independant wrapper for spawning threads +// Note: the spawned thread will loop over the callback function until stopped. +// Note: The callback function is expected to return every 2 seconds or more +// often. + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_THREAD_WRAPPER_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_THREAD_WRAPPER_H_ + +#if defined(WEBRTC_WIN) +#include +#endif + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/common_types.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +// Callback function that the spawned thread will enter once spawned. +// A return value of false is interpreted as that the function has no +// more work to do and that the thread can be released. +typedef bool(*ThreadRunFunction)(void*); + +enum ThreadPriority { +#ifdef WEBRTC_WIN + kLowPriority = THREAD_PRIORITY_BELOW_NORMAL, + kNormalPriority = THREAD_PRIORITY_NORMAL, + kHighPriority = THREAD_PRIORITY_ABOVE_NORMAL, + kHighestPriority = THREAD_PRIORITY_HIGHEST, + kRealtimePriority = THREAD_PRIORITY_TIME_CRITICAL +#else + kLowPriority = 1, + kNormalPriority = 2, + kHighPriority = 3, + kHighestPriority = 4, + kRealtimePriority = 5 +#endif +}; + +// Represents a simple worker thread. The implementation must be assumed +// to be single threaded, meaning that all methods of the class, must be +// called from the same thread, including instantiation. +// TODO(tommi): There's no need for this to be a virtual interface since there's +// only ever a single implementation of it. +class ThreadWrapper { + public: + virtual ~ThreadWrapper() {} + + // Factory method. Constructor disabled. + // + // func Pointer to a, by user, specified callback function. + // obj Object associated with the thread. Passed in the callback + // function. + // prio Thread priority. May require root/admin rights. + // thread_name NULL terminated thread name, will be visable in the Windows + // debugger. + static rtc::scoped_ptr CreateThread(ThreadRunFunction func, + void* obj, const char* thread_name); + + // Get the current thread's thread ID. + // NOTE: This is a static method. It returns the id of the calling thread, + // *not* the id of the worker thread that a ThreadWrapper instance represents. + // TODO(tommi): Move outside of the ThreadWrapper class to avoid confusion. + static uint32_t GetThreadId(); + + // Tries to spawns a thread and returns true if that was successful. + // Additionally, it tries to set thread priority according to the priority + // from when CreateThread was called. However, failure to set priority will + // not result in a false return value. + virtual bool Start() = 0; + + // Stops the spawned thread and waits for it to be reclaimed with a timeout + // of two seconds. Will return false if the thread was not reclaimed. + // Multiple tries to Stop are allowed (e.g. to wait longer than 2 seconds). + // It's ok to call Stop() even if the spawned thread has been reclaimed. + virtual bool Stop() = 0; + + // Set the priority of the worker thread. Must be called when thread + // is running. + virtual bool SetPriority(ThreadPriority priority) = 0; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_THREAD_WRAPPER_H_ diff --git a/webrtc/system_wrappers/interface/trace.h b/webrtc/system_wrappers/interface/trace.h new file mode 100644 index 0000000..e63b603 --- /dev/null +++ b/webrtc/system_wrappers/interface/trace.h @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + * + * System independent wrapper for logging runtime information to file. + * Note: All log messages will be written to the same trace file. + * Note: If too many messages are written to file there will be a build up of + * messages. Apply filtering to avoid that. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_INTERFACE_TRACE_H_ +#define WEBRTC_SYSTEM_WRAPPERS_INTERFACE_TRACE_H_ + +#include "webrtc/common_types.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +#if defined(WEBRTC_RESTRICT_LOGGING) +// Disable all TRACE macros. The LOG macro is still functional. +#define WEBRTC_TRACE true ? (void) 0 : Trace::Add +#else +#define WEBRTC_TRACE Trace::Add +#endif + +class Trace { + public: + // The length of the trace text preceeding the log message. + static const int kBoilerplateLength; + // The position of the timestamp text within a trace. + static const int kTimestampPosition; + // The length of the timestamp (without "delta" field). + static const int kTimestampLength; + + // Increments the reference count to the trace. + static void CreateTrace(); + // Decrements the reference count to the trace. + static void ReturnTrace(); + // Note: any instance that writes to the trace file should increment and + // decrement the reference count on construction and destruction, + // respectively. + + // Specifies what type of messages should be written to the trace file. The + // filter parameter is a bitmask where each message type is enumerated by the + // TraceLevel enumerator. TODO(hellner): why is the TraceLevel enumerator not + // defined in this file? + static void set_level_filter(int filter); + + // Returns what type of messages are written to the trace file. + static int level_filter(); + + // Sets the file name. If add_file_counter is false the same file will be + // reused when it fills up. If it's true a new file with incremented name + // will be used. + static int32_t SetTraceFile(const char* file_name, + const bool add_file_counter = false); + + // Returns the name of the file that the trace is currently writing to. + static int32_t TraceFile(char file_name[1024]); + + // Registers callback to receive trace messages. + // TODO(hellner): Why not use OutStream instead? Why is TraceCallback not + // defined in this file? + static int32_t SetTraceCallback(TraceCallback* callback); + + // Adds a trace message for writing to file. The message is put in a queue + // for writing to file whenever possible for performance reasons. I.e. there + // is a crash it is possible that the last, vital logs are not logged yet. + // level is the type of message to log. If that type of messages is + // filtered it will not be written to file. module is an identifier for what + // part of the code the message is coming. + // id is an identifier that should be unique for that set of classes that + // are associated (e.g. all instances owned by an engine). + // msg and the ellipsis are the same as e.g. sprintf. + // TODO(hellner) Why is TraceModule not defined in this file? + static void Add(const TraceLevel level, + const TraceModule module, + const int32_t id, + const char* msg, ...); + + private: + static volatile int level_filter_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_INTERFACE_TRACE_H_ diff --git a/webrtc/system_wrappers/source/aligned_malloc.cc b/webrtc/system_wrappers/source/aligned_malloc.cc new file mode 100644 index 0000000..258b6be --- /dev/null +++ b/webrtc/system_wrappers/source/aligned_malloc.cc @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/interface/aligned_malloc.h" + +#include +#include + +#if _WIN32 +#include +#else +#include +#endif + +#include "webrtc/typedefs.h" + +// Reference on memory alignment: +// http://stackoverflow.com/questions/227897/solve-the-memory-alignment-in-c-interview-question-that-stumped-me +namespace webrtc { + +uintptr_t GetRightAlign(uintptr_t start_pos, size_t alignment) { + // The pointer should be aligned with |alignment| bytes. The - 1 guarantees + // that it is aligned towards the closest higher (right) address. + return (start_pos + alignment - 1) & ~(alignment - 1); +} + +// Alignment must be an integer power of two. +bool ValidAlignment(size_t alignment) { + if (!alignment) { + return false; + } + return (alignment & (alignment - 1)) == 0; +} + +void* GetRightAlign(const void* pointer, size_t alignment) { + if (!pointer) { + return NULL; + } + if (!ValidAlignment(alignment)) { + return NULL; + } + uintptr_t start_pos = reinterpret_cast(pointer); + return reinterpret_cast(GetRightAlign(start_pos, alignment)); +} + +void* AlignedMalloc(size_t size, size_t alignment) { + if (size == 0) { + return NULL; + } + if (!ValidAlignment(alignment)) { + return NULL; + } + + // The memory is aligned towards the lowest address that so only + // alignment - 1 bytes needs to be allocated. + // A pointer to the start of the memory must be stored so that it can be + // retreived for deletion, ergo the sizeof(uintptr_t). + void* memory_pointer = malloc(size + sizeof(uintptr_t) + alignment - 1); + if (memory_pointer == NULL) { + return NULL; + } + + // Aligning after the sizeof(uintptr_t) bytes will leave room for the header + // in the same memory block. + uintptr_t align_start_pos = reinterpret_cast(memory_pointer); + align_start_pos += sizeof(uintptr_t); + uintptr_t aligned_pos = GetRightAlign(align_start_pos, alignment); + void* aligned_pointer = reinterpret_cast(aligned_pos); + + // Store the address to the beginning of the memory just before the aligned + // memory. + uintptr_t header_pos = aligned_pos - sizeof(uintptr_t); + void* header_pointer = reinterpret_cast(header_pos); + uintptr_t memory_start = reinterpret_cast(memory_pointer); + memcpy(header_pointer, &memory_start, sizeof(uintptr_t)); + + return aligned_pointer; +} + +void AlignedFree(void* mem_block) { + if (mem_block == NULL) { + return; + } + uintptr_t aligned_pos = reinterpret_cast(mem_block); + uintptr_t header_pos = aligned_pos - sizeof(uintptr_t); + + // Read out the address of the AlignedMemory struct from the header. + uintptr_t memory_start_pos = *reinterpret_cast(header_pos); + void* memory_start = reinterpret_cast(memory_start_pos); + free(memory_start); +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/event.cc b/webrtc/system_wrappers/source/event.cc new file mode 100644 index 0000000..7f4f055 --- /dev/null +++ b/webrtc/system_wrappers/source/event.cc @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/interface/event_wrapper.h" + +#if defined(_WIN32) +#include +#include "webrtc/system_wrappers/source/event_timer_win.h" +#elif defined(WEBRTC_MAC) && !defined(WEBRTC_IOS) +#include +#include +#include "webrtc/system_wrappers/source/event_timer_posix.h" +#else +#include +#include "webrtc/system_wrappers/source/event_timer_posix.h" +#endif + +#include "webrtc/base/event.h" + +namespace webrtc { + +class EventWrapperImpl : public EventWrapper { + public: + EventWrapperImpl() : event_(false, false) {} + ~EventWrapperImpl() override {} + + bool Set() override { + event_.Set(); + return true; + } + + EventTypeWrapper Wait(unsigned long max_time) override { + int to_wait = max_time == WEBRTC_EVENT_INFINITE ? + rtc::Event::kForever : static_cast(max_time); + return event_.Wait(to_wait) ? kEventSignaled : kEventTimeout; + } + + private: + rtc::Event event_; +}; + +// static +EventWrapper* EventWrapper::Create() { + return new EventWrapperImpl(); +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/event_timer_posix.cc b/webrtc/system_wrappers/source/event_timer_posix.cc new file mode 100644 index 0000000..99eebcb --- /dev/null +++ b/webrtc/system_wrappers/source/event_timer_posix.cc @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/source/event_timer_posix.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "webrtc/base/checks.h" + +namespace webrtc { + +// static +EventTimerWrapper* EventTimerWrapper::Create() { + return new EventTimerPosix(); +} + +const long int E6 = 1000000; +const long int E9 = 1000 * E6; + +EventTimerPosix::EventTimerPosix() + : event_set_(false), + timer_thread_(nullptr), + created_at_(), + periodic_(false), + time_(0), + count_(0) { + pthread_mutexattr_t attr; + pthread_mutexattr_init(&attr); + pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE); + pthread_mutex_init(&mutex_, &attr); +#ifdef WEBRTC_CLOCK_TYPE_REALTIME + pthread_cond_init(&cond_, 0); +#else + pthread_condattr_t cond_attr; + pthread_condattr_init(&cond_attr); + pthread_condattr_setclock(&cond_attr, CLOCK_MONOTONIC); + pthread_cond_init(&cond_, &cond_attr); + pthread_condattr_destroy(&cond_attr); +#endif +} + +EventTimerPosix::~EventTimerPosix() { + StopTimer(); + pthread_cond_destroy(&cond_); + pthread_mutex_destroy(&mutex_); +} + +// TODO(pbos): Make this void. +bool EventTimerPosix::Set() { + RTC_CHECK_EQ(0, pthread_mutex_lock(&mutex_)); + event_set_ = true; + pthread_cond_signal(&cond_); + pthread_mutex_unlock(&mutex_); + return true; +} + +EventTypeWrapper EventTimerPosix::Wait(unsigned long timeout) { + int ret_val = 0; + RTC_CHECK_EQ(0, pthread_mutex_lock(&mutex_)); + + if (!event_set_) { + if (WEBRTC_EVENT_INFINITE != timeout) { + timespec end_at; +#ifndef WEBRTC_MAC +#ifdef WEBRTC_CLOCK_TYPE_REALTIME + clock_gettime(CLOCK_REALTIME, &end_at); +#else + clock_gettime(CLOCK_MONOTONIC, &end_at); +#endif +#else + timeval value; + struct timezone time_zone; + time_zone.tz_minuteswest = 0; + time_zone.tz_dsttime = 0; + gettimeofday(&value, &time_zone); + TIMEVAL_TO_TIMESPEC(&value, &end_at); +#endif + end_at.tv_sec += timeout / 1000; + end_at.tv_nsec += (timeout - (timeout / 1000) * 1000) * E6; + + if (end_at.tv_nsec >= E9) { + end_at.tv_sec++; + end_at.tv_nsec -= E9; + } + while (ret_val == 0 && !event_set_) + ret_val = pthread_cond_timedwait(&cond_, &mutex_, &end_at); + } else { + while (ret_val == 0 && !event_set_) + ret_val = pthread_cond_wait(&cond_, &mutex_); + } + } + + RTC_DCHECK(ret_val == 0 || ret_val == ETIMEDOUT); + + // Reset and signal if set, regardless of why the thread woke up. + if (event_set_) { + ret_val = 0; + event_set_ = false; + } + pthread_mutex_unlock(&mutex_); + + return ret_val == 0 ? kEventSignaled : kEventTimeout; +} + +EventTypeWrapper EventTimerPosix::Wait(timespec* end_at) { + int ret_val = 0; + RTC_CHECK_EQ(0, pthread_mutex_lock(&mutex_)); + + while (ret_val == 0 && !event_set_) + ret_val = pthread_cond_timedwait(&cond_, &mutex_, end_at); + + RTC_DCHECK(ret_val == 0 || ret_val == ETIMEDOUT); + + // Reset and signal if set, regardless of why the thread woke up. + if (event_set_) { + ret_val = 0; + event_set_ = false; + } + pthread_mutex_unlock(&mutex_); + + return ret_val == 0 ? kEventSignaled : kEventTimeout; +} + +bool EventTimerPosix::StartTimer(bool periodic, unsigned long time) { + pthread_mutex_lock(&mutex_); + if (timer_thread_) { + if (periodic_) { + // Timer already started. + pthread_mutex_unlock(&mutex_); + return false; + } else { + // New one shot timer + time_ = time; + created_at_.tv_sec = 0; + timer_event_->Set(); + pthread_mutex_unlock(&mutex_); + return true; + } + } + + // Start the timer thread + timer_event_.reset(new EventTimerPosix()); + const char* thread_name = "WebRtc_event_timer_thread"; + timer_thread_ = ThreadWrapper::CreateThread(Run, this, thread_name); + periodic_ = periodic; + time_ = time; + bool started = timer_thread_->Start(); + timer_thread_->SetPriority(kRealtimePriority); + pthread_mutex_unlock(&mutex_); + + return started; +} + +bool EventTimerPosix::Run(void* obj) { + return static_cast(obj)->Process(); +} + +bool EventTimerPosix::Process() { + pthread_mutex_lock(&mutex_); + if (created_at_.tv_sec == 0) { +#ifndef WEBRTC_MAC +#ifdef WEBRTC_CLOCK_TYPE_REALTIME + clock_gettime(CLOCK_REALTIME, &created_at_); +#else + clock_gettime(CLOCK_MONOTONIC, &created_at_); +#endif +#else + timeval value; + struct timezone time_zone; + time_zone.tz_minuteswest = 0; + time_zone.tz_dsttime = 0; + gettimeofday(&value, &time_zone); + TIMEVAL_TO_TIMESPEC(&value, &created_at_); +#endif + count_ = 0; + } + + timespec end_at; + unsigned long long time = time_ * ++count_; + end_at.tv_sec = created_at_.tv_sec + time / 1000; + end_at.tv_nsec = created_at_.tv_nsec + (time - (time / 1000) * 1000) * E6; + + if (end_at.tv_nsec >= E9) { + end_at.tv_sec++; + end_at.tv_nsec -= E9; + } + + pthread_mutex_unlock(&mutex_); + if (timer_event_->Wait(&end_at) == kEventSignaled) + return true; + + pthread_mutex_lock(&mutex_); + if (periodic_ || count_ == 1) + Set(); + pthread_mutex_unlock(&mutex_); + + return true; +} + +bool EventTimerPosix::StopTimer() { + if (timer_event_) { + timer_event_->Set(); + } + if (timer_thread_) { + if (!timer_thread_->Stop()) { + return false; + } + timer_thread_.reset(); + } + timer_event_.reset(); + + // Set time to zero to force new reference time for the timer. + memset(&created_at_, 0, sizeof(created_at_)); + count_ = 0; + return true; +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/event_timer_posix.h b/webrtc/system_wrappers/source/event_timer_posix.h new file mode 100644 index 0000000..593e8a4 --- /dev/null +++ b/webrtc/system_wrappers/source/event_timer_posix.h @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_SOURCE_EVENT_POSIX_H_ +#define WEBRTC_SYSTEM_WRAPPERS_SOURCE_EVENT_POSIX_H_ + +#include "webrtc/system_wrappers/interface/event_wrapper.h" + +#include +#include + +#include "webrtc/system_wrappers/interface/thread_wrapper.h" + +namespace webrtc { + +enum State { + kUp = 1, + kDown = 2 +}; + +class EventTimerPosix : public EventTimerWrapper { + public: + EventTimerPosix(); + ~EventTimerPosix() override; + + EventTypeWrapper Wait(unsigned long max_time) override; + bool Set() override; + + bool StartTimer(bool periodic, unsigned long time) override; + bool StopTimer() override; + + private: + static bool Run(void* obj); + bool Process(); + EventTypeWrapper Wait(timespec* end_at); + + private: + pthread_cond_t cond_; + pthread_mutex_t mutex_; + bool event_set_; + + rtc::scoped_ptr timer_thread_; + rtc::scoped_ptr timer_event_; + timespec created_at_; + + bool periodic_; + unsigned long time_; // In ms + unsigned long count_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_SOURCE_EVENT_POSIX_H_ diff --git a/webrtc/system_wrappers/source/event_timer_win.cc b/webrtc/system_wrappers/source/event_timer_win.cc new file mode 100644 index 0000000..4c58698 --- /dev/null +++ b/webrtc/system_wrappers/source/event_timer_win.cc @@ -0,0 +1,78 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/source/event_timer_win.h" + +#include "Mmsystem.h" + +namespace webrtc { + +// static +EventTimerWrapper* EventTimerWrapper::Create() { + return new EventTimerWin(); +} + +EventTimerWin::EventTimerWin() + : event_(::CreateEvent(NULL, // security attributes + FALSE, // manual reset + FALSE, // initial state + NULL)), // name of event + timerID_(NULL) { +} + +EventTimerWin::~EventTimerWin() { + StopTimer(); + CloseHandle(event_); +} + +bool EventTimerWin::Set() { + // Note: setting an event that is already set has no effect. + return SetEvent(event_) == 1; +} + +EventTypeWrapper EventTimerWin::Wait(unsigned long max_time) { + unsigned long res = WaitForSingleObject(event_, max_time); + switch (res) { + case WAIT_OBJECT_0: + return kEventSignaled; + case WAIT_TIMEOUT: + return kEventTimeout; + default: + return kEventError; + } +} + +bool EventTimerWin::StartTimer(bool periodic, unsigned long time) { + if (timerID_ != NULL) { + timeKillEvent(timerID_); + timerID_ = NULL; + } + + if (periodic) { + timerID_ = timeSetEvent(time, 0, (LPTIMECALLBACK)HANDLE(event_), 0, + TIME_PERIODIC | TIME_CALLBACK_EVENT_PULSE); + } else { + timerID_ = timeSetEvent(time, 0, (LPTIMECALLBACK)HANDLE(event_), 0, + TIME_ONESHOT | TIME_CALLBACK_EVENT_SET); + } + + return timerID_ != NULL; +} + +bool EventTimerWin::StopTimer() { + if (timerID_ != NULL) { + timeKillEvent(timerID_); + timerID_ = NULL; + } + + return true; +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/event_timer_win.h b/webrtc/system_wrappers/source/event_timer_win.h new file mode 100644 index 0000000..d5bcd2c --- /dev/null +++ b/webrtc/system_wrappers/source/event_timer_win.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_SOURCE_EVENT_WIN_H_ +#define WEBRTC_SYSTEM_WRAPPERS_SOURCE_EVENT_WIN_H_ + +#include + +#include "webrtc/system_wrappers/interface/event_wrapper.h" + +#include "webrtc/typedefs.h" + +namespace webrtc { + +class EventTimerWin : public EventTimerWrapper { + public: + EventTimerWin(); + virtual ~EventTimerWin(); + + virtual EventTypeWrapper Wait(unsigned long max_time); + virtual bool Set(); + + virtual bool StartTimer(bool periodic, unsigned long time); + virtual bool StopTimer(); + + private: + HANDLE event_; + uint32_t timerID_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_SOURCE_EVENT_WIN_H_ diff --git a/webrtc/system_wrappers/source/file_impl.cc b/webrtc/system_wrappers/source/file_impl.cc new file mode 100644 index 0000000..89a9185 --- /dev/null +++ b/webrtc/system_wrappers/source/file_impl.cc @@ -0,0 +1,278 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/source/file_impl.h" + +#include + +#ifdef _WIN32 +#include +#else +#include +#include +#endif + +#include "webrtc/base/checks.h" +#include "webrtc/system_wrappers/interface/rw_lock_wrapper.h" + +namespace webrtc { + +FileWrapper* FileWrapper::Create() { + return new FileWrapperImpl(); +} + +FileWrapperImpl::FileWrapperImpl() + : rw_lock_(RWLockWrapper::CreateRWLock()), + id_(NULL), + managed_file_handle_(true), + open_(false), + looping_(false), + read_only_(false), + max_size_in_bytes_(0), + size_in_bytes_(0) { + memset(file_name_utf8_, 0, kMaxFileNameSize); +} + +FileWrapperImpl::~FileWrapperImpl() { + if (id_ != NULL && managed_file_handle_) { + fclose(id_); + } +} + +int FileWrapperImpl::CloseFile() { + WriteLockScoped write(*rw_lock_); + return CloseFileImpl(); +} + +int FileWrapperImpl::Rewind() { + WriteLockScoped write(*rw_lock_); + if (looping_ || !read_only_) { + if (id_ != NULL) { + size_in_bytes_ = 0; + return fseek(id_, 0, SEEK_SET); + } + } + return -1; +} + +int FileWrapperImpl::SetMaxFileSize(size_t bytes) { + WriteLockScoped write(*rw_lock_); + max_size_in_bytes_ = bytes; + return 0; +} + +int FileWrapperImpl::Flush() { + WriteLockScoped write(*rw_lock_); + return FlushImpl(); +} + +int FileWrapperImpl::FileName(char* file_name_utf8, size_t size) const { + ReadLockScoped read(*rw_lock_); + size_t length = strlen(file_name_utf8_); + if (length > kMaxFileNameSize) { + assert(false); + return -1; + } + if (length < 1) { + return -1; + } + + // Make sure to NULL terminate + if (size < length) { + length = size - 1; + } + memcpy(file_name_utf8, file_name_utf8_, length); + file_name_utf8[length] = 0; + return 0; +} + +bool FileWrapperImpl::Open() const { + ReadLockScoped read(*rw_lock_); + return open_; +} + +int FileWrapperImpl::OpenFile(const char* file_name_utf8, bool read_only, + bool loop, bool text) { + WriteLockScoped write(*rw_lock_); + if (id_ != NULL && !managed_file_handle_) + return -1; + size_t length = strlen(file_name_utf8); + if (length > kMaxFileNameSize - 1) { + return -1; + } + + read_only_ = read_only; + + FILE* tmp_id = NULL; +#if defined _WIN32 + wchar_t wide_file_name[kMaxFileNameSize]; + wide_file_name[0] = 0; + + MultiByteToWideChar(CP_UTF8, + 0, // UTF8 flag + file_name_utf8, + -1, // Null terminated string + wide_file_name, + kMaxFileNameSize); + if (text) { + if (read_only) { + tmp_id = _wfopen(wide_file_name, L"rt"); + } else { + tmp_id = _wfopen(wide_file_name, L"wt"); + } + } else { + if (read_only) { + tmp_id = _wfopen(wide_file_name, L"rb"); + } else { + tmp_id = _wfopen(wide_file_name, L"wb"); + } + } +#else + if (text) { + if (read_only) { + tmp_id = fopen(file_name_utf8, "rt"); + } else { + tmp_id = fopen(file_name_utf8, "wt"); + } + } else { + if (read_only) { + tmp_id = fopen(file_name_utf8, "rb"); + } else { + tmp_id = fopen(file_name_utf8, "wb"); + } + } +#endif + + if (tmp_id != NULL) { + // +1 comes from copying the NULL termination character. + memcpy(file_name_utf8_, file_name_utf8, length + 1); + if (id_ != NULL) { + fclose(id_); + } + id_ = tmp_id; + managed_file_handle_ = true; + looping_ = loop; + open_ = true; + return 0; + } + return -1; +} + +int FileWrapperImpl::OpenFromFileHandle(FILE* handle, + bool manage_file, + bool read_only, + bool loop) { + WriteLockScoped write(*rw_lock_); + if (!handle) + return -1; + + if (id_ != NULL) { + if (managed_file_handle_) + fclose(id_); + else + return -1; + } + + id_ = handle; + managed_file_handle_ = manage_file; + read_only_ = read_only; + looping_ = loop; + open_ = true; + return 0; +} + +int FileWrapperImpl::Read(void* buf, size_t length) { + WriteLockScoped write(*rw_lock_); + if (id_ == NULL) + return -1; + + size_t bytes_read = fread(buf, 1, length, id_); + if (bytes_read != length && !looping_) { + CloseFileImpl(); + } + return static_cast(bytes_read); +} + +int FileWrapperImpl::WriteText(const char* format, ...) { + WriteLockScoped write(*rw_lock_); + if (format == NULL) + return -1; + + if (read_only_) + return -1; + + if (id_ == NULL) + return -1; + + va_list args; + va_start(args, format); + int num_chars = vfprintf(id_, format, args); + va_end(args); + + if (num_chars >= 0) { + return num_chars; + } else { + CloseFileImpl(); + return -1; + } +} + +bool FileWrapperImpl::Write(const void* buf, size_t length) { + WriteLockScoped write(*rw_lock_); + if (buf == NULL) + return false; + + if (read_only_) + return false; + + if (id_ == NULL) + return false; + + // Check if it's time to stop writing. + if (max_size_in_bytes_ > 0 && + (size_in_bytes_ + length) > max_size_in_bytes_) { + FlushImpl(); + return false; + } + + size_t num_bytes = fwrite(buf, 1, length, id_); + if (num_bytes > 0) { + size_in_bytes_ += num_bytes; + return true; + } + + CloseFileImpl(); + return false; +} + +int FileWrapperImpl::CloseFileImpl() { + if (id_ != NULL) { + if (managed_file_handle_) + fclose(id_); + id_ = NULL; + } + memset(file_name_utf8_, 0, kMaxFileNameSize); + open_ = false; + return 0; +} + +int FileWrapperImpl::FlushImpl() { + if (id_ != NULL) { + return fflush(id_); + } + return -1; +} + +int FileWrapper::Rewind() { + RTC_DCHECK(false); + return -1; +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/file_impl.h b/webrtc/system_wrappers/source/file_impl.h new file mode 100644 index 0000000..e6679aa --- /dev/null +++ b/webrtc/system_wrappers/source/file_impl.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_SOURCE_FILE_IMPL_H_ +#define WEBRTC_SYSTEM_WRAPPERS_SOURCE_FILE_IMPL_H_ + +#include + +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/system_wrappers/interface/file_wrapper.h" + +namespace webrtc { + +class RWLockWrapper; + +class FileWrapperImpl : public FileWrapper { + public: + FileWrapperImpl(); + ~FileWrapperImpl() override; + + int FileName(char* file_name_utf8, size_t size) const override; + + bool Open() const override; + + int OpenFile(const char* file_name_utf8, + bool read_only, + bool loop = false, + bool text = false) override; + + int OpenFromFileHandle(FILE* handle, + bool manage_file, + bool read_only, + bool loop = false) override; + + int CloseFile() override; + int SetMaxFileSize(size_t bytes) override; + int Flush() override; + + int Read(void* buf, size_t length) override; + bool Write(const void* buf, size_t length) override; + int WriteText(const char* format, ...) override; + int Rewind() override; + + private: + int CloseFileImpl(); + int FlushImpl(); + + rtc::scoped_ptr rw_lock_; + + FILE* id_; + bool managed_file_handle_; + bool open_; + bool looping_; + bool read_only_; + size_t max_size_in_bytes_; // -1 indicates file size limitation is off + size_t size_in_bytes_; + char file_name_utf8_[kMaxFileNameSize]; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_SOURCE_FILE_IMPL_H_ diff --git a/webrtc/system_wrappers/source/logging.cc b/webrtc/system_wrappers/source/logging.cc new file mode 100644 index 0000000..45a0985 --- /dev/null +++ b/webrtc/system_wrappers/source/logging.cc @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/interface/logging.h" + +#include + +#include + +#include "webrtc/common_types.h" +#include "webrtc/system_wrappers/interface/trace.h" + +namespace webrtc { +namespace { + +TraceLevel WebRtcSeverity(LoggingSeverity sev) { + switch (sev) { + // TODO(ajm): SENSITIVE doesn't have a corresponding webrtc level. + case LS_SENSITIVE: return kTraceInfo; + case LS_VERBOSE: return kTraceInfo; + case LS_INFO: return kTraceTerseInfo; + case LS_WARNING: return kTraceWarning; + case LS_ERROR: return kTraceError; + default: return kTraceNone; + } +} + +// Return the filename portion of the string (that following the last slash). +const char* FilenameFromPath(const char* file) { + const char* end1 = ::strrchr(file, '/'); + const char* end2 = ::strrchr(file, '\\'); + if (!end1 && !end2) + return file; + else + return (end1 > end2) ? end1 + 1 : end2 + 1; +} + +} // namespace + +LogMessage::LogMessage(const char* file, int line, LoggingSeverity sev) + : severity_(sev) { + print_stream_ << "(" << FilenameFromPath(file) << ":" << line << "): "; +} + +bool LogMessage::Loggable(LoggingSeverity sev) { + // |level_filter| is a bitmask, unlike libjingle's minimum severity value. + return WebRtcSeverity(sev) & Trace::level_filter() ? true : false; +} + +LogMessage::~LogMessage() { + const std::string& str = print_stream_.str(); + Trace::Add(WebRtcSeverity(severity_), kTraceUndefined, 0, "%s", str.c_str()); +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/metrics_default.cc b/webrtc/system_wrappers/source/metrics_default.cc new file mode 100644 index 0000000..af950b4 --- /dev/null +++ b/webrtc/system_wrappers/source/metrics_default.cc @@ -0,0 +1,29 @@ +// Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. +// +// Use of this source code is governed by a BSD-style license +// that can be found in the LICENSE file in the root of the source +// tree. An additional intellectual property rights grant can be found +// in the file PATENTS. All contributing project authors may +// be found in the AUTHORS file in the root of the source tree. +// + +#include "webrtc/system_wrappers/interface/metrics.h" + +// Default implementation of histogram methods for WebRTC clients that do not +// want to provide their own implementation. + +namespace webrtc { +namespace metrics { + +Histogram* HistogramFactoryGetCounts(const std::string& name, int min, int max, + int bucket_count) { return NULL; } + +Histogram* HistogramFactoryGetEnumeration(const std::string& name, + int boundary) { return NULL; } + +void HistogramAdd( + Histogram* histogram_pointer, const std::string& name, int sample) {} + +} // namespace metrics +} // namespace webrtc + diff --git a/webrtc/system_wrappers/source/rw_lock_generic.cc b/webrtc/system_wrappers/source/rw_lock_generic.cc new file mode 100644 index 0000000..0ca9518 --- /dev/null +++ b/webrtc/system_wrappers/source/rw_lock_generic.cc @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/source/rw_lock_generic.h" + +#include "webrtc/system_wrappers/interface/condition_variable_wrapper.h" +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" + +namespace webrtc { + +RWLockGeneric::RWLockGeneric() + : readers_active_(0), + writer_active_(false), + readers_waiting_(0), + writers_waiting_(0) { + critical_section_ = CriticalSectionWrapper::CreateCriticalSection(); + read_condition_ = ConditionVariableWrapper::CreateConditionVariable(); + write_condition_ = ConditionVariableWrapper::CreateConditionVariable(); +} + +RWLockGeneric::~RWLockGeneric() { + delete write_condition_; + delete read_condition_; + delete critical_section_; +} + +void RWLockGeneric::AcquireLockExclusive() { + CriticalSectionScoped cs(critical_section_); + if (writer_active_ || readers_active_ > 0) { + ++writers_waiting_; + while (writer_active_ || readers_active_ > 0) { + write_condition_->SleepCS(*critical_section_); + } + --writers_waiting_; + } + writer_active_ = true; +} + +void RWLockGeneric::ReleaseLockExclusive() { + CriticalSectionScoped cs(critical_section_); + writer_active_ = false; + if (writers_waiting_ > 0) { + write_condition_->Wake(); + } else if (readers_waiting_ > 0) { + read_condition_->WakeAll(); + } +} + +void RWLockGeneric::AcquireLockShared() { + CriticalSectionScoped cs(critical_section_); + if (writer_active_ || writers_waiting_ > 0) { + ++readers_waiting_; + + while (writer_active_ || writers_waiting_ > 0) { + read_condition_->SleepCS(*critical_section_); + } + --readers_waiting_; + } + ++readers_active_; +} + +void RWLockGeneric::ReleaseLockShared() { + CriticalSectionScoped cs(critical_section_); + --readers_active_; + if (readers_active_ == 0 && writers_waiting_ > 0) { + write_condition_->Wake(); + } +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/rw_lock_generic.h b/webrtc/system_wrappers/source/rw_lock_generic.h new file mode 100644 index 0000000..653564c --- /dev/null +++ b/webrtc/system_wrappers/source/rw_lock_generic.h @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_SOURCE_RW_LOCK_GENERIC_H_ +#define WEBRTC_SYSTEM_WRAPPERS_SOURCE_RW_LOCK_GENERIC_H_ + +#include "webrtc/system_wrappers/interface/rw_lock_wrapper.h" +#include "webrtc/typedefs.h" + +namespace webrtc { + +class CriticalSectionWrapper; +class ConditionVariableWrapper; + +class RWLockGeneric : public RWLockWrapper { + public: + RWLockGeneric(); + ~RWLockGeneric() override; + + void AcquireLockExclusive() override; + void ReleaseLockExclusive() override; + + void AcquireLockShared() override; + void ReleaseLockShared() override; + + private: + CriticalSectionWrapper* critical_section_; + ConditionVariableWrapper* read_condition_; + ConditionVariableWrapper* write_condition_; + + int readers_active_; + bool writer_active_; + int readers_waiting_; + int writers_waiting_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_SOURCE_RW_LOCK_GENERIC_H_ diff --git a/webrtc/system_wrappers/source/rw_lock_posix.cc b/webrtc/system_wrappers/source/rw_lock_posix.cc new file mode 100644 index 0000000..cdcb7fb --- /dev/null +++ b/webrtc/system_wrappers/source/rw_lock_posix.cc @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/source/rw_lock_posix.h" + +namespace webrtc { + +RWLockPosix::RWLockPosix() : lock_() { +} + +RWLockPosix::~RWLockPosix() { + pthread_rwlock_destroy(&lock_); +} + +RWLockPosix* RWLockPosix::Create() { + RWLockPosix* ret_val = new RWLockPosix(); + if (!ret_val->Init()) { + delete ret_val; + return NULL; + } + return ret_val; +} + +bool RWLockPosix::Init() { + return pthread_rwlock_init(&lock_, 0) == 0; +} + +void RWLockPosix::AcquireLockExclusive() { + pthread_rwlock_wrlock(&lock_); +} + +void RWLockPosix::ReleaseLockExclusive() { + pthread_rwlock_unlock(&lock_); +} + +void RWLockPosix::AcquireLockShared() { + pthread_rwlock_rdlock(&lock_); +} + +void RWLockPosix::ReleaseLockShared() { + pthread_rwlock_unlock(&lock_); +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/rw_lock_posix.h b/webrtc/system_wrappers/source/rw_lock_posix.h new file mode 100644 index 0000000..bec3c2d --- /dev/null +++ b/webrtc/system_wrappers/source/rw_lock_posix.h @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_SOURCE_RW_LOCK_POSIX_H_ +#define WEBRTC_SYSTEM_WRAPPERS_SOURCE_RW_LOCK_POSIX_H_ + +#include "webrtc/system_wrappers/interface/rw_lock_wrapper.h" +#include "webrtc/typedefs.h" + +#include + +namespace webrtc { + +class RWLockPosix : public RWLockWrapper { + public: + static RWLockPosix* Create(); + ~RWLockPosix() override; + + void AcquireLockExclusive() override; + void ReleaseLockExclusive() override; + + void AcquireLockShared() override; + void ReleaseLockShared() override; + + private: + RWLockPosix(); + bool Init(); + + pthread_rwlock_t lock_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_SOURCE_RW_LOCK_POSIX_H_ diff --git a/webrtc/system_wrappers/source/rw_lock_win.cc b/webrtc/system_wrappers/source/rw_lock_win.cc new file mode 100644 index 0000000..aea74fa --- /dev/null +++ b/webrtc/system_wrappers/source/rw_lock_win.cc @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/source/rw_lock_win.h" + +#include "webrtc/system_wrappers/interface/trace.h" + +namespace webrtc { + +static bool native_rw_locks_supported = false; +static bool module_load_attempted = false; +static HMODULE library = NULL; + +typedef void (WINAPI* InitializeSRWLock)(PSRWLOCK); + +typedef void (WINAPI* AcquireSRWLockExclusive)(PSRWLOCK); +typedef void (WINAPI* ReleaseSRWLockExclusive)(PSRWLOCK); + +typedef void (WINAPI* AcquireSRWLockShared)(PSRWLOCK); +typedef void (WINAPI* ReleaseSRWLockShared)(PSRWLOCK); + +InitializeSRWLock initialize_srw_lock; +AcquireSRWLockExclusive acquire_srw_lock_exclusive; +AcquireSRWLockShared acquire_srw_lock_shared; +ReleaseSRWLockShared release_srw_lock_shared; +ReleaseSRWLockExclusive release_srw_lock_exclusive; + +RWLockWin::RWLockWin() { + initialize_srw_lock(&lock_); +} + +RWLockWin* RWLockWin::Create() { + if (!LoadModule()) { + return NULL; + } + return new RWLockWin(); +} + +void RWLockWin::AcquireLockExclusive() { + acquire_srw_lock_exclusive(&lock_); +} + +void RWLockWin::ReleaseLockExclusive() { + release_srw_lock_exclusive(&lock_); +} + +void RWLockWin::AcquireLockShared() { + acquire_srw_lock_shared(&lock_); +} + +void RWLockWin::ReleaseLockShared() { + release_srw_lock_shared(&lock_); +} + +bool RWLockWin::LoadModule() { + if (module_load_attempted) { + return native_rw_locks_supported; + } + module_load_attempted = true; + // Use native implementation if supported (i.e Vista+) + library = LoadLibrary(TEXT("Kernel32.dll")); + if (!library) { + return false; + } + WEBRTC_TRACE(kTraceStateInfo, kTraceUtility, -1, "Loaded Kernel.dll"); + + initialize_srw_lock = + (InitializeSRWLock)GetProcAddress(library, "InitializeSRWLock"); + + acquire_srw_lock_exclusive = + (AcquireSRWLockExclusive)GetProcAddress(library, + "AcquireSRWLockExclusive"); + release_srw_lock_exclusive = + (ReleaseSRWLockExclusive)GetProcAddress(library, + "ReleaseSRWLockExclusive"); + acquire_srw_lock_shared = + (AcquireSRWLockShared)GetProcAddress(library, "AcquireSRWLockShared"); + release_srw_lock_shared = + (ReleaseSRWLockShared)GetProcAddress(library, "ReleaseSRWLockShared"); + + if (initialize_srw_lock && acquire_srw_lock_exclusive && + release_srw_lock_exclusive && acquire_srw_lock_shared && + release_srw_lock_shared) { + WEBRTC_TRACE(kTraceStateInfo, kTraceUtility, -1, "Loaded Native RW Lock"); + native_rw_locks_supported = true; + } + return native_rw_locks_supported; +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/rw_lock_win.h b/webrtc/system_wrappers/source/rw_lock_win.h new file mode 100644 index 0000000..6f7cd33 --- /dev/null +++ b/webrtc/system_wrappers/source/rw_lock_win.h @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_SOURCE_RW_LOCK_WIN_H_ +#define WEBRTC_SYSTEM_WRAPPERS_SOURCE_RW_LOCK_WIN_H_ + +#include "webrtc/system_wrappers/interface/rw_lock_wrapper.h" + +#include + +namespace webrtc { + +class RWLockWin : public RWLockWrapper { + public: + static RWLockWin* Create(); + ~RWLockWin() {} + + virtual void AcquireLockExclusive(); + virtual void ReleaseLockExclusive(); + + virtual void AcquireLockShared(); + virtual void ReleaseLockShared(); + + private: + RWLockWin(); + static bool LoadModule(); + + SRWLOCK lock_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_SOURCE_RW_LOCK_WIN_H_ diff --git a/webrtc/system_wrappers/source/sleep.cc b/webrtc/system_wrappers/source/sleep.cc new file mode 100644 index 0000000..a916477 --- /dev/null +++ b/webrtc/system_wrappers/source/sleep.cc @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ +// An OS-independent sleep function. + +#include "webrtc/system_wrappers/interface/sleep.h" + +#ifdef _WIN32 +// For Sleep() +#include +#else +// For nanosleep() +#include +#endif + +namespace webrtc { + +void SleepMs(int msecs) { +#ifdef _WIN32 + Sleep(msecs); +#else + struct timespec short_wait; + struct timespec remainder; + short_wait.tv_sec = msecs / 1000; + short_wait.tv_nsec = (msecs % 1000) * 1000 * 1000; + nanosleep(&short_wait, &remainder); +#endif +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/thread.cc b/webrtc/system_wrappers/source/thread.cc new file mode 100644 index 0000000..b469344 --- /dev/null +++ b/webrtc/system_wrappers/source/thread.cc @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/interface/thread_wrapper.h" + +#if defined(_WIN32) +#include "webrtc/system_wrappers/source/thread_win.h" +#else +#include "webrtc/system_wrappers/source/thread_posix.h" +#endif + +namespace webrtc { + +#if defined(_WIN32) +typedef ThreadWindows ThreadType; +#else +typedef ThreadPosix ThreadType; +#endif + +rtc::scoped_ptr ThreadWrapper::CreateThread( + ThreadRunFunction func, void* obj, const char* thread_name) { + return rtc::scoped_ptr( + new ThreadType(func, obj, thread_name)).Pass(); +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/thread_posix.cc b/webrtc/system_wrappers/source/thread_posix.cc new file mode 100644 index 0000000..fdfbf80 --- /dev/null +++ b/webrtc/system_wrappers/source/thread_posix.cc @@ -0,0 +1,166 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/source/thread_posix.h" + +#include + +#include +#include +#ifdef WEBRTC_LINUX +#include +#include +#include +#endif + +#include "webrtc/base/checks.h" +#include "webrtc/base/platform_thread.h" +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" +#include "webrtc/system_wrappers/interface/event_wrapper.h" +#include "webrtc/system_wrappers/interface/sleep.h" +#include "webrtc/system_wrappers/interface/trace.h" + +namespace webrtc { +namespace { +struct ThreadAttributes { + ThreadAttributes() { pthread_attr_init(&attr); } + ~ThreadAttributes() { pthread_attr_destroy(&attr); } + pthread_attr_t* operator&() { return &attr; } + pthread_attr_t attr; +}; +} // namespace + +int ConvertToSystemPriority(ThreadPriority priority, int min_prio, + int max_prio) { + RTC_DCHECK(max_prio - min_prio > 2); + const int top_prio = max_prio - 1; + const int low_prio = min_prio + 1; + + switch (priority) { + case kLowPriority: + return low_prio; + case kNormalPriority: + // The -1 ensures that the kHighPriority is always greater or equal to + // kNormalPriority. + return (low_prio + top_prio - 1) / 2; + case kHighPriority: + return std::max(top_prio - 2, low_prio); + case kHighestPriority: + return std::max(top_prio - 1, low_prio); + case kRealtimePriority: + return top_prio; + } + RTC_DCHECK(false); + return low_prio; +} + +// static +void* ThreadPosix::StartThread(void* param) { + static_cast(param)->Run(); + return 0; +} + +ThreadPosix::ThreadPosix(ThreadRunFunction func, void* obj, + const char* thread_name) + : run_function_(func), + obj_(obj), + stop_event_(false, false), + name_(thread_name ? thread_name : "webrtc"), + thread_(0) { + RTC_DCHECK(name_.length() < 64); +} + +uint32_t ThreadWrapper::GetThreadId() { + return rtc::CurrentThreadId(); +} + +ThreadPosix::~ThreadPosix() { + RTC_DCHECK(thread_checker_.CalledOnValidThread()); +} + +// TODO(pbos): Make Start void, calling code really doesn't support failures +// here. +bool ThreadPosix::Start() { + RTC_DCHECK(thread_checker_.CalledOnValidThread()); + RTC_DCHECK(!thread_) << "Thread already started?"; + + ThreadAttributes attr; + // Set the stack stack size to 1M. + pthread_attr_setstacksize(&attr, 1024 * 1024); + RTC_CHECK_EQ(0, pthread_create(&thread_, &attr, &StartThread, this)); + return true; +} + +bool ThreadPosix::Stop() { + RTC_DCHECK(thread_checker_.CalledOnValidThread()); + if (!thread_) + return true; + + stop_event_.Set(); + RTC_CHECK_EQ(0, pthread_join(thread_, nullptr)); + thread_ = 0; + + return true; +} + +bool ThreadPosix::SetPriority(ThreadPriority priority) { + RTC_DCHECK(thread_checker_.CalledOnValidThread()); + if (!thread_) + return false; +#if defined(WEBRTC_CHROMIUM_BUILD) && defined(WEBRTC_LINUX) + // TODO(tommi): Switch to the same mechanism as Chromium uses for + // changing thread priorities. + return true; +#else +#ifdef WEBRTC_THREAD_RR + const int policy = SCHED_RR; +#else + const int policy = SCHED_FIFO; +#endif + const int min_prio = sched_get_priority_min(policy); + const int max_prio = sched_get_priority_max(policy); + if (min_prio == -1 || max_prio == -1) { + WEBRTC_TRACE(kTraceError, kTraceUtility, -1, + "unable to retreive min or max priority for threads"); + return false; + } + + if (max_prio - min_prio <= 2) + return false; + + sched_param param; + param.sched_priority = ConvertToSystemPriority(priority, min_prio, max_prio); + if (pthread_setschedparam(thread_, policy, ¶m) != 0) { + WEBRTC_TRACE( + kTraceError, kTraceUtility, -1, "unable to set thread priority"); + return false; + } + + return true; +#endif // defined(WEBRTC_CHROMIUM_BUILD) && defined(WEBRTC_LINUX) +} + +void ThreadPosix::Run() { + if (!name_.empty()) { + // Setting the thread name may fail (harmlessly) if running inside a + // sandbox. Ignore failures if they happen. + rtc::SetCurrentThreadName(name_.substr(0, 63).c_str()); + } + + // It's a requirement that for successful thread creation that the run + // function be called at least once (see RunFunctionIsCalled unit test), + // so to fullfill that requirement, we use a |do| loop and not |while|. + do { + if (!run_function_(obj_)) + break; + } while (!stop_event_.Wait(0)); +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/thread_posix.h b/webrtc/system_wrappers/source/thread_posix.h new file mode 100644 index 0000000..c726e48 --- /dev/null +++ b/webrtc/system_wrappers/source/thread_posix.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_SOURCE_THREAD_POSIX_H_ +#define WEBRTC_SYSTEM_WRAPPERS_SOURCE_THREAD_POSIX_H_ + +#include "webrtc/base/event.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/base/thread_checker.h" +#include "webrtc/system_wrappers/interface/thread_wrapper.h" + +#include + +namespace webrtc { + +int ConvertToSystemPriority(ThreadPriority priority, int min_prio, + int max_prio); + +class ThreadPosix : public ThreadWrapper { + public: + ThreadPosix(ThreadRunFunction func, void* obj, const char* thread_name); + ~ThreadPosix() override; + + // From ThreadWrapper. + bool Start() override; + bool Stop() override; + + bool SetPriority(ThreadPriority priority) override; + + private: + static void* StartThread(void* param); + + void Run(); + + rtc::ThreadChecker thread_checker_; + ThreadRunFunction const run_function_; + void* const obj_; + rtc::Event stop_event_; + const std::string name_; + + pthread_t thread_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_SOURCE_THREAD_POSIX_H_ diff --git a/webrtc/system_wrappers/source/thread_win.cc b/webrtc/system_wrappers/source/thread_win.cc new file mode 100644 index 0000000..2773f7e --- /dev/null +++ b/webrtc/system_wrappers/source/thread_win.cc @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/source/thread_win.h" + +#include +#include +#include + +#include "webrtc/base/checks.h" +#include "webrtc/base/platform_thread.h" +#include "webrtc/system_wrappers/interface/trace.h" + +namespace webrtc { +namespace { +void CALLBACK RaiseFlag(ULONG_PTR param) { + *reinterpret_cast(param) = true; +} +} + +ThreadWindows::ThreadWindows(ThreadRunFunction func, void* obj, + const char* thread_name) + : run_function_(func), + obj_(obj), + stop_(false), + thread_(NULL), + name_(thread_name ? thread_name : "webrtc") { + RTC_DCHECK(func); +} + +ThreadWindows::~ThreadWindows() { + RTC_DCHECK(main_thread_.CalledOnValidThread()); + RTC_DCHECK(!thread_); +} + +// static +uint32_t ThreadWrapper::GetThreadId() { + return GetCurrentThreadId(); +} + +// static +DWORD WINAPI ThreadWindows::StartThread(void* param) { + static_cast(param)->Run(); + return 0; +} + +bool ThreadWindows::Start() { + RTC_DCHECK(main_thread_.CalledOnValidThread()); + RTC_DCHECK(!thread_); + + stop_ = false; + + // See bug 2902 for background on STACK_SIZE_PARAM_IS_A_RESERVATION. + // Set the reserved stack stack size to 1M, which is the default on Windows + // and Linux. + DWORD thread_id; + thread_ = ::CreateThread(NULL, 1024 * 1024, &StartThread, this, + STACK_SIZE_PARAM_IS_A_RESERVATION, &thread_id); + if (!thread_ ) { + RTC_DCHECK(false) << "CreateThread failed"; + return false; + } + + return true; +} + +bool ThreadWindows::Stop() { + RTC_DCHECK(main_thread_.CalledOnValidThread()); + if (thread_) { + // Set stop_ to |true| on the worker thread. + QueueUserAPC(&RaiseFlag, thread_, reinterpret_cast(&stop_)); + WaitForSingleObject(thread_, INFINITE); + CloseHandle(thread_); + thread_ = nullptr; + } + + return true; +} + +bool ThreadWindows::SetPriority(ThreadPriority priority) { + RTC_DCHECK(main_thread_.CalledOnValidThread()); + return thread_ && SetThreadPriority(thread_, priority); +} + +void ThreadWindows::Run() { + if (!name_.empty()) + rtc::SetCurrentThreadName(name_.c_str()); + + do { + // The interface contract of Start/Stop is that for a successfull call to + // Start, there should be at least one call to the run function. So we + // call the function before checking |stop_|. + if (!run_function_(obj_)) + break; + // Alertable sleep to permit RaiseFlag to run and update |stop_|. + SleepEx(0, true); + } while (!stop_); +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/thread_win.h b/webrtc/system_wrappers/source/thread_win.h new file mode 100644 index 0000000..741ae1e --- /dev/null +++ b/webrtc/system_wrappers/source/thread_win.h @@ -0,0 +1,48 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_SOURCE_THREAD_WIN_H_ +#define WEBRTC_SYSTEM_WRAPPERS_SOURCE_THREAD_WIN_H_ + +#include "webrtc/system_wrappers/interface/thread_wrapper.h" + +#include + +#include "webrtc/base/thread_checker.h" + +namespace webrtc { + +class ThreadWindows : public ThreadWrapper { + public: + ThreadWindows(ThreadRunFunction func, void* obj, const char* thread_name); + ~ThreadWindows() override; + + bool Start() override; + bool Stop() override; + + bool SetPriority(ThreadPriority priority) override; + + protected: + void Run(); + + private: + static DWORD WINAPI StartThread(void* param); + + ThreadRunFunction const run_function_; + void* const obj_; + bool stop_; + HANDLE thread_; + const std::string name_; + rtc::ThreadChecker main_thread_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_SOURCE_THREAD_WIN_H_ diff --git a/webrtc/system_wrappers/source/trace_impl.cc b/webrtc/system_wrappers/source/trace_impl.cc new file mode 100644 index 0000000..ffe79b9 --- /dev/null +++ b/webrtc/system_wrappers/source/trace_impl.cc @@ -0,0 +1,604 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/source/trace_impl.h" + +#include +#include +#include +#include + +#include "webrtc/base/atomicops.h" +#ifdef _WIN32 +#include "webrtc/system_wrappers/source/trace_win.h" +#else +#include "webrtc/system_wrappers/source/trace_posix.h" +#endif // _WIN32 + +#define KEY_LEN_CHARS 31 + +#ifdef _WIN32 +#pragma warning(disable:4355) +#endif // _WIN32 + +namespace webrtc { + +const int Trace::kBoilerplateLength = 71; +const int Trace::kTimestampPosition = 13; +const int Trace::kTimestampLength = 12; +volatile int Trace::level_filter_ = kTraceDefault; + +// Construct On First Use idiom. Avoids "static initialization order fiasco". +TraceImpl* TraceImpl::StaticInstance(CountOperation count_operation, + const TraceLevel level) { + // Sanities to avoid taking lock unless absolutely necessary (for + // performance reasons). count_operation == kAddRefNoCreate implies that a + // message will be written to file. + if ((level != kTraceAll) && (count_operation == kAddRefNoCreate)) { + if (!(level & level_filter())) { + return NULL; + } + } + TraceImpl* impl = + GetStaticInstance(count_operation); + return impl; +} + +TraceImpl* TraceImpl::GetTrace(const TraceLevel level) { + return StaticInstance(kAddRefNoCreate, level); +} + +TraceImpl* TraceImpl::CreateInstance() { +#if defined(_WIN32) + return new TraceWindows(); +#else + return new TracePosix(); +#endif +} + +TraceImpl::TraceImpl() + : callback_(NULL), + row_count_text_(0), + file_count_text_(0), + trace_file_(FileWrapper::Create()) { +} + +TraceImpl::~TraceImpl() { + trace_file_->Flush(); + trace_file_->CloseFile(); +} + +int32_t TraceImpl::AddThreadId(char* trace_message) const { + uint32_t thread_id = ThreadWrapper::GetThreadId(); + // Messages is 12 characters. + return sprintf(trace_message, "%10u; ", thread_id); +} + +int32_t TraceImpl::AddLevel(char* sz_message, const TraceLevel level) const { + const int kMessageLength = 12; + switch (level) { + case kTraceTerseInfo: + // Add the appropriate amount of whitespace. + memset(sz_message, ' ', kMessageLength); + sz_message[kMessageLength] = '\0'; + break; + case kTraceStateInfo: + sprintf(sz_message, "STATEINFO ; "); + break; + case kTraceWarning: + sprintf(sz_message, "WARNING ; "); + break; + case kTraceError: + sprintf(sz_message, "ERROR ; "); + break; + case kTraceCritical: + sprintf(sz_message, "CRITICAL ; "); + break; + case kTraceInfo: + sprintf(sz_message, "DEBUGINFO ; "); + break; + case kTraceModuleCall: + sprintf(sz_message, "MODULECALL; "); + break; + case kTraceMemory: + sprintf(sz_message, "MEMORY ; "); + break; + case kTraceTimer: + sprintf(sz_message, "TIMER ; "); + break; + case kTraceStream: + sprintf(sz_message, "STREAM ; "); + break; + case kTraceApiCall: + sprintf(sz_message, "APICALL ; "); + break; + case kTraceDebug: + sprintf(sz_message, "DEBUG ; "); + break; + default: + assert(false); + return 0; + } + // All messages are 12 characters. + return kMessageLength; +} + +int32_t TraceImpl::AddModuleAndId(char* trace_message, + const TraceModule module, + const int32_t id) const { + // Use long int to prevent problems with different definitions of + // int32_t. + // TODO(hellner): is this actually a problem? If so, it should be better to + // clean up int32_t + const long int idl = id; + const int kMessageLength = 25; + if (idl != -1) { + const unsigned long int id_engine = id >> 16; + const unsigned long int id_channel = id & 0xffff; + + switch (module) { + case kTraceUndefined: + // Add the appropriate amount of whitespace. + memset(trace_message, ' ', kMessageLength); + trace_message[kMessageLength] = '\0'; + break; + case kTraceVoice: + sprintf(trace_message, " VOICE:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceVideo: + sprintf(trace_message, " VIDEO:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceUtility: + sprintf(trace_message, " UTILITY:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceRtpRtcp: + sprintf(trace_message, " RTP/RTCP:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceTransport: + sprintf(trace_message, " TRANSPORT:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceAudioCoding: + sprintf(trace_message, "AUDIO CODING:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceSrtp: + sprintf(trace_message, " SRTP:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceAudioMixerServer: + sprintf(trace_message, " AUDIO MIX/S:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceAudioMixerClient: + sprintf(trace_message, " AUDIO MIX/C:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceVideoCoding: + sprintf(trace_message, "VIDEO CODING:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceVideoMixer: + // Print sleep time and API call + sprintf(trace_message, " VIDEO MIX:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceFile: + sprintf(trace_message, " FILE:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceAudioProcessing: + sprintf(trace_message, " AUDIO PROC:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceAudioDevice: + sprintf(trace_message, "AUDIO DEVICE:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceVideoRenderer: + sprintf(trace_message, "VIDEO RENDER:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceVideoCapture: + sprintf(trace_message, "VIDEO CAPTUR:%5ld %5ld;", id_engine, + id_channel); + break; + case kTraceRemoteBitrateEstimator: + sprintf(trace_message, " BWE RBE:%5ld %5ld;", id_engine, + id_channel); + break; + } + } else { + switch (module) { + case kTraceUndefined: + // Add the appropriate amount of whitespace. + memset(trace_message, ' ', kMessageLength); + trace_message[kMessageLength] = '\0'; + break; + case kTraceVoice: + sprintf(trace_message, " VOICE:%11ld;", idl); + break; + case kTraceVideo: + sprintf(trace_message, " VIDEO:%11ld;", idl); + break; + case kTraceUtility: + sprintf(trace_message, " UTILITY:%11ld;", idl); + break; + case kTraceRtpRtcp: + sprintf(trace_message, " RTP/RTCP:%11ld;", idl); + break; + case kTraceTransport: + sprintf(trace_message, " TRANSPORT:%11ld;", idl); + break; + case kTraceAudioCoding: + sprintf(trace_message, "AUDIO CODING:%11ld;", idl); + break; + case kTraceSrtp: + sprintf(trace_message, " SRTP:%11ld;", idl); + break; + case kTraceAudioMixerServer: + sprintf(trace_message, " AUDIO MIX/S:%11ld;", idl); + break; + case kTraceAudioMixerClient: + sprintf(trace_message, " AUDIO MIX/C:%11ld;", idl); + break; + case kTraceVideoCoding: + sprintf(trace_message, "VIDEO CODING:%11ld;", idl); + break; + case kTraceVideoMixer: + sprintf(trace_message, " VIDEO MIX:%11ld;", idl); + break; + case kTraceFile: + sprintf(trace_message, " FILE:%11ld;", idl); + break; + case kTraceAudioProcessing: + sprintf(trace_message, " AUDIO PROC:%11ld;", idl); + break; + case kTraceAudioDevice: + sprintf(trace_message, "AUDIO DEVICE:%11ld;", idl); + break; + case kTraceVideoRenderer: + sprintf(trace_message, "VIDEO RENDER:%11ld;", idl); + break; + case kTraceVideoCapture: + sprintf(trace_message, "VIDEO CAPTUR:%11ld;", idl); + break; + case kTraceRemoteBitrateEstimator: + sprintf(trace_message, " BWE RBE:%11ld;", idl); + break; + } + } + return kMessageLength; +} + +int32_t TraceImpl::SetTraceFileImpl(const char* file_name_utf8, + const bool add_file_counter) { + rtc::CritScope lock(&crit_); + + trace_file_->Flush(); + trace_file_->CloseFile(); + + if (file_name_utf8) { + if (add_file_counter) { + file_count_text_ = 1; + + char file_name_with_counter_utf8[FileWrapper::kMaxFileNameSize]; + CreateFileName(file_name_utf8, file_name_with_counter_utf8, + file_count_text_); + if (trace_file_->OpenFile(file_name_with_counter_utf8, false, false, + true) == -1) { + return -1; + } + } else { + file_count_text_ = 0; + if (trace_file_->OpenFile(file_name_utf8, false, false, true) == -1) { + return -1; + } + } + } + row_count_text_ = 0; + return 0; +} + +int32_t TraceImpl::TraceFileImpl( + char file_name_utf8[FileWrapper::kMaxFileNameSize]) { + rtc::CritScope lock(&crit_); + return trace_file_->FileName(file_name_utf8, FileWrapper::kMaxFileNameSize); +} + +int32_t TraceImpl::SetTraceCallbackImpl(TraceCallback* callback) { + rtc::CritScope lock(&crit_); + callback_ = callback; + return 0; +} + +int32_t TraceImpl::AddMessage( + char* trace_message, + const char msg[WEBRTC_TRACE_MAX_MESSAGE_SIZE], + const uint16_t written_so_far) const { + int length = 0; + if (written_so_far >= WEBRTC_TRACE_MAX_MESSAGE_SIZE) { + return -1; + } + // - 2 to leave room for newline and NULL termination. +#ifdef _WIN32 + length = _snprintf(trace_message, + WEBRTC_TRACE_MAX_MESSAGE_SIZE - written_so_far - 2, + "%s", msg); + if (length < 0) { + length = WEBRTC_TRACE_MAX_MESSAGE_SIZE - written_so_far - 2; + trace_message[length] = 0; + } +#else + length = snprintf(trace_message, + WEBRTC_TRACE_MAX_MESSAGE_SIZE - written_so_far - 2, + "%s", msg); + if (length < 0 || + length > WEBRTC_TRACE_MAX_MESSAGE_SIZE - written_so_far - 2) { + length = WEBRTC_TRACE_MAX_MESSAGE_SIZE - written_so_far - 2; + trace_message[length] = 0; + } +#endif + // Length with NULL termination. + return length + 1; +} + +void TraceImpl::AddMessageToList( + const char trace_message[WEBRTC_TRACE_MAX_MESSAGE_SIZE], + const uint16_t length, + const TraceLevel level) { + rtc::CritScope lock(&crit_); + if (callback_) + callback_->Print(level, trace_message, length); + WriteToFile(trace_message, length); +} + +void TraceImpl::WriteToFile(const char* msg, uint16_t length) { + if (!trace_file_->Open()) + return; + + if (row_count_text_ > WEBRTC_TRACE_MAX_FILE_SIZE) { + // wrap file + row_count_text_ = 0; + trace_file_->Flush(); + + if (file_count_text_ == 0) { + trace_file_->Rewind(); + } else { + char old_file_name[FileWrapper::kMaxFileNameSize]; + char new_file_name[FileWrapper::kMaxFileNameSize]; + + // get current name + trace_file_->FileName(old_file_name, FileWrapper::kMaxFileNameSize); + trace_file_->CloseFile(); + + file_count_text_++; + + UpdateFileName(old_file_name, new_file_name, file_count_text_); + + if (trace_file_->OpenFile(new_file_name, false, false, true) == -1) { + return; + } + } + } + if (row_count_text_ == 0) { + char message[WEBRTC_TRACE_MAX_MESSAGE_SIZE + 1]; + int32_t length = AddDateTimeInfo(message); + if (length != -1) { + message[length] = 0; + message[length - 1] = '\n'; + trace_file_->Write(message, length); + row_count_text_++; + } + } + + char trace_message[WEBRTC_TRACE_MAX_MESSAGE_SIZE]; + memcpy(trace_message, msg, length); + trace_message[length] = 0; + trace_message[length - 1] = '\n'; + trace_file_->Write(trace_message, length); + row_count_text_++; +} + +void TraceImpl::AddImpl(const TraceLevel level, + const TraceModule module, + const int32_t id, + const char msg[WEBRTC_TRACE_MAX_MESSAGE_SIZE]) { + if (!TraceCheck(level)) + return; + + char trace_message[WEBRTC_TRACE_MAX_MESSAGE_SIZE]; + char* message_ptr = &trace_message[0]; + int32_t len = AddLevel(message_ptr, level); + if (len == -1) + return; + + message_ptr += len; + int32_t ack_len = len; + + len = AddTime(message_ptr, level); + if (len == -1) + return; + + message_ptr += len; + ack_len += len; + + len = AddModuleAndId(message_ptr, module, id); + if (len == -1) + return; + + message_ptr += len; + ack_len += len; + + len = AddThreadId(message_ptr); + if (len < 0) + return; + + message_ptr += len; + ack_len += len; + + len = AddMessage(message_ptr, msg, static_cast(ack_len)); + if (len == -1) + return; + + ack_len += len; + AddMessageToList(trace_message, static_cast(ack_len), level); +} + +bool TraceImpl::TraceCheck(const TraceLevel level) const { + return (level & level_filter()) ? true : false; +} + +bool TraceImpl::UpdateFileName( + const char file_name_utf8[FileWrapper::kMaxFileNameSize], + char file_name_with_counter_utf8[FileWrapper::kMaxFileNameSize], + const uint32_t new_count) const { + int32_t length = (int32_t)strlen(file_name_utf8); + if (length < 0) { + return false; + } + + int32_t length_without_file_ending = length - 1; + while (length_without_file_ending > 0) { + if (file_name_utf8[length_without_file_ending] == '.') { + break; + } else { + length_without_file_ending--; + } + } + if (length_without_file_ending == 0) { + length_without_file_ending = length; + } + int32_t length_to_ = length_without_file_ending - 1; + while (length_to_ > 0) { + if (file_name_utf8[length_to_] == '_') { + break; + } else { + length_to_--; + } + } + + memcpy(file_name_with_counter_utf8, file_name_utf8, length_to_); + sprintf(file_name_with_counter_utf8 + length_to_, "_%lu%s", + static_cast(new_count), + file_name_utf8 + length_without_file_ending); + return true; +} + +bool TraceImpl::CreateFileName( + const char file_name_utf8[FileWrapper::kMaxFileNameSize], + char file_name_with_counter_utf8[FileWrapper::kMaxFileNameSize], + const uint32_t new_count) const { + int32_t length = (int32_t)strlen(file_name_utf8); + if (length < 0) { + return false; + } + + int32_t length_without_file_ending = length - 1; + while (length_without_file_ending > 0) { + if (file_name_utf8[length_without_file_ending] == '.') { + break; + } else { + length_without_file_ending--; + } + } + if (length_without_file_ending == 0) { + length_without_file_ending = length; + } + memcpy(file_name_with_counter_utf8, file_name_utf8, + length_without_file_ending); + sprintf(file_name_with_counter_utf8 + length_without_file_ending, "_%lu%s", + static_cast(new_count), + file_name_utf8 + length_without_file_ending); + return true; +} + +// static +void Trace::CreateTrace() { + TraceImpl::StaticInstance(kAddRef); +} + +// static +void Trace::ReturnTrace() { + TraceImpl::StaticInstance(kRelease); +} + +// static +int32_t Trace::TraceFile(char file_name[FileWrapper::kMaxFileNameSize]) { + TraceImpl* trace = TraceImpl::GetTrace(); + if (trace) { + int ret_val = trace->TraceFileImpl(file_name); + ReturnTrace(); + return ret_val; + } + return -1; +} + +// static +void Trace::set_level_filter(int filter) { + rtc::AtomicOps::ReleaseStore(&level_filter_, filter); +} + +// static +int Trace::level_filter() { + return rtc::AtomicOps::AcquireLoad(&level_filter_); +} + +// static +int32_t Trace::SetTraceFile(const char* file_name, + const bool add_file_counter) { + TraceImpl* trace = TraceImpl::GetTrace(); + if (trace) { + int ret_val = trace->SetTraceFileImpl(file_name, add_file_counter); + ReturnTrace(); + return ret_val; + } + return -1; +} + +int32_t Trace::SetTraceCallback(TraceCallback* callback) { + TraceImpl* trace = TraceImpl::GetTrace(); + if (trace) { + int ret_val = trace->SetTraceCallbackImpl(callback); + ReturnTrace(); + return ret_val; + } + return -1; +} + +void Trace::Add(const TraceLevel level, const TraceModule module, + const int32_t id, const char* msg, ...) { + TraceImpl* trace = TraceImpl::GetTrace(level); + if (trace) { + if (trace->TraceCheck(level)) { + char temp_buff[WEBRTC_TRACE_MAX_MESSAGE_SIZE]; + char* buff = 0; + if (msg) { + va_list args; + va_start(args, msg); +#ifdef _WIN32 + _vsnprintf(temp_buff, WEBRTC_TRACE_MAX_MESSAGE_SIZE - 1, msg, args); +#else + vsnprintf(temp_buff, WEBRTC_TRACE_MAX_MESSAGE_SIZE - 1, msg, args); +#endif + va_end(args); + buff = temp_buff; + } + trace->AddImpl(level, module, id, buff); + } + ReturnTrace(); + } +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/trace_impl.h b/webrtc/system_wrappers/source/trace_impl.h new file mode 100644 index 0000000..da5af72 --- /dev/null +++ b/webrtc/system_wrappers/source/trace_impl.h @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_SOURCE_TRACE_IMPL_H_ +#define WEBRTC_SYSTEM_WRAPPERS_SOURCE_TRACE_IMPL_H_ + +#include "webrtc/base/criticalsection.h" +#include "webrtc/base/scoped_ptr.h" +#include "webrtc/system_wrappers/interface/event_wrapper.h" +#include "webrtc/system_wrappers/interface/file_wrapper.h" +#include "webrtc/system_wrappers/interface/static_instance.h" +#include "webrtc/system_wrappers/interface/thread_wrapper.h" +#include "webrtc/system_wrappers/interface/trace.h" + +namespace webrtc { + +#define WEBRTC_TRACE_MAX_MESSAGE_SIZE 1024 +// Total buffer size is WEBRTC_TRACE_NUM_ARRAY (number of buffer partitions) * +// WEBRTC_TRACE_MAX_QUEUE (number of lines per buffer partition) * +// WEBRTC_TRACE_MAX_MESSAGE_SIZE (number of 1 byte charachters per line) = +// 1 or 4 Mbyte. + +#define WEBRTC_TRACE_MAX_FILE_SIZE 100*1000 +// Number of rows that may be written to file. On average 110 bytes per row (max +// 256 bytes per row). So on average 110*100*1000 = 11 Mbyte, max 256*100*1000 = +// 25.6 Mbyte + +class TraceImpl : public Trace { + public: + virtual ~TraceImpl(); + + static TraceImpl* CreateInstance(); + static TraceImpl* GetTrace(const TraceLevel level = kTraceAll); + + int32_t SetTraceFileImpl(const char* file_name, const bool add_file_counter); + int32_t TraceFileImpl(char file_name[FileWrapper::kMaxFileNameSize]); + + int32_t SetTraceCallbackImpl(TraceCallback* callback); + + void AddImpl(const TraceLevel level, const TraceModule module, + const int32_t id, const char* msg); + + bool TraceCheck(const TraceLevel level) const; + + protected: + TraceImpl(); + + static TraceImpl* StaticInstance(CountOperation count_operation, + const TraceLevel level = kTraceAll); + + int32_t AddThreadId(char* trace_message) const; + + // OS specific implementations. + virtual int32_t AddTime(char* trace_message, + const TraceLevel level) const = 0; + + virtual int32_t AddDateTimeInfo(char* trace_message) const = 0; + + private: + friend class Trace; + + int32_t AddLevel(char* sz_message, const TraceLevel level) const; + + int32_t AddModuleAndId(char* trace_message, const TraceModule module, + const int32_t id) const; + + int32_t AddMessage(char* trace_message, + const char msg[WEBRTC_TRACE_MAX_MESSAGE_SIZE], + const uint16_t written_so_far) const; + + void AddMessageToList( + const char trace_message[WEBRTC_TRACE_MAX_MESSAGE_SIZE], + const uint16_t length, + const TraceLevel level); + + bool UpdateFileName( + const char file_name_utf8[FileWrapper::kMaxFileNameSize], + char file_name_with_counter_utf8[FileWrapper::kMaxFileNameSize], + const uint32_t new_count) const; + + bool CreateFileName( + const char file_name_utf8[FileWrapper::kMaxFileNameSize], + char file_name_with_counter_utf8[FileWrapper::kMaxFileNameSize], + const uint32_t new_count) const; + + void WriteToFile(const char* msg, uint16_t length) + EXCLUSIVE_LOCKS_REQUIRED(crit_); + + TraceCallback* callback_ GUARDED_BY(crit_); + uint32_t row_count_text_ GUARDED_BY(crit_); + uint32_t file_count_text_ GUARDED_BY(crit_); + + const rtc::scoped_ptr trace_file_ GUARDED_BY(crit_); + rtc::CriticalSection crit_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_SOURCE_TRACE_IMPL_H_ diff --git a/webrtc/system_wrappers/source/trace_posix.cc b/webrtc/system_wrappers/source/trace_posix.cc new file mode 100644 index 0000000..cb702d8 --- /dev/null +++ b/webrtc/system_wrappers/source/trace_posix.cc @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/source/trace_posix.h" + +#include +#include +#include +#include +#include +#include + +namespace webrtc { + +TracePosix::TracePosix() + : crit_sect_(*CriticalSectionWrapper::CreateCriticalSection()) { + struct timeval system_time_high_res; + gettimeofday(&system_time_high_res, 0); + prev_api_tick_count_ = prev_tick_count_ = system_time_high_res.tv_sec; +} + +TracePosix::~TracePosix() { + delete &crit_sect_; +} + +int32_t TracePosix::AddTime(char* trace_message, const TraceLevel level) const { + struct timeval system_time_high_res; + if (gettimeofday(&system_time_high_res, 0) == -1) { + return -1; + } + struct tm buffer; + const struct tm* system_time = + localtime_r(&system_time_high_res.tv_sec, &buffer); + + const uint32_t ms_time = system_time_high_res.tv_usec / 1000; + uint32_t prev_tickCount = 0; + { + CriticalSectionScoped lock(&crit_sect_); + if (level == kTraceApiCall) { + prev_tickCount = prev_tick_count_; + prev_tick_count_ = ms_time; + } else { + prev_tickCount = prev_api_tick_count_; + prev_api_tick_count_ = ms_time; + } + } + + uint32_t dw_delta_time = ms_time - prev_tickCount; + if (prev_tickCount == 0) { + dw_delta_time = 0; + } + if (dw_delta_time > 0x0fffffff) { + // Either wraparound or data race. + dw_delta_time = 0; + } + if (dw_delta_time > 99999) { + dw_delta_time = 99999; + } + + sprintf(trace_message, "(%2u:%2u:%2u:%3u |%5lu) ", system_time->tm_hour, + system_time->tm_min, system_time->tm_sec, ms_time, + static_cast(dw_delta_time)); + // Messages are 22 characters. + return 22; +} + +int32_t TracePosix::AddDateTimeInfo(char* trace_message) const { + time_t t; + time(&t); + char buffer[26]; // man ctime says buffer should have room for >=26 bytes. + sprintf(trace_message, "Local Date: %s", ctime_r(&t, buffer)); + int32_t len = static_cast(strlen(trace_message)); + + if ('\n' == trace_message[len - 1]) { + trace_message[len - 1] = '\0'; + --len; + } + + // Messages is 12 characters. + return len + 1; +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/trace_posix.h b/webrtc/system_wrappers/source/trace_posix.h new file mode 100644 index 0000000..89420c6 --- /dev/null +++ b/webrtc/system_wrappers/source/trace_posix.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_SOURCE_TRACE_POSIX_H_ +#define WEBRTC_SYSTEM_WRAPPERS_SOURCE_TRACE_POSIX_H_ + +#include "webrtc/system_wrappers/interface/critical_section_wrapper.h" +#include "webrtc/system_wrappers/source/trace_impl.h" + +namespace webrtc { + +class TracePosix : public TraceImpl { + public: + TracePosix(); + ~TracePosix() override; + + // This method can be called on several different threads different from + // the creating thread. + int32_t AddTime(char* trace_message, const TraceLevel level) const override; + + int32_t AddDateTimeInfo(char* trace_message) const override; + + private: + volatile mutable uint32_t prev_api_tick_count_; + volatile mutable uint32_t prev_tick_count_; + + CriticalSectionWrapper& crit_sect_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_SOURCE_TRACE_POSIX_H_ diff --git a/webrtc/system_wrappers/source/trace_win.cc b/webrtc/system_wrappers/source/trace_win.cc new file mode 100644 index 0000000..4caedfc --- /dev/null +++ b/webrtc/system_wrappers/source/trace_win.cc @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#include "webrtc/system_wrappers/source/trace_win.h" + +#include +#include + +#include "Mmsystem.h" + +namespace webrtc { +TraceWindows::TraceWindows() + : prev_api_tick_count_(0), + prev_tick_count_(0) { +} + +TraceWindows::~TraceWindows() { +} + +int32_t TraceWindows::AddTime(char* trace_message, + const TraceLevel level) const { + uint32_t dw_current_time = timeGetTime(); + SYSTEMTIME system_time; + GetSystemTime(&system_time); + + if (level == kTraceApiCall) { + uint32_t dw_delta_time = dw_current_time - prev_tick_count_; + prev_tick_count_ = dw_current_time; + + if (prev_tick_count_ == 0) { + dw_delta_time = 0; + } + if (dw_delta_time > 0x0fffffff) { + // Either wrap-around or data race. + dw_delta_time = 0; + } + if (dw_delta_time > 99999) { + dw_delta_time = 99999; + } + + sprintf(trace_message, "(%2u:%2u:%2u:%3u |%5u) ", system_time.wHour, + system_time.wMinute, system_time.wSecond, + system_time.wMilliseconds, dw_delta_time); + } else { + uint32_t dw_delta_time = dw_current_time - prev_api_tick_count_; + prev_api_tick_count_ = dw_current_time; + + if (prev_api_tick_count_ == 0) { + dw_delta_time = 0; + } + if (dw_delta_time > 0x0fffffff) { + // Either wraparound or data race. + dw_delta_time = 0; + } + if (dw_delta_time > 99999) { + dw_delta_time = 99999; + } + sprintf(trace_message, "(%2u:%2u:%2u:%3u |%5u) ", system_time.wHour, + system_time.wMinute, system_time.wSecond, + system_time.wMilliseconds, dw_delta_time); + } + return 22; +} + +int32_t TraceWindows::AddDateTimeInfo(char* trace_message) const { + prev_api_tick_count_ = timeGetTime(); + prev_tick_count_ = prev_api_tick_count_; + + SYSTEMTIME sys_time; + GetLocalTime(&sys_time); + + TCHAR sz_date_str[20]; + TCHAR sz_time_str[20]; + + // Create date string (e.g. Apr 04 2002) + GetDateFormat(LOCALE_SYSTEM_DEFAULT, 0, &sys_time, TEXT("MMM dd yyyy"), + sz_date_str, 20); + + // Create time string (e.g. 15:32:08) + GetTimeFormat(LOCALE_SYSTEM_DEFAULT, 0, &sys_time, TEXT("HH':'mm':'ss"), + sz_time_str, 20); + + sprintf(trace_message, "Local Date: %ls Local Time: %ls", sz_date_str, + sz_time_str); + + // Include NULL termination (hence + 1). + return static_cast(strlen(trace_message) + 1); +} + +} // namespace webrtc diff --git a/webrtc/system_wrappers/source/trace_win.h b/webrtc/system_wrappers/source/trace_win.h new file mode 100644 index 0000000..1311b23 --- /dev/null +++ b/webrtc/system_wrappers/source/trace_win.h @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. + * + * Use of this source code is governed by a BSD-style license + * that can be found in the LICENSE file in the root of the source + * tree. An additional intellectual property rights grant can be found + * in the file PATENTS. All contributing project authors may + * be found in the AUTHORS file in the root of the source tree. + */ + +#ifndef WEBRTC_SYSTEM_WRAPPERS_SOURCE_TRACE_WIN_H_ +#define WEBRTC_SYSTEM_WRAPPERS_SOURCE_TRACE_WIN_H_ + +#include +#include + +#include "webrtc/system_wrappers/source/trace_impl.h" + +namespace webrtc { + +class TraceWindows : public TraceImpl { + public: + TraceWindows(); + virtual ~TraceWindows(); + + virtual int32_t AddTime(char* trace_message, const TraceLevel level) const; + + virtual int32_t AddDateTimeInfo(char* trace_message) const; + private: + volatile mutable uint32_t prev_api_tick_count_; + volatile mutable uint32_t prev_tick_count_; +}; + +} // namespace webrtc + +#endif // WEBRTC_SYSTEM_WRAPPERS_SOURCE_TRACE_WIN_H_