Update audio_processing module

Corresponds to upstream commit 524e9b043e7e86fd72353b987c9d5f6a1ebf83e1 Update notes: * Pull in third party license file * Replace .gypi files with BUILD.gn to keep track of what changes upstream * Bunch of new filse pulled in as dependencies * Won't build yet due to changes needed on top of these
2015-10-13 17:25:22 +05:30
parent 5ae7a5d6cd
commit 753eada3aa
324 changed files with 52533 additions and 16117 deletions
--- a/webrtc/modules/audio_processing/BUILD.gn
+++ b/webrtc/modules/audio_processing/BUILD.gn
@@ -0,0 +1,284 @@
+# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS.  All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+import("//build/config/arm.gni")
+import("//third_party/protobuf/proto_library.gni")
+import("../../build/webrtc.gni")
+
+declare_args() {
+  # Outputs some low-level debug files.
+  aec_debug_dump = false
+
+  # Disables the usual mode where we trust the reported system delay
+  # values the AEC receives. The corresponding define is set appropriately
+  # in the code, but it can be force-enabled here for testing.
+  aec_untrusted_delay_for_testing = false
+}
+
+source_set("audio_processing") {
+  sources = [
+    "aec/aec_core.c",
+    "aec/aec_core.h",
+    "aec/aec_core_internal.h",
+    "aec/aec_rdft.c",
+    "aec/aec_rdft.h",
+    "aec/aec_resampler.c",
+    "aec/aec_resampler.h",
+    "aec/echo_cancellation.c",
+    "aec/echo_cancellation_internal.h",
+    "aec/include/echo_cancellation.h",
+    "aecm/aecm_core.c",
+    "aecm/aecm_core.h",
+    "aecm/echo_control_mobile.c",
+    "aecm/include/echo_control_mobile.h",
+    "agc/agc.cc",
+    "agc/agc.h",
+    "agc/agc_manager_direct.cc",
+    "agc/agc_manager_direct.h",
+    "agc/gain_map_internal.h",
+    "agc/histogram.cc",
+    "agc/histogram.h",
+    "agc/legacy/analog_agc.c",
+    "agc/legacy/analog_agc.h",
+    "agc/legacy/digital_agc.c",
+    "agc/legacy/digital_agc.h",
+    "agc/legacy/gain_control.h",
+    "agc/utility.cc",
+    "agc/utility.h",
+    "audio_buffer.cc",
+    "audio_buffer.h",
+    "audio_processing_impl.cc",
+    "audio_processing_impl.h",
+    "beamformer/beamformer.h",
+    "beamformer/complex_matrix.h",
+    "beamformer/covariance_matrix_generator.cc",
+    "beamformer/covariance_matrix_generator.h",
+    "beamformer/matrix.h",
+    "beamformer/nonlinear_beamformer.cc",
+    "beamformer/nonlinear_beamformer.h",
+    "common.h",
+    "echo_cancellation_impl.cc",
+    "echo_cancellation_impl.h",
+    "echo_control_mobile_impl.cc",
+    "echo_control_mobile_impl.h",
+    "gain_control_impl.cc",
+    "gain_control_impl.h",
+    "high_pass_filter_impl.cc",
+    "high_pass_filter_impl.h",
+    "include/audio_processing.h",
+    "intelligibility/intelligibility_enhancer.cc",
+    "intelligibility/intelligibility_enhancer.h",
+    "intelligibility/intelligibility_utils.cc",
+    "intelligibility/intelligibility_utils.h",
+    "level_estimator_impl.cc",
+    "level_estimator_impl.h",
+    "logging/aec_logging.h",
+    "logging/aec_logging_file_handling.cc",
+    "logging/aec_logging_file_handling.h",
+    "noise_suppression_impl.cc",
+    "noise_suppression_impl.h",
+    "processing_component.cc",
+    "processing_component.h",
+    "rms_level.cc",
+    "rms_level.h",
+    "splitting_filter.cc",
+    "splitting_filter.h",
+    "three_band_filter_bank.cc",
+    "three_band_filter_bank.h",
+    "transient/common.h",
+    "transient/daubechies_8_wavelet_coeffs.h",
+    "transient/dyadic_decimator.h",
+    "transient/moving_moments.cc",
+    "transient/moving_moments.h",
+    "transient/transient_detector.cc",
+    "transient/transient_detector.h",
+    "transient/transient_suppressor.cc",
+    "transient/transient_suppressor.h",
+    "transient/wpd_node.cc",
+    "transient/wpd_node.h",
+    "transient/wpd_tree.cc",
+    "transient/wpd_tree.h",
+    "typing_detection.cc",
+    "typing_detection.h",
+    "utility/delay_estimator.c",
+    "utility/delay_estimator.h",
+    "utility/delay_estimator_internal.h",
+    "utility/delay_estimator_wrapper.c",
+    "utility/delay_estimator_wrapper.h",
+    "vad/common.h",
+    "vad/gmm.cc",
+    "vad/gmm.h",
+    "vad/noise_gmm_tables.h",
+    "vad/pitch_based_vad.cc",
+    "vad/pitch_based_vad.h",
+    "vad/pitch_internal.cc",
+    "vad/pitch_internal.h",
+    "vad/pole_zero_filter.cc",
+    "vad/pole_zero_filter.h",
+    "vad/standalone_vad.cc",
+    "vad/standalone_vad.h",
+    "vad/vad_audio_proc.cc",
+    "vad/vad_audio_proc.h",
+    "vad/vad_audio_proc_internal.h",
+    "vad/vad_circular_buffer.cc",
+    "vad/vad_circular_buffer.h",
+    "vad/voice_activity_detector.cc",
+    "vad/voice_activity_detector.h",
+    "vad/voice_gmm_tables.h",
+    "voice_detection_impl.cc",
+    "voice_detection_impl.h",
+  ]
+
+  configs += [ "../..:common_config" ]
+  public_configs = [ "../..:common_inherited_config" ]
+
+  defines = []
+  deps = [
+    "../..:webrtc_common",
+    "../audio_coding:isac",
+  ]
+
+  if (aec_debug_dump) {
+    defines += [ "WEBRTC_AEC_DEBUG_DUMP" ]
+  }
+
+  if (aec_untrusted_delay_for_testing) {
+    defines += [ "WEBRTC_UNTRUSTED_DELAY" ]
+  }
+
+  if (rtc_enable_protobuf) {
+    defines += [ "WEBRTC_AUDIOPROC_DEBUG_DUMP" ]
+    deps += [ ":audioproc_debug_proto" ]
+  }
+
+  if (rtc_prefer_fixed_point) {
+    defines += [ "WEBRTC_NS_FIXED" ]
+    sources += [
+      "ns/include/noise_suppression_x.h",
+      "ns/noise_suppression_x.c",
+      "ns/nsx_core.c",
+      "ns/nsx_core.h",
+      "ns/nsx_defines.h",
+    ]
+    if (current_cpu == "mipsel") {
+      sources += [ "ns/nsx_core_mips.c" ]
+    } else {
+      sources += [ "ns/nsx_core_c.c" ]
+    }
+  } else {
+    defines += [ "WEBRTC_NS_FLOAT" ]
+    sources += [
+      "ns/defines.h",
+      "ns/include/noise_suppression.h",
+      "ns/noise_suppression.c",
+      "ns/ns_core.c",
+      "ns/ns_core.h",
+      "ns/windows_private.h",
+    ]
+  }
+
+  if (current_cpu == "x86" || current_cpu == "x64") {
+    deps += [ ":audio_processing_sse2" ]
+  }
+
+  if (rtc_build_with_neon) {
+    deps += [ ":audio_processing_neon" ]
+  }
+
+  if (current_cpu == "mipsel") {
+    sources += [ "aecm/aecm_core_mips.c" ]
+    if (mips_float_abi == "hard") {
+      sources += [
+        "aec/aec_core_mips.c",
+        "aec/aec_rdft_mips.c",
+      ]
+    }
+  } else {
+    sources += [ "aecm/aecm_core_c.c" ]
+  }
+
+  if (is_win) {
+    cflags = [
+      # TODO(jschuh): Bug 1348: fix this warning.
+      "/wd4267",  # size_t to int truncations
+    ]
+  }
+
+  if (is_clang) {
+    # Suppress warnings from Chrome's Clang plugins.
+    # See http://code.google.com/p/webrtc/issues/detail?id=163 for details.
+    configs -= [ "//build/config/clang:find_bad_constructs" ]
+  }
+
+  deps += [
+    "../../base:rtc_base_approved",
+    "../../common_audio",
+    "../../system_wrappers",
+  ]
+}
+
+if (rtc_enable_protobuf) {
+  proto_library("audioproc_debug_proto") {
+    sources = [
+      "debug.proto",
+    ]
+
+    proto_out_dir = "webrtc/audio_processing"
+  }
+}
+
+if (current_cpu == "x86" || current_cpu == "x64") {
+  source_set("audio_processing_sse2") {
+    sources = [
+      "aec/aec_core_sse2.c",
+      "aec/aec_rdft_sse2.c",
+    ]
+
+    if (is_posix) {
+      cflags = [ "-msse2" ]
+    }
+
+    configs += [ "../..:common_config" ]
+    public_configs = [ "../..:common_inherited_config" ]
+  }
+}
+
+if (rtc_build_with_neon) {
+  source_set("audio_processing_neon") {
+    sources = [
+      "aec/aec_core_neon.c",
+      "aec/aec_rdft_neon.c",
+      "aecm/aecm_core_neon.c",
+      "ns/nsx_core_neon.c",
+    ]
+
+    if (current_cpu != "arm64") {
+      # Enable compilation for the NEON instruction set. This is needed
+      # since //build/config/arm.gni only enables NEON for iOS, not Android.
+      # This provides the same functionality as webrtc/build/arm_neon.gypi.
+      configs -= [ "//build/config/compiler:compiler_arm_fpu" ]
+      cflags = [ "-mfpu=neon" ]
+    }
+
+    # Disable LTO on NEON targets due to compiler bug.
+    # TODO(fdegans): Enable this. See crbug.com/408997.
+    if (rtc_use_lto) {
+      cflags -= [
+        "-flto",
+        "-ffat-lto-objects",
+      ]
+    }
+
+    configs += [ "../..:common_config" ]
+    public_configs = [ "../..:common_inherited_config" ]
+
+    deps = [
+      "../../common_audio",
+    ]
+  }
+}
--- a/webrtc/modules/audio_processing/Makefile.am
+++ b/webrtc/modules/audio_processing/Makefile.am
@@ -1,26 +1,104 @@
-SUBDIRS = utility ns aec aecm agc
 lib_LTLIBRARIES = libwebrtc_audio_processing.la

-if NS_FIXED
-COMMON_CXXFLAGS += -DWEBRTC_NS_FIXED=1
-NS_LIB = libns_fix
-else
-COMMON_CXXFLAGS += -DWEBRTC_NS_FLOAT=1
-NS_LIB = libns
-endif
-
 webrtcincludedir = $(includedir)/webrtc_audio_processing
-webrtcinclude_HEADERS = $(top_srcdir)/src/typedefs.h \
-			$(top_srcdir)/src/modules/interface/module.h \
-			interface/audio_processing.h \
-			$(top_srcdir)/src/common_types.h \
-			$(top_srcdir)/src/modules/interface/module_common_types.h
+webrtcinclude_HEADERS = $(top_srcdir)/webrtc/base/arraysize.h \
+			$(top_srcdir)/webrtc/base/platform_file.h \
+			$(top_srcdir)/webrtc/common.h \
+			$(top_srcdir)/webrtc/typedefs.h \
+			$(top_srcdir)/webrtc/modules/audio_processing/beamformer/array_util.h \
+			include/audio_processing.h

-libwebrtc_audio_processing_la_SOURCES = interface/audio_processing.h \
+libwebrtc_audio_processing_la_SOURCES = include/audio_processing.h \
+					aec/include/echo_cancellation.h \
+					aec/aec_common.h \
+					aec/aec_core.c \
+					aec/aec_core.h \
+					aec/aec_core_internal.h \
+					aec/aec_core_sse2.c \
+					aec/aec_rdft.c \
+					aec/aec_rdft.h \
+					aec/aec_rdft_sse2.c \
+					aec/aec_resampler.c \
+					aec/aec_resampler.h \
+					aec/echo_cancellation.c \
+					aec/echo_cancellation_internal.h \
+					aecm/include/echo_control_mobile.h \
+					aecm/echo_control_mobile.c \
+					aecm/aecm_core.c \
+					aecm/aecm_core.h \
+					aecm/aecm_core_c.c \
+					agc/legacy/analog_agc.c \
+					agc/legacy/analog_agc.h \
+					agc/legacy/gain_control.h \
+					agc/legacy/digital_agc.c \
+					agc/legacy/digital_agc.h \
+					agc/agc.cc \
+					agc/agc.h \
+					agc/agc_manager_direct.cc \
+					agc/agc_manager_direct.h \
+					agc/gain_map_internal.h \
+					agc/histogram.cc \
+					agc/histogram.h \
+					agc/utility.cc \
+					agc/utility.h \
+					beamformer/array_util.h \
+					beamformer/beamformer.h \
+					beamformer/complex_matrix.h \
+					beamformer/covariance_matrix_generator.h \
+					beamformer/matrix.h \
+					beamformer/matrix_test_helpers.h \
+					beamformer/nonlinear_beamformer.h \
+					beamformer/covariance_matrix_generator.cc \
+					beamformer/nonlinear_beamformer.cc \
+					logging/aec_logging.h \
+					logging/aec_logging_file_handling.h \
+					logging/aec_logging_file_handling.cc \
+					transient/common.h \
+					transient/daubechies_8_wavelet_coeffs.h \
+					transient/dyadic_decimator.h \
+					transient/file_utils.h \
+					transient/moving_moments.h \
+					transient/transient_detector.h \
+					transient/transient_suppressor.h \
+					transient/wpd_node.h \
+					transient/wpd_tree.h \
+					transient/click_annotate.cc \
+					transient/file_utils.cc \
+					transient/moving_moments.cc \
+					transient/transient_detector.cc \
+					transient/transient_suppressor.cc \
+					transient/wpd_node.cc \
+					transient/wpd_tree.cc \
+					utility/delay_estimator.c \
+					utility/delay_estimator.h \
+					utility/delay_estimator_internal.h \
+					utility/delay_estimator_wrapper.c \
+					utility/delay_estimator_wrapper.h \
+					vad/common.h \
+					vad/gmm.h \
+					vad/noise_gmm_tables.h \
+					vad/pitch_based_vad.h \
+					vad/pitch_internal.h \
+					vad/pole_zero_filter.h \
+					vad/standalone_vad.h \
+					vad/vad_audio_proc.h \
+					vad/vad_audio_proc_internal.h \
+					vad/vad_circular_buffer.h \
+					vad/voice_activity_detector.h \
+					vad/voice_gmm_tables.h \
+					vad/gmm.cc \
+					vad/pitch_based_vad.cc \
+					vad/pitch_internal.cc \
+					vad/pole_zero_filter.cc \
+					vad/standalone_vad.cc \
+					vad/vad_audio_proc.cc \
+					vad/vad_circular_buffer.cc \
+					vad/voice_activity_detector.cc \
 					audio_buffer.cc \
 					audio_buffer.h \
 					audio_processing_impl.cc \
 					audio_processing_impl.h \
+					common.h \
 					echo_cancellation_impl.cc \
 					echo_cancellation_impl.h \
 					echo_control_mobile_impl.cc \
@@ -33,27 +111,56 @@ libwebrtc_audio_processing_la_SOURCES = interface/audio_processing.h \
 					level_estimator_impl.h \
 					noise_suppression_impl.cc \
 					noise_suppression_impl.h \
+					rms_level.cc \
+					rms_level.h \
 					splitting_filter.cc \
 					splitting_filter.h \
 					processing_component.cc \
 					processing_component.h \
+					three_band_filter_bank.cc \
+					three_band_filter_bank.h \
+					typing_detection.cc \
+					typing_detection.h \
 					voice_detection_impl.cc \
 					voice_detection_impl.h
-libwebrtc_audio_processing_la_CXXFLAGS = $(AM_CXXFLAGS) $(COMMON_CXXFLAGS) \
-				       -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-				       -I$(top_srcdir)/src/common_audio/vad/main/interface \
-				       -I$(top_srcdir)/src/system_wrappers/interface \
-				       -I$(top_srcdir)/src/modules/audio_processing/utility \
-				       -I$(top_srcdir)/src/modules/audio_processing/ns/interface \
-				       -I$(top_srcdir)/src/modules/audio_processing/aec/interface \
-				       -I$(top_srcdir)/src/modules/audio_processing/aecm/interface \
-				       -I$(top_srcdir)/src/modules/audio_processing/agc/interface
-libwebrtc_audio_processing_la_LIBADD = $(top_builddir)/src/system_wrappers/libsystem_wrappers.la \
-				       $(top_builddir)/src/common_audio/signal_processing_library/libspl.la \
-				       $(top_builddir)/src/common_audio/vad/libvad.la \
-				       $(top_builddir)/src/modules/audio_processing/utility/libapm_util.la \
-				       $(top_builddir)/src/modules/audio_processing/ns/$(NS_LIB).la \
-				       $(top_builddir)/src/modules/audio_processing/aec/libaec.la \
-				       $(top_builddir)/src/modules/audio_processing/aecm/libaecm.la \
-				       $(top_builddir)/src/modules/audio_processing/agc/libagc.la
+
+if NS_FIXED
+COMMON_CXXFLAGS += -DWEBRTC_NS_FIXED=0
+libwebrtc_audio_processing_la_SOURCES += \
+					ns/include/noise_suppression_x.h \
+					ns/noise_suppression_x.c \
+					ns/nsx_defines.h \
+					ns/nsx_core.c \
+					ns/nsx_core.h \
+					ns/nsx_core_c.c
+else
+COMMON_CXXFLAGS += -DWEBRTC_NS_FIXED=1
+libwebrtc_audio_processing_la_SOURCES += \
+					ns/include/noise_suppression.h \
+					ns/noise_suppression.c \
+					ns/defines.h \
+					ns/ns_core.c \
+					ns/ns_core.h \
+					ns/windows_private.h
+endif
+
+libwebrtc_audio_processing_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS)
+libwebrtc_audio_processing_la_CXXFLAGS = $(AM_CXXFLAGS) $(COMMON_CXXFLAGS)
+
+libwebrtc_audio_processing_la_LIBADD = $(top_builddir)/webrtc/base/libbase.la \
+				       $(top_builddir)/webrtc/system_wrappers/libsystem_wrappers.la \
+				       $(top_builddir)/webrtc/common_audio/libcommon_audio.la
+				       $(top_builddir)/webrtc/modules/audio_coding/libaudio_coding.la
 libwebrtc_audio_processing_la_LDFLAGS = $(AM_LDFLAGS) -version-info $(LIBWEBRTC_AUDIO_PROCESSING_VERSION_INFO)
+
+# FIXME:
+# x86:  aec/aec_core_sse2.c
+# 	aec/aec_rdft_sse2.c
+# NEON: aec/aec_core_neon.c
+# 	aec/aec_rdft_neon.c
+# 	aecm/aecm_core_neon.c
+#	ns/nsx_core_neon.c
+# MIPS: aec/aec_core_mips.c
+# 	aec/aec_rdft_neon.c
+#       aecm/aecm_core_mips.c
+#       ns/nsx_core_mips.c
--- a/webrtc/modules/audio_processing/OWNERS
+++ b/webrtc/modules/audio_processing/OWNERS
@@ -1,2 +0,0 @@
-andrew@webrtc.org
-bjornv@webrtc.org
--- a/webrtc/modules/audio_processing/aec/Makefile.am
+++ b/webrtc/modules/audio_processing/aec/Makefile.am
@@ -1,16 +0,0 @@
-noinst_LTLIBRARIES = libaec.la
-
-libaec_la_SOURCES = interface/echo_cancellation.h \
-		    echo_cancellation.c \
-		    aec_core.h \
-		    aec_core.c \
-		    aec_core_sse2.c \
-		    aec_rdft.h \
-		    aec_rdft.c \
-		    aec_rdft_sse2.c \
-		    resampler.h \
-		    resampler.c
-libaec_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
-		   -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-		   -I$(top_srcdir)/src/system_wrappers/interface \
-		   -I$(top_srcdir)/src/modules/audio_processing/utility
--- a/webrtc/modules/audio_processing/aec/aec.gypi
+++ b/webrtc/modules/audio_processing/aec/aec.gypi
@@ -1,40 +0,0 @@
-# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
-#
-# Use of this source code is governed by a BSD-style license
-# that can be found in the LICENSE file in the root of the source
-# tree. An additional intellectual property rights grant can be found
-# in the file PATENTS.  All contributing project authors may
-# be found in the AUTHORS file in the root of the source tree.
-
-{
-  'targets': [
-    {
-      'target_name': 'aec',
-      'type': '<(library)',
-      'dependencies': [
-        '<(webrtc_root)/common_audio/common_audio.gyp:spl',
-        'apm_util'
-      ],
-      'include_dirs': [
-        'interface',
-      ],
-      'direct_dependent_settings': {
-        'include_dirs': [
-          'interface',
-        ],
-      },
-      'sources': [
-        'interface/echo_cancellation.h',
-        'echo_cancellation.c',
-        'aec_core.h',
-        'aec_core.c',
-        'aec_core_sse2.c',
-        'aec_rdft.h',
-        'aec_rdft.c',
-        'aec_rdft_sse2.c',
-        'resampler.h',
-        'resampler.c',
-      ],
-    },
-  ],
-}
--- a/webrtc/modules/audio_processing/aec/aec_common.h
+++ b/webrtc/modules/audio_processing/aec/aec_common.h
@@ -0,0 +1,32 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
+
+#include "webrtc/typedefs.h"
+
+#ifdef _MSC_VER /* visual c++ */
+#define ALIGN16_BEG __declspec(align(16))
+#define ALIGN16_END
+#else /* gcc or icc */
+#define ALIGN16_BEG
+#define ALIGN16_END __attribute__((aligned(16)))
+#endif
+
+extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65];
+extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65];
+extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65];
+extern const float WebRtcAec_kExtendedSmoothingCoefficients[2][2];
+extern const float WebRtcAec_kNormalSmoothingCoefficients[2][2];
+extern const float WebRtcAec_kMinFarendPSD;
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
+
--- a/webrtc/modules/audio_processing/aec/aec_core.c
+++ b/webrtc/modules/audio_processing/aec/aec_core.c
--- a/webrtc/modules/audio_processing/aec/aec_core.h
+++ b/webrtc/modules/audio_processing/aec/aec_core.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -12,29 +12,18 @@
 * Specifies the interface for the AEC core.
 */

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_

-#include <stdio.h>
+#include <stddef.h>

-#include "signal_processing_library.h"
-#include "typedefs.h"
-
-//#define AEC_DEBUG // for recording files
+#include "webrtc/typedefs.h"

 #define FRAME_LEN 80
-#define PART_LEN 64 // Length of partition
-#define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients
-#define PART_LEN2 (PART_LEN * 2) // Length of partition * 2
-#define NR_PART 12 // Number of partitions
-#define FILT_LEN (PART_LEN * NR_PART) // Filter length
-#define FILT_LEN2 (FILT_LEN * 2) // Double filter length
-#define FAR_BUF_LEN (FILT_LEN2 * 2)
-#define PREF_BAND_SIZE 24
-
-#define BLOCKL_MAX FRAME_LEN
-// Maximum delay in fixed point delay estimator, used for logging
-enum {kMaxDelay = 100};
+#define PART_LEN 64               // Length of partition
+#define PART_LEN1 (PART_LEN + 1)  // Unique fft coefficients
+#define PART_LEN2 (PART_LEN * 2)  // Length of partition * 2
+#define NUM_HIGH_BANDS_MAX  2     // Max number of high bands

 typedef float complex_t[2];
 // For performance reasons, some arrays of complex numbers are replaced by twice
@@ -46,136 +35,95 @@ typedef float complex_t[2];
 // compile time.

 // Metrics
-enum {offsetLevel = -100};
+enum {
+  kOffsetLevel = -100
+};

-typedef struct {
-    float sfrsum;
-    int sfrcounter;
-    float framelevel;
-    float frsum;
-    int frcounter;
-    float minlevel;
-    float averagelevel;
-} power_level_t;
+typedef struct Stats {
+  float instant;
+  float average;
+  float min;
+  float max;
+  float sum;
+  float hisum;
+  float himean;
+  int counter;
+  int hicounter;
+} Stats;

-typedef struct {
-    float instant;
-    float average;
-    float min;
-    float max;
-    float sum;
-    float hisum;
-    float himean;
-    int counter;
-    int hicounter;
-} stats_t;
+typedef struct AecCore AecCore;

-typedef struct {
-    int farBufWritePos, farBufReadPos;
-
-    int knownDelay;
-    int inSamples, outSamples;
-    int delayEstCtr;
-
-    void *farFrBuf, *nearFrBuf, *outFrBuf;
-
-    void *nearFrBufH;
-    void *outFrBufH;
-
-    float xBuf[PART_LEN2]; // farend
-    float dBuf[PART_LEN2]; // nearend
-    float eBuf[PART_LEN2]; // error
-
-    float dBufH[PART_LEN2]; // nearend
-
-    float xPow[PART_LEN1];
-    float dPow[PART_LEN1];
-    float dMinPow[PART_LEN1];
-    float dInitMinPow[PART_LEN1];
-    float *noisePow;
-
-    float xfBuf[2][NR_PART * PART_LEN1]; // farend fft buffer
-    float wfBuf[2][NR_PART * PART_LEN1]; // filter fft
-    complex_t sde[PART_LEN1]; // cross-psd of nearend and error
-    complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend
-    complex_t xfwBuf[NR_PART * PART_LEN1]; // farend windowed fft buffer
-
-    float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near and error psd
-    float hNs[PART_LEN1];
-    float hNlFbMin, hNlFbLocalMin;
-    float hNlXdAvgMin;
-    int hNlNewMin, hNlMinCtr;
-    float overDrive, overDriveSm;
-    float targetSupp, minOverDrive;
-    float outBuf[PART_LEN];
-    int delayIdx;
-
-    short stNearState, echoState;
-    short divergeState;
-
-    int xfBufBlockPos;
-
-    short farBuf[FILT_LEN2 * 2];
-
-    short mult; // sampling frequency multiple
-    int sampFreq;
-    WebRtc_UWord32 seed;
-
-    float mu; // stepsize
-    float errThresh; // error threshold
-
-    int noiseEstCtr;
-
-    power_level_t farlevel;
-    power_level_t nearlevel;
-    power_level_t linoutlevel;
-    power_level_t nlpoutlevel;
-
-    int metricsMode;
-    int stateCounter;
-    stats_t erl;
-    stats_t erle;
-    stats_t aNlp;
-    stats_t rerl;
-
-    // Quantities to control H band scaling for SWB input
-    int freq_avg_ic;         //initial bin for averaging nlp gain
-    int flag_Hband_cn;      //for comfort noise
-    float cn_scale_Hband;   //scale for comfort noise in H band
-
-    int delay_histogram[kMaxDelay];
-    int delay_logging_enabled;
-    void* delay_estimator;
-
-#ifdef AEC_DEBUG
-    FILE *farFile;
-    FILE *nearFile;
-    FILE *outFile;
-    FILE *outLpFile;
-#endif
-} aec_t;
-
-typedef void (*WebRtcAec_FilterFar_t)(aec_t *aec, float yf[2][PART_LEN1]);
-extern WebRtcAec_FilterFar_t WebRtcAec_FilterFar;
-typedef void (*WebRtcAec_ScaleErrorSignal_t)(aec_t *aec, float ef[2][PART_LEN1]);
-extern WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
-typedef void (*WebRtcAec_FilterAdaptation_t)
-  (aec_t *aec, float *fft, float ef[2][PART_LEN1]);
-extern WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;
-typedef void (*WebRtcAec_OverdriveAndSuppress_t)
-  (aec_t *aec, float hNl[PART_LEN1], const float hNlFb, float efw[2][PART_LEN1]);
-extern WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress;
-
-int WebRtcAec_CreateAec(aec_t **aec);
-int WebRtcAec_FreeAec(aec_t *aec);
-int WebRtcAec_InitAec(aec_t *aec, int sampFreq);
+AecCore* WebRtcAec_CreateAec();  // Returns NULL on error.
+void WebRtcAec_FreeAec(AecCore* aec);
+int WebRtcAec_InitAec(AecCore* aec, int sampFreq);
 void WebRtcAec_InitAec_SSE2(void);
+#if defined(MIPS_FPU_LE)
+void WebRtcAec_InitAec_mips(void);
+#endif
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+void WebRtcAec_InitAec_neon(void);
+#endif

-void WebRtcAec_InitMetrics(aec_t *aec);
-void WebRtcAec_ProcessFrame(aec_t *aec, const short *farend,
-                       const short *nearend, const short *nearendH,
-                       short *out, short *outH,
-                       int knownDelay);
+void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend);
+void WebRtcAec_ProcessFrames(AecCore* aec,
+                             const float* const* nearend,
+                             size_t num_bands,
+                             size_t num_samples,
+                             int knownDelay,
+                             float* const* out);

-#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_
+// A helper function to call WebRtc_MoveReadPtr() for all far-end buffers.
+// Returns the number of elements moved, and adjusts |system_delay| by the
+// corresponding amount in ms.
+int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements);

+// Calculates the median, standard deviation and amount of poor values among the
+// delay estimates aggregated up to the first call to the function. After that
+// first call the metrics are aggregated and updated every second. With poor
+// values we mean values that most likely will cause the AEC to perform poorly.
+// TODO(bjornv): Consider changing tests and tools to handle constant
+// constant aggregation window throughout the session instead.
+int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std,
+                                  float* fraction_poor_delays);
+
+// Returns the echo state (1: echo, 0: no echo).
+int WebRtcAec_echo_state(AecCore* self);
+
+// Gets statistics of the echo metrics ERL, ERLE, A_NLP.
+void WebRtcAec_GetEchoStats(AecCore* self,
+                            Stats* erl,
+                            Stats* erle,
+                            Stats* a_nlp);
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+void* WebRtcAec_far_time_buf(AecCore* self);
+#endif
+
+// Sets local configuration modes.
+void WebRtcAec_SetConfigCore(AecCore* self,
+                             int nlp_mode,
+                             int metrics_mode,
+                             int delay_logging);
+
+// Non-zero enables, zero disables.
+void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable);
+
+// Returns non-zero if delay agnostic (i.e., signal based delay estimation) is
+// enabled and zero if disabled.
+int WebRtcAec_delay_agnostic_enabled(AecCore* self);
+
+// Enables or disables extended filter mode. Non-zero enables, zero disables.
+void WebRtcAec_enable_extended_filter(AecCore* self, int enable);
+
+// Returns non-zero if extended filter mode is enabled and zero if disabled.
+int WebRtcAec_extended_filter_enabled(AecCore* self);
+
+// Returns the current |system_delay|, i.e., the buffered difference between
+// far-end and near-end.
+int WebRtcAec_system_delay(AecCore* self);
+
+// Sets the |system_delay| to |value|.  Note that if the value is changed
+// improperly, there can be a performance regression.  So it should be used with
+// care.
+void WebRtcAec_SetSystemDelay(AecCore* self, int delay);
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
--- a/webrtc/modules/audio_processing/aec/aec_core_internal.h
+++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h
@@ -0,0 +1,202 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/wav_file.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+#include "webrtc/typedefs.h"
+
+// Number of partitions for the extended filter mode. The first one is an enum
+// to be used in array declarations, as it represents the maximum filter length.
+enum {
+  kExtendedNumPartitions = 32
+};
+static const int kNormalNumPartitions = 12;
+
+// Delay estimator constants, used for logging and delay compensation if
+// if reported delays are disabled.
+enum {
+  kLookaheadBlocks = 15
+};
+enum {
+  // 500 ms for 16 kHz which is equivalent with the limit of reported delays.
+  kHistorySizeBlocks = 125
+};
+
+// Extended filter adaptation parameters.
+// TODO(ajm): No narrowband tuning yet.
+static const float kExtendedMu = 0.4f;
+static const float kExtendedErrorThreshold = 1.0e-6f;
+
+typedef struct PowerLevel {
+  float sfrsum;
+  int sfrcounter;
+  float framelevel;
+  float frsum;
+  int frcounter;
+  float minlevel;
+  float averagelevel;
+} PowerLevel;
+
+struct AecCore {
+  int farBufWritePos, farBufReadPos;
+
+  int knownDelay;
+  int inSamples, outSamples;
+  int delayEstCtr;
+
+  RingBuffer* nearFrBuf;
+  RingBuffer* outFrBuf;
+
+  RingBuffer* nearFrBufH[NUM_HIGH_BANDS_MAX];
+  RingBuffer* outFrBufH[NUM_HIGH_BANDS_MAX];
+
+  float dBuf[PART_LEN2];  // nearend
+  float eBuf[PART_LEN2];  // error
+
+  float dBufH[NUM_HIGH_BANDS_MAX][PART_LEN2];  // nearend
+
+  float xPow[PART_LEN1];
+  float dPow[PART_LEN1];
+  float dMinPow[PART_LEN1];
+  float dInitMinPow[PART_LEN1];
+  float* noisePow;
+
+  float xfBuf[2][kExtendedNumPartitions * PART_LEN1];  // farend fft buffer
+  float wfBuf[2][kExtendedNumPartitions * PART_LEN1];  // filter fft
+  complex_t sde[PART_LEN1];  // cross-psd of nearend and error
+  complex_t sxd[PART_LEN1];  // cross-psd of farend and nearend
+  // Farend windowed fft buffer.
+  complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1];
+
+  float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1];  // far, near, error psd
+  float hNs[PART_LEN1];
+  float hNlFbMin, hNlFbLocalMin;
+  float hNlXdAvgMin;
+  int hNlNewMin, hNlMinCtr;
+  float overDrive, overDriveSm;
+  int nlp_mode;
+  float outBuf[PART_LEN];
+  int delayIdx;
+
+  short stNearState, echoState;
+  short divergeState;
+
+  int xfBufBlockPos;
+
+  RingBuffer* far_buf;
+  RingBuffer* far_buf_windowed;
+  int system_delay;  // Current system delay buffered in AEC.
+
+  int mult;  // sampling frequency multiple
+  int sampFreq;
+  size_t num_bands;
+  uint32_t seed;
+
+  float normal_mu;               // stepsize
+  float normal_error_threshold;  // error threshold
+
+  int noiseEstCtr;
+
+  PowerLevel farlevel;
+  PowerLevel nearlevel;
+  PowerLevel linoutlevel;
+  PowerLevel nlpoutlevel;
+
+  int metricsMode;
+  int stateCounter;
+  Stats erl;
+  Stats erle;
+  Stats aNlp;
+  Stats rerl;
+
+  // Quantities to control H band scaling for SWB input
+  int freq_avg_ic;       // initial bin for averaging nlp gain
+  int flag_Hband_cn;     // for comfort noise
+  float cn_scale_Hband;  // scale for comfort noise in H band
+
+  int delay_metrics_delivered;
+  int delay_histogram[kHistorySizeBlocks];
+  int num_delay_values;
+  int delay_median;
+  int delay_std;
+  float fraction_poor_delays;
+  int delay_logging_enabled;
+  void* delay_estimator_farend;
+  void* delay_estimator;
+  // Variables associated with delay correction through signal based delay
+  // estimation feedback.
+  int signal_delay_correction;
+  int previous_delay;
+  int delay_correction_count;
+  int shift_offset;
+  float delay_quality_threshold;
+  int frame_count;
+
+  // 0 = delay agnostic mode (signal based delay correction) disabled.
+  // Otherwise enabled.
+  int delay_agnostic_enabled;
+  // 1 = extended filter mode enabled, 0 = disabled.
+  int extended_filter_enabled;
+  // Runtime selection of number of filter partitions.
+  int num_partitions;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  // Sequence number of this AEC instance, so that different instances can
+  // choose different dump file names.
+  int instance_index;
+
+  // Number of times we've restarted dumping; used to pick new dump file names
+  // each time.
+  int debug_dump_count;
+
+  RingBuffer* far_time_buf;
+  rtc_WavWriter* farFile;
+  rtc_WavWriter* nearFile;
+  rtc_WavWriter* outFile;
+  rtc_WavWriter* outLinearFile;
+  FILE* e_fft_file;
+#endif
+};
+
+typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]);
+extern WebRtcAecFilterFar WebRtcAec_FilterFar;
+typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]);
+extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
+typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec,
+                                          float* fft,
+                                          float ef[2][PART_LEN1]);
+extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
+typedef void (*WebRtcAecOverdriveAndSuppress)(AecCore* aec,
+                                              float hNl[PART_LEN1],
+                                              const float hNlFb,
+                                              float efw[2][PART_LEN1]);
+extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
+
+typedef void (*WebRtcAecComfortNoise)(AecCore* aec,
+                                      float efw[2][PART_LEN1],
+                                      complex_t* comfortNoiseHband,
+                                      const float* noisePow,
+                                      const float* lambda);
+extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
+
+typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec,
+                                          float efw[2][PART_LEN1],
+                                          float xfw[2][PART_LEN1],
+                                          float* fft,
+                                          float* cohde,
+                                          float* cohxd);
+extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
--- a/webrtc/modules/audio_processing/aec/aec_core_mips.c
+++ b/webrtc/modules/audio_processing/aec/aec_core_mips.c
@@ -0,0 +1,774 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The core AEC algorithm, which is presented with time-aligned signals.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+#include <math.h>
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+static const int flagHbandCn = 1; // flag for adding comfort noise in H band
+extern const float WebRtcAec_weightCurve[65];
+extern const float WebRtcAec_overDriveCurve[65];
+
+void WebRtcAec_ComfortNoise_mips(AecCore* aec,
+                                 float efw[2][PART_LEN1],
+                                 complex_t* comfortNoiseHband,
+                                 const float* noisePow,
+                                 const float* lambda) {
+  int i, num;
+  float rand[PART_LEN];
+  float noise, noiseAvg, tmp, tmpAvg;
+  int16_t randW16[PART_LEN];
+  complex_t u[PART_LEN1];
+
+  const float pi2 = 6.28318530717959f;
+  const float pi2t = pi2 / 32768;
+
+  // Generate a uniform random array on [0 1]
+  WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
+
+  int16_t* randWptr = randW16;
+  float randTemp, randTemp2, randTemp3, randTemp4;
+  int32_t tmp1s, tmp2s, tmp3s, tmp4s;
+
+  for (i = 0; i < PART_LEN; i+=4) {
+    __asm __volatile (
+      ".set     push                                           \n\t"
+      ".set     noreorder                                      \n\t"
+      "lh       %[tmp1s],       0(%[randWptr])                 \n\t"
+      "lh       %[tmp2s],       2(%[randWptr])                 \n\t"
+      "lh       %[tmp3s],       4(%[randWptr])                 \n\t"
+      "lh       %[tmp4s],       6(%[randWptr])                 \n\t"
+      "mtc1     %[tmp1s],       %[randTemp]                    \n\t"
+      "mtc1     %[tmp2s],       %[randTemp2]                   \n\t"
+      "mtc1     %[tmp3s],       %[randTemp3]                   \n\t"
+      "mtc1     %[tmp4s],       %[randTemp4]                   \n\t"
+      "cvt.s.w  %[randTemp],    %[randTemp]                    \n\t"
+      "cvt.s.w  %[randTemp2],   %[randTemp2]                   \n\t"
+      "cvt.s.w  %[randTemp3],   %[randTemp3]                   \n\t"
+      "cvt.s.w  %[randTemp4],   %[randTemp4]                   \n\t"
+      "addiu    %[randWptr],    %[randWptr],      8            \n\t"
+      "mul.s    %[randTemp],    %[randTemp],      %[pi2t]      \n\t"
+      "mul.s    %[randTemp2],   %[randTemp2],     %[pi2t]      \n\t"
+      "mul.s    %[randTemp3],   %[randTemp3],     %[pi2t]      \n\t"
+      "mul.s    %[randTemp4],   %[randTemp4],     %[pi2t]      \n\t"
+      ".set     pop                                            \n\t"
+      : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp),
+        [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3),
+        [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s),
+        [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s),
+        [tmp4s] "=&r" (tmp4s)
+      : [pi2t] "f" (pi2t)
+      : "memory"
+    );
+
+    u[i+1][0] = cosf(randTemp);
+    u[i+1][1] = sinf(randTemp);
+    u[i+2][0] = cosf(randTemp2);
+    u[i+2][1] = sinf(randTemp2);
+    u[i+3][0] = cosf(randTemp3);
+    u[i+3][1] = sinf(randTemp3);
+    u[i+4][0] = cosf(randTemp4);
+    u[i+4][1] = sinf(randTemp4);
+  }
+
+  // Reject LF noise
+  float* u_ptr = &u[1][0];
+  float noise2, noise3, noise4;
+  float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f;
+
+  u[0][0] = 0;
+  u[0][1] = 0;
+  for (i = 1; i < PART_LEN1; i+=4) {
+    __asm __volatile (
+      ".set     push                                            \n\t"
+      ".set     noreorder                                       \n\t"
+      "lwc1     %[noise],       4(%[noisePow])                  \n\t"
+      "lwc1     %[noise2],      8(%[noisePow])                  \n\t"
+      "lwc1     %[noise3],      12(%[noisePow])                 \n\t"
+      "lwc1     %[noise4],      16(%[noisePow])                 \n\t"
+      "sqrt.s   %[noise],       %[noise]                        \n\t"
+      "sqrt.s   %[noise2],      %[noise2]                       \n\t"
+      "sqrt.s   %[noise3],      %[noise3]                       \n\t"
+      "sqrt.s   %[noise4],      %[noise4]                       \n\t"
+      "lwc1     %[tmp1f],       0(%[u_ptr])                     \n\t"
+      "lwc1     %[tmp2f],       4(%[u_ptr])                     \n\t"
+      "lwc1     %[tmp3f],       8(%[u_ptr])                     \n\t"
+      "lwc1     %[tmp4f],       12(%[u_ptr])                    \n\t"
+      "lwc1     %[tmp5f],       16(%[u_ptr])                    \n\t"
+      "lwc1     %[tmp6f],       20(%[u_ptr])                    \n\t"
+      "lwc1     %[tmp7f],       24(%[u_ptr])                    \n\t"
+      "lwc1     %[tmp8f],       28(%[u_ptr])                    \n\t"
+      "addiu    %[noisePow],    %[noisePow],      16            \n\t"
+      "mul.s    %[tmp1f],       %[tmp1f],         %[noise]      \n\t"
+      "mul.s    %[tmp2f],       %[tmp2f],         %[noise]      \n\t"
+      "mul.s    %[tmp3f],       %[tmp3f],         %[noise2]     \n\t"
+      "mul.s    %[tmp4f],       %[tmp4f],         %[noise2]     \n\t"
+      "mul.s    %[tmp5f],       %[tmp5f],         %[noise3]     \n\t"
+      "mul.s    %[tmp6f],       %[tmp6f],         %[noise3]     \n\t"
+      "swc1     %[tmp1f],       0(%[u_ptr])                     \n\t"
+      "swc1     %[tmp3f],       8(%[u_ptr])                     \n\t"
+      "mul.s    %[tmp8f],       %[tmp8f],         %[noise4]     \n\t"
+      "mul.s    %[tmp7f],       %[tmp7f],         %[noise4]     \n\t"
+      "neg.s    %[tmp2f]                                        \n\t"
+      "neg.s    %[tmp4f]                                        \n\t"
+      "neg.s    %[tmp6f]                                        \n\t"
+      "neg.s    %[tmp8f]                                        \n\t"
+      "swc1     %[tmp5f],       16(%[u_ptr])                    \n\t"
+      "swc1     %[tmp7f],       24(%[u_ptr])                    \n\t"
+      "swc1     %[tmp2f],       4(%[u_ptr])                     \n\t"
+      "swc1     %[tmp4f],       12(%[u_ptr])                    \n\t"
+      "swc1     %[tmp6f],       20(%[u_ptr])                    \n\t"
+      "swc1     %[tmp8f],       28(%[u_ptr])                    \n\t"
+      "addiu    %[u_ptr],       %[u_ptr],         32            \n\t"
+      ".set     pop                                             \n\t"
+      : [u_ptr] "+r" (u_ptr),  [noisePow] "+r" (noisePow),
+        [noise] "=&f" (noise), [noise2] "=&f" (noise2),
+        [noise3] "=&f" (noise3), [noise4] "=&f" (noise4),
+        [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f),
+        [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f),
+        [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f),
+        [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f)
+      :
+      : "memory"
+    );
+  }
+  u[PART_LEN][1] = 0;
+  noisePow -= PART_LEN;
+
+  u_ptr = &u[0][0];
+  float* u_ptr_end = &u[PART_LEN][0];
+  float* efw_ptr_0 = &efw[0][0];
+  float* efw_ptr_1 = &efw[1][0];
+  float tmp9f, tmp10f;
+  const float tmp1c = 1.0;
+
+  __asm __volatile (
+    ".set     push                                                        \n\t"
+    ".set     noreorder                                                   \n\t"
+   "1:                                                                    \n\t"
+    "lwc1     %[tmp1f],       0(%[lambda])                                \n\t"
+    "lwc1     %[tmp6f],       4(%[lambda])                                \n\t"
+    "addiu    %[lambda],      %[lambda],        8                         \n\t"
+    "c.lt.s   %[tmp1f],       %[tmp1c]                                    \n\t"
+    "bc1f     4f                                                          \n\t"
+    " nop                                                                 \n\t"
+    "c.lt.s   %[tmp6f],       %[tmp1c]                                    \n\t"
+    "bc1f     3f                                                          \n\t"
+    " nop                                                                 \n\t"
+   "2:                                                                    \n\t"
+    "mul.s    %[tmp1f],       %[tmp1f],         %[tmp1f]                  \n\t"
+    "mul.s    %[tmp6f],       %[tmp6f],         %[tmp6f]                  \n\t"
+    "sub.s    %[tmp1f],       %[tmp1c],         %[tmp1f]                  \n\t"
+    "sub.s    %[tmp6f],       %[tmp1c],         %[tmp6f]                  \n\t"
+    "sqrt.s   %[tmp1f],       %[tmp1f]                                    \n\t"
+    "sqrt.s   %[tmp6f],       %[tmp6f]                                    \n\t"
+    "lwc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
+    "lwc1     %[tmp3f],       0(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
+    "lwc1     %[tmp8f],       8(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
+    "lwc1     %[tmp5f],       4(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
+    "lwc1     %[tmp10f],      12(%[u_ptr])                                \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp3f]                  \n\t"
+    "add.s    %[tmp2f],       %[tmp2f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp5f]                  \n\t"
+    "add.s    %[tmp4f],       %[tmp4f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp8f]                  \n\t"
+    "add.s    %[tmp7f],       %[tmp7f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp10f]                 \n\t"
+    "add.s    %[tmp9f],       %[tmp9f],         %[tmp3f]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+    "madd.s   %[tmp2f],       %[tmp2f],         %[tmp1f],     %[tmp3f]    \n\t"
+    "madd.s   %[tmp4f],       %[tmp4f],         %[tmp1f],     %[tmp5f]    \n\t"
+    "madd.s   %[tmp7f],       %[tmp7f],         %[tmp6f],     %[tmp8f]    \n\t"
+    "madd.s   %[tmp9f],       %[tmp9f],         %[tmp6f],     %[tmp10f]   \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+    "swc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
+    "swc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
+    "swc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
+    "b        5f                                                          \n\t"
+    " swc1    %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
+   "3:                                                                    \n\t"
+    "mul.s    %[tmp1f],       %[tmp1f],         %[tmp1f]                  \n\t"
+    "sub.s    %[tmp1f],       %[tmp1c],         %[tmp1f]                  \n\t"
+    "sqrt.s   %[tmp1f],       %[tmp1f]                                    \n\t"
+    "lwc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
+    "lwc1     %[tmp3f],       0(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
+    "lwc1     %[tmp5f],       4(%[u_ptr])                                 \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp3f]                  \n\t"
+    "add.s    %[tmp2f],       %[tmp2f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp5f]                  \n\t"
+    "add.s    %[tmp4f],       %[tmp4f],         %[tmp3f]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+    "madd.s   %[tmp2f],       %[tmp2f],         %[tmp1f],     %[tmp3f]    \n\t"
+    "madd.s   %[tmp4f],       %[tmp4f],         %[tmp1f],     %[tmp5f]    \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+    "swc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
+    "b        5f                                                          \n\t"
+    " swc1    %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
+   "4:                                                                    \n\t"
+    "c.lt.s   %[tmp6f],       %[tmp1c]                                    \n\t"
+    "bc1f     5f                                                          \n\t"
+    " nop                                                                 \n\t"
+    "mul.s    %[tmp6f],       %[tmp6f],         %[tmp6f]                  \n\t"
+    "sub.s    %[tmp6f],       %[tmp1c],         %[tmp6f]                  \n\t"
+    "sqrt.s   %[tmp6f],       %[tmp6f]                                    \n\t"
+    "lwc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
+    "lwc1     %[tmp8f],       8(%[u_ptr])                                 \n\t"
+    "lwc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
+    "lwc1     %[tmp10f],      12(%[u_ptr])                                \n\t"
+#if !defined(MIPS32_R2_LE)
+    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp8f]                  \n\t"
+    "add.s    %[tmp7f],       %[tmp7f],         %[tmp3f]                  \n\t"
+    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp10f]                 \n\t"
+    "add.s    %[tmp9f],       %[tmp9f],         %[tmp3f]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+    "madd.s   %[tmp7f],       %[tmp7f],         %[tmp6f],     %[tmp8f]    \n\t"
+    "madd.s   %[tmp9f],       %[tmp9f],         %[tmp6f],     %[tmp10f]   \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+    "swc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
+    "swc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
+   "5:                                                                    \n\t"
+    "addiu    %[u_ptr],       %[u_ptr],         16                        \n\t"
+    "addiu    %[efw_ptr_0],   %[efw_ptr_0],     8                         \n\t"
+    "bne      %[u_ptr],       %[u_ptr_end],     1b                        \n\t"
+    " addiu   %[efw_ptr_1],   %[efw_ptr_1],     8                         \n\t"
+    ".set     pop                                                         \n\t"
+    : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr),
+      [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1),
+      [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f),
+      [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f),
+      [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f),
+      [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f)
+    : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end)
+    : "memory"
+  );
+
+  lambda -= PART_LEN;
+  tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0));
+  //tmp = 1 - lambda[i];
+  efw[0][PART_LEN] += tmp * u[PART_LEN][0];
+  efw[1][PART_LEN] += tmp * u[PART_LEN][1];
+
+  // For H band comfort noise
+  // TODO: don't compute noise and "tmp" twice. Use the previous results.
+  noiseAvg = 0.0;
+  tmpAvg = 0.0;
+  num = 0;
+  if ((aec->sampFreq == 32000 || aec->sampFreq == 48000) && flagHbandCn == 1) {
+    for (i = 0; i < PART_LEN; i++) {
+      rand[i] = ((float)randW16[i]) / 32768;
+    }
+
+    // average noise scale
+    // average over second half of freq spectrum (i.e., 4->8khz)
+    // TODO: we shouldn't need num. We know how many elements we're summing.
+    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+      num++;
+      noiseAvg += sqrtf(noisePow[i]);
+    }
+    noiseAvg /= (float)num;
+
+    // average nlp scale
+    // average over second half of freq spectrum (i.e., 4->8khz)
+    // TODO: we shouldn't need num. We know how many elements we're summing.
+    num = 0;
+    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
+      num++;
+      tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
+    }
+    tmpAvg /= (float)num;
+
+    // Use average noise for H band
+    // TODO: we should probably have a new random vector here.
+    // Reject LF noise
+    u[0][0] = 0;
+    u[0][1] = 0;
+    for (i = 1; i < PART_LEN1; i++) {
+      tmp = pi2 * rand[i - 1];
+
+      // Use average noise for H band
+      u[i][0] = noiseAvg * (float)cos(tmp);
+      u[i][1] = -noiseAvg * (float)sin(tmp);
+    }
+    u[PART_LEN][1] = 0;
+
+    for (i = 0; i < PART_LEN1; i++) {
+      // Use average NLP weight for H band
+      comfortNoiseHband[i][0] = tmpAvg * u[i][0];
+      comfortNoiseHband[i][1] = tmpAvg * u[i][1];
+    }
+  }
+}
+
+void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < aec->num_partitions; i++) {
+    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int pos = i * PART_LEN1;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >=  aec->num_partitions) {
+      xPos -=  aec->num_partitions * (PART_LEN1);
+    }
+    float* yf0 = yf[0];
+    float* yf1 = yf[1];
+    float* aRe = aec->xfBuf[0] + xPos;
+    float* aIm = aec->xfBuf[1] + xPos;
+    float* bRe = aec->wfBuf[0] + pos;
+    float* bIm = aec->wfBuf[1] + pos;
+    float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
+    int len = PART_LEN1 >> 1;
+
+    __asm __volatile (
+      ".set       push                                                \n\t"
+      ".set       noreorder                                           \n\t"
+     "1:                                                              \n\t"
+      "lwc1       %[f0],      0(%[aRe])                               \n\t"
+      "lwc1       %[f1],      0(%[bRe])                               \n\t"
+      "lwc1       %[f2],      0(%[bIm])                               \n\t"
+      "lwc1       %[f3],      0(%[aIm])                               \n\t"
+      "lwc1       %[f4],      4(%[aRe])                               \n\t"
+      "lwc1       %[f5],      4(%[bRe])                               \n\t"
+      "lwc1       %[f6],      4(%[bIm])                               \n\t"
+      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
+      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
+      "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
+      "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
+      "lwc1       %[f7],      4(%[aIm])                               \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
+      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
+      "mul.s      %[f11],     %[f6],          %[f7]                   \n\t"
+      "addiu      %[aRe],     %[aRe],         8                       \n\t"
+      "addiu      %[aIm],     %[aIm],         8                       \n\t"
+      "addiu      %[len],     %[len],         -1                      \n\t"
+      "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
+      "mul.s      %[f12],     %[f7],          %[f5]                   \n\t"
+      "lwc1       %[f2],      0(%[yf0])                               \n\t"
+      "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
+      "lwc1       %[f3],      0(%[yf1])                               \n\t"
+      "sub.s      %[f9],      %[f9],          %[f11]                  \n\t"
+      "lwc1       %[f6],      4(%[yf0])                               \n\t"
+      "add.s      %[f4],      %[f4],          %[f12]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "addiu      %[aRe],     %[aRe],         8                       \n\t"
+      "addiu      %[aIm],     %[aIm],         8                       \n\t"
+      "addiu      %[len],     %[len],         -1                      \n\t"
+      "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
+      "lwc1       %[f2],      0(%[yf0])                               \n\t"
+      "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
+      "lwc1       %[f3],      0(%[yf1])                               \n\t"
+      "nmsub.s    %[f9],      %[f9],          %[f6],      %[f7]       \n\t"
+      "lwc1       %[f6],      4(%[yf0])                               \n\t"
+      "madd.s     %[f4],      %[f4],          %[f7],      %[f5]       \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "lwc1       %[f5],      4(%[yf1])                               \n\t"
+      "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
+      "addiu      %[bRe],     %[bRe],         8                       \n\t"
+      "addiu      %[bIm],     %[bIm],         8                       \n\t"
+      "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
+      "add.s      %[f6],      %[f6],          %[f9]                   \n\t"
+      "add.s      %[f5],      %[f5],          %[f4]                   \n\t"
+      "swc1       %[f2],      0(%[yf0])                               \n\t"
+      "swc1       %[f3],      0(%[yf1])                               \n\t"
+      "swc1       %[f6],      4(%[yf0])                               \n\t"
+      "swc1       %[f5],      4(%[yf1])                               \n\t"
+      "addiu      %[yf0],     %[yf0],         8                       \n\t"
+      "bgtz       %[len],     1b                                      \n\t"
+      " addiu     %[yf1],     %[yf1],         8                       \n\t"
+      "lwc1       %[f0],      0(%[aRe])                               \n\t"
+      "lwc1       %[f1],      0(%[bRe])                               \n\t"
+      "lwc1       %[f2],      0(%[bIm])                               \n\t"
+      "lwc1       %[f3],      0(%[aIm])                               \n\t"
+      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
+      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
+      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
+      "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
+      "lwc1       %[f2],      0(%[yf0])                               \n\t"
+      "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
+      "lwc1       %[f3],      0(%[yf1])                               \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
+      "lwc1       %[f2],      0(%[yf0])                               \n\t"
+      "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
+      "lwc1       %[f3],      0(%[yf1])                               \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
+      "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
+      "swc1       %[f2],      0(%[yf0])                               \n\t"
+      "swc1       %[f3],      0(%[yf1])                               \n\t"
+      ".set       pop                                                 \n\t"
+      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+        [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+        [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
+        [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe),
+        [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm),
+        [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len)
+      :
+      : "memory"
+    );
+  }
+}
+
+void WebRtcAec_FilterAdaptation_mips(AecCore* aec,
+                                     float* fft,
+                                     float ef[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < aec->num_partitions; i++) {
+    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
+    int pos;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
+      xPos -= aec->num_partitions * PART_LEN1;
+    }
+
+    pos = i * PART_LEN1;
+    float* aRe = aec->xfBuf[0] + xPos;
+    float* aIm = aec->xfBuf[1] + xPos;
+    float* bRe = ef[0];
+    float* bIm = ef[1];
+    float* fft_tmp;
+
+    float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12;
+    int len = PART_LEN >> 1;
+
+    __asm __volatile (
+      ".set       push                                                \n\t"
+      ".set       noreorder                                           \n\t"
+      "addiu      %[fft_tmp], %[fft],         0                       \n\t"
+     "1:                                                              \n\t"
+      "lwc1       %[f0],      0(%[aRe])                               \n\t"
+      "lwc1       %[f1],      0(%[bRe])                               \n\t"
+      "lwc1       %[f2],      0(%[bIm])                               \n\t"
+      "lwc1       %[f4],      4(%[aRe])                               \n\t"
+      "lwc1       %[f5],      4(%[bRe])                               \n\t"
+      "lwc1       %[f6],      4(%[bIm])                               \n\t"
+      "addiu      %[aRe],     %[aRe],         8                       \n\t"
+      "addiu      %[bRe],     %[bRe],         8                       \n\t"
+      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
+      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
+      "lwc1       %[f3],      0(%[aIm])                               \n\t"
+      "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
+      "lwc1       %[f7],      4(%[aIm])                               \n\t"
+      "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
+      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
+      "mul.s      %[f11],     %[f7],          %[f6]                   \n\t"
+      "mul.s      %[f5],      %[f7],          %[f5]                   \n\t"
+      "addiu      %[aIm],     %[aIm],         8                       \n\t"
+      "addiu      %[bIm],     %[bIm],         8                       \n\t"
+      "addiu      %[len],     %[len],         -1                      \n\t"
+      "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
+      "sub.s      %[f1],      %[f0],          %[f1]                   \n\t"
+      "add.s      %[f9],      %[f9],          %[f11]                  \n\t"
+      "sub.s      %[f5],      %[f4],          %[f5]                   \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "addiu      %[aIm],     %[aIm],         8                       \n\t"
+      "addiu      %[bIm],     %[bIm],         8                       \n\t"
+      "addiu      %[len],     %[len],         -1                      \n\t"
+      "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
+      "nmsub.s    %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
+      "madd.s     %[f9],      %[f9],          %[f7],      %[f6]       \n\t"
+      "nmsub.s    %[f5],      %[f4],          %[f7],      %[f5]       \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "swc1       %[f8],      0(%[fft_tmp])                           \n\t"
+      "swc1       %[f1],      4(%[fft_tmp])                           \n\t"
+      "swc1       %[f9],      8(%[fft_tmp])                           \n\t"
+      "swc1       %[f5],      12(%[fft_tmp])                          \n\t"
+      "bgtz       %[len],     1b                                      \n\t"
+      " addiu     %[fft_tmp], %[fft_tmp],     16                      \n\t"
+      "lwc1       %[f0],      0(%[aRe])                               \n\t"
+      "lwc1       %[f1],      0(%[bRe])                               \n\t"
+      "lwc1       %[f2],      0(%[bIm])                               \n\t"
+      "lwc1       %[f3],      0(%[aIm])                               \n\t"
+      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
+      "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "swc1       %[f8],      4(%[fft])                               \n\t"
+      ".set       pop                                                 \n\t"
+      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+        [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
+        [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
+        [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm),
+        [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp),
+        [len] "+r" (len)
+      : [fft] "r" (fft)
+      : "memory"
+    );
+
+    aec_rdft_inverse_128(fft);
+    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
+
+    // fft scaling
+    {
+      float scale = 2.0f / PART_LEN2;
+      __asm __volatile (
+        ".set     push                                    \n\t"
+        ".set     noreorder                               \n\t"
+        "addiu    %[fft_tmp], %[fft],        0            \n\t"
+        "addiu    %[len],     $zero,         8            \n\t"
+       "1:                                                \n\t"
+        "addiu    %[len],     %[len],        -1           \n\t"
+        "lwc1     %[f0],      0(%[fft_tmp])               \n\t"
+        "lwc1     %[f1],      4(%[fft_tmp])               \n\t"
+        "lwc1     %[f2],      8(%[fft_tmp])               \n\t"
+        "lwc1     %[f3],      12(%[fft_tmp])              \n\t"
+        "mul.s    %[f0],      %[f0],         %[scale]     \n\t"
+        "mul.s    %[f1],      %[f1],         %[scale]     \n\t"
+        "mul.s    %[f2],      %[f2],         %[scale]     \n\t"
+        "mul.s    %[f3],      %[f3],         %[scale]     \n\t"
+        "lwc1     %[f4],      16(%[fft_tmp])              \n\t"
+        "lwc1     %[f5],      20(%[fft_tmp])              \n\t"
+        "lwc1     %[f6],      24(%[fft_tmp])              \n\t"
+        "lwc1     %[f7],      28(%[fft_tmp])              \n\t"
+        "mul.s    %[f4],      %[f4],         %[scale]     \n\t"
+        "mul.s    %[f5],      %[f5],         %[scale]     \n\t"
+        "mul.s    %[f6],      %[f6],         %[scale]     \n\t"
+        "mul.s    %[f7],      %[f7],         %[scale]     \n\t"
+        "swc1     %[f0],      0(%[fft_tmp])               \n\t"
+        "swc1     %[f1],      4(%[fft_tmp])               \n\t"
+        "swc1     %[f2],      8(%[fft_tmp])               \n\t"
+        "swc1     %[f3],      12(%[fft_tmp])              \n\t"
+        "swc1     %[f4],      16(%[fft_tmp])              \n\t"
+        "swc1     %[f5],      20(%[fft_tmp])              \n\t"
+        "swc1     %[f6],      24(%[fft_tmp])              \n\t"
+        "swc1     %[f7],      28(%[fft_tmp])              \n\t"
+        "bgtz     %[len],     1b                          \n\t"
+        " addiu   %[fft_tmp], %[fft_tmp],    32           \n\t"
+        ".set     pop                                     \n\t"
+        : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+          [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+          [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
+          [fft_tmp] "=&r" (fft_tmp)
+        : [scale] "f" (scale), [fft] "r" (fft)
+        : "memory"
+      );
+    }
+    aec_rdft_forward_128(fft);
+    aRe = aec->wfBuf[0] + pos;
+    aIm = aec->wfBuf[1] + pos;
+    __asm __volatile (
+      ".set     push                                    \n\t"
+      ".set     noreorder                               \n\t"
+      "addiu    %[fft_tmp], %[fft],        0            \n\t"
+      "addiu    %[len],     $zero,         31           \n\t"
+      "lwc1     %[f0],      0(%[aRe])                   \n\t"
+      "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
+      "lwc1     %[f2],      256(%[aRe])                 \n\t"
+      "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
+      "lwc1     %[f4],      4(%[aRe])                   \n\t"
+      "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
+      "lwc1     %[f6],      4(%[aIm])                   \n\t"
+      "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
+      "add.s    %[f0],      %[f0],         %[f1]        \n\t"
+      "add.s    %[f2],      %[f2],         %[f3]        \n\t"
+      "add.s    %[f4],      %[f4],         %[f5]        \n\t"
+      "add.s    %[f6],      %[f6],         %[f7]        \n\t"
+      "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
+      "swc1     %[f0],      0(%[aRe])                   \n\t"
+      "swc1     %[f2],      256(%[aRe])                 \n\t"
+      "swc1     %[f4],      4(%[aRe])                   \n\t"
+      "addiu    %[aRe],     %[aRe],        8            \n\t"
+      "swc1     %[f6],      4(%[aIm])                   \n\t"
+      "addiu    %[aIm],     %[aIm],        8            \n\t"
+     "1:                                                \n\t"
+      "lwc1     %[f0],      0(%[aRe])                   \n\t"
+      "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
+      "lwc1     %[f2],      0(%[aIm])                   \n\t"
+      "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
+      "lwc1     %[f4],      4(%[aRe])                   \n\t"
+      "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
+      "lwc1     %[f6],      4(%[aIm])                   \n\t"
+      "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
+      "add.s    %[f0],      %[f0],         %[f1]        \n\t"
+      "add.s    %[f2],      %[f2],         %[f3]        \n\t"
+      "add.s    %[f4],      %[f4],         %[f5]        \n\t"
+      "add.s    %[f6],      %[f6],         %[f7]        \n\t"
+      "addiu    %[len],     %[len],        -1           \n\t"
+      "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
+      "swc1     %[f0],      0(%[aRe])                   \n\t"
+      "swc1     %[f2],      0(%[aIm])                   \n\t"
+      "swc1     %[f4],      4(%[aRe])                   \n\t"
+      "addiu    %[aRe],     %[aRe],        8            \n\t"
+      "swc1     %[f6],      4(%[aIm])                   \n\t"
+      "bgtz     %[len],     1b                          \n\t"
+      " addiu   %[aIm],     %[aIm],        8            \n\t"
+      ".set     pop                                     \n\t"
+      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
+        [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
+        [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm)
+      : [fft] "r" (fft)
+      : "memory"
+    );
+  }
+}
+
+void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec,
+                                         float hNl[PART_LEN1],
+                                         const float hNlFb,
+                                         float efw[2][PART_LEN1]) {
+  int i;
+  const float one = 1.0;
+  float* p_hNl;
+  float* p_efw0;
+  float* p_efw1;
+  float* p_WebRtcAec_wC;
+  float temp1, temp2, temp3, temp4;
+
+  p_hNl = &hNl[0];
+  p_efw0 = &efw[0][0];
+  p_efw1 = &efw[1][0];
+  p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0];
+
+  for (i = 0; i < PART_LEN1; i++) {
+    // Weight subbands
+    __asm __volatile (
+      ".set      push                                              \n\t"
+      ".set      noreorder                                         \n\t"
+      "lwc1      %[temp1],    0(%[p_hNl])                          \n\t"
+      "lwc1      %[temp2],    0(%[p_wC])                           \n\t"
+      "c.lt.s    %[hNlFb],    %[temp1]                             \n\t"
+      "bc1f      1f                                                \n\t"
+      " mul.s    %[temp3],    %[temp2],     %[hNlFb]               \n\t"
+      "sub.s     %[temp4],    %[one],       %[temp2]               \n\t"
+#if !defined(MIPS32_R2_LE)
+      "mul.s     %[temp1],    %[temp1],     %[temp4]               \n\t"
+      "add.s     %[temp1],    %[temp3],     %[temp1]               \n\t"
+#else // #if !defined(MIPS32_R2_LE)
+      "madd.s    %[temp1],    %[temp3],     %[temp1],   %[temp4]   \n\t"
+#endif // #if !defined(MIPS32_R2_LE)
+      "swc1      %[temp1],    0(%[p_hNl])                          \n\t"
+     "1:                                                           \n\t"
+      "addiu     %[p_wC],     %[p_wC],      4                      \n\t"
+      ".set      pop                                               \n\t"
+      : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
+        [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC)
+      : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl)
+      : "memory"
+    );
+
+    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
+
+    __asm __volatile (
+      "lwc1      %[temp1],    0(%[p_hNl])              \n\t"
+      "lwc1      %[temp3],    0(%[p_efw1])             \n\t"
+      "lwc1      %[temp2],    0(%[p_efw0])             \n\t"
+      "addiu     %[p_hNl],    %[p_hNl],     4          \n\t"
+      "mul.s     %[temp3],    %[temp3],     %[temp1]   \n\t"
+      "mul.s     %[temp2],    %[temp2],     %[temp1]   \n\t"
+      "addiu     %[p_efw0],   %[p_efw0],    4          \n\t"
+      "addiu     %[p_efw1],   %[p_efw1],    4          \n\t"
+      "neg.s     %[temp4],    %[temp3]                 \n\t"
+      "swc1      %[temp2],    -4(%[p_efw0])            \n\t"
+      "swc1      %[temp4],    -4(%[p_efw1])            \n\t"
+      : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
+        [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1),
+        [p_hNl] "+r" (p_hNl)
+      :
+      : "memory"
+    );
+  }
+}
+
+void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
+  const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+  const float error_threshold = aec->extended_filter_enabled
+                                    ? kExtendedErrorThreshold
+                                    : aec->normal_error_threshold;
+  int len = (PART_LEN1);
+  float* ef0 = ef[0];
+  float* ef1 = ef[1];
+  float* xPow = aec->xPow;
+  float fac1 = 1e-10f;
+  float err_th2 = error_threshold * error_threshold;
+  float f0, f1, f2;
+#if !defined(MIPS32_R2_LE)
+  float f3;
+#endif
+
+  __asm __volatile (
+    ".set       push                                   \n\t"
+    ".set       noreorder                              \n\t"
+   "1:                                                 \n\t"
+    "lwc1       %[f0],     0(%[xPow])                  \n\t"
+    "lwc1       %[f1],     0(%[ef0])                   \n\t"
+    "lwc1       %[f2],     0(%[ef1])                   \n\t"
+    "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
+    "div.s      %[f1],     %[f1],       %[f0]          \n\t"
+    "div.s      %[f2],     %[f2],       %[f0]          \n\t"
+    "mul.s      %[f0],     %[f1],       %[f1]          \n\t"
+#if defined(MIPS32_R2_LE)
+    "madd.s     %[f0],     %[f0],       %[f2],   %[f2] \n\t"
+#else
+    "mul.s      %[f3],     %[f2],       %[f2]          \n\t"
+    "add.s      %[f0],     %[f0],       %[f3]          \n\t"
+#endif
+    "c.le.s     %[f0],     %[err_th2]                  \n\t"
+    "nop                                               \n\t"
+    "bc1t       2f                                     \n\t"
+    " nop                                              \n\t"
+    "sqrt.s     %[f0],     %[f0]                       \n\t"
+    "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
+    "div.s      %[f0],     %[err_th],   %[f0]          \n\t"
+    "mul.s      %[f1],     %[f1],       %[f0]          \n\t"
+    "mul.s      %[f2],     %[f2],       %[f0]          \n\t"
+   "2:                                                 \n\t"
+    "mul.s      %[f1],     %[f1],       %[mu]          \n\t"
+    "mul.s      %[f2],     %[f2],       %[mu]          \n\t"
+    "swc1       %[f1],     0(%[ef0])                   \n\t"
+    "swc1       %[f2],     0(%[ef1])                   \n\t"
+    "addiu      %[len],    %[len],      -1             \n\t"
+    "addiu      %[xPow],   %[xPow],     4              \n\t"
+    "addiu      %[ef0],    %[ef0],      4              \n\t"
+    "bgtz       %[len],    1b                          \n\t"
+    " addiu     %[ef1],    %[ef1],      4              \n\t"
+    ".set       pop                                    \n\t"
+    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
+#if !defined(MIPS32_R2_LE)
+      [f3] "=&f" (f3),
+#endif
+      [xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
+      [len] "+r" (len)
+    : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
+      [err_th] "f" (error_threshold)
+    : "memory"
+  );
+}
+
+void WebRtcAec_InitAec_mips(void) {
+  WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
+  WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
+  WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
+  WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
+  WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
+}
+
--- a/webrtc/modules/audio_processing/aec/aec_core_neon.c
+++ b/webrtc/modules/audio_processing/aec/aec_core_neon.c
@@ -0,0 +1,736 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The core AEC algorithm, neon version of speed-critical functions.
+ *
+ * Based on aec_core_sse2.c.
+ */
+
+#include <arm_neon.h>
+#include <math.h>
+#include <string.h>  // memset
+
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+enum { kShiftExponentIntoTopMantissa = 8 };
+enum { kFloatExponentShift = 23 };
+
+__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
+  return aRe * bRe - aIm * bIm;
+}
+
+__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
+  return aRe * bIm + aIm * bRe;
+}
+
+static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) {
+  int i;
+  const int num_partitions = aec->num_partitions;
+  for (i = 0; i < num_partitions; i++) {
+    int j;
+    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int pos = i * PART_LEN1;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
+    }
+
+    // vectorized code (four at once)
+    for (j = 0; j + 3 < PART_LEN1; j += 4) {
+      const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
+      const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
+      const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
+      const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
+      const float32x4_t yf_re = vld1q_f32(&yf[0][j]);
+      const float32x4_t yf_im = vld1q_f32(&yf[1][j]);
+      const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re);
+      const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im);
+      const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im);
+      const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re);
+      const float32x4_t g = vaddq_f32(yf_re, e);
+      const float32x4_t h = vaddq_f32(yf_im, f);
+      vst1q_f32(&yf[0][j], g);
+      vst1q_f32(&yf[1][j], h);
+    }
+    // scalar code for the remaining items.
+    for (; j < PART_LEN1; j++) {
+      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
+      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
+    }
+  }
+}
+
+// ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32.
+#if !defined (WEBRTC_ARCH_ARM64)
+static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) {
+  int i;
+  float32x4_t x = vrecpeq_f32(b);
+  // from arm documentation
+  // The Newton-Raphson iteration:
+  //     x[n+1] = x[n] * (2 - d * x[n])
+  // converges to (1/d) if x0 is the result of VRECPE applied to d.
+  //
+  // Note: The precision did not improve after 2 iterations.
+  for (i = 0; i < 2; i++) {
+    x = vmulq_f32(vrecpsq_f32(b, x), x);
+  }
+  // a/b = a*(1/b)
+  return vmulq_f32(a, x);
+}
+
+static float32x4_t vsqrtq_f32(float32x4_t s) {
+  int i;
+  float32x4_t x = vrsqrteq_f32(s);
+
+  // Code to handle sqrt(0).
+  // If the input to sqrtf() is zero, a zero will be returned.
+  // If the input to vrsqrteq_f32() is zero, positive infinity is returned.
+  const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000);
+  // check for divide by zero
+  const uint32x4_t div_by_zero = vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(x));
+  // zero out the positive infinity results
+  x = vreinterpretq_f32_u32(vandq_u32(vmvnq_u32(div_by_zero),
+                                      vreinterpretq_u32_f32(x)));
+  // from arm documentation
+  // The Newton-Raphson iteration:
+  //     x[n+1] = x[n] * (3 - d * (x[n] * x[n])) / 2)
+  // converges to (1/√d) if x0 is the result of VRSQRTE applied to d.
+  //
+  // Note: The precision did not improve after 2 iterations.
+  for (i = 0; i < 2; i++) {
+    x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x);
+  }
+  // sqrt(s) = s * 1/sqrt(s)
+  return vmulq_f32(s, x);;
+}
+#endif  // WEBRTC_ARCH_ARM64
+
+static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
+  const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+  const float error_threshold = aec->extended_filter_enabled ?
+      kExtendedErrorThreshold : aec->normal_error_threshold;
+  const float32x4_t k1e_10f = vdupq_n_f32(1e-10f);
+  const float32x4_t kMu = vmovq_n_f32(mu);
+  const float32x4_t kThresh = vmovq_n_f32(error_threshold);
+  int i;
+  // vectorized code (four at once)
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    const float32x4_t xPow = vld1q_f32(&aec->xPow[i]);
+    const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]);
+    const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]);
+    const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f);
+    float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus);
+    float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus);
+    const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re);
+    const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im);
+    const float32x4_t absEf = vsqrtq_f32(ef_sum2);
+    const uint32x4_t bigger = vcgtq_f32(absEf, kThresh);
+    const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f);
+    const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus);
+    uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv));
+    uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv));
+    uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger),
+                                     vreinterpretq_u32_f32(ef_re));
+    uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger),
+                                     vreinterpretq_u32_f32(ef_im));
+    ef_re_if = vandq_u32(bigger, ef_re_if);
+    ef_im_if = vandq_u32(bigger, ef_im_if);
+    ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if);
+    ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if);
+    ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu);
+    ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu);
+    vst1q_f32(&ef[0][i], ef_re);
+    vst1q_f32(&ef[1][i], ef_im);
+  }
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    float abs_ef;
+    ef[0][i] /= (aec->xPow[i] + 1e-10f);
+    ef[1][i] /= (aec->xPow[i] + 1e-10f);
+    abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
+
+    if (abs_ef > error_threshold) {
+      abs_ef = error_threshold / (abs_ef + 1e-10f);
+      ef[0][i] *= abs_ef;
+      ef[1][i] *= abs_ef;
+    }
+
+    // Stepsize factor
+    ef[0][i] *= mu;
+    ef[1][i] *= mu;
+  }
+}
+
+static void FilterAdaptationNEON(AecCore* aec,
+                                 float* fft,
+                                 float ef[2][PART_LEN1]) {
+  int i;
+  const int num_partitions = aec->num_partitions;
+  for (i = 0; i < num_partitions; i++) {
+    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
+    int pos = i * PART_LEN1;
+    int j;
+    // Check for wrap
+    if (i + aec->xfBufBlockPos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
+    }
+
+    // Process the whole array...
+    for (j = 0; j < PART_LEN; j += 4) {
+      // Load xfBuf and ef.
+      const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
+      const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
+      const float32x4_t ef_re = vld1q_f32(&ef[0][j]);
+      const float32x4_t ef_im = vld1q_f32(&ef[1][j]);
+      // Calculate the product of conjugate(xfBuf) by ef.
+      //   re(conjugate(a) * b) = aRe * bRe + aIm * bIm
+      //   im(conjugate(a) * b)=  aRe * bIm - aIm * bRe
+      const float32x4_t a = vmulq_f32(xfBuf_re, ef_re);
+      const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im);
+      const float32x4_t c = vmulq_f32(xfBuf_re, ef_im);
+      const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re);
+      // Interleave real and imaginary parts.
+      const float32x4x2_t g_n_h = vzipq_f32(e, f);
+      // Store
+      vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]);
+      vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]);
+    }
+    // ... and fixup the first imaginary entry.
+    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
+                   -aec->xfBuf[1][xPos + PART_LEN],
+                   ef[0][PART_LEN],
+                   ef[1][PART_LEN]);
+
+    aec_rdft_inverse_128(fft);
+    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
+
+    // fft scaling
+    {
+      const float scale = 2.0f / PART_LEN2;
+      const float32x4_t scale_ps = vmovq_n_f32(scale);
+      for (j = 0; j < PART_LEN; j += 4) {
+        const float32x4_t fft_ps = vld1q_f32(&fft[j]);
+        const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps);
+        vst1q_f32(&fft[j], fft_scale);
+      }
+    }
+    aec_rdft_forward_128(fft);
+
+    {
+      const float wt1 = aec->wfBuf[1][pos];
+      aec->wfBuf[0][pos + PART_LEN] += fft[1];
+      for (j = 0; j < PART_LEN; j += 4) {
+        float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
+        float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
+        const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]);
+        const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]);
+        const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4);
+        wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]);
+        wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]);
+
+        vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re);
+        vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im);
+      }
+      aec->wfBuf[1][pos] = wt1;
+    }
+  }
+}
+
+static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) {
+  // a^b = exp2(b * log2(a))
+  //   exp2(x) and log2(x) are calculated using polynomial approximations.
+  float32x4_t log2_a, b_log2_a, a_exp_b;
+
+  // Calculate log2(x), x = a.
+  {
+    // To calculate log2(x), we decompose x like this:
+    //   x = y * 2^n
+    //     n is an integer
+    //     y is in the [1.0, 2.0) range
+    //
+    //   log2(x) = log2(y) + n
+    //     n       can be evaluated by playing with float representation.
+    //     log2(y) in a small range can be approximated, this code uses an order
+    //             five polynomial approximation. The coefficients have been
+    //             estimated with the Remez algorithm and the resulting
+    //             polynomial has a maximum relative error of 0.00086%.
+
+    // Compute n.
+    //    This is done by masking the exponent, shifting it into the top bit of
+    //    the mantissa, putting eight into the biased exponent (to shift/
+    //    compensate the fact that the exponent has been shifted in the top/
+    //    fractional part and finally getting rid of the implicit leading one
+    //    from the mantissa by substracting it out.
+    const uint32x4_t vec_float_exponent_mask = vdupq_n_u32(0x7F800000);
+    const uint32x4_t vec_eight_biased_exponent = vdupq_n_u32(0x43800000);
+    const uint32x4_t vec_implicit_leading_one = vdupq_n_u32(0x43BF8000);
+    const uint32x4_t two_n = vandq_u32(vreinterpretq_u32_f32(a),
+                                       vec_float_exponent_mask);
+    const uint32x4_t n_1 = vshrq_n_u32(two_n, kShiftExponentIntoTopMantissa);
+    const uint32x4_t n_0 = vorrq_u32(n_1, vec_eight_biased_exponent);
+    const float32x4_t n =
+        vsubq_f32(vreinterpretq_f32_u32(n_0),
+                  vreinterpretq_f32_u32(vec_implicit_leading_one));
+    // Compute y.
+    const uint32x4_t vec_mantissa_mask = vdupq_n_u32(0x007FFFFF);
+    const uint32x4_t vec_zero_biased_exponent_is_one = vdupq_n_u32(0x3F800000);
+    const uint32x4_t mantissa = vandq_u32(vreinterpretq_u32_f32(a),
+                                          vec_mantissa_mask);
+    const float32x4_t y =
+        vreinterpretq_f32_u32(vorrq_u32(mantissa,
+                                        vec_zero_biased_exponent_is_one));
+    // Approximate log2(y) ~= (y - 1) * pol5(y).
+    //    pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
+    const float32x4_t C5 = vdupq_n_f32(-3.4436006e-2f);
+    const float32x4_t C4 = vdupq_n_f32(3.1821337e-1f);
+    const float32x4_t C3 = vdupq_n_f32(-1.2315303f);
+    const float32x4_t C2 = vdupq_n_f32(2.5988452f);
+    const float32x4_t C1 = vdupq_n_f32(-3.3241990f);
+    const float32x4_t C0 = vdupq_n_f32(3.1157899f);
+    float32x4_t pol5_y = C5;
+    pol5_y = vmlaq_f32(C4, y, pol5_y);
+    pol5_y = vmlaq_f32(C3, y, pol5_y);
+    pol5_y = vmlaq_f32(C2, y, pol5_y);
+    pol5_y = vmlaq_f32(C1, y, pol5_y);
+    pol5_y = vmlaq_f32(C0, y, pol5_y);
+    const float32x4_t y_minus_one =
+        vsubq_f32(y, vreinterpretq_f32_u32(vec_zero_biased_exponent_is_one));
+    const float32x4_t log2_y = vmulq_f32(y_minus_one, pol5_y);
+
+    // Combine parts.
+    log2_a = vaddq_f32(n, log2_y);
+  }
+
+  // b * log2(a)
+  b_log2_a = vmulq_f32(b, log2_a);
+
+  // Calculate exp2(x), x = b * log2(a).
+  {
+    // To calculate 2^x, we decompose x like this:
+    //   x = n + y
+    //     n is an integer, the value of x - 0.5 rounded down, therefore
+    //     y is in the [0.5, 1.5) range
+    //
+    //   2^x = 2^n * 2^y
+    //     2^n can be evaluated by playing with float representation.
+    //     2^y in a small range can be approximated, this code uses an order two
+    //         polynomial approximation. The coefficients have been estimated
+    //         with the Remez algorithm and the resulting polynomial has a
+    //         maximum relative error of 0.17%.
+    // To avoid over/underflow, we reduce the range of input to ]-127, 129].
+    const float32x4_t max_input = vdupq_n_f32(129.f);
+    const float32x4_t min_input = vdupq_n_f32(-126.99999f);
+    const float32x4_t x_min = vminq_f32(b_log2_a, max_input);
+    const float32x4_t x_max = vmaxq_f32(x_min, min_input);
+    // Compute n.
+    const float32x4_t half = vdupq_n_f32(0.5f);
+    const float32x4_t x_minus_half = vsubq_f32(x_max, half);
+    const int32x4_t x_minus_half_floor = vcvtq_s32_f32(x_minus_half);
+
+    // Compute 2^n.
+    const int32x4_t float_exponent_bias = vdupq_n_s32(127);
+    const int32x4_t two_n_exponent =
+        vaddq_s32(x_minus_half_floor, float_exponent_bias);
+    const float32x4_t two_n =
+        vreinterpretq_f32_s32(vshlq_n_s32(two_n_exponent, kFloatExponentShift));
+    // Compute y.
+    const float32x4_t y = vsubq_f32(x_max, vcvtq_f32_s32(x_minus_half_floor));
+
+    // Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
+    const float32x4_t C2 = vdupq_n_f32(3.3718944e-1f);
+    const float32x4_t C1 = vdupq_n_f32(6.5763628e-1f);
+    const float32x4_t C0 = vdupq_n_f32(1.0017247f);
+    float32x4_t exp2_y = C2;
+    exp2_y = vmlaq_f32(C1, y, exp2_y);
+    exp2_y = vmlaq_f32(C0, y, exp2_y);
+
+    // Combine parts.
+    a_exp_b = vmulq_f32(exp2_y, two_n);
+  }
+
+  return a_exp_b;
+}
+
+static void OverdriveAndSuppressNEON(AecCore* aec,
+                                     float hNl[PART_LEN1],
+                                     const float hNlFb,
+                                     float efw[2][PART_LEN1]) {
+  int i;
+  const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);
+  const float32x4_t vec_one = vdupq_n_f32(1.0f);
+  const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
+  const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm);
+
+  // vectorized code (four at once)
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    // Weight subbands
+    float32x4_t vec_hNl = vld1q_f32(&hNl[i]);
+    const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]);
+    const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb);
+    const float32x4_t vec_weightCurve_hNlFb = vmulq_f32(vec_weightCurve,
+                                                        vec_hNlFb);
+    const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve);
+    const float32x4_t vec_one_weightCurve_hNl = vmulq_f32(vec_one_weightCurve,
+                                                          vec_hNl);
+    const uint32x4_t vec_if0 = vandq_u32(vmvnq_u32(bigger),
+                                         vreinterpretq_u32_f32(vec_hNl));
+    const float32x4_t vec_one_weightCurve_add =
+        vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl);
+    const uint32x4_t vec_if1 =
+        vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add));
+
+    vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));
+
+    {
+      const float32x4_t vec_overDriveCurve =
+          vld1q_f32(&WebRtcAec_overDriveCurve[i]);
+      const float32x4_t vec_overDriveSm_overDriveCurve =
+          vmulq_f32(vec_overDriveSm, vec_overDriveCurve);
+      vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
+      vst1q_f32(&hNl[i], vec_hNl);
+    }
+
+    // Suppress error signal
+    {
+      float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
+      float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
+      vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
+      vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
+
+      // Ooura fft returns incorrect sign on imaginary component. It matters
+      // here because we are making an additive change with comfort noise.
+      vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
+      vst1q_f32(&efw[0][i], vec_efw_re);
+      vst1q_f32(&efw[1][i], vec_efw_im);
+    }
+  }
+
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    // Weight subbands
+    if (hNl[i] > hNlFb) {
+      hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
+               (1 - WebRtcAec_weightCurve[i]) * hNl[i];
+    }
+
+    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
+
+    // Suppress error signal
+    efw[0][i] *= hNl[i];
+    efw[1][i] *= hNl[i];
+
+    // Ooura fft returns incorrect sign on imaginary component. It matters
+    // here because we are making an additive change with comfort noise.
+    efw[1][i] *= -1;
+  }
+}
+
+static int PartitionDelay(const AecCore* aec) {
+  // Measures the energy in each filter partition and returns the partition with
+  // highest energy.
+  // TODO(bjornv): Spread computational cost by computing one partition per
+  // block?
+  float wfEnMax = 0;
+  int i;
+  int delay = 0;
+
+  for (i = 0; i < aec->num_partitions; i++) {
+    int j;
+    int pos = i * PART_LEN1;
+    float wfEn = 0;
+    float32x4_t vec_wfEn = vdupq_n_f32(0.0f);
+    // vectorized code (four at once)
+    for (j = 0; j + 3 < PART_LEN1; j += 4) {
+      const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]);
+      const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]);
+      vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0);
+      vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1);
+    }
+    {
+      float32x2_t vec_total;
+      // A B C D
+      vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn));
+      // A+B C+D
+      vec_total = vpadd_f32(vec_total, vec_total);
+      // A+B+C+D A+B+C+D
+      wfEn = vget_lane_f32(vec_total, 0);
+    }
+
+    // scalar code for the remaining items.
+    for (; j < PART_LEN1; j++) {
+      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
+              aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+    }
+
+    if (wfEn > wfEnMax) {
+      wfEnMax = wfEn;
+      delay = i;
+    }
+  }
+  return delay;
+}
+
+// Updates the following smoothed  Power Spectral Densities (PSD):
+//  - sd  : near-end
+//  - se  : residual echo
+//  - sx  : far-end
+//  - sde : cross-PSD of near-end and residual echo
+//  - sxd : cross-PSD of near-end and far-end
+//
+// In addition to updating the PSDs, also the filter diverge state is determined
+// upon actions are taken.
+static void SmoothedPSD(AecCore* aec,
+                        float efw[2][PART_LEN1],
+                        float dfw[2][PART_LEN1],
+                        float xfw[2][PART_LEN1]) {
+  // Power estimate smoothing coefficients.
+  const float* ptrGCoh = aec->extended_filter_enabled
+      ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
+      : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
+  int i;
+  float sdSum = 0, seSum = 0;
+  const float32x4_t vec_15 =  vdupq_n_f32(WebRtcAec_kMinFarendPSD);
+  float32x4_t vec_sdSum = vdupq_n_f32(0.0f);
+  float32x4_t vec_seSum = vdupq_n_f32(0.0f);
+
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]);
+    const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]);
+    const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]);
+    const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]);
+    const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]);
+    const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]);
+    float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]);
+    float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]);
+    float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]);
+    float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0);
+    float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0);
+    float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0);
+
+    vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1);
+    vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1);
+    vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1);
+    vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15);
+    vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]);
+    vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]);
+    vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]);
+
+    vst1q_f32(&aec->sd[i], vec_sd);
+    vst1q_f32(&aec->se[i], vec_se);
+    vst1q_f32(&aec->sx[i], vec_sx);
+
+    {
+      float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
+      float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0);
+      float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1);
+      vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]);
+      vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]);
+      vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1);
+      vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0);
+      vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]);
+      vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]);
+      vst2q_f32(&aec->sde[i][0], vec_sde);
+    }
+
+    {
+      float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
+      float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0);
+      float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1);
+      vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]);
+      vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]);
+      vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1);
+      vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0);
+      vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]);
+      vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]);
+      vst2q_f32(&aec->sxd[i][0], vec_sxd);
+    }
+
+    vec_sdSum = vaddq_f32(vec_sdSum, vec_sd);
+    vec_seSum = vaddq_f32(vec_seSum, vec_se);
+  }
+  {
+    float32x2_t vec_sdSum_total;
+    float32x2_t vec_seSum_total;
+    // A B C D
+    vec_sdSum_total = vpadd_f32(vget_low_f32(vec_sdSum),
+                                vget_high_f32(vec_sdSum));
+    vec_seSum_total = vpadd_f32(vget_low_f32(vec_seSum),
+                                vget_high_f32(vec_seSum));
+    // A+B C+D
+    vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total);
+    vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total);
+    // A+B+C+D A+B+C+D
+    sdSum = vget_lane_f32(vec_sdSum_total, 0);
+    seSum = vget_lane_f32(vec_seSum_total, 0);
+  }
+
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
+                 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
+    aec->se[i] = ptrGCoh[0] * aec->se[i] +
+                 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
+    // We threshold here to protect against the ill-effects of a zero farend.
+    // The threshold is not arbitrarily chosen, but balances protection and
+    // adverse interaction with the algorithm's tuning.
+    // TODO(bjornv): investigate further why this is so sensitive.
+    aec->sx[i] =
+        ptrGCoh[0] * aec->sx[i] +
+        ptrGCoh[1] * WEBRTC_SPL_MAX(
+            xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
+            WebRtcAec_kMinFarendPSD);
+
+    aec->sde[i][0] =
+        ptrGCoh[0] * aec->sde[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
+    aec->sde[i][1] =
+        ptrGCoh[0] * aec->sde[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
+
+    aec->sxd[i][0] =
+        ptrGCoh[0] * aec->sxd[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
+    aec->sxd[i][1] =
+        ptrGCoh[0] * aec->sxd[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
+
+    sdSum += aec->sd[i];
+    seSum += aec->se[i];
+  }
+
+  // Divergent filter safeguard.
+  aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
+
+  if (aec->divergeState)
+    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+
+  // Reset if error is significantly larger than nearend (13 dB).
+  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
+    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+}
+
+// Window time domain data to be used by the fft.
+__inline static void WindowData(float* x_windowed, const float* x) {
+  int i;
+  for (i = 0; i < PART_LEN; i += 4) {
+    const float32x4_t vec_Buf1 = vld1q_f32(&x[i]);
+    const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]);
+    const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]);
+    // A B C D
+    float32x4_t vec_sqrtHanning_rev =
+        vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
+    // B A D C
+    vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev);
+    // D C B A
+    vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev),
+                                       vget_low_f32(vec_sqrtHanning_rev));
+    vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning));
+    vst1q_f32(&x_windowed[PART_LEN + i],
+            vmulq_f32(vec_Buf2, vec_sqrtHanning_rev));
+  }
+}
+
+// Puts fft output data into a complex valued array.
+__inline static void StoreAsComplex(const float* data,
+                                    float data_complex[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < PART_LEN; i += 4) {
+    const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]);
+    vst1q_f32(&data_complex[0][i], vec_data.val[0]);
+    vst1q_f32(&data_complex[1][i], vec_data.val[1]);
+  }
+  // fix beginning/end values
+  data_complex[1][0] = 0;
+  data_complex[1][PART_LEN] = 0;
+  data_complex[0][0] = data[0];
+  data_complex[0][PART_LEN] = data[1];
+}
+
+static void SubbandCoherenceNEON(AecCore* aec,
+                                 float efw[2][PART_LEN1],
+                                 float xfw[2][PART_LEN1],
+                                 float* fft,
+                                 float* cohde,
+                                 float* cohxd) {
+  float dfw[2][PART_LEN1];
+  int i;
+
+  if (aec->delayEstCtr == 0)
+    aec->delayIdx = PartitionDelay(aec);
+
+  // Use delayed far.
+  memcpy(xfw,
+         aec->xfwBuf + aec->delayIdx * PART_LEN1,
+         sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+  // Windowed near fft
+  WindowData(fft, aec->dBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, dfw);
+
+  // Windowed error fft
+  WindowData(fft, aec->eBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, efw);
+
+  SmoothedPSD(aec, efw, dfw, xfw);
+
+  {
+    const float32x4_t vec_1eminus10 =  vdupq_n_f32(1e-10f);
+
+    // Subband coherence
+    for (i = 0; i + 3 < PART_LEN1; i += 4) {
+      const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]);
+      const float32x4_t vec_se = vld1q_f32(&aec->se[i]);
+      const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]);
+      const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se);
+      const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx);
+      float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
+      float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
+      float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]);
+      float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]);
+      vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]);
+      vec_cohde = vdivq_f32(vec_cohde, vec_sdse);
+      vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]);
+      vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx);
+
+      vst1q_f32(&cohde[i], vec_cohde);
+      vst1q_f32(&cohxd[i], vec_cohxd);
+    }
+  }
+  // scalar code for the remaining items.
+  for (; i < PART_LEN1; i++) {
+    cohde[i] =
+        (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
+        (aec->sd[i] * aec->se[i] + 1e-10f);
+    cohxd[i] =
+        (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
+        (aec->sx[i] * aec->sd[i] + 1e-10f);
+  }
+}
+
+void WebRtcAec_InitAec_neon(void) {
+  WebRtcAec_FilterFar = FilterFarNEON;
+  WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
+  WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
+  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
+  WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
+}
+
--- a/webrtc/modules/audio_processing/aec/aec_core_sse2.c
+++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.c
@@ -12,35 +12,33 @@
 * The core AEC algorithm, SSE2 version of speed-critical functions.
 */

-#include "typedefs.h"
-
-#if defined(WEBRTC_USE_SSE2)
 #include <emmintrin.h>
 #include <math.h>
+#include <string.h>  // memset

-#include "aec_core.h"
-#include "aec_rdft.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"

-__inline static float MulRe(float aRe, float aIm, float bRe, float bIm)
-{
+__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
  return aRe * bRe - aIm * bIm;
 }

-__inline static float MulIm(float aRe, float aIm, float bRe, float bIm)
-{
+__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
  return aRe * bIm + aIm * bRe;
 }

-static void FilterFarSSE2(aec_t *aec, float yf[2][PART_LEN1])
-{
+static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) {
  int i;
-  for (i = 0; i < NR_PART; i++) {
+  const int num_partitions = aec->num_partitions;
+  for (i = 0; i < num_partitions; i++) {
    int j;
    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
    int pos = i * PART_LEN1;
    // Check for wrap
-    if (i + aec->xfBufBlockPos >= NR_PART) {
-      xPos -= NR_PART*(PART_LEN1);
+    if (i + aec->xfBufBlockPos >= num_partitions) {
+      xPos -= num_partitions * (PART_LEN1);
    }

    // vectorized code (four at once)
@@ -64,19 +62,25 @@ static void FilterFarSSE2(aec_t *aec, float yf[2][PART_LEN1])
    }
    // scalar code for the remaining items.
    for (; j < PART_LEN1; j++) {
-      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
-      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
+      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
+      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
+                        aec->xfBuf[1][xPos + j],
+                        aec->wfBuf[0][pos + j],
+                        aec->wfBuf[1][pos + j]);
    }
  }
 }

-static void ScaleErrorSignalSSE2(aec_t *aec, float ef[2][PART_LEN1])
-{
+static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
  const __m128 k1e_10f = _mm_set1_ps(1e-10f);
-  const __m128 kThresh = _mm_set1_ps(aec->errThresh);
-  const __m128 kMu = _mm_set1_ps(aec->mu);
+  const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
+                                                  : _mm_set1_ps(aec->normal_mu);
+  const __m128 kThresh = aec->extended_filter_enabled
+                             ? _mm_set1_ps(kExtendedErrorThreshold)
+                             : _mm_set1_ps(aec->normal_error_threshold);

  int i;
  // vectorized code (four at once)
@@ -110,36 +114,46 @@ static void ScaleErrorSignalSSE2(aec_t *aec, float ef[2][PART_LEN1])
    _mm_storeu_ps(&ef[1][i], ef_im);
  }
  // scalar code for the remaining items.
-  for (; i < (PART_LEN1); i++) {
-    float absEf;
-    ef[0][i] /= (aec->xPow[i] + 1e-10f);
-    ef[1][i] /= (aec->xPow[i] + 1e-10f);
-    absEf = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
+  {
+    const float mu =
+        aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
+    const float error_threshold = aec->extended_filter_enabled
+                                      ? kExtendedErrorThreshold
+                                      : aec->normal_error_threshold;
+    for (; i < (PART_LEN1); i++) {
+      float abs_ef;
+      ef[0][i] /= (aec->xPow[i] + 1e-10f);
+      ef[1][i] /= (aec->xPow[i] + 1e-10f);
+      abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);

-    if (absEf > aec->errThresh) {
-      absEf = aec->errThresh / (absEf + 1e-10f);
-      ef[0][i] *= absEf;
-      ef[1][i] *= absEf;
+      if (abs_ef > error_threshold) {
+        abs_ef = error_threshold / (abs_ef + 1e-10f);
+        ef[0][i] *= abs_ef;
+        ef[1][i] *= abs_ef;
+      }
+
+      // Stepsize factor
+      ef[0][i] *= mu;
+      ef[1][i] *= mu;
    }
-
-    // Stepsize factor
-    ef[0][i] *= aec->mu;
-    ef[1][i] *= aec->mu;
  }
 }

-static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1]) {
+static void FilterAdaptationSSE2(AecCore* aec,
+                                 float* fft,
+                                 float ef[2][PART_LEN1]) {
  int i, j;
-  for (i = 0; i < NR_PART; i++) {
-    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
+  const int num_partitions = aec->num_partitions;
+  for (i = 0; i < num_partitions; i++) {
+    int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
    int pos = i * PART_LEN1;
    // Check for wrap
-    if (i + aec->xfBufBlockPos >= NR_PART) {
-      xPos -= NR_PART * PART_LEN1;
+    if (i + aec->xfBufBlockPos >= num_partitions) {
+      xPos -= num_partitions * PART_LEN1;
    }

    // Process the whole array...
-    for (j = 0; j < PART_LEN; j+= 4) {
+    for (j = 0; j < PART_LEN; j += 4) {
      // Load xfBuf and ef.
      const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
      const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
@@ -158,22 +172,23 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1])
      const __m128 g = _mm_unpacklo_ps(e, f);
      const __m128 h = _mm_unpackhi_ps(e, f);
      // Store
-      _mm_storeu_ps(&fft[2*j + 0], g);
-      _mm_storeu_ps(&fft[2*j + 4], h);
+      _mm_storeu_ps(&fft[2 * j + 0], g);
+      _mm_storeu_ps(&fft[2 * j + 4], h);
    }
    // ... and fixup the first imaginary entry.
    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
                   -aec->xfBuf[1][xPos + PART_LEN],
-                   ef[0][PART_LEN], ef[1][PART_LEN]);
+                   ef[0][PART_LEN],
+                   ef[1][PART_LEN]);

    aec_rdft_inverse_128(fft);
-    memset(fft + PART_LEN, 0, sizeof(float)*PART_LEN);
+    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);

    // fft scaling
    {
      float scale = 2.0f / PART_LEN2;
      const __m128 scale_ps = _mm_load_ps1(&scale);
-      for (j = 0; j < PART_LEN; j+=4) {
+      for (j = 0; j < PART_LEN; j += 4) {
        const __m128 fft_ps = _mm_loadu_ps(&fft[j]);
        const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps);
        _mm_storeu_ps(&fft[j], fft_scale);
@@ -184,13 +199,15 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1])
    {
      float wt1 = aec->wfBuf[1][pos];
      aec->wfBuf[0][pos + PART_LEN] += fft[1];
-      for (j = 0; j < PART_LEN; j+= 4) {
+      for (j = 0; j < PART_LEN; j += 4) {
        __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
        __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
        const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);
        const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);
-        const __m128 fft_re = _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2 ,0));
-        const __m128 fft_im = _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3 ,1));
+        const __m128 fft_re =
+            _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0));
+        const __m128 fft_im =
+            _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));
        wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);
        wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);
        _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re);
@@ -201,8 +218,7 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1])
  }
 }

-static __m128 mm_pow_ps(__m128 a, __m128 b)
-{
+static __m128 mm_pow_ps(__m128 a, __m128 b) {
  // a^b = exp2(b * log2(a))
  //   exp2(x) and log2(x) are calculated using polynomial approximations.
  __m128 log2_a, b_log2_a, a_exp_b;
@@ -227,55 +243,55 @@ static __m128 mm_pow_ps(__m128 a, __m128 b)
    //    compensate the fact that the exponent has been shifted in the top/
    //    fractional part and finally getting rid of the implicit leading one
    //    from the mantissa by substracting it out.
-    static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END =
-        {0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
-    static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END =
-        {0x43800000, 0x43800000, 0x43800000, 0x43800000};
-    static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END =
-        {0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};
+    static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = {
+        0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
+    static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = {
+        0x43800000, 0x43800000, 0x43800000, 0x43800000};
+    static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = {
+        0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};
    static const int shift_exponent_into_top_mantissa = 8;
-    const __m128 two_n = _mm_and_ps(a, *((__m128 *)float_exponent_mask));
-    const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(_mm_castps_si128(two_n),
-        shift_exponent_into_top_mantissa));
-    const __m128 n_0 = _mm_or_ps(n_1, *((__m128 *)eight_biased_exponent));
-    const __m128 n   = _mm_sub_ps(n_0,  *((__m128 *)implicit_leading_one));
+    const __m128 two_n = _mm_and_ps(a, *((__m128*)float_exponent_mask));
+    const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(
+        _mm_castps_si128(two_n), shift_exponent_into_top_mantissa));
+    const __m128 n_0 = _mm_or_ps(n_1, *((__m128*)eight_biased_exponent));
+    const __m128 n = _mm_sub_ps(n_0, *((__m128*)implicit_leading_one));

    // Compute y.
-    static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END =
-        {0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};
-    static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END =
-        {0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000};
-    const __m128 mantissa = _mm_and_ps(a, *((__m128 *)mantissa_mask));
-    const __m128 y        = _mm_or_ps(
-        mantissa,  *((__m128 *)zero_biased_exponent_is_one));
+    static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = {
+        0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};
+    static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = {
+        0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000};
+    const __m128 mantissa = _mm_and_ps(a, *((__m128*)mantissa_mask));
+    const __m128 y =
+        _mm_or_ps(mantissa, *((__m128*)zero_biased_exponent_is_one));

    // Approximate log2(y) ~= (y - 1) * pol5(y).
    //    pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
-    static const ALIGN16_BEG float ALIGN16_END C5[4] =
-        {-3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f};
-    static const ALIGN16_BEG float ALIGN16_END C4[4] =
-        {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f};
-    static const ALIGN16_BEG float ALIGN16_END C3[4] =
-        {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f};
-    static const ALIGN16_BEG float ALIGN16_END C2[4] =
-        {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f};
-    static const ALIGN16_BEG float ALIGN16_END C1[4] =
-        {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f};
-    static const ALIGN16_BEG float ALIGN16_END C0[4] =
-        {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f};
-    const __m128 pol5_y_0 = _mm_mul_ps(y,        *((__m128 *)C5));
-    const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128 *)C4));
+    static const ALIGN16_BEG float ALIGN16_END C5[4] = {
+        -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C4[4] = {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C3[4] = {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C2[4] = {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C1[4] = {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f};
+    static const ALIGN16_BEG float ALIGN16_END
+        C0[4] = {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f};
+    const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128*)C5));
+    const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128*)C4));
    const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y);
-    const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128 *)C3));
+    const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128*)C3));
    const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y);
-    const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128 *)C2));
+    const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128*)C2));
    const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y);
-    const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128 *)C1));
+    const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128*)C1));
    const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y);
-    const __m128 pol5_y   = _mm_add_ps(pol5_y_8, *((__m128 *)C0));
-    const __m128 y_minus_one = _mm_sub_ps(
-        y, *((__m128 *)zero_biased_exponent_is_one));
-    const __m128 log2_y = _mm_mul_ps(y_minus_one ,  pol5_y);
+    const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128*)C0));
+    const __m128 y_minus_one =
+        _mm_sub_ps(y, *((__m128*)zero_biased_exponent_is_one));
+    const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y);

    // Combine parts.
    log2_a = _mm_add_ps(n, log2_y);
@@ -299,38 +315,38 @@ static __m128 mm_pow_ps(__m128 a, __m128 b)
    //         maximum relative error of 0.17%.

    // To avoid over/underflow, we reduce the range of input to ]-127, 129].
-    static const ALIGN16_BEG float max_input[4] ALIGN16_END =
-        {129.f, 129.f, 129.f, 129.f};
-    static const ALIGN16_BEG float min_input[4] ALIGN16_END =
-        {-126.99999f, -126.99999f, -126.99999f, -126.99999f};
-    const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128 *)max_input));
-    const __m128 x_max = _mm_max_ps(x_min,    *((__m128 *)min_input));
+    static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f,
+                                                               129.f, 129.f};
+    static const ALIGN16_BEG float min_input[4] ALIGN16_END = {
+        -126.99999f, -126.99999f, -126.99999f, -126.99999f};
+    const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128*)max_input));
+    const __m128 x_max = _mm_max_ps(x_min, *((__m128*)min_input));
    // Compute n.
-    static const ALIGN16_BEG float half[4] ALIGN16_END =
-        {0.5f, 0.5f, 0.5f, 0.5f};
-    const __m128  x_minus_half = _mm_sub_ps(x_max, *((__m128 *)half));
+    static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f,
+                                                          0.5f, 0.5f};
+    const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128*)half));
    const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half);
    // Compute 2^n.
-    static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END =
-        {127, 127, 127, 127};
+    static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = {
+        127, 127, 127, 127};
    static const int float_exponent_shift = 23;
-    const __m128i two_n_exponent = _mm_add_epi32(
-        x_minus_half_floor, *((__m128i *)float_exponent_bias));
-    const __m128  two_n = _mm_castsi128_ps(_mm_slli_epi32(
-        two_n_exponent, float_exponent_shift));
+    const __m128i two_n_exponent =
+        _mm_add_epi32(x_minus_half_floor, *((__m128i*)float_exponent_bias));
+    const __m128 two_n =
+        _mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift));
    // Compute y.
    const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor));
    // Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
-    static const ALIGN16_BEG float C2[4] ALIGN16_END =
-        {3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f};
-    static const ALIGN16_BEG float C1[4] ALIGN16_END =
-        {6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f};
-    static const ALIGN16_BEG float C0[4] ALIGN16_END =
-        {1.0017247f, 1.0017247f, 1.0017247f, 1.0017247f};
-    const __m128 exp2_y_0 = _mm_mul_ps(y,        *((__m128 *)C2));
-    const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128 *)C1));
+    static const ALIGN16_BEG float C2[4] ALIGN16_END = {
+        3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f};
+    static const ALIGN16_BEG float C1[4] ALIGN16_END = {
+        6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f};
+    static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f,
+                                                        1.0017247f, 1.0017247f};
+    const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128*)C2));
+    const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128*)C1));
    const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y);
-    const __m128 exp2_y   = _mm_add_ps(exp2_y_2, *((__m128 *)C0));
+    const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128*)C0));

    // Combine parts.
    a_exp_b = _mm_mul_ps(exp2_y, two_n);
@@ -338,10 +354,8 @@ static __m128 mm_pow_ps(__m128 a, __m128 b)
  return a_exp_b;
 }

-extern const float WebRtcAec_weightCurve[65];
-extern const float WebRtcAec_overDriveCurve[65];
-
-static void OverdriveAndSuppressSSE2(aec_t *aec, float hNl[PART_LEN1],
+static void OverdriveAndSuppressSSE2(AecCore* aec,
+                                     float hNl[PART_LEN1],
                                     const float hNlFb,
                                     float efw[2][PART_LEN1]) {
  int i;
@@ -350,26 +364,25 @@ static void OverdriveAndSuppressSSE2(aec_t *aec, float hNl[PART_LEN1],
  const __m128 vec_minus_one = _mm_set1_ps(-1.0f);
  const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm);
  // vectorized code (four at once)
-  for (i = 0; i + 3 < PART_LEN1; i+=4) {
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
    // Weight subbands
    __m128 vec_hNl = _mm_loadu_ps(&hNl[i]);
    const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]);
    const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb);
-    const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(
-        vec_weightCurve, vec_hNlFb);
+    const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb);
    const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve);
-    const __m128 vec_one_weightCurve_hNl = _mm_mul_ps(
-        vec_one_weightCurve, vec_hNl);
+    const __m128 vec_one_weightCurve_hNl =
+        _mm_mul_ps(vec_one_weightCurve, vec_hNl);
    const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl);
    const __m128 vec_if1 = _mm_and_ps(
        bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl));
    vec_hNl = _mm_or_ps(vec_if0, vec_if1);

    {
-      const __m128 vec_overDriveCurve = _mm_loadu_ps(
-          &WebRtcAec_overDriveCurve[i]);
-      const __m128 vec_overDriveSm_overDriveCurve = _mm_mul_ps(
-          vec_overDriveSm, vec_overDriveCurve);
+      const __m128 vec_overDriveCurve =
+          _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);
+      const __m128 vec_overDriveSm_overDriveCurve =
+          _mm_mul_ps(vec_overDriveSm, vec_overDriveCurve);
      vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);
      _mm_storeu_ps(&hNl[i], vec_hNl);
    }
@@ -393,7 +406,7 @@ static void OverdriveAndSuppressSSE2(aec_t *aec, float hNl[PART_LEN1],
    // Weight subbands
    if (hNl[i] > hNlFb) {
      hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
-          (1 - WebRtcAec_weightCurve[i]) * hNl[i];
+               (1 - WebRtcAec_weightCurve[i]) * hNl[i];
    }
    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);

@@ -407,11 +420,312 @@ static void OverdriveAndSuppressSSE2(aec_t *aec, float hNl[PART_LEN1],
  }
 }

+__inline static void _mm_add_ps_4x1(__m128 sum, float *dst) {
+  // A+B C+D
+  sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2)));
+  // A+B+C+D A+B+C+D
+  sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1)));
+  _mm_store_ss(dst, sum);
+}
+static int PartitionDelay(const AecCore* aec) {
+  // Measures the energy in each filter partition and returns the partition with
+  // highest energy.
+  // TODO(bjornv): Spread computational cost by computing one partition per
+  // block?
+  float wfEnMax = 0;
+  int i;
+  int delay = 0;
+
+  for (i = 0; i < aec->num_partitions; i++) {
+    int j;
+    int pos = i * PART_LEN1;
+    float wfEn = 0;
+    __m128 vec_wfEn = _mm_set1_ps(0.0f);
+    // vectorized code (four at once)
+    for (j = 0; j + 3 < PART_LEN1; j += 4) {
+      const __m128 vec_wfBuf0 = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
+      const __m128 vec_wfBuf1 = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
+      vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0));
+      vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1));
+    }
+    _mm_add_ps_4x1(vec_wfEn, &wfEn);
+
+    // scalar code for the remaining items.
+    for (; j < PART_LEN1; j++) {
+      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
+              aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
+    }
+
+    if (wfEn > wfEnMax) {
+      wfEnMax = wfEn;
+      delay = i;
+    }
+  }
+  return delay;
+}
+
+// Updates the following smoothed  Power Spectral Densities (PSD):
+//  - sd  : near-end
+//  - se  : residual echo
+//  - sx  : far-end
+//  - sde : cross-PSD of near-end and residual echo
+//  - sxd : cross-PSD of near-end and far-end
+//
+// In addition to updating the PSDs, also the filter diverge state is determined
+// upon actions are taken.
+static void SmoothedPSD(AecCore* aec,
+                        float efw[2][PART_LEN1],
+                        float dfw[2][PART_LEN1],
+                        float xfw[2][PART_LEN1]) {
+  // Power estimate smoothing coefficients.
+  const float* ptrGCoh = aec->extended_filter_enabled
+      ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
+      : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
+  int i;
+  float sdSum = 0, seSum = 0;
+  const __m128 vec_15 =  _mm_set1_ps(WebRtcAec_kMinFarendPSD);
+  const __m128 vec_GCoh0 = _mm_set1_ps(ptrGCoh[0]);
+  const __m128 vec_GCoh1 = _mm_set1_ps(ptrGCoh[1]);
+  __m128 vec_sdSum = _mm_set1_ps(0.0f);
+  __m128 vec_seSum = _mm_set1_ps(0.0f);
+
+  for (i = 0; i + 3 < PART_LEN1; i += 4) {
+    const __m128 vec_dfw0 = _mm_loadu_ps(&dfw[0][i]);
+    const __m128 vec_dfw1 = _mm_loadu_ps(&dfw[1][i]);
+    const __m128 vec_efw0 = _mm_loadu_ps(&efw[0][i]);
+    const __m128 vec_efw1 = _mm_loadu_ps(&efw[1][i]);
+    const __m128 vec_xfw0 = _mm_loadu_ps(&xfw[0][i]);
+    const __m128 vec_xfw1 = _mm_loadu_ps(&xfw[1][i]);
+    __m128 vec_sd = _mm_mul_ps(_mm_loadu_ps(&aec->sd[i]), vec_GCoh0);
+    __m128 vec_se = _mm_mul_ps(_mm_loadu_ps(&aec->se[i]), vec_GCoh0);
+    __m128 vec_sx = _mm_mul_ps(_mm_loadu_ps(&aec->sx[i]), vec_GCoh0);
+    __m128 vec_dfw_sumsq = _mm_mul_ps(vec_dfw0, vec_dfw0);
+    __m128 vec_efw_sumsq = _mm_mul_ps(vec_efw0, vec_efw0);
+    __m128 vec_xfw_sumsq = _mm_mul_ps(vec_xfw0, vec_xfw0);
+    vec_dfw_sumsq = _mm_add_ps(vec_dfw_sumsq, _mm_mul_ps(vec_dfw1, vec_dfw1));
+    vec_efw_sumsq = _mm_add_ps(vec_efw_sumsq, _mm_mul_ps(vec_efw1, vec_efw1));
+    vec_xfw_sumsq = _mm_add_ps(vec_xfw_sumsq, _mm_mul_ps(vec_xfw1, vec_xfw1));
+    vec_xfw_sumsq = _mm_max_ps(vec_xfw_sumsq, vec_15);
+    vec_sd = _mm_add_ps(vec_sd, _mm_mul_ps(vec_dfw_sumsq, vec_GCoh1));
+    vec_se = _mm_add_ps(vec_se, _mm_mul_ps(vec_efw_sumsq, vec_GCoh1));
+    vec_sx = _mm_add_ps(vec_sx, _mm_mul_ps(vec_xfw_sumsq, vec_GCoh1));
+    _mm_storeu_ps(&aec->sd[i], vec_sd);
+    _mm_storeu_ps(&aec->se[i], vec_se);
+    _mm_storeu_ps(&aec->sx[i], vec_sx);
+
+    {
+      const __m128 vec_3210 = _mm_loadu_ps(&aec->sde[i][0]);
+      const __m128 vec_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
+      __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
+                                    _MM_SHUFFLE(2, 0, 2, 0));
+      __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
+                                    _MM_SHUFFLE(3, 1, 3, 1));
+      __m128 vec_dfwefw0011 = _mm_mul_ps(vec_dfw0, vec_efw0);
+      __m128 vec_dfwefw0110 = _mm_mul_ps(vec_dfw0, vec_efw1);
+      vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
+      vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
+      vec_dfwefw0011 = _mm_add_ps(vec_dfwefw0011,
+                                  _mm_mul_ps(vec_dfw1, vec_efw1));
+      vec_dfwefw0110 = _mm_sub_ps(vec_dfwefw0110,
+                                  _mm_mul_ps(vec_dfw1, vec_efw0));
+      vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwefw0011, vec_GCoh1));
+      vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwefw0110, vec_GCoh1));
+      _mm_storeu_ps(&aec->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b));
+      _mm_storeu_ps(&aec->sde[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
+    }
+
+    {
+      const __m128 vec_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
+      const __m128 vec_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
+      __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
+                                    _MM_SHUFFLE(2, 0, 2, 0));
+      __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
+                                    _MM_SHUFFLE(3, 1, 3, 1));
+      __m128 vec_dfwxfw0011 = _mm_mul_ps(vec_dfw0, vec_xfw0);
+      __m128 vec_dfwxfw0110 = _mm_mul_ps(vec_dfw0, vec_xfw1);
+      vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
+      vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
+      vec_dfwxfw0011 = _mm_add_ps(vec_dfwxfw0011,
+                                  _mm_mul_ps(vec_dfw1, vec_xfw1));
+      vec_dfwxfw0110 = _mm_sub_ps(vec_dfwxfw0110,
+                                  _mm_mul_ps(vec_dfw1, vec_xfw0));
+      vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwxfw0011, vec_GCoh1));
+      vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwxfw0110, vec_GCoh1));
+      _mm_storeu_ps(&aec->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b));
+      _mm_storeu_ps(&aec->sxd[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
+    }
+
+    vec_sdSum = _mm_add_ps(vec_sdSum, vec_sd);
+    vec_seSum = _mm_add_ps(vec_seSum, vec_se);
+  }
+
+  _mm_add_ps_4x1(vec_sdSum, &sdSum);
+  _mm_add_ps_4x1(vec_seSum, &seSum);
+
+  for (; i < PART_LEN1; i++) {
+    aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
+                 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
+    aec->se[i] = ptrGCoh[0] * aec->se[i] +
+                 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
+    // We threshold here to protect against the ill-effects of a zero farend.
+    // The threshold is not arbitrarily chosen, but balances protection and
+    // adverse interaction with the algorithm's tuning.
+    // TODO(bjornv): investigate further why this is so sensitive.
+    aec->sx[i] =
+        ptrGCoh[0] * aec->sx[i] +
+        ptrGCoh[1] * WEBRTC_SPL_MAX(
+            xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
+            WebRtcAec_kMinFarendPSD);
+
+    aec->sde[i][0] =
+        ptrGCoh[0] * aec->sde[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
+    aec->sde[i][1] =
+        ptrGCoh[0] * aec->sde[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
+
+    aec->sxd[i][0] =
+        ptrGCoh[0] * aec->sxd[i][0] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
+    aec->sxd[i][1] =
+        ptrGCoh[0] * aec->sxd[i][1] +
+        ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
+
+    sdSum += aec->sd[i];
+    seSum += aec->se[i];
+  }
+
+  // Divergent filter safeguard.
+  aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
+
+  if (aec->divergeState)
+    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
+
+  // Reset if error is significantly larger than nearend (13 dB).
+  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
+    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
+}
+
+// Window time domain data to be used by the fft.
+__inline static void WindowData(float* x_windowed, const float* x) {
+  int i;
+  for (i = 0; i < PART_LEN; i += 4) {
+    const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]);
+    const __m128 vec_Buf2 = _mm_loadu_ps(&x[PART_LEN + i]);
+    const __m128 vec_sqrtHanning = _mm_load_ps(&WebRtcAec_sqrtHanning[i]);
+    // A B C D
+    __m128 vec_sqrtHanning_rev =
+        _mm_loadu_ps(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
+    // D C B A
+    vec_sqrtHanning_rev =
+        _mm_shuffle_ps(vec_sqrtHanning_rev, vec_sqrtHanning_rev,
+                       _MM_SHUFFLE(0, 1, 2, 3));
+    _mm_storeu_ps(&x_windowed[i], _mm_mul_ps(vec_Buf1, vec_sqrtHanning));
+    _mm_storeu_ps(&x_windowed[PART_LEN + i],
+                  _mm_mul_ps(vec_Buf2, vec_sqrtHanning_rev));
+  }
+}
+
+// Puts fft output data into a complex valued array.
+__inline static void StoreAsComplex(const float* data,
+                                    float data_complex[2][PART_LEN1]) {
+  int i;
+  for (i = 0; i < PART_LEN; i += 4) {
+    const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]);
+    const __m128 vec_fft4 = _mm_loadu_ps(&data[2 * i + 4]);
+    const __m128 vec_a = _mm_shuffle_ps(vec_fft0, vec_fft4,
+                                        _MM_SHUFFLE(2, 0, 2, 0));
+    const __m128 vec_b = _mm_shuffle_ps(vec_fft0, vec_fft4,
+                                        _MM_SHUFFLE(3, 1, 3, 1));
+    _mm_storeu_ps(&data_complex[0][i], vec_a);
+    _mm_storeu_ps(&data_complex[1][i], vec_b);
+  }
+  // fix beginning/end values
+  data_complex[1][0] = 0;
+  data_complex[1][PART_LEN] = 0;
+  data_complex[0][0] = data[0];
+  data_complex[0][PART_LEN] = data[1];
+}
+
+static void SubbandCoherenceSSE2(AecCore* aec,
+                                 float efw[2][PART_LEN1],
+                                 float xfw[2][PART_LEN1],
+                                 float* fft,
+                                 float* cohde,
+                                 float* cohxd) {
+  float dfw[2][PART_LEN1];
+  int i;
+
+  if (aec->delayEstCtr == 0)
+    aec->delayIdx = PartitionDelay(aec);
+
+  // Use delayed far.
+  memcpy(xfw,
+         aec->xfwBuf + aec->delayIdx * PART_LEN1,
+         sizeof(xfw[0][0]) * 2 * PART_LEN1);
+
+  // Windowed near fft
+  WindowData(fft, aec->dBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, dfw);
+
+  // Windowed error fft
+  WindowData(fft, aec->eBuf);
+  aec_rdft_forward_128(fft);
+  StoreAsComplex(fft, efw);
+
+  SmoothedPSD(aec, efw, dfw, xfw);
+
+  {
+    const __m128 vec_1eminus10 =  _mm_set1_ps(1e-10f);
+
+    // Subband coherence
+    for (i = 0; i + 3 < PART_LEN1; i += 4) {
+      const __m128 vec_sd = _mm_loadu_ps(&aec->sd[i]);
+      const __m128 vec_se = _mm_loadu_ps(&aec->se[i]);
+      const __m128 vec_sx = _mm_loadu_ps(&aec->sx[i]);
+      const __m128 vec_sdse = _mm_add_ps(vec_1eminus10,
+                                         _mm_mul_ps(vec_sd, vec_se));
+      const __m128 vec_sdsx = _mm_add_ps(vec_1eminus10,
+                                         _mm_mul_ps(vec_sd, vec_sx));
+      const __m128 vec_sde_3210 = _mm_loadu_ps(&aec->sde[i][0]);
+      const __m128 vec_sde_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
+      const __m128 vec_sxd_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
+      const __m128 vec_sxd_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
+      const __m128 vec_sde_0 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
+                                              _MM_SHUFFLE(2, 0, 2, 0));
+      const __m128 vec_sde_1 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
+                                              _MM_SHUFFLE(3, 1, 3, 1));
+      const __m128 vec_sxd_0 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
+                                              _MM_SHUFFLE(2, 0, 2, 0));
+      const __m128 vec_sxd_1 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
+                                              _MM_SHUFFLE(3, 1, 3, 1));
+      __m128 vec_cohde = _mm_mul_ps(vec_sde_0, vec_sde_0);
+      __m128 vec_cohxd = _mm_mul_ps(vec_sxd_0, vec_sxd_0);
+      vec_cohde = _mm_add_ps(vec_cohde, _mm_mul_ps(vec_sde_1, vec_sde_1));
+      vec_cohde = _mm_div_ps(vec_cohde, vec_sdse);
+      vec_cohxd = _mm_add_ps(vec_cohxd, _mm_mul_ps(vec_sxd_1, vec_sxd_1));
+      vec_cohxd = _mm_div_ps(vec_cohxd, vec_sdsx);
+      _mm_storeu_ps(&cohde[i], vec_cohde);
+      _mm_storeu_ps(&cohxd[i], vec_cohxd);
+    }
+
+    // scalar code for the remaining items.
+    for (; i < PART_LEN1; i++) {
+      cohde[i] =
+          (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
+          (aec->sd[i] * aec->se[i] + 1e-10f);
+      cohxd[i] =
+          (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
+          (aec->sx[i] * aec->sd[i] + 1e-10f);
+    }
+  }
+}
+
 void WebRtcAec_InitAec_SSE2(void) {
  WebRtcAec_FilterFar = FilterFarSSE2;
  WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
  WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
+  WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
 }
-
-#endif   // WEBRTC_USE_SSE2
--- a/webrtc/modules/audio_processing/aec/aec_rdft.c
+++ b/webrtc/modules/audio_processing/aec/aec_rdft.c
@@ -19,200 +19,193 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "aec_rdft.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"

 #include <math.h>

-#include "system_wrappers/interface/cpu_features_wrapper.h"
-#include "typedefs.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+#include "webrtc/typedefs.h"

-// constants shared by all paths (C, SSE2).
-float rdft_w[64];
-// constants used by the C path.
-float rdft_wk3ri_first[32];
-float rdft_wk3ri_second[32];
-// constants used by SSE2 but initialized in C path.
-ALIGN16_BEG float ALIGN16_END rdft_wk1r[32];
-ALIGN16_BEG float ALIGN16_END rdft_wk2r[32];
-ALIGN16_BEG float ALIGN16_END rdft_wk3r[32];
-ALIGN16_BEG float ALIGN16_END rdft_wk1i[32];
-ALIGN16_BEG float ALIGN16_END rdft_wk2i[32];
-ALIGN16_BEG float ALIGN16_END rdft_wk3i[32];
-ALIGN16_BEG float ALIGN16_END cftmdl_wk1r[4];
+// These tables used to be computed at run-time. For example, refer to:
+// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/aec/aec_rdft.c?r=6564
+// to see the initialization code.
+const float rdft_w[64] = {
+    1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f,
+    0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f,
+    0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f,
+    0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f,
+    0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
+    0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f,
+    0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f,
+    0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f,
+    0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f,
+    0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
+    0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f,
+    0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f,
+    0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f,
+    0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f,
+    0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
+    0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
+};
+const float rdft_wk3ri_first[16] = {
+    1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f,
+    0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
+    0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f,
+    0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
+};
+const float rdft_wk3ri_second[16] = {
+    -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f,
+    -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f,
+    -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f,
+    -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = {
+    1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f,
+    0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f,
+    0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f,
+    0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f,
+    0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f,
+    0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f,
+    0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f,
+    0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = {
+    1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f,
+    0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f,
+    0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f,
+    0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
+    0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f,
+    0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f,
+    0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f,
+    0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = {
+    1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f,
+    0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
+    0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f,
+    -0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f,
+    0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f,
+    0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f,
+    0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f,
+    -0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = {
+    -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
+    -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
+    -0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f,
+    -0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f,
+    -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f,
+    -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f,
+    -0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f,
+    -0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = {
+    -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f,
+    -0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f,
+    -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
+    -0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f,
+    -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f,
+    -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f,
+    -0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f,
+    -0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f,
+};
+ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = {
+    -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
+    -0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f,
+    -0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f,
+    -0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f,
+    -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f,
+    -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f,
+    -0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f,
+    -0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f,
+};
+ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = {
+    0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f,
+};

-static int ip[16];
+static void bitrv2_128_C(float* a) {
+  /*
+      Following things have been attempted but are no faster:
+      (a) Storing the swap indexes in a LUT (index calculations are done
+          for 'free' while waiting on memory/L1).
+      (b) Consolidate the load/store of two consecutive floats by a 64 bit
+          integer (execution is memory/L1 bound).
+      (c) Do a mix of floats and 64 bit integer to maximize register
+          utilization (execution is memory/L1 bound).
+      (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
+      (e) Hard-coding of the offsets to completely eliminates index
+          calculations.
+  */

-static void bitrv2_32or128(int n, int *ip, float *a) {
-  // n is 32 or 128
-  int j, j1, k, k1, m, m2;
+  unsigned int j, j1, k, k1;
  float xr, xi, yr, yi;

-  ip[0] = 0;
-  {
-    int l = n;
-    m = 1;
-    while ((m << 3) < l) {
-      l >>= 1;
-      for (j = 0; j < m; j++) {
-        ip[m + j] = ip[j] + l;
-      }
-      m <<= 1;
-    }
-  }
-  m2 = 2 * m;
-  for (k = 0; k < m; k++) {
+  static const int ip[4] = {0, 64, 32, 96};
+  for (k = 0; k < 4; k++) {
    for (j = 0; j < k; j++) {
      j1 = 2 * j + ip[k];
      k1 = 2 * k + ip[j];
-      xr = a[j1];
+      xr = a[j1 + 0];
      xi = a[j1 + 1];
-      yr = a[k1];
+      yr = a[k1 + 0];
      yi = a[k1 + 1];
-      a[j1] = yr;
+      a[j1 + 0] = yr;
      a[j1 + 1] = yi;
-      a[k1] = xr;
+      a[k1 + 0] = xr;
      a[k1 + 1] = xi;
-      j1 += m2;
-      k1 += 2 * m2;
-      xr = a[j1];
+      j1 += 8;
+      k1 += 16;
+      xr = a[j1 + 0];
      xi = a[j1 + 1];
-      yr = a[k1];
+      yr = a[k1 + 0];
      yi = a[k1 + 1];
-      a[j1] = yr;
+      a[j1 + 0] = yr;
      a[j1 + 1] = yi;
-      a[k1] = xr;
+      a[k1 + 0] = xr;
      a[k1 + 1] = xi;
-      j1 += m2;
-      k1 -= m2;
-      xr = a[j1];
+      j1 += 8;
+      k1 -= 8;
+      xr = a[j1 + 0];
      xi = a[j1 + 1];
-      yr = a[k1];
+      yr = a[k1 + 0];
      yi = a[k1 + 1];
-      a[j1] = yr;
+      a[j1 + 0] = yr;
      a[j1 + 1] = yi;
-      a[k1] = xr;
+      a[k1 + 0] = xr;
      a[k1 + 1] = xi;
-      j1 += m2;
-      k1 += 2 * m2;
-      xr = a[j1];
+      j1 += 8;
+      k1 += 16;
+      xr = a[j1 + 0];
      xi = a[j1 + 1];
-      yr = a[k1];
+      yr = a[k1 + 0];
      yi = a[k1 + 1];
-      a[j1] = yr;
+      a[j1 + 0] = yr;
      a[j1 + 1] = yi;
-      a[k1] = xr;
+      a[k1 + 0] = xr;
      a[k1 + 1] = xi;
    }
-    j1 = 2 * k + m2 + ip[k];
-    k1 = j1 + m2;
-    xr = a[j1];
+    j1 = 2 * k + 8 + ip[k];
+    k1 = j1 + 8;
+    xr = a[j1 + 0];
    xi = a[j1 + 1];
-    yr = a[k1];
+    yr = a[k1 + 0];
    yi = a[k1 + 1];
-    a[j1] = yr;
+    a[j1 + 0] = yr;
    a[j1 + 1] = yi;
-    a[k1] = xr;
+    a[k1 + 0] = xr;
    a[k1 + 1] = xi;
  }
 }

-static void makewt_32(void) {
-  const int nw = 32;
-  int j, nwh;
-  float delta, x, y;
-
-  ip[0] = nw;
-  ip[1] = 1;
-  nwh = nw >> 1;
-  delta = atanf(1.0f) / nwh;
-  rdft_w[0] = 1;
-  rdft_w[1] = 0;
-  rdft_w[nwh] = cosf(delta * nwh);
-  rdft_w[nwh + 1] = rdft_w[nwh];
-  for (j = 2; j < nwh; j += 2) {
-    x = cosf(delta * j);
-    y = sinf(delta * j);
-    rdft_w[j] = x;
-    rdft_w[j + 1] = y;
-    rdft_w[nw - j] = y;
-    rdft_w[nw - j + 1] = x;
-  }
-  bitrv2_32or128(nw, ip + 2, rdft_w);
-
-  // pre-calculate constants used by cft1st_128 and cftmdl_128...
-  cftmdl_wk1r[0] = rdft_w[2];
-  cftmdl_wk1r[1] = rdft_w[2];
-  cftmdl_wk1r[2] = rdft_w[2];
-  cftmdl_wk1r[3] = -rdft_w[2];
-  {
-    int k1;
-
-    for (k1 = 0, j = 0; j < 128; j += 16, k1 += 2) {
-      const int k2 = 2 * k1;
-      const float wk2r = rdft_w[k1 + 0];
-      const float wk2i = rdft_w[k1 + 1];
-      float wk1r, wk1i;
-      // ... scalar version.
-      wk1r = rdft_w[k2 + 0];
-      wk1i = rdft_w[k2 + 1];
-      rdft_wk3ri_first[k1 + 0] = wk1r - 2 * wk2i * wk1i;
-      rdft_wk3ri_first[k1 + 1] = 2 * wk2i * wk1r - wk1i;
-      wk1r = rdft_w[k2 + 2];
-      wk1i = rdft_w[k2 + 3];
-      rdft_wk3ri_second[k1 + 0] = wk1r - 2 * wk2r * wk1i;
-      rdft_wk3ri_second[k1 + 1] = 2 * wk2r * wk1r - wk1i;
-      // ... vector version.
-      rdft_wk1r[k2 + 0] = rdft_w[k2 + 0];
-      rdft_wk1r[k2 + 1] = rdft_w[k2 + 0];
-      rdft_wk1r[k2 + 2] = rdft_w[k2 + 2];
-      rdft_wk1r[k2 + 3] = rdft_w[k2 + 2];
-      rdft_wk2r[k2 + 0] = rdft_w[k1 + 0];
-      rdft_wk2r[k2 + 1] = rdft_w[k1 + 0];
-      rdft_wk2r[k2 + 2] = -rdft_w[k1 + 1];
-      rdft_wk2r[k2 + 3] = -rdft_w[k1 + 1];
-      rdft_wk3r[k2 + 0] = rdft_wk3ri_first[k1 + 0];
-      rdft_wk3r[k2 + 1] = rdft_wk3ri_first[k1 + 0];
-      rdft_wk3r[k2 + 2] = rdft_wk3ri_second[k1 + 0];
-      rdft_wk3r[k2 + 3] = rdft_wk3ri_second[k1 + 0];
-      rdft_wk1i[k2 + 0] = -rdft_w[k2 + 1];
-      rdft_wk1i[k2 + 1] = rdft_w[k2 + 1];
-      rdft_wk1i[k2 + 2] = -rdft_w[k2 + 3];
-      rdft_wk1i[k2 + 3] = rdft_w[k2 + 3];
-      rdft_wk2i[k2 + 0] = -rdft_w[k1 + 1];
-      rdft_wk2i[k2 + 1] = rdft_w[k1 + 1];
-      rdft_wk2i[k2 + 2] = -rdft_w[k1 + 0];
-      rdft_wk2i[k2 + 3] = rdft_w[k1 + 0];
-      rdft_wk3i[k2 + 0] = -rdft_wk3ri_first[k1 + 1];
-      rdft_wk3i[k2 + 1] = rdft_wk3ri_first[k1 + 1];
-      rdft_wk3i[k2 + 2] = -rdft_wk3ri_second[k1 + 1];
-      rdft_wk3i[k2 + 3] = rdft_wk3ri_second[k1 + 1];
-    }
-  }
-}
-
-static void makect_32(void) {
-  float *c = rdft_w + 32;
-  const int nc = 32;
-  int j, nch;
-  float delta;
-
-  ip[1] = nc;
-  nch = nc >> 1;
-  delta = atanf(1.0f) / nch;
-  c[0] = cosf(delta * nch);
-  c[nch] = 0.5f * c[0];
-  for (j = 1; j < nch; j++) {
-    c[j] = 0.5f * cosf(delta * j);
-    c[nc - j] = 0.5f * sinf(delta * j);
-  }
-}
-
-static void cft1st_128_C(float *a) {
+static void cft1st_128_C(float* a) {
  const int n = 128;
  int j, k1, k2;
  float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;

+  // The processing of the first set of elements was simplified in C to avoid
+  // some operations (multiplication by zero or one, addition of two elements
+  // multiplied by the same weight, ...).
  x0r = a[0] + a[2];
  x0i = a[1] + a[3];
  x1r = a[0] - a[2];
@@ -311,7 +304,7 @@ static void cft1st_128_C(float *a) {
  }
 }

-static void cftmdl_128_C(float *a) {
+static void cftmdl_128_C(float* a) {
  const int l = 8;
  const int n = 128;
  const int m = 32;
@@ -320,7 +313,7 @@ static void cftmdl_128_C(float *a) {
  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;

  for (j0 = 0; j0 < l; j0 += 2) {
-    j1 = j0 +  8;
+    j1 = j0 + 8;
    j2 = j0 + 16;
    j3 = j0 + 24;
    x0r = a[j0 + 0] + a[j1 + 0];
@@ -342,7 +335,7 @@ static void cftmdl_128_C(float *a) {
  }
  wk1r = rdft_w[2];
  for (j0 = m; j0 < l + m; j0 += 2) {
-    j1 = j0 +  8;
+    j1 = j0 + 8;
    j2 = j0 + 16;
    j3 = j0 + 24;
    x0r = a[j0 + 0] + a[j1 + 0];
@@ -378,7 +371,7 @@ static void cftmdl_128_C(float *a) {
    wk3r = rdft_wk3ri_first[k1 + 0];
    wk3i = rdft_wk3ri_first[k1 + 1];
    for (j0 = k; j0 < l + k; j0 += 2) {
-      j1 = j0 +  8;
+      j1 = j0 + 8;
      j2 = j0 + 16;
      j3 = j0 + 24;
      x0r = a[j0 + 0] + a[j1 + 0];
@@ -409,7 +402,7 @@ static void cftmdl_128_C(float *a) {
    wk3r = rdft_wk3ri_second[k1 + 0];
    wk3i = rdft_wk3ri_second[k1 + 1];
    for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
-      j1 = j0 +  8;
+      j1 = j0 + 8;
      j2 = j0 + 16;
      j3 = j0 + 24;
      x0r = a[j0 + 0] + a[j1 + 0];
@@ -438,7 +431,7 @@ static void cftmdl_128_C(float *a) {
  }
 }

-static void cftfsub_128(float *a) {
+static void cftfsub_128_C(float* a) {
  int j, j1, j2, j3, l;
  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;

@@ -468,7 +461,7 @@ static void cftfsub_128(float *a) {
  }
 }

-static void cftbsub_128(float *a) {
+static void cftbsub_128_C(float* a) {
  int j, j1, j2, j3, l;
  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;

@@ -499,14 +492,14 @@ static void cftbsub_128(float *a) {
  }
 }

-static void rftfsub_128_C(float *a) {
-  const float *c = rdft_w + 32;
+static void rftfsub_128_C(float* a) {
+  const float* c = rdft_w + 32;
  int j1, j2, k1, k2;
  float wkr, wki, xr, xi, yr, yi;

  for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
    k2 = 128 - j2;
-    k1 =  32 - j1;
+    k1 = 32 - j1;
    wkr = 0.5f - c[k1];
    wki = c[j1];
    xr = a[j2 + 0] - a[k2 + 0];
@@ -520,15 +513,15 @@ static void rftfsub_128_C(float *a) {
  }
 }

-static void rftbsub_128_C(float *a) {
-  const float *c = rdft_w + 32;
+static void rftbsub_128_C(float* a) {
+  const float* c = rdft_w + 32;
  int j1, j2, k1, k2;
  float wkr, wki, xr, xi, yr, yi;

  a[1] = -a[1];
  for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
    k2 = 128 - j2;
-    k1 =  32 - j1;
+    k1 = 32 - j1;
    wkr = 0.5f - c[k1];
    wki = c[j1];
    xr = a[j2 + 0] - a[k2 + 0];
@@ -543,11 +536,9 @@ static void rftbsub_128_C(float *a) {
  a[65] = -a[65];
 }

-void aec_rdft_forward_128(float *a) {
-  const int n = 128;
+void aec_rdft_forward_128(float* a) {
  float xi;
-
-  bitrv2_32or128(n, ip + 2, a);
+  bitrv2_128(a);
  cftfsub_128(a);
  rftfsub_128(a);
  xi = a[0] - a[1];
@@ -555,33 +546,44 @@ void aec_rdft_forward_128(float *a) {
  a[1] = xi;
 }

-void aec_rdft_inverse_128(float *a) {
-  const int n = 128;
-
+void aec_rdft_inverse_128(float* a) {
  a[1] = 0.5f * (a[0] - a[1]);
  a[0] -= a[1];
  rftbsub_128(a);
-  bitrv2_32or128(n, ip + 2, a);
+  bitrv2_128(a);
  cftbsub_128(a);
 }

 // code path selection
-rft_sub_128_t cft1st_128;
-rft_sub_128_t cftmdl_128;
-rft_sub_128_t rftfsub_128;
-rft_sub_128_t rftbsub_128;
+RftSub128 cft1st_128;
+RftSub128 cftmdl_128;
+RftSub128 rftfsub_128;
+RftSub128 rftbsub_128;
+RftSub128 cftfsub_128;
+RftSub128 cftbsub_128;
+RftSub128 bitrv2_128;

 void aec_rdft_init(void) {
  cft1st_128 = cft1st_128_C;
  cftmdl_128 = cftmdl_128_C;
  rftfsub_128 = rftfsub_128_C;
  rftbsub_128 = rftbsub_128_C;
+  cftfsub_128 = cftfsub_128_C;
+  cftbsub_128 = cftbsub_128_C;
+  bitrv2_128 = bitrv2_128_C;
+#if defined(WEBRTC_ARCH_X86_FAMILY)
  if (WebRtc_GetCPUInfo(kSSE2)) {
-#if defined(WEBRTC_USE_SSE2)
    aec_rdft_init_sse2();
-#endif
  }
-  // init library constants.
-  makewt_32();
-  makect_32();
+#endif
+#if defined(MIPS_FPU_LE)
+  aec_rdft_init_mips();
+#endif
+#if defined(WEBRTC_HAS_NEON)
+  aec_rdft_init_neon();
+#elif defined(WEBRTC_DETECT_NEON)
+  if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
+    aec_rdft_init_neon();
+  }
+#endif
 }
--- a/webrtc/modules/audio_processing/aec/aec_rdft.h
+++ b/webrtc/modules/audio_processing/aec/aec_rdft.h
@@ -11,6 +11,8 @@
 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
 #define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_

+#include "webrtc/modules/audio_processing/aec/aec_common.h"
+
 // These intrinsics were unavailable before VS 2008.
 // TODO(andrew): move to a common file.
 #if defined(_MSC_VER) && _MSC_VER < 1500
@@ -19,39 +21,41 @@ static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; }
 static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; }
 #endif

-#ifdef _MSC_VER /* visual c++ */
-# define ALIGN16_BEG __declspec(align(16))
-# define ALIGN16_END
-#else /* gcc or icc */
-# define ALIGN16_BEG
-# define ALIGN16_END __attribute__((aligned(16)))
-#endif
-
-// constants shared by all paths (C, SSE2).
-extern float rdft_w[64];
-// constants used by the C path.
-extern float rdft_wk3ri_first[32];
-extern float rdft_wk3ri_second[32];
-// constants used by SSE2 but initialized in C path.
-extern float rdft_wk1r[32];
-extern float rdft_wk2r[32];
-extern float rdft_wk3r[32];
-extern float rdft_wk1i[32];
-extern float rdft_wk2i[32];
-extern float rdft_wk3i[32];
-extern float cftmdl_wk1r[4];
+// Constants shared by all paths (C, SSE2, NEON).
+extern const float rdft_w[64];
+// Constants used by the C path.
+extern const float rdft_wk3ri_first[16];
+extern const float rdft_wk3ri_second[16];
+// Constants used by SSE2 and NEON but initialized in the C path.
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32];
+extern ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32];
+extern ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4];

 // code path selection function pointers
-typedef void (*rft_sub_128_t)(float *a);
-extern rft_sub_128_t rftfsub_128;
-extern rft_sub_128_t rftbsub_128;
-extern rft_sub_128_t cft1st_128;
-extern rft_sub_128_t cftmdl_128;
+typedef void (*RftSub128)(float* a);
+extern RftSub128 rftfsub_128;
+extern RftSub128 rftbsub_128;
+extern RftSub128 cft1st_128;
+extern RftSub128 cftmdl_128;
+extern RftSub128 cftfsub_128;
+extern RftSub128 cftbsub_128;
+extern RftSub128 bitrv2_128;

 // entry points
 void aec_rdft_init(void);
 void aec_rdft_init_sse2(void);
-void aec_rdft_forward_128(float *a);
-void aec_rdft_inverse_128(float *a);
+void aec_rdft_forward_128(float* a);
+void aec_rdft_inverse_128(float* a);
+
+#if defined(MIPS_FPU_LE)
+void aec_rdft_init_mips(void);
+#endif
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+void aec_rdft_init_neon(void);
+#endif

 #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
--- a/webrtc/modules/audio_processing/aec/aec_rdft_mips.c
+++ b/webrtc/modules/audio_processing/aec/aec_rdft_mips.c
--- a/webrtc/modules/audio_processing/aec/aec_rdft_neon.c
+++ b/webrtc/modules/audio_processing/aec/aec_rdft_neon.c
@@ -0,0 +1,355 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/*
+ * The rdft AEC algorithm, neon version of speed-critical functions.
+ *
+ * Based on the sse2 version.
+ */
+
+
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
+
+#include <arm_neon.h>
+
+static const ALIGN16_BEG float ALIGN16_END
+    k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
+
+static void cft1st_128_neon(float* a) {
+  const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
+  int j, k2;
+
+  for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
+    float32x4_t a00v = vld1q_f32(&a[j + 0]);
+    float32x4_t a04v = vld1q_f32(&a[j + 4]);
+    float32x4_t a08v = vld1q_f32(&a[j + 8]);
+    float32x4_t a12v = vld1q_f32(&a[j + 12]);
+    float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v));
+    float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v));
+    float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v));
+    float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v));
+    const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]);
+    const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2]);
+    const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2]);
+    const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2]);
+    const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2]);
+    const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2]);
+    float32x4_t x0v = vaddq_f32(a01v, a23v);
+    const float32x4_t x1v = vsubq_f32(a01v, a23v);
+    const float32x4_t x2v = vaddq_f32(a45v, a67v);
+    const float32x4_t x3v = vsubq_f32(a45v, a67v);
+    const float32x4_t x3w = vrev64q_f32(x3v);
+    float32x4_t x0w;
+    a01v = vaddq_f32(x0v, x2v);
+    x0v = vsubq_f32(x0v, x2v);
+    x0w = vrev64q_f32(x0v);
+    a45v = vmulq_f32(wk2rv, x0v);
+    a45v = vmlaq_f32(a45v, wk2iv, x0w);
+    x0v = vmlaq_f32(x1v, x3w, vec_swap_sign);
+    x0w = vrev64q_f32(x0v);
+    a23v = vmulq_f32(wk1rv, x0v);
+    a23v = vmlaq_f32(a23v, wk1iv, x0w);
+    x0v = vmlsq_f32(x1v, x3w, vec_swap_sign);
+    x0w = vrev64q_f32(x0v);
+    a67v = vmulq_f32(wk3rv, x0v);
+    a67v = vmlaq_f32(a67v, wk3iv, x0w);
+    a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v));
+    a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v));
+    a08v = vcombine_f32(vget_high_f32(a01v), vget_high_f32(a23v));
+    a12v = vcombine_f32(vget_high_f32(a45v), vget_high_f32(a67v));
+    vst1q_f32(&a[j + 0], a00v);
+    vst1q_f32(&a[j + 4], a04v);
+    vst1q_f32(&a[j + 8], a08v);
+    vst1q_f32(&a[j + 12], a12v);
+  }
+}
+
+static void cftmdl_128_neon(float* a) {
+  int j;
+  const int l = 8;
+  const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
+  float32x4_t wk1rv = vld1q_f32(cftmdl_wk1r);
+
+  for (j = 0; j < l; j += 2) {
+    const float32x2_t a_00 = vld1_f32(&a[j + 0]);
+    const float32x2_t a_08 = vld1_f32(&a[j + 8]);
+    const float32x2_t a_32 = vld1_f32(&a[j + 32]);
+    const float32x2_t a_40 = vld1_f32(&a[j + 40]);
+    const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
+    const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
+    const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
+    const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
+    const float32x2_t a_16 = vld1_f32(&a[j + 16]);
+    const float32x2_t a_24 = vld1_f32(&a[j + 24]);
+    const float32x2_t a_48 = vld1_f32(&a[j + 48]);
+    const float32x2_t a_56 = vld1_f32(&a[j + 56]);
+    const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
+    const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
+    const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
+    const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
+    const float32x4_t xx0 = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+    const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+    const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
+    const float32x4_t x1_x3_add =
+        vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+    const float32x4_t x1_x3_sub =
+        vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+    const float32x2_t yy0_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 0);
+    const float32x2_t yy0_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 0);
+    const float32x4_t yy0_as = vcombine_f32(yy0_a, yy0_s);
+    const float32x2_t yy1_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 1);
+    const float32x2_t yy1_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 1);
+    const float32x4_t yy1_as = vcombine_f32(yy1_a, yy1_s);
+    const float32x4_t yy0 = vmlaq_f32(yy0_as, vec_swap_sign, yy1_as);
+    const float32x4_t yy4 = vmulq_f32(wk1rv, yy0);
+    const float32x4_t xx1_rev = vrev64q_f32(xx1);
+    const float32x4_t yy4_rev = vrev64q_f32(yy4);
+
+    vst1_f32(&a[j + 0], vget_low_f32(xx0));
+    vst1_f32(&a[j + 32], vget_high_f32(xx0));
+    vst1_f32(&a[j + 16], vget_low_f32(xx1));
+    vst1_f32(&a[j + 48], vget_high_f32(xx1_rev));
+
+    a[j + 48] = -a[j + 48];
+
+    vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add));
+    vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub));
+    vst1_f32(&a[j + 40], vget_low_f32(yy4));
+    vst1_f32(&a[j + 56], vget_high_f32(yy4_rev));
+  }
+
+  {
+    const int k = 64;
+    const int k1 = 2;
+    const int k2 = 2 * k1;
+    const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2 + 0]);
+    const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2 + 0]);
+    const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2 + 0]);
+    const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2 + 0]);
+    const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2 + 0]);
+    wk1rv = vld1q_f32(&rdft_wk1r[k2 + 0]);
+    for (j = k; j < l + k; j += 2) {
+      const float32x2_t a_00 = vld1_f32(&a[j + 0]);
+      const float32x2_t a_08 = vld1_f32(&a[j + 8]);
+      const float32x2_t a_32 = vld1_f32(&a[j + 32]);
+      const float32x2_t a_40 = vld1_f32(&a[j + 40]);
+      const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
+      const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
+      const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
+      const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
+      const float32x2_t a_16 = vld1_f32(&a[j + 16]);
+      const float32x2_t a_24 = vld1_f32(&a[j + 24]);
+      const float32x2_t a_48 = vld1_f32(&a[j + 48]);
+      const float32x2_t a_56 = vld1_f32(&a[j + 56]);
+      const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
+      const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
+      const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
+      const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
+      const float32x4_t xx = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+      const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+      const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
+      const float32x4_t x1_x3_add =
+          vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+      const float32x4_t x1_x3_sub =
+          vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
+      float32x4_t xx4 = vmulq_f32(wk2rv, xx1);
+      float32x4_t xx12 = vmulq_f32(wk1rv, x1_x3_add);
+      float32x4_t xx22 = vmulq_f32(wk3rv, x1_x3_sub);
+      xx4 = vmlaq_f32(xx4, wk2iv, vrev64q_f32(xx1));
+      xx12 = vmlaq_f32(xx12, wk1iv, vrev64q_f32(x1_x3_add));
+      xx22 = vmlaq_f32(xx22, wk3iv, vrev64q_f32(x1_x3_sub));
+
+      vst1_f32(&a[j + 0], vget_low_f32(xx));
+      vst1_f32(&a[j + 32], vget_high_f32(xx));
+      vst1_f32(&a[j + 16], vget_low_f32(xx4));
+      vst1_f32(&a[j + 48], vget_high_f32(xx4));
+      vst1_f32(&a[j + 8], vget_low_f32(xx12));
+      vst1_f32(&a[j + 40], vget_high_f32(xx12));
+      vst1_f32(&a[j + 24], vget_low_f32(xx22));
+      vst1_f32(&a[j + 56], vget_high_f32(xx22));
+    }
+  }
+}
+
+__inline static float32x4_t reverse_order_f32x4(float32x4_t in) {
+  // A B C D -> C D A B
+  const float32x4_t rev = vcombine_f32(vget_high_f32(in), vget_low_f32(in));
+  // C D A B -> D C B A
+  return vrev64q_f32(rev);
+}
+
+static void rftfsub_128_neon(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2;
+  const float32x4_t mm_half = vdupq_n_f32(0.5f);
+
+  // Vectorized code (four at once).
+  // Note: commented number are indexes for the first iteration of the loop.
+  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+    // Load 'wk'.
+    const float32x4_t c_j1 = vld1q_f32(&c[j1]);          //  1,  2,  3,  4,
+    const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]);     // 28, 29, 30, 31,
+    const float32x4_t wkrt = vsubq_f32(mm_half, c_k1);   // 28, 29, 30, 31,
+    const float32x4_t wkr_ = reverse_order_f32x4(wkrt);  // 31, 30, 29, 28,
+    const float32x4_t wki_ = c_j1;                       //  1,  2,  3,  4,
+    // Load and shuffle 'a'.
+    //   2,   4,   6,   8,   3,   5,   7,   9
+    float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
+    // 120, 122, 124, 126, 121, 123, 125, 127,
+    const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
+    // 126, 124, 122, 120
+    const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
+    // 127, 125, 123, 121
+    const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
+    // Calculate 'x'.
+    const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
+    // 2-126, 4-124, 6-122, 8-120,
+    const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
+    // 3-127, 5-125, 7-123, 9-121,
+    // Calculate product into 'y'.
+    //    yr = wkr * xr - wki * xi;
+    //    yi = wkr * xi + wki * xr;
+    const float32x4_t a_ = vmulq_f32(wkr_, xr_);
+    const float32x4_t b_ = vmulq_f32(wki_, xi_);
+    const float32x4_t c_ = vmulq_f32(wkr_, xi_);
+    const float32x4_t d_ = vmulq_f32(wki_, xr_);
+    const float32x4_t yr_ = vsubq_f32(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
+    const float32x4_t yi_ = vaddq_f32(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
+                                                // Update 'a'.
+                                                //    a[j2 + 0] -= yr;
+                                                //    a[j2 + 1] -= yi;
+                                                //    a[k2 + 0] += yr;
+                                                //    a[k2 + 1] -= yi;
+    // 126, 124, 122, 120,
+    const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
+    // 127, 125, 123, 121,
+    const float32x4_t a_k2_p1n = vsubq_f32(a_k2_p1, yi_);
+    // Shuffle in right order and store.
+    const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
+    const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
+    // 124, 125, 126, 127, 120, 121, 122, 123
+    const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
+    //   2,   4,   6,   8,
+    a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
+    //   3,   5,   7,   9,
+    a_j2_p.val[1] = vsubq_f32(a_j2_p.val[1], yi_);
+    //   2,   3,   4,   5,   6,   7,   8,   9,
+    vst2q_f32(&a[0 + j2], a_j2_p);
+
+    vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
+    vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
+  }
+
+  // Scalar code for the remaining items.
+  for (; j2 < 64; j1 += 1, j2 += 2) {
+    const int k2 = 128 - j2;
+    const int k1 = 32 - j1;
+    const float wkr = 0.5f - c[k1];
+    const float wki = c[j1];
+    const float xr = a[j2 + 0] - a[k2 + 0];
+    const float xi = a[j2 + 1] + a[k2 + 1];
+    const float yr = wkr * xr - wki * xi;
+    const float yi = wkr * xi + wki * xr;
+    a[j2 + 0] -= yr;
+    a[j2 + 1] -= yi;
+    a[k2 + 0] += yr;
+    a[k2 + 1] -= yi;
+  }
+}
+
+static void rftbsub_128_neon(float* a) {
+  const float* c = rdft_w + 32;
+  int j1, j2;
+  const float32x4_t mm_half = vdupq_n_f32(0.5f);
+
+  a[1] = -a[1];
+  // Vectorized code (four at once).
+  //    Note: commented number are indexes for the first iteration of the loop.
+  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
+    // Load 'wk'.
+    const float32x4_t c_j1 = vld1q_f32(&c[j1]);         //  1,  2,  3,  4,
+    const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]);    // 28, 29, 30, 31,
+    const float32x4_t wkrt = vsubq_f32(mm_half, c_k1);  // 28, 29, 30, 31,
+    const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
+    const float32x4_t wki_ = c_j1;                      //  1,  2,  3,  4,
+    // Load and shuffle 'a'.
+    //   2,   4,   6,   8,   3,   5,   7,   9
+    float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
+    // 120, 122, 124, 126, 121, 123, 125, 127,
+    const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
+    // 126, 124, 122, 120
+    const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
+    // 127, 125, 123, 121
+    const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
+    // Calculate 'x'.
+    const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
+    // 2-126, 4-124, 6-122, 8-120,
+    const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
+    // 3-127, 5-125, 7-123, 9-121,
+    // Calculate product into 'y'.
+    //    yr = wkr * xr - wki * xi;
+    //    yi = wkr * xi + wki * xr;
+    const float32x4_t a_ = vmulq_f32(wkr_, xr_);
+    const float32x4_t b_ = vmulq_f32(wki_, xi_);
+    const float32x4_t c_ = vmulq_f32(wkr_, xi_);
+    const float32x4_t d_ = vmulq_f32(wki_, xr_);
+    const float32x4_t yr_ = vaddq_f32(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
+    const float32x4_t yi_ = vsubq_f32(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
+                                                // Update 'a'.
+                                                //    a[j2 + 0] -= yr;
+                                                //    a[j2 + 1] -= yi;
+                                                //    a[k2 + 0] += yr;
+                                                //    a[k2 + 1] -= yi;
+    // 126, 124, 122, 120,
+    const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
+    // 127, 125, 123, 121,
+    const float32x4_t a_k2_p1n = vsubq_f32(yi_, a_k2_p1);
+    // Shuffle in right order and store.
+    //   2,   3,   4,   5,   6,   7,   8,   9,
+    const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
+    const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
+    // 124, 125, 126, 127, 120, 121, 122, 123
+    const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
+    //   2,   4,   6,   8,
+    a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
+    //   3,   5,   7,   9,
+    a_j2_p.val[1] = vsubq_f32(yi_, a_j2_p.val[1]);
+    //   2,   3,   4,   5,   6,   7,   8,   9,
+    vst2q_f32(&a[0 + j2], a_j2_p);
+
+    vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
+    vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
+  }
+
+  // Scalar code for the remaining items.
+  for (; j2 < 64; j1 += 1, j2 += 2) {
+    const int k2 = 128 - j2;
+    const int k1 = 32 - j1;
+    const float wkr = 0.5f - c[k1];
+    const float wki = c[j1];
+    const float xr = a[j2 + 0] - a[k2 + 0];
+    const float xi = a[j2 + 1] + a[k2 + 1];
+    const float yr = wkr * xr + wki * xi;
+    const float yi = wkr * xi - wki * xr;
+    a[j2 + 0] = a[j2 + 0] - yr;
+    a[j2 + 1] = yi - a[j2 + 1];
+    a[k2 + 0] = yr + a[k2 + 0];
+    a[k2 + 1] = yi - a[k2 + 1];
+  }
+  a[65] = -a[65];
+}
+
+void aec_rdft_init_neon(void) {
+  cft1st_128 = cft1st_128_neon;
+  cftmdl_128 = cftmdl_128_neon;
+  rftfsub_128 = rftfsub_128_neon;
+  rftbsub_128 = rftbsub_128_neon;
+}
+
--- a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c
+++ b/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c
@@ -8,172 +8,168 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "typedefs.h"
+#include "webrtc/modules/audio_processing/aec/aec_rdft.h"

-#if defined(WEBRTC_USE_SSE2)
 #include <emmintrin.h>

-#include "aec_rdft.h"
+static const ALIGN16_BEG float ALIGN16_END
+    k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};

-static const ALIGN16_BEG float ALIGN16_END k_swap_sign[4] =
-  {-1.f, 1.f, -1.f, 1.f};
-
-static void cft1st_128_SSE2(float *a) {
+static void cft1st_128_SSE2(float* a) {
  const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
  int j, k2;

  for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
-          __m128 a00v   = _mm_loadu_ps(&a[j +  0]);
-          __m128 a04v   = _mm_loadu_ps(&a[j +  4]);
-          __m128 a08v   = _mm_loadu_ps(&a[j +  8]);
-          __m128 a12v   = _mm_loadu_ps(&a[j + 12]);
-          __m128 a01v   = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1 ,0));
-          __m128 a23v   = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3 ,2));
-          __m128 a45v   = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1 ,0));
-          __m128 a67v   = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3 ,2));
+    __m128 a00v = _mm_loadu_ps(&a[j + 0]);
+    __m128 a04v = _mm_loadu_ps(&a[j + 4]);
+    __m128 a08v = _mm_loadu_ps(&a[j + 8]);
+    __m128 a12v = _mm_loadu_ps(&a[j + 12]);
+    __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0));
+    __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2));
+    __m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0));
+    __m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2));

-    const __m128 wk1rv  = _mm_load_ps(&rdft_wk1r[k2]);
-    const __m128 wk1iv  = _mm_load_ps(&rdft_wk1i[k2]);
-    const __m128 wk2rv  = _mm_load_ps(&rdft_wk2r[k2]);
-    const __m128 wk2iv  = _mm_load_ps(&rdft_wk2i[k2]);
-    const __m128 wk3rv  = _mm_load_ps(&rdft_wk3r[k2]);
-    const __m128 wk3iv  = _mm_load_ps(&rdft_wk3i[k2]);
-          __m128 x0v    = _mm_add_ps(a01v, a23v);
-    const __m128 x1v    = _mm_sub_ps(a01v, a23v);
-    const __m128 x2v    = _mm_add_ps(a45v, a67v);
-    const __m128 x3v    = _mm_sub_ps(a45v, a67v);
-          __m128 x0w;
-                 a01v   = _mm_add_ps(x0v, x2v);
-                 x0v    = _mm_sub_ps(x0v, x2v);
-                 x0w    = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
+    const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]);
+    const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]);
+    const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]);
+    const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]);
+    const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]);
+    const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]);
+    __m128 x0v = _mm_add_ps(a01v, a23v);
+    const __m128 x1v = _mm_sub_ps(a01v, a23v);
+    const __m128 x2v = _mm_add_ps(a45v, a67v);
+    const __m128 x3v = _mm_sub_ps(a45v, a67v);
+    __m128 x0w;
+    a01v = _mm_add_ps(x0v, x2v);
+    x0v = _mm_sub_ps(x0v, x2v);
+    x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
    {
      const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v);
      const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w);
-                   a45v   = _mm_add_ps(a45_0v, a45_1v);
+      a45v = _mm_add_ps(a45_0v, a45_1v);
    }
    {
-            __m128 a23_0v, a23_1v;
-      const __m128 x3w    = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0 ,1));
-      const __m128 x3s    = _mm_mul_ps(mm_swap_sign, x3w);
-                   x0v    = _mm_add_ps(x1v, x3s);
-                   x0w    = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
-                   a23_0v = _mm_mul_ps(wk1rv, x0v);
-                   a23_1v = _mm_mul_ps(wk1iv, x0w);
-                   a23v   = _mm_add_ps(a23_0v, a23_1v);
+      __m128 a23_0v, a23_1v;
+      const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1));
+      const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w);
+      x0v = _mm_add_ps(x1v, x3s);
+      x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
+      a23_0v = _mm_mul_ps(wk1rv, x0v);
+      a23_1v = _mm_mul_ps(wk1iv, x0w);
+      a23v = _mm_add_ps(a23_0v, a23_1v);

-                   x0v    = _mm_sub_ps(x1v, x3s);
-                   x0w    = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
+      x0v = _mm_sub_ps(x1v, x3s);
+      x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
    }
    {
      const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v);
      const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w);
-                   a67v   = _mm_add_ps(a67_0v, a67_1v);
+      a67v = _mm_add_ps(a67_0v, a67_1v);
    }

-                 a00v   = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1 ,0));
-                 a04v   = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1 ,0));
-                 a08v   = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3 ,2));
-                 a12v   = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3 ,2));
-    _mm_storeu_ps(&a[j +  0], a00v);
-    _mm_storeu_ps(&a[j +  4], a04v);
-    _mm_storeu_ps(&a[j +  8], a08v);
+    a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0));
+    a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0));
+    a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2));
+    a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2));
+    _mm_storeu_ps(&a[j + 0], a00v);
+    _mm_storeu_ps(&a[j + 4], a04v);
+    _mm_storeu_ps(&a[j + 8], a08v);
    _mm_storeu_ps(&a[j + 12], a12v);
  }
 }

-static void cftmdl_128_SSE2(float *a) {
+static void cftmdl_128_SSE2(float* a) {
  const int l = 8;
  const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
  int j0;

  __m128 wk1rv = _mm_load_ps(cftmdl_wk1r);
  for (j0 = 0; j0 < l; j0 += 2) {
-    const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 +  0]);
-    const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 +  8]);
+    const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
+    const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
    const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
    const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
-    const __m128  a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
-                                           _mm_castsi128_ps(a_32),
-                                           _MM_SHUFFLE(1, 0, 1 ,0));
-    const __m128  a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
-                                           _mm_castsi128_ps(a_40),
-                                           _MM_SHUFFLE(1, 0, 1 ,0));
-          __m128  x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
-    const __m128  x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
+    const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
+                                          _mm_castsi128_ps(a_32),
+                                          _MM_SHUFFLE(1, 0, 1, 0));
+    const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
+                                          _mm_castsi128_ps(a_40),
+                                          _MM_SHUFFLE(1, 0, 1, 0));
+    __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
+    const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);

    const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
    const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
    const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
    const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
-    const __m128  a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
-                                           _mm_castsi128_ps(a_48),
-                                           _MM_SHUFFLE(1, 0, 1 ,0));
-    const __m128  a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
-                                           _mm_castsi128_ps(a_56),
-                                           _MM_SHUFFLE(1, 0, 1 ,0));
-    const __m128  x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
-    const __m128  x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
+    const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
+                                          _mm_castsi128_ps(a_48),
+                                          _MM_SHUFFLE(1, 0, 1, 0));
+    const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
+                                          _mm_castsi128_ps(a_56),
+                                          _MM_SHUFFLE(1, 0, 1, 0));
+    const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
+    const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);

-    const __m128  xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-    const __m128  xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+    const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+    const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);

-    const __m128  x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(
-        _mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1),
-                          _MM_SHUFFLE(2, 3, 0, 1)));
-    const __m128  x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
-    const __m128  x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
-    const __m128  x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+    const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
+        _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
+    const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
+    const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+    const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);

-    const __m128 yy0 = _mm_shuffle_ps(x1_x3_add, x1_x3_sub,
-                                      _MM_SHUFFLE(2, 2, 2 ,2));
-    const __m128 yy1 = _mm_shuffle_ps(x1_x3_add, x1_x3_sub,
-                                      _MM_SHUFFLE(3, 3, 3 ,3));
+    const __m128 yy0 =
+        _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2));
+    const __m128 yy1 =
+        _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3));
    const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1);
    const __m128 yy3 = _mm_add_ps(yy0, yy2);
    const __m128 yy4 = _mm_mul_ps(wk1rv, yy3);

-    _mm_storel_epi64((__m128i*)&a[j0 +  0], _mm_castps_si128(xx0));
-    _mm_storel_epi64((__m128i*)&a[j0 + 32],
-                     _mm_shuffle_epi32(_mm_castps_si128(xx0),
-                                       _MM_SHUFFLE(3, 2, 3, 2)));
+    _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0));
+    _mm_storel_epi64(
+        (__m128i*)&a[j0 + 32],
+        _mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2)));

    _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1));
-    _mm_storel_epi64((__m128i*)&a[j0 + 48],
-                     _mm_shuffle_epi32(_mm_castps_si128(xx1),
-                                       _MM_SHUFFLE(2, 3, 2, 3)));
+    _mm_storel_epi64(
+        (__m128i*)&a[j0 + 48],
+        _mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3)));
    a[j0 + 48] = -a[j0 + 48];

-    _mm_storel_epi64((__m128i*)&a[j0 +  8], _mm_castps_si128(x1_x3_add));
+    _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add));
    _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub));

    _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4));
-    _mm_storel_epi64((__m128i*)&a[j0 + 56],
-                     _mm_shuffle_epi32(_mm_castps_si128(yy4),
-                     _MM_SHUFFLE(2, 3, 2, 3)));
+    _mm_storel_epi64(
+        (__m128i*)&a[j0 + 56],
+        _mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3)));
  }

  {
    int k = 64;
    int k1 = 2;
    int k2 = 2 * k1;
-    const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2+0]);
-    const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2+0]);
-    const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2+0]);
-    const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2+0]);
-    const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2+0]);
-                 wk1rv = _mm_load_ps(&rdft_wk1r[k2+0]);
+    const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]);
+    const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]);
+    const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]);
+    const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]);
+    const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]);
+    wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]);
    for (j0 = k; j0 < l + k; j0 += 2) {
-      const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 +  0]);
-      const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 +  8]);
+      const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
+      const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
      const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
      const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
      const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
                                            _mm_castsi128_ps(a_32),
-                                            _MM_SHUFFLE(1, 0, 1 ,0));
+                                            _MM_SHUFFLE(1, 0, 1, 0));
      const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
                                            _mm_castsi128_ps(a_40),
-                                            _MM_SHUFFLE(1, 0, 1 ,0));
-            __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
+                                            _MM_SHUFFLE(1, 0, 1, 0));
+      __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
      const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);

      const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
@@ -182,100 +178,102 @@ static void cftmdl_128_SSE2(float *a) {
      const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
      const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
                                            _mm_castsi128_ps(a_48),
-                                            _MM_SHUFFLE(1, 0, 1 ,0));
+                                            _MM_SHUFFLE(1, 0, 1, 0));
      const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
                                            _mm_castsi128_ps(a_56),
-                                            _MM_SHUFFLE(1, 0, 1 ,0));
+                                            _MM_SHUFFLE(1, 0, 1, 0));
      const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
      const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);

      const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
      const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-      const __m128 xx2 = _mm_mul_ps(xx1 , wk2rv);
-      const __m128 xx3 = _mm_mul_ps(wk2iv,
-          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1),
-                                             _MM_SHUFFLE(2, 3, 0, 1))));
+      const __m128 xx2 = _mm_mul_ps(xx1, wk2rv);
+      const __m128 xx3 =
+          _mm_mul_ps(wk2iv,
+                     _mm_castsi128_ps(_mm_shuffle_epi32(
+                         _mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1))));
      const __m128 xx4 = _mm_add_ps(xx2, xx3);

-      const __m128  x3i0_3r0_3i1_x3r1 =  _mm_castsi128_ps(
-          _mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1),
-                            _MM_SHUFFLE(2, 3, 0, 1)));
-      const __m128  x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
-      const __m128  x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
-      const __m128  x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+      const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
+          _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
+      const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
+      const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+      const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);

      const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
-      const __m128 xx11 = _mm_mul_ps(wk1iv,
+      const __m128 xx11 = _mm_mul_ps(
+          wk1iv,
          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
                                             _MM_SHUFFLE(2, 3, 0, 1))));
      const __m128 xx12 = _mm_add_ps(xx10, xx11);

      const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
-      const __m128 xx21 = _mm_mul_ps(wk3iv,
+      const __m128 xx21 = _mm_mul_ps(
+          wk3iv,
          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
-                           _MM_SHUFFLE(2, 3, 0, 1))));
+                                             _MM_SHUFFLE(2, 3, 0, 1))));
      const __m128 xx22 = _mm_add_ps(xx20, xx21);

-      _mm_storel_epi64((__m128i*)&a[j0 +  0], _mm_castps_si128(xx));
-      _mm_storel_epi64((__m128i*)&a[j0 + 32],
-                         _mm_shuffle_epi32(_mm_castps_si128(xx),
-                                           _MM_SHUFFLE(3, 2, 3, 2)));
+      _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));
+      _mm_storel_epi64(
+          (__m128i*)&a[j0 + 32],
+          _mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2)));

      _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4));
-      _mm_storel_epi64((__m128i*)&a[j0 + 48],
-                        _mm_shuffle_epi32(_mm_castps_si128(xx4),
-                                          _MM_SHUFFLE(3, 2, 3, 2)));
+      _mm_storel_epi64(
+          (__m128i*)&a[j0 + 48],
+          _mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2)));

-      _mm_storel_epi64((__m128i*)&a[j0 +  8], _mm_castps_si128(xx12));
-      _mm_storel_epi64((__m128i*)&a[j0 + 40],
-                       _mm_shuffle_epi32(_mm_castps_si128(xx12),
-                                         _MM_SHUFFLE(3, 2, 3, 2)));
+      _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12));
+      _mm_storel_epi64(
+          (__m128i*)&a[j0 + 40],
+          _mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2)));

      _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22));
-      _mm_storel_epi64((__m128i*)&a[j0 + 56],
-                       _mm_shuffle_epi32(_mm_castps_si128(xx22),
-                                         _MM_SHUFFLE(3, 2, 3, 2)));
+      _mm_storel_epi64(
+          (__m128i*)&a[j0 + 56],
+          _mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2)));
    }
  }
 }

-static void rftfsub_128_SSE2(float *a) {
-  const float *c = rdft_w + 32;
+static void rftfsub_128_SSE2(float* a) {
+  const float* c = rdft_w + 32;
  int j1, j2, k1, k2;
  float wkr, wki, xr, xi, yr, yi;

-  static const ALIGN16_BEG float ALIGN16_END k_half[4] =
-    {0.5f, 0.5f, 0.5f, 0.5f};
+  static const ALIGN16_BEG float ALIGN16_END
+      k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
  const __m128 mm_half = _mm_load_ps(k_half);

  // Vectorized code (four at once).
  //    Note: commented number are indexes for the first iteration of the loop.
  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
    // Load 'wk'.
-    const __m128 c_j1 = _mm_loadu_ps(&c[     j1]);         //  1,  2,  3,  4,
-    const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]);         // 28, 29, 30, 31,
-    const __m128 wkrt = _mm_sub_ps(mm_half, c_k1);         // 28, 29, 30, 31,
+    const __m128 c_j1 = _mm_loadu_ps(&c[j1]);       //  1,  2,  3,  4,
+    const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]);  // 28, 29, 30, 31,
+    const __m128 wkrt = _mm_sub_ps(mm_half, c_k1);  // 28, 29, 30, 31,
    const __m128 wkr_ =
-      _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28,
-    const __m128 wki_ = c_j1;                              //  1,  2,  3,  4,
+        _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3));  // 31, 30, 29, 28,
+    const __m128 wki_ = c_j1;                                 //  1,  2,  3,  4,
    // Load and shuffle 'a'.
-    const __m128 a_j2_0 = _mm_loadu_ps(&a[0   + j2]);  //   2,   3,   4,   5,
-    const __m128 a_j2_4 = _mm_loadu_ps(&a[4   + j2]);  //   6,   7,   8,   9,
+    const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]);    //   2,   3,   4,   5,
+    const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]);    //   6,   7,   8,   9,
    const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]);  // 120, 121, 122, 123,
    const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]);  // 124, 125, 126, 127,
-    const __m128 a_j2_p0 = _mm_shuffle_ps(a_j2_0, a_j2_4,
-                            _MM_SHUFFLE(2, 0, 2 ,0));  //   2,   4,   6,   8,
-    const __m128 a_j2_p1 = _mm_shuffle_ps(a_j2_0, a_j2_4,
-                            _MM_SHUFFLE(3, 1, 3 ,1));  //   3,   5,   7,   9,
-    const __m128 a_k2_p0 = _mm_shuffle_ps(a_k2_4, a_k2_0,
-                            _MM_SHUFFLE(0, 2, 0 ,2));  // 126, 124, 122, 120,
-    const __m128 a_k2_p1 = _mm_shuffle_ps(a_k2_4, a_k2_0,
-                            _MM_SHUFFLE(1, 3, 1 ,3));  // 127, 125, 123, 121,
+    const __m128 a_j2_p0 = _mm_shuffle_ps(
+        a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0));  //   2,   4,   6,   8,
+    const __m128 a_j2_p1 = _mm_shuffle_ps(
+        a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1));  //   3,   5,   7,   9,
+    const __m128 a_k2_p0 = _mm_shuffle_ps(
+        a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2));  // 126, 124, 122, 120,
+    const __m128 a_k2_p1 = _mm_shuffle_ps(
+        a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3));  // 127, 125, 123, 121,
    // Calculate 'x'.
    const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
-                                               // 2-126, 4-124, 6-122, 8-120,
+    // 2-126, 4-124, 6-122, 8-120,
    const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
-                                               // 3-127, 5-125, 7-123, 9-121,
+    // 3-127, 5-125, 7-123, 9-121,
    // Calculate product into 'y'.
    //    yr = wkr * xr - wki * xi;
    //    yi = wkr * xi + wki * xr;
@@ -283,12 +281,12 @@ static void rftfsub_128_SSE2(float *a) {
    const __m128 b_ = _mm_mul_ps(wki_, xi_);
    const __m128 c_ = _mm_mul_ps(wkr_, xi_);
    const __m128 d_ = _mm_mul_ps(wki_, xr_);
-    const __m128 yr_ = _mm_sub_ps(a_, b_);     // 2-126, 4-124, 6-122, 8-120,
-    const __m128 yi_ = _mm_add_ps(c_, d_);     // 3-127, 5-125, 7-123, 9-121,
-    // Update 'a'.
-    //    a[j2 + 0] -= yr;
-    //    a[j2 + 1] -= yi;
-    //    a[k2 + 0] += yr;
+    const __m128 yr_ = _mm_sub_ps(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
+    const __m128 yi_ = _mm_add_ps(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
+                                            // Update 'a'.
+                                            //    a[j2 + 0] -= yr;
+                                            //    a[j2 + 1] -= yi;
+                                            //    a[k2 + 0] += yr;
    //    a[k2 + 1] -= yi;
    const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_);  //   2,   4,   6,   8,
    const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_);  //   3,   5,   7,   9,
@@ -296,26 +294,26 @@ static void rftfsub_128_SSE2(float *a) {
    const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_);  // 127, 125, 123, 121,
    // Shuffle in right order and store.
    const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
-                                                       //   2,   3,   4,   5,
+    //   2,   3,   4,   5,
    const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
-                                                       //   6,   7,   8,   9,
+    //   6,   7,   8,   9,
    const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
-                                                       // 122, 123, 120, 121,
+    // 122, 123, 120, 121,
    const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
-                                                       // 126, 127, 124, 125,
-    const __m128 a_k2_0n = _mm_shuffle_ps(a_k2_0nt, a_k2_0nt,
-                            _MM_SHUFFLE(1, 0, 3 ,2));  // 120, 121, 122, 123,
-    const __m128 a_k2_4n = _mm_shuffle_ps(a_k2_4nt, a_k2_4nt,
-                            _MM_SHUFFLE(1, 0, 3 ,2));  // 124, 125, 126, 127,
-    _mm_storeu_ps(&a[0   + j2], a_j2_0n);
-    _mm_storeu_ps(&a[4   + j2], a_j2_4n);
+    // 126, 127, 124, 125,
+    const __m128 a_k2_0n = _mm_shuffle_ps(
+        a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2));  // 120, 121, 122, 123,
+    const __m128 a_k2_4n = _mm_shuffle_ps(
+        a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2));  // 124, 125, 126, 127,
+    _mm_storeu_ps(&a[0 + j2], a_j2_0n);
+    _mm_storeu_ps(&a[4 + j2], a_j2_4n);
    _mm_storeu_ps(&a[122 - j2], a_k2_0n);
    _mm_storeu_ps(&a[126 - j2], a_k2_4n);
  }
  // Scalar code for the remaining items.
  for (; j2 < 64; j1 += 1, j2 += 2) {
    k2 = 128 - j2;
-    k1 =  32 - j1;
+    k1 = 32 - j1;
    wkr = 0.5f - c[k1];
    wki = c[j1];
    xr = a[j2 + 0] - a[k2 + 0];
@@ -329,13 +327,13 @@ static void rftfsub_128_SSE2(float *a) {
  }
 }

-static void rftbsub_128_SSE2(float *a) {
-  const float *c = rdft_w + 32;
+static void rftbsub_128_SSE2(float* a) {
+  const float* c = rdft_w + 32;
  int j1, j2, k1, k2;
  float wkr, wki, xr, xi, yr, yi;

-  static const ALIGN16_BEG float ALIGN16_END k_half[4] =
-    {0.5f, 0.5f, 0.5f, 0.5f};
+  static const ALIGN16_BEG float ALIGN16_END
+      k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
  const __m128 mm_half = _mm_load_ps(k_half);

  a[1] = -a[1];
@@ -343,30 +341,30 @@ static void rftbsub_128_SSE2(float *a) {
  //    Note: commented number are indexes for the first iteration of the loop.
  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
    // Load 'wk'.
-    const __m128 c_j1 = _mm_loadu_ps(&c[     j1]);         //  1,  2,  3,  4,
-    const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]);         // 28, 29, 30, 31,
-    const __m128 wkrt = _mm_sub_ps(mm_half, c_k1);         // 28, 29, 30, 31,
+    const __m128 c_j1 = _mm_loadu_ps(&c[j1]);       //  1,  2,  3,  4,
+    const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]);  // 28, 29, 30, 31,
+    const __m128 wkrt = _mm_sub_ps(mm_half, c_k1);  // 28, 29, 30, 31,
    const __m128 wkr_ =
-      _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28,
-    const __m128 wki_ = c_j1;                              //  1,  2,  3,  4,
+        _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3));  // 31, 30, 29, 28,
+    const __m128 wki_ = c_j1;                                 //  1,  2,  3,  4,
    // Load and shuffle 'a'.
-    const __m128 a_j2_0 = _mm_loadu_ps(&a[0   + j2]);  //   2,   3,   4,   5,
-    const __m128 a_j2_4 = _mm_loadu_ps(&a[4   + j2]);  //   6,   7,   8,   9,
+    const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]);    //   2,   3,   4,   5,
+    const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]);    //   6,   7,   8,   9,
    const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]);  // 120, 121, 122, 123,
    const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]);  // 124, 125, 126, 127,
-    const __m128 a_j2_p0 = _mm_shuffle_ps(a_j2_0, a_j2_4,
-                            _MM_SHUFFLE(2, 0, 2 ,0));  //   2,   4,   6,   8,
-    const __m128 a_j2_p1 = _mm_shuffle_ps(a_j2_0, a_j2_4,
-                            _MM_SHUFFLE(3, 1, 3 ,1));  //   3,   5,   7,   9,
-    const __m128 a_k2_p0 = _mm_shuffle_ps(a_k2_4, a_k2_0,
-                            _MM_SHUFFLE(0, 2, 0 ,2));  // 126, 124, 122, 120,
-    const __m128 a_k2_p1 = _mm_shuffle_ps(a_k2_4, a_k2_0,
-                            _MM_SHUFFLE(1, 3, 1 ,3));  // 127, 125, 123, 121,
+    const __m128 a_j2_p0 = _mm_shuffle_ps(
+        a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0));  //   2,   4,   6,   8,
+    const __m128 a_j2_p1 = _mm_shuffle_ps(
+        a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1));  //   3,   5,   7,   9,
+    const __m128 a_k2_p0 = _mm_shuffle_ps(
+        a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2));  // 126, 124, 122, 120,
+    const __m128 a_k2_p1 = _mm_shuffle_ps(
+        a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3));  // 127, 125, 123, 121,
    // Calculate 'x'.
    const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
-                                               // 2-126, 4-124, 6-122, 8-120,
+    // 2-126, 4-124, 6-122, 8-120,
    const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
-                                               // 3-127, 5-125, 7-123, 9-121,
+    // 3-127, 5-125, 7-123, 9-121,
    // Calculate product into 'y'.
    //    yr = wkr * xr + wki * xi;
    //    yi = wkr * xi - wki * xr;
@@ -374,12 +372,12 @@ static void rftbsub_128_SSE2(float *a) {
    const __m128 b_ = _mm_mul_ps(wki_, xi_);
    const __m128 c_ = _mm_mul_ps(wkr_, xi_);
    const __m128 d_ = _mm_mul_ps(wki_, xr_);
-    const __m128 yr_ = _mm_add_ps(a_, b_);     // 2-126, 4-124, 6-122, 8-120,
-    const __m128 yi_ = _mm_sub_ps(c_, d_);     // 3-127, 5-125, 7-123, 9-121,
-    // Update 'a'.
-    //    a[j2 + 0] = a[j2 + 0] - yr;
-    //    a[j2 + 1] = yi - a[j2 + 1];
-    //    a[k2 + 0] = yr + a[k2 + 0];
+    const __m128 yr_ = _mm_add_ps(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
+    const __m128 yi_ = _mm_sub_ps(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
+                                            // Update 'a'.
+                                            //    a[j2 + 0] = a[j2 + 0] - yr;
+                                            //    a[j2 + 1] = yi - a[j2 + 1];
+                                            //    a[k2 + 0] = yr + a[k2 + 0];
    //    a[k2 + 1] = yi - a[k2 + 1];
    const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_);  //   2,   4,   6,   8,
    const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1);  //   3,   5,   7,   9,
@@ -387,26 +385,26 @@ static void rftbsub_128_SSE2(float *a) {
    const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1);  // 127, 125, 123, 121,
    // Shuffle in right order and store.
    const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
-                                                       //   2,   3,   4,   5,
+    //   2,   3,   4,   5,
    const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
-                                                       //   6,   7,   8,   9,
+    //   6,   7,   8,   9,
    const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
-                                                       // 122, 123, 120, 121,
+    // 122, 123, 120, 121,
    const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
-                                                       // 126, 127, 124, 125,
-    const __m128 a_k2_0n = _mm_shuffle_ps(a_k2_0nt, a_k2_0nt,
-                            _MM_SHUFFLE(1, 0, 3 ,2));  // 120, 121, 122, 123,
-    const __m128 a_k2_4n = _mm_shuffle_ps(a_k2_4nt, a_k2_4nt,
-                            _MM_SHUFFLE(1, 0, 3 ,2));  // 124, 125, 126, 127,
-    _mm_storeu_ps(&a[0   + j2], a_j2_0n);
-    _mm_storeu_ps(&a[4   + j2], a_j2_4n);
+    // 126, 127, 124, 125,
+    const __m128 a_k2_0n = _mm_shuffle_ps(
+        a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2));  // 120, 121, 122, 123,
+    const __m128 a_k2_4n = _mm_shuffle_ps(
+        a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2));  // 124, 125, 126, 127,
+    _mm_storeu_ps(&a[0 + j2], a_j2_0n);
+    _mm_storeu_ps(&a[4 + j2], a_j2_4n);
    _mm_storeu_ps(&a[122 - j2], a_k2_0n);
    _mm_storeu_ps(&a[126 - j2], a_k2_4n);
  }
  // Scalar code for the remaining items.
  for (; j2 < 64; j1 += 1, j2 += 2) {
    k2 = 128 - j2;
-    k1 =  32 - j1;
+    k1 = 32 - j1;
    wkr = 0.5f - c[k1];
    wki = c[j1];
    xr = a[j2 + 0] - a[k2 + 0];
@@ -427,5 +425,3 @@ void aec_rdft_init_sse2(void) {
  rftfsub_128 = rftfsub_128_SSE2;
  rftbsub_128 = rftbsub_128_SSE2;
 }
-
-#endif  // WEBRTC_USE_SS2
--- a/webrtc/modules/audio_processing/aec/aec_resampler.c
+++ b/webrtc/modules/audio_processing/aec/aec_resampler.c
@@ -0,0 +1,209 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for
+ * clock skew by resampling the farend signal.
+ */
+
+#include "webrtc/modules/audio_processing/aec/aec_resampler.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+enum {
+  kEstimateLengthFrames = 400
+};
+
+typedef struct {
+  float buffer[kResamplerBufferSize];
+  float position;
+
+  int deviceSampleRateHz;
+  int skewData[kEstimateLengthFrames];
+  int skewDataIndex;
+  float skewEstimate;
+} AecResampler;
+
+static int EstimateSkew(const int* rawSkew,
+                        int size,
+                        int absLimit,
+                        float* skewEst);
+
+void* WebRtcAec_CreateResampler() {
+  return malloc(sizeof(AecResampler));
+}
+
+int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz) {
+  AecResampler* obj = (AecResampler*)resampInst;
+  memset(obj->buffer, 0, sizeof(obj->buffer));
+  obj->position = 0.0;
+
+  obj->deviceSampleRateHz = deviceSampleRateHz;
+  memset(obj->skewData, 0, sizeof(obj->skewData));
+  obj->skewDataIndex = 0;
+  obj->skewEstimate = 0.0;
+
+  return 0;
+}
+
+void WebRtcAec_FreeResampler(void* resampInst) {
+  AecResampler* obj = (AecResampler*)resampInst;
+  free(obj);
+}
+
+void WebRtcAec_ResampleLinear(void* resampInst,
+                              const float* inspeech,
+                              size_t size,
+                              float skew,
+                              float* outspeech,
+                              size_t* size_out) {
+  AecResampler* obj = (AecResampler*)resampInst;
+
+  float* y;
+  float be, tnew;
+  size_t tn, mm;
+
+  assert(size <= 2 * FRAME_LEN);
+  assert(resampInst != NULL);
+  assert(inspeech != NULL);
+  assert(outspeech != NULL);
+  assert(size_out != NULL);
+
+  // Add new frame data in lookahead
+  memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay],
+         inspeech,
+         size * sizeof(inspeech[0]));
+
+  // Sample rate ratio
+  be = 1 + skew;
+
+  // Loop over input frame
+  mm = 0;
+  y = &obj->buffer[FRAME_LEN];  // Point at current frame
+
+  tnew = be * mm + obj->position;
+  tn = (size_t)tnew;
+
+  while (tn < size) {
+
+    // Interpolation
+    outspeech[mm] = y[tn] + (tnew - tn) * (y[tn + 1] - y[tn]);
+    mm++;
+
+    tnew = be * mm + obj->position;
+    tn = (int)tnew;
+  }
+
+  *size_out = mm;
+  obj->position += (*size_out) * be - size;
+
+  // Shift buffer
+  memmove(obj->buffer,
+          &obj->buffer[size],
+          (kResamplerBufferSize - size) * sizeof(obj->buffer[0]));
+}
+
+int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst) {
+  AecResampler* obj = (AecResampler*)resampInst;
+  int err = 0;
+
+  if (obj->skewDataIndex < kEstimateLengthFrames) {
+    obj->skewData[obj->skewDataIndex] = rawSkew;
+    obj->skewDataIndex++;
+  } else if (obj->skewDataIndex == kEstimateLengthFrames) {
+    err = EstimateSkew(
+        obj->skewData, kEstimateLengthFrames, obj->deviceSampleRateHz, skewEst);
+    obj->skewEstimate = *skewEst;
+    obj->skewDataIndex++;
+  } else {
+    *skewEst = obj->skewEstimate;
+  }
+
+  return err;
+}
+
+int EstimateSkew(const int* rawSkew,
+                 int size,
+                 int deviceSampleRateHz,
+                 float* skewEst) {
+  const int absLimitOuter = (int)(0.04f * deviceSampleRateHz);
+  const int absLimitInner = (int)(0.0025f * deviceSampleRateHz);
+  int i = 0;
+  int n = 0;
+  float rawAvg = 0;
+  float err = 0;
+  float rawAbsDev = 0;
+  int upperLimit = 0;
+  int lowerLimit = 0;
+  float cumSum = 0;
+  float x = 0;
+  float x2 = 0;
+  float y = 0;
+  float xy = 0;
+  float xAvg = 0;
+  float denom = 0;
+  float skew = 0;
+
+  *skewEst = 0;  // Set in case of error below.
+  for (i = 0; i < size; i++) {
+    if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
+      n++;
+      rawAvg += rawSkew[i];
+    }
+  }
+
+  if (n == 0) {
+    return -1;
+  }
+  assert(n > 0);
+  rawAvg /= n;
+
+  for (i = 0; i < size; i++) {
+    if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
+      err = rawSkew[i] - rawAvg;
+      rawAbsDev += err >= 0 ? err : -err;
+    }
+  }
+  assert(n > 0);
+  rawAbsDev /= n;
+  upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1);  // +1 for ceiling.
+  lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1);  // -1 for floor.
+
+  n = 0;
+  for (i = 0; i < size; i++) {
+    if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) ||
+        (rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) {
+      n++;
+      cumSum += rawSkew[i];
+      x += n;
+      x2 += n * n;
+      y += cumSum;
+      xy += n * cumSum;
+    }
+  }
+
+  if (n == 0) {
+    return -1;
+  }
+  assert(n > 0);
+  xAvg = x / n;
+  denom = x2 - xAvg * x;
+
+  if (denom != 0) {
+    skew = (xy - xAvg * y) / denom;
+  }
+
+  *skewEst = skew;
+  return 0;
+}
--- a/webrtc/modules/audio_processing/aec/aec_resampler.h
+++ b/webrtc/modules/audio_processing/aec/aec_resampler.h
@@ -0,0 +1,39 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
+
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+enum {
+  kResamplingDelay = 1
+};
+enum {
+  kResamplerBufferSize = FRAME_LEN * 4
+};
+
+// Unless otherwise specified, functions return 0 on success and -1 on error.
+void* WebRtcAec_CreateResampler();  // Returns NULL on error.
+int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz);
+void WebRtcAec_FreeResampler(void* resampInst);
+
+// Estimates skew from raw measurement.
+int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst);
+
+// Resamples input using linear interpolation.
+void WebRtcAec_ResampleLinear(void* resampInst,
+                              const float* inspeech,
+                              size_t size,
+                              float skew,
+                              float* outspeech,
+                              size_t* size_out);
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
--- a/webrtc/modules/audio_processing/aec/echo_cancellation.c
+++ b/webrtc/modules/audio_processing/aec/echo_cancellation.c
--- a/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
+++ b/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
@@ -0,0 +1,67 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+
+typedef struct {
+  int delayCtr;
+  int sampFreq;
+  int splitSampFreq;
+  int scSampFreq;
+  float sampFactor;  // scSampRate / sampFreq
+  short skewMode;
+  int bufSizeStart;
+  int knownDelay;
+  int rate_factor;
+
+  short initFlag;  // indicates if AEC has been initialized
+
+  // Variables used for averaging far end buffer size
+  short counter;
+  int sum;
+  short firstVal;
+  short checkBufSizeCtr;
+
+  // Variables used for delay shifts
+  short msInSndCardBuf;
+  short filtDelay;  // Filtered delay estimate.
+  int timeForDelayChange;
+  int startup_phase;
+  int checkBuffSize;
+  short lastDelayDiff;
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+  FILE* bufFile;
+  FILE* delayFile;
+  FILE* skewFile;
+#endif
+
+  // Structures
+  void* resampler;
+
+  int skewFrCtr;
+  int resample;  // if the skew is small enough we don't resample
+  int highSkewCtr;
+  float skew;
+
+  RingBuffer* far_pre_buf;  // Time domain far-end pre-buffer.
+
+  int lastError;
+
+  int farend_started;
+
+  AecCore* aec;
+} Aec;
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
--- a/webrtc/modules/audio_processing/aec/include/echo_cancellation.h
+++ b/webrtc/modules/audio_processing/aec/include/echo_cancellation.h
@@ -0,0 +1,245 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
+
+#include <stddef.h>
+
+#include "webrtc/typedefs.h"
+
+// Errors
+#define AEC_UNSPECIFIED_ERROR 12000
+#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001
+#define AEC_UNINITIALIZED_ERROR 12002
+#define AEC_NULL_POINTER_ERROR 12003
+#define AEC_BAD_PARAMETER_ERROR 12004
+
+// Warnings
+#define AEC_BAD_PARAMETER_WARNING 12050
+
+enum {
+  kAecNlpConservative = 0,
+  kAecNlpModerate,
+  kAecNlpAggressive
+};
+
+enum {
+  kAecFalse = 0,
+  kAecTrue
+};
+
+typedef struct {
+  int16_t nlpMode;      // default kAecNlpModerate
+  int16_t skewMode;     // default kAecFalse
+  int16_t metricsMode;  // default kAecFalse
+  int delay_logging;    // default kAecFalse
+  // float realSkew;
+} AecConfig;
+
+typedef struct {
+  int instant;
+  int average;
+  int max;
+  int min;
+} AecLevel;
+
+typedef struct {
+  AecLevel rerl;
+  AecLevel erl;
+  AecLevel erle;
+  AecLevel aNlp;
+} AecMetrics;
+
+struct AecCore;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * Allocates the memory needed by the AEC. The memory needs to be initialized
+ * separately using the WebRtcAec_Init() function. Returns a pointer to the
+ * object or NULL on error.
+ */
+void* WebRtcAec_Create();
+
+/*
+ * This function releases the memory allocated by WebRtcAec_Create().
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*        aecInst         Pointer to the AEC instance
+ */
+void WebRtcAec_Free(void* aecInst);
+
+/*
+ * Initializes an AEC instance.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecInst       Pointer to the AEC instance
+ * int32_t        sampFreq      Sampling frequency of data
+ * int32_t        scSampFreq    Soundcard sampling frequency
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq);
+
+/*
+ * Inserts an 80 or 160 sample block of data into the farend buffer.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecInst       Pointer to the AEC instance
+ * const float*   farend        In buffer containing one frame of
+ *                              farend signal for L band
+ * int16_t        nrOfSamples   Number of samples in farend buffer
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAec_BufferFarend(void* aecInst,
+                               const float* farend,
+                               size_t nrOfSamples);
+
+/*
+ * Runs the echo canceller on an 80 or 160 sample blocks of data.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*         aecInst        Pointer to the AEC instance
+ * float* const* nearend        In buffer containing one frame of
+ *                              nearend+echo signal for each band
+ * int           num_bands      Number of bands in nearend buffer
+ * int16_t       nrOfSamples    Number of samples in nearend buffer
+ * int16_t       msInSndCardBuf Delay estimate for sound card and
+ *                              system buffers
+ * int16_t       skew           Difference between number of samples played
+ *                              and recorded at the soundcard (for clock skew
+ *                              compensation)
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * float* const* out            Out buffer, one frame of processed nearend
+ *                              for each band
+ * int32_t       return         0: OK
+ *                             -1: error
+ */
+int32_t WebRtcAec_Process(void* aecInst,
+                          const float* const* nearend,
+                          size_t num_bands,
+                          float* const* out,
+                          size_t nrOfSamples,
+                          int16_t msInSndCardBuf,
+                          int32_t skew);
+
+/*
+ * This function enables the user to set certain parameters on-the-fly.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          handle        Pointer to the AEC instance
+ * AecConfig      config        Config instance that contains all
+ *                              properties to be set
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int            return         0: OK
+ *                              -1: error
+ */
+int WebRtcAec_set_config(void* handle, AecConfig config);
+
+/*
+ * Gets the current echo status of the nearend signal.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          handle        Pointer to the AEC instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int*           status        0: Almost certainly nearend single-talk
+ *                              1: Might not be neared single-talk
+ * int            return         0: OK
+ *                              -1: error
+ */
+int WebRtcAec_get_echo_status(void* handle, int* status);
+
+/*
+ * Gets the current echo metrics for the session.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          handle        Pointer to the AEC instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * AecMetrics*    metrics       Struct which will be filled out with the
+ *                              current echo metrics.
+ * int            return         0: OK
+ *                              -1: error
+ */
+int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics);
+
+/*
+ * Gets the current delay metrics for the session.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*   handle               Pointer to the AEC instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int*    median               Delay median value.
+ * int*    std                  Delay standard deviation.
+ * float*  fraction_poor_delays Fraction of the delay estimates that may
+ *                              cause the AEC to perform poorly.
+ *
+ * int     return                0: OK
+ *                              -1: error
+ */
+int WebRtcAec_GetDelayMetrics(void* handle,
+                              int* median,
+                              int* std,
+                              float* fraction_poor_delays);
+
+/*
+ * Gets the last error code.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*          aecInst       Pointer to the AEC instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int32_t        return        11000-11100: error code
+ */
+int32_t WebRtcAec_get_error_code(void* aecInst);
+
+// Returns a pointer to the low level AEC handle.
+//
+// Input:
+//  - handle                    : Pointer to the AEC instance.
+//
+// Return value:
+//  - AecCore pointer           : NULL for error.
+//
+struct AecCore* WebRtcAec_aec_core(void* handle);
+
+#ifdef __cplusplus
+}
+#endif
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
--- a/webrtc/modules/audio_processing/aec/interface/echo_cancellation.h
+++ b/webrtc/modules/audio_processing/aec/interface/echo_cancellation.h
@@ -1,278 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_INTERFACE_ECHO_CANCELLATION_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_INTERFACE_ECHO_CANCELLATION_H_
-
-#include "typedefs.h"
-
-// Errors
-#define AEC_UNSPECIFIED_ERROR           12000
-#define AEC_UNSUPPORTED_FUNCTION_ERROR  12001
-#define AEC_UNINITIALIZED_ERROR         12002
-#define AEC_NULL_POINTER_ERROR          12003
-#define AEC_BAD_PARAMETER_ERROR         12004
-
-// Warnings
-#define AEC_BAD_PARAMETER_WARNING       12050
-
-enum {
-    kAecNlpConservative = 0,
-    kAecNlpModerate,
-    kAecNlpAggressive
-};
-
-enum {
-    kAecFalse = 0,
-    kAecTrue
-};
-
-typedef struct {
-    WebRtc_Word16 nlpMode;        // default kAecNlpModerate
-    WebRtc_Word16 skewMode;       // default kAecFalse
-    WebRtc_Word16 metricsMode;    // default kAecFalse
-    int delay_logging;            // default kAecFalse
-    //float realSkew;
-} AecConfig;
-
-typedef struct {
-    WebRtc_Word16 instant;
-    WebRtc_Word16 average;
-    WebRtc_Word16 max;
-    WebRtc_Word16 min;
-} AecLevel;
-
-typedef struct {
-    AecLevel rerl;
-    AecLevel erl;
-    AecLevel erle;
-    AecLevel aNlp;
-} AecMetrics;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Allocates the memory needed by the AEC. The memory needs to be initialized
- * separately using the WebRtcAec_Init() function.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void **aecInst               Pointer to the AEC instance to be created
- *                              and initialized
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * WebRtc_Word32 return          0: OK
- *                              -1: error
- */
-WebRtc_Word32 WebRtcAec_Create(void **aecInst);
-
-/*
- * This function releases the memory allocated by WebRtcAec_Create().
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void         *aecInst        Pointer to the AEC instance
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * WebRtc_Word32  return         0: OK
- *                              -1: error
- */
-WebRtc_Word32 WebRtcAec_Free(void *aecInst);
-
-/*
- * Initializes an AEC instance.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void           *aecInst      Pointer to the AEC instance
- * WebRtc_Word32  sampFreq      Sampling frequency of data
- * WebRtc_Word32  scSampFreq    Soundcard sampling frequency
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * WebRtc_Word32 return          0: OK
- *                              -1: error
- */
-WebRtc_Word32 WebRtcAec_Init(void *aecInst,
-                             WebRtc_Word32 sampFreq,
-                             WebRtc_Word32 scSampFreq);
-
-/*
- * Inserts an 80 or 160 sample block of data into the farend buffer.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void           *aecInst      Pointer to the AEC instance
- * WebRtc_Word16  *farend       In buffer containing one frame of
- *                              farend signal for L band
- * WebRtc_Word16  nrOfSamples   Number of samples in farend buffer
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * WebRtc_Word32  return         0: OK
- *                              -1: error
- */
-WebRtc_Word32 WebRtcAec_BufferFarend(void *aecInst,
-                                     const WebRtc_Word16 *farend,
-                                     WebRtc_Word16 nrOfSamples);
-
-/*
- * Runs the echo canceller on an 80 or 160 sample blocks of data.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void          *aecInst       Pointer to the AEC instance
- * WebRtc_Word16 *nearend       In buffer containing one frame of
- *                              nearend+echo signal for L band
- * WebRtc_Word16 *nearendH      In buffer containing one frame of
- *                              nearend+echo signal for H band
- * WebRtc_Word16 nrOfSamples    Number of samples in nearend buffer
- * WebRtc_Word16 msInSndCardBuf Delay estimate for sound card and
- *                              system buffers
- * WebRtc_Word16 skew           Difference between number of samples played
- *                              and recorded at the soundcard (for clock skew
- *                              compensation)
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * WebRtc_Word16  *out          Out buffer, one frame of processed nearend
- *                              for L band
- * WebRtc_Word16  *outH         Out buffer, one frame of processed nearend
- *                              for H band
- * WebRtc_Word32  return         0: OK
- *                              -1: error
- */
-WebRtc_Word32 WebRtcAec_Process(void *aecInst,
-                                const WebRtc_Word16 *nearend,
-                                const WebRtc_Word16 *nearendH,
-                                WebRtc_Word16 *out,
-                                WebRtc_Word16 *outH,
-                                WebRtc_Word16 nrOfSamples,
-                                WebRtc_Word16 msInSndCardBuf,
-                                WebRtc_Word32 skew);
-
-/*
- * This function enables the user to set certain parameters on-the-fly.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void           *aecInst      Pointer to the AEC instance
- * AecConfig      config        Config instance that contains all
- *                              properties to be set
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * WebRtc_Word32  return         0: OK
- *                              -1: error
- */
-WebRtc_Word32 WebRtcAec_set_config(void *aecInst, AecConfig config);
-
-/*
- * Gets the on-the-fly paramters.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void           *aecInst      Pointer to the AEC instance
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * AecConfig      *config       Pointer to the config instance that
- *                              all properties will be written to
- * WebRtc_Word32  return         0: OK
- *                              -1: error
- */
-WebRtc_Word32 WebRtcAec_get_config(void *aecInst, AecConfig *config);
-
-/*
- * Gets the current echo status of the nearend signal.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void           *aecInst      Pointer to the AEC instance
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * WebRtc_Word16  *status       0: Almost certainly nearend single-talk
- *                              1: Might not be neared single-talk
- * WebRtc_Word32  return         0: OK
- *                              -1: error
- */
-WebRtc_Word32 WebRtcAec_get_echo_status(void *aecInst, WebRtc_Word16 *status);
-
-/*
- * Gets the current echo metrics for the session.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void           *aecInst      Pointer to the AEC instance
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * AecMetrics     *metrics      Struct which will be filled out with the
- *                              current echo metrics.
- * WebRtc_Word32  return         0: OK
- *                              -1: error
- */
-WebRtc_Word32 WebRtcAec_GetMetrics(void *aecInst, AecMetrics *metrics);
-
-/*
- * Gets the current delay metrics for the session.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void*      handle            Pointer to the AEC instance
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * int*       median            Delay median value.
- * int*       std               Delay standard deviation.
- *
- * int        return             0: OK
- *                              -1: error
- */
-int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std);
-
-/*
- * Gets the last error code.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void           *aecInst      Pointer to the AEC instance
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * WebRtc_Word32  return        11000-11100: error code
- */
-WebRtc_Word32 WebRtcAec_get_error_code(void *aecInst);
-
-/*
- * Gets a version string.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * char           *versionStr   Pointer to a string array
- * WebRtc_Word16  len           The maximum length of the string
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * WebRtc_Word8   *versionStr   Pointer to a string array
- * WebRtc_Word32  return         0: OK
- *                              -1: error
- */
-WebRtc_Word32 WebRtcAec_get_version(WebRtc_Word8 *versionStr, WebRtc_Word16 len);
-
-#ifdef __cplusplus
-}
-#endif
-#endif  /* WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_INTERFACE_ECHO_CANCELLATION_H_ */
--- a/webrtc/modules/audio_processing/aec/resampler.c
+++ b/webrtc/modules/audio_processing/aec/resampler.c
@@ -1,233 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for clock
- * skew by resampling the farend signal.
- */
-
-#include <assert.h>
-#include <stdlib.h>
-#include <string.h>
-#include <math.h>
-
-#include "resampler.h"
-#include "aec_core.h"
-
-enum { kFrameBufferSize = FRAME_LEN * 4 };
-enum { kEstimateLengthFrames = 400 };
-
-typedef struct {
-    short buffer[kFrameBufferSize];
-    float position;
-
-    int deviceSampleRateHz;
-    int skewData[kEstimateLengthFrames];
-    int skewDataIndex;
-    float skewEstimate;
-} resampler_t;
-
-static int EstimateSkew(const int* rawSkew,
-                        int size,
-                        int absLimit,
-                        float *skewEst);
-
-int WebRtcAec_CreateResampler(void **resampInst)
-{
-    resampler_t *obj = malloc(sizeof(resampler_t));
-    *resampInst = obj;
-    if (obj == NULL) {
-        return -1;
-    }
-
-    return 0;
-}
-
-int WebRtcAec_InitResampler(void *resampInst, int deviceSampleRateHz)
-{
-    resampler_t *obj = (resampler_t*) resampInst;
-    memset(obj->buffer, 0, sizeof(obj->buffer));
-    obj->position = 0.0;
-
-    obj->deviceSampleRateHz = deviceSampleRateHz;
-    memset(obj->skewData, 0, sizeof(obj->skewData));
-    obj->skewDataIndex = 0;
-    obj->skewEstimate = 0.0;
-
-    return 0;
-}
-
-int WebRtcAec_FreeResampler(void *resampInst)
-{
-    resampler_t *obj = (resampler_t*) resampInst;
-    free(obj);
-
-    return 0;
-}
-
-int WebRtcAec_ResampleLinear(void *resampInst,
-                             const short *inspeech,
-                             int size,
-                             float skew,
-                             short *outspeech)
-{
-    resampler_t *obj = (resampler_t*) resampInst;
-
-    short *y;
-    float be, tnew, interp;
-    int tn, outsize, mm;
-
-    if (size < 0 || size > 2 * FRAME_LEN) {
-        return -1;
-    }
-
-    // Add new frame data in lookahead
-    memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay],
-           inspeech,
-           size * sizeof(short));
-
-    // Sample rate ratio
-    be = 1 + skew;
-
-    // Loop over input frame
-    mm = 0;
-    y = &obj->buffer[FRAME_LEN]; // Point at current frame
-
-    tnew = be * mm + obj->position;
-    tn = (int) tnew;
-
-    while (tn < size) {
-
-        // Interpolation
-        interp = y[tn] + (tnew - tn) * (y[tn+1] - y[tn]);
-
-        if (interp > 32767) {
-            interp = 32767;
-        }
-        else if (interp < -32768) {
-            interp = -32768;
-        }
-
-        outspeech[mm] = (short) interp;
-        mm++;
-
-        tnew = be * mm + obj->position;
-        tn = (int) tnew;
-    }
-
-    outsize = mm;
-    obj->position += outsize * be - size;
-
-    // Shift buffer
-    memmove(obj->buffer,
-            &obj->buffer[size],
-            (kFrameBufferSize - size) * sizeof(short));
-
-    return outsize;
-}
-
-int WebRtcAec_GetSkew(void *resampInst, int rawSkew, float *skewEst)
-{
-    resampler_t *obj = (resampler_t*)resampInst;
-    int err = 0;
-
-    if (obj->skewDataIndex < kEstimateLengthFrames) {
-        obj->skewData[obj->skewDataIndex] = rawSkew;
-        obj->skewDataIndex++;
-    }
-    else if (obj->skewDataIndex == kEstimateLengthFrames) {
-        err = EstimateSkew(obj->skewData,
-                           kEstimateLengthFrames,
-                           obj->deviceSampleRateHz,
-                           skewEst);
-        obj->skewEstimate = *skewEst;
-        obj->skewDataIndex++;
-    }
-    else {
-        *skewEst = obj->skewEstimate;
-    }
-
-    return err;
-}
-
-int EstimateSkew(const int* rawSkew,
-                 int size,
-                 int deviceSampleRateHz,
-                 float *skewEst)
-{
-    const int absLimitOuter = (int)(0.04f * deviceSampleRateHz);
-    const int absLimitInner = (int)(0.0025f * deviceSampleRateHz);
-    int i = 0;
-    int n = 0;
-    float rawAvg = 0;
-    float err = 0;
-    float rawAbsDev = 0;
-    int upperLimit = 0;
-    int lowerLimit = 0;
-    float cumSum = 0;
-    float x = 0;
-    float x2 = 0;
-    float y = 0;
-    float xy = 0;
-    float xAvg = 0;
-    float denom = 0;
-    float skew = 0;
-
-    *skewEst = 0; // Set in case of error below.
-    for (i = 0; i < size; i++) {
-      if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
-        n++;
-        rawAvg += rawSkew[i];
-      }
-    }
-
-    if (n == 0) {
-      return -1;
-    }
-    assert(n > 0);
-    rawAvg /= n;
-
-    for (i = 0; i < size; i++) {
-      if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
-        err = rawSkew[i] - rawAvg;
-        rawAbsDev += err >= 0 ? err : -err;
-      }
-    }
-    assert(n > 0);
-    rawAbsDev /= n;
-    upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1); // +1 for ceiling.
-    lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1); // -1 for floor.
-
-    n = 0;
-    for (i = 0; i < size; i++) {
-        if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) ||
-            (rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) {
-            n++;
-            cumSum += rawSkew[i];
-            x += n;
-            x2 += n*n;
-            y += cumSum;
-            xy += n * cumSum;
-        }
-    }
-
-    if (n == 0) {
-      return -1;
-    }
-    assert(n > 0);
-    xAvg = x / n;
-    denom = x2 - xAvg*x;
-
-    if (denom != 0) {
-        skew = (xy - xAvg*y) / denom;
-    }
-
-    *skewEst = skew;
-    return 0;
-}
--- a/webrtc/modules/audio_processing/aec/resampler.h
+++ b/webrtc/modules/audio_processing/aec/resampler.h
@@ -1,32 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_RESAMPLER_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_RESAMPLER_H_
-
-enum { kResamplingDelay = 1 };
-
-// Unless otherwise specified, functions return 0 on success and -1 on error
-int WebRtcAec_CreateResampler(void **resampInst);
-int WebRtcAec_InitResampler(void *resampInst, int deviceSampleRateHz);
-int WebRtcAec_FreeResampler(void *resampInst);
-
-// Estimates skew from raw measurement.
-int WebRtcAec_GetSkew(void *resampInst, int rawSkew, float *skewEst);
-
-// Resamples input using linear interpolation.
-// Returns size of resampled array.
-int WebRtcAec_ResampleLinear(void *resampInst,
-                             const short *inspeech,
-                             int size,
-                             float skew,
-                             short *outspeech);
-
-#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_RESAMPLER_H_
--- a/webrtc/modules/audio_processing/aecm/Makefile.am
+++ b/webrtc/modules/audio_processing/aecm/Makefile.am
@@ -1,9 +0,0 @@
-noinst_LTLIBRARIES = libaecm.la
-
-libaecm_la_SOURCES = interface/echo_control_mobile.h \
-		     echo_control_mobile.c \
-		     aecm_core.c \
-		     aecm_core.h
-libaecm_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
-		    -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-		    -I$(top_srcdir)/src/modules/audio_processing/utility
--- a/webrtc/modules/audio_processing/aecm/aecm.gypi
+++ b/webrtc/modules/audio_processing/aecm/aecm.gypi
@@ -1,34 +0,0 @@
-# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
-#
-# Use of this source code is governed by a BSD-style license
-# that can be found in the LICENSE file in the root of the source
-# tree. An additional intellectual property rights grant can be found
-# in the file PATENTS.  All contributing project authors may
-# be found in the AUTHORS file in the root of the source tree.
-
-{
-  'targets': [
-    {
-      'target_name': 'aecm',
-      'type': '<(library)',
-      'dependencies': [
-        '<(webrtc_root)/common_audio/common_audio.gyp:spl',
-        'apm_util'
-      ],
-      'include_dirs': [
-        'interface',
-      ],
-      'direct_dependent_settings': {
-        'include_dirs': [
-          'interface',
-        ],
-      },
-      'sources': [
-        'interface/echo_control_mobile.h',
-        'echo_control_mobile.c',
-        'aecm_core.c',
-        'aecm_core.h',
-      ],
-    },
-  ],
-}
--- a/webrtc/modules/audio_processing/aecm/aecm_core.c
+++ b/webrtc/modules/audio_processing/aecm/aecm_core.c
--- a/webrtc/modules/audio_processing/aecm/aecm_core.h
+++ b/webrtc/modules/audio_processing/aecm/aecm_core.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,217 +8,144 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-// Performs echo control (suppression) with fft routines in fixed-point
+// Performs echo control (suppression) with fft routines in fixed-point.

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_CORE_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_CORE_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_

-#define AECM_DYNAMIC_Q // turn on/off dynamic Q-domain
-//#define AECM_WITH_ABS_APPROX
-//#define AECM_SHORT                // for 32 sample partition length (otherwise 64)
-
-#include "typedefs.h"
-#include "signal_processing_library.h"
-
-// Algorithm parameters
-
-#define FRAME_LEN       80              // Total frame length, 10 ms
-#ifdef AECM_SHORT
-
-#define PART_LEN        32              // Length of partition
-#define PART_LEN_SHIFT  6               // Length of (PART_LEN * 2) in base 2
-
-#else
-
-#define PART_LEN        64              // Length of partition
-#define PART_LEN_SHIFT  7               // Length of (PART_LEN * 2) in base 2
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aecm/aecm_defines.h"
+#include "webrtc/typedefs.h"

+#ifdef _MSC_VER  // visual c++
+#define ALIGN8_BEG __declspec(align(8))
+#define ALIGN8_END
+#else  // gcc or icc
+#define ALIGN8_BEG
+#define ALIGN8_END __attribute__((aligned(8)))
 #endif

-#define PART_LEN1       (PART_LEN + 1)  // Unique fft coefficients
-#define PART_LEN2       (PART_LEN << 1) // Length of partition * 2
-#define PART_LEN4       (PART_LEN << 2) // Length of partition * 4
-#define FAR_BUF_LEN     PART_LEN4       // Length of buffers
-#define MAX_DELAY 100
-
-// Counter parameters
-#ifdef AECM_SHORT
-
-#define CONV_LEN        1024            // Convergence length used at startup
-#else
-
-#define CONV_LEN        512             // Convergence length used at startup
-#endif
-
-#define CONV_LEN2       (CONV_LEN << 1) // Convergence length * 2 used at startup
-// Energy parameters
-#define MAX_BUF_LEN     64              // History length of energy signals
-
-#define FAR_ENERGY_MIN  1025            // Lowest Far energy level: At least 2 in energy
-#define FAR_ENERGY_DIFF 929             // Allowed difference between max and min
-
-#define ENERGY_DEV_OFFSET       0       // The energy error offset in Q8
-#define ENERGY_DEV_TOL  400             // The energy estimation tolerance in Q8
-#define FAR_ENERGY_VAD_REGION   230     // Far VAD tolerance region
-// Stepsize parameters
-#define MU_MIN          10              // Min stepsize 2^-MU_MIN (far end energy dependent)
-#define MU_MAX          1               // Max stepsize 2^-MU_MAX (far end energy dependent)
-#define MU_DIFF         9               // MU_MIN - MU_MAX
-// Channel parameters
-#define MIN_MSE_COUNT   20              // Min number of consecutive blocks with enough far end
-                                        // energy to compare channel estimates
-#define MIN_MSE_DIFF    29              // The ratio between adapted and stored channel to
-                                        // accept a new storage (0.8 in Q-MSE_RESOLUTION)
-#define MSE_RESOLUTION  5               // MSE parameter resolution
-#define RESOLUTION_CHANNEL16    12      // W16 Channel in Q-RESOLUTION_CHANNEL16
-#define RESOLUTION_CHANNEL32    28      // W32 Channel in Q-RESOLUTION_CHANNEL
-#define CHANNEL_VAD     16              // Minimum energy in frequency band to update channel
-// Suppression gain parameters: SUPGAIN_ parameters in Q-(RESOLUTION_SUPGAIN)
-#define RESOLUTION_SUPGAIN      8       // Channel in Q-(RESOLUTION_SUPGAIN)
-#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN)   // Default suppression gain
-#define SUPGAIN_ERROR_PARAM_A   3072    // Estimation error parameter (Maximum gain) (8 in Q8)
-#define SUPGAIN_ERROR_PARAM_B   1536    // Estimation error parameter (Gain before going down)
-#define SUPGAIN_ERROR_PARAM_D   SUPGAIN_DEFAULT // Estimation error parameter
-                                                // (Should be the same as Default) (1 in Q8)
-#define SUPGAIN_EPC_DT  200             // = SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL
-// Defines for "check delay estimation"
-#define CORR_WIDTH      31              // Number of samples to correlate over.
-#define CORR_MAX        16              // Maximum correlation offset
-#define CORR_MAX_BUF    63
-#define CORR_DEV        4
-#define CORR_MAX_LEVEL  20
-#define CORR_MAX_LOW    4
-#define CORR_BUF_LEN    (CORR_MAX << 1) + 1
-// Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN
-
-#define ONE_Q14         (1 << 14)
-
-// NLP defines
-#define NLP_COMP_LOW    3277            // 0.2 in Q14
-#define NLP_COMP_HIGH   ONE_Q14         // 1 in Q14
-
-extern const WebRtc_Word16 WebRtcAecm_kSqrtHanning[];
-
 typedef struct {
-    WebRtc_Word16 real;
-    WebRtc_Word16 imag;
-} complex16_t;
+    int16_t real;
+    int16_t imag;
+} ComplexInt16;

-typedef struct
-{
+typedef struct {
    int farBufWritePos;
    int farBufReadPos;
    int knownDelay;
    int lastKnownDelay;
-    int firstVAD; // Parameter to control poorly initialized channels
+    int firstVAD;  // Parameter to control poorly initialized channels

-    void *farFrameBuf;
-    void *nearNoisyFrameBuf;
-    void *nearCleanFrameBuf;
-    void *outFrameBuf;
+    RingBuffer* farFrameBuf;
+    RingBuffer* nearNoisyFrameBuf;
+    RingBuffer* nearCleanFrameBuf;
+    RingBuffer* outFrameBuf;

-    WebRtc_Word16 farBuf[FAR_BUF_LEN];
+    int16_t farBuf[FAR_BUF_LEN];

-    WebRtc_Word16 mult;
-    WebRtc_UWord32 seed;
+    int16_t mult;
+    uint32_t seed;

    // Delay estimation variables
+    void* delay_estimator_farend;
    void* delay_estimator;
-    WebRtc_UWord16 currentDelay;
+    uint16_t currentDelay;
+    // Far end history variables
+    // TODO(bjornv): Replace |far_history| with ring_buffer.
+    uint16_t far_history[PART_LEN1 * MAX_DELAY];
+    int far_history_pos;
+    int far_q_domains[MAX_DELAY];

-    WebRtc_Word16 nlpFlag;
-    WebRtc_Word16 fixedDelay;
+    int16_t nlpFlag;
+    int16_t fixedDelay;

-    WebRtc_UWord32 totCount;
+    uint32_t totCount;

-    WebRtc_Word16 dfaCleanQDomain;
-    WebRtc_Word16 dfaCleanQDomainOld;
-    WebRtc_Word16 dfaNoisyQDomain;
-    WebRtc_Word16 dfaNoisyQDomainOld;
+    int16_t dfaCleanQDomain;
+    int16_t dfaCleanQDomainOld;
+    int16_t dfaNoisyQDomain;
+    int16_t dfaNoisyQDomainOld;

-    WebRtc_Word16 nearLogEnergy[MAX_BUF_LEN];
-    WebRtc_Word16 farLogEnergy;
-    WebRtc_Word16 echoAdaptLogEnergy[MAX_BUF_LEN];
-    WebRtc_Word16 echoStoredLogEnergy[MAX_BUF_LEN];
+    int16_t nearLogEnergy[MAX_BUF_LEN];
+    int16_t farLogEnergy;
+    int16_t echoAdaptLogEnergy[MAX_BUF_LEN];
+    int16_t echoStoredLogEnergy[MAX_BUF_LEN];

-    // The extra 16 or 32 bytes in the following buffers are for alignment based Neon code.
-    // It's designed this way since the current GCC compiler can't align a buffer in 16 or 32
-    // byte boundaries properly.
-    WebRtc_Word16 channelStored_buf[PART_LEN1 + 8];
-    WebRtc_Word16 channelAdapt16_buf[PART_LEN1 + 8];
-    WebRtc_Word32 channelAdapt32_buf[PART_LEN1 + 8];
-    WebRtc_Word16 xBuf_buf[PART_LEN2 + 16]; // farend
-    WebRtc_Word16 dBufClean_buf[PART_LEN2 + 16]; // nearend
-    WebRtc_Word16 dBufNoisy_buf[PART_LEN2 + 16]; // nearend
-    WebRtc_Word16 outBuf_buf[PART_LEN + 8];
+    // The extra 16 or 32 bytes in the following buffers are for alignment based
+    // Neon code.
+    // It's designed this way since the current GCC compiler can't align a
+    // buffer in 16 or 32 byte boundaries properly.
+    int16_t channelStored_buf[PART_LEN1 + 8];
+    int16_t channelAdapt16_buf[PART_LEN1 + 8];
+    int32_t channelAdapt32_buf[PART_LEN1 + 8];
+    int16_t xBuf_buf[PART_LEN2 + 16];  // farend
+    int16_t dBufClean_buf[PART_LEN2 + 16];  // nearend
+    int16_t dBufNoisy_buf[PART_LEN2 + 16];  // nearend
+    int16_t outBuf_buf[PART_LEN + 8];

    // Pointers to the above buffers
-    WebRtc_Word16 *channelStored;
-    WebRtc_Word16 *channelAdapt16;
-    WebRtc_Word32 *channelAdapt32;
-    WebRtc_Word16 *xBuf;
-    WebRtc_Word16 *dBufClean;
-    WebRtc_Word16 *dBufNoisy;
-    WebRtc_Word16 *outBuf;
+    int16_t *channelStored;
+    int16_t *channelAdapt16;
+    int32_t *channelAdapt32;
+    int16_t *xBuf;
+    int16_t *dBufClean;
+    int16_t *dBufNoisy;
+    int16_t *outBuf;

-    WebRtc_Word32 echoFilt[PART_LEN1];
-    WebRtc_Word16 nearFilt[PART_LEN1];
-    WebRtc_Word32 noiseEst[PART_LEN1];
+    int32_t echoFilt[PART_LEN1];
+    int16_t nearFilt[PART_LEN1];
+    int32_t noiseEst[PART_LEN1];
    int           noiseEstTooLowCtr[PART_LEN1];
    int           noiseEstTooHighCtr[PART_LEN1];
-    WebRtc_Word16 noiseEstCtr;
-    WebRtc_Word16 cngMode;
+    int16_t noiseEstCtr;
+    int16_t cngMode;

-    WebRtc_Word32 mseAdaptOld;
-    WebRtc_Word32 mseStoredOld;
-    WebRtc_Word32 mseThreshold;
+    int32_t mseAdaptOld;
+    int32_t mseStoredOld;
+    int32_t mseThreshold;

-    WebRtc_Word16 farEnergyMin;
-    WebRtc_Word16 farEnergyMax;
-    WebRtc_Word16 farEnergyMaxMin;
-    WebRtc_Word16 farEnergyVAD;
-    WebRtc_Word16 farEnergyMSE;
+    int16_t farEnergyMin;
+    int16_t farEnergyMax;
+    int16_t farEnergyMaxMin;
+    int16_t farEnergyVAD;
+    int16_t farEnergyMSE;
    int currentVADValue;
-    WebRtc_Word16 vadUpdateCount;
+    int16_t vadUpdateCount;

-    WebRtc_Word16 startupState;
-    WebRtc_Word16 mseChannelCount;
-    WebRtc_Word16 supGain;
-    WebRtc_Word16 supGainOld;
+    int16_t startupState;
+    int16_t mseChannelCount;
+    int16_t supGain;
+    int16_t supGainOld;

-    WebRtc_Word16 supGainErrParamA;
-    WebRtc_Word16 supGainErrParamD;
-    WebRtc_Word16 supGainErrParamDiffAB;
-    WebRtc_Word16 supGainErrParamDiffBD;
+    int16_t supGainErrParamA;
+    int16_t supGainErrParamD;
+    int16_t supGainErrParamDiffAB;
+    int16_t supGainErrParamDiffBD;
+
+    struct RealFFT* real_fft;

 #ifdef AEC_DEBUG
    FILE *farFile;
    FILE *nearFile;
    FILE *outFile;
 #endif
-} AecmCore_t;
+} AecmCore;

-///////////////////////////////////////////////////////////////////////////////////////////////
-// WebRtcAecm_CreateCore(...)
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CreateCore()
 //
 // Allocates the memory needed by the AECM. The memory needs to be
 // initialized separately using the WebRtcAecm_InitCore() function.
-//
-// Input:
-//      - aecm          : Instance that should be created
-//
-// Output:
-//      - aecm          : Created instance
-//
-// Return value         :  0 - Ok
-//                        -1 - Error
-//
-int WebRtcAecm_CreateCore(AecmCore_t **aecm);
+// Returns a pointer to the instance and a nullptr at failure.
+AecmCore* WebRtcAecm_CreateCore();

-///////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
 // WebRtcAecm_InitCore(...)
 //
-// This function initializes the AECM instant created with WebRtcAecm_CreateCore(...)
+// This function initializes the AECM instant created with
+// WebRtcAecm_CreateCore()
 // Input:
 //      - aecm          : Pointer to the AECM instance
 //      - samplingFreq  : Sampling Frequency
@@ -229,57 +156,58 @@ int WebRtcAecm_CreateCore(AecmCore_t **aecm);
 // Return value         :  0 - Ok
 //                        -1 - Error
 //
-int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq);
+int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq);

-///////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
 // WebRtcAecm_FreeCore(...)
 //
 // This function releases the memory allocated by WebRtcAecm_CreateCore()
 // Input:
 //      - aecm          : Pointer to the AECM instance
 //
-// Return value         :  0 - Ok
-//                        -1 - Error
-//           11001-11016: Error
-//
-int WebRtcAecm_FreeCore(AecmCore_t *aecm);
+void WebRtcAecm_FreeCore(AecmCore* aecm);

-int WebRtcAecm_Control(AecmCore_t *aecm, int delay, int nlpFlag);
+int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag);

-///////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
 // WebRtcAecm_InitEchoPathCore(...)
 //
 // This function resets the echo channel adaptation with the specified channel.
 // Input:
 //      - aecm          : Pointer to the AECM instance
-//      - echo_path     : Pointer to the data that should initialize the echo path
+//      - echo_path     : Pointer to the data that should initialize the echo
+//                        path
 //
 // Output:
 //      - aecm          : Initialized instance
 //
-void WebRtcAecm_InitEchoPathCore(AecmCore_t* aecm, const WebRtc_Word16* echo_path);
+void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path);

-///////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
 // WebRtcAecm_ProcessFrame(...)
 //
-// This function processes frames and sends blocks to WebRtcAecm_ProcessBlock(...)
+// This function processes frames and sends blocks to
+// WebRtcAecm_ProcessBlock(...)
 //
 // Inputs:
 //      - aecm          : Pointer to the AECM instance
 //      - farend        : In buffer containing one frame of echo signal
-//      - nearendNoisy  : In buffer containing one frame of nearend+echo signal without NS
-//      - nearendClean  : In buffer containing one frame of nearend+echo signal with NS
+//      - nearendNoisy  : In buffer containing one frame of nearend+echo signal
+//                        without NS
+//      - nearendClean  : In buffer containing one frame of nearend+echo signal
+//                        with NS
 //
 // Output:
 //      - out           : Out buffer, one frame of nearend signal          :
 //
 //
-int WebRtcAecm_ProcessFrame(AecmCore_t * aecm, const WebRtc_Word16 * farend,
-                            const WebRtc_Word16 * nearendNoisy,
-                            const WebRtc_Word16 * nearendClean,
-                            WebRtc_Word16 * out);
+int WebRtcAecm_ProcessFrame(AecmCore* aecm,
+                            const int16_t* farend,
+                            const int16_t* nearendNoisy,
+                            const int16_t* nearendClean,
+                            int16_t* out);

-///////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
 // WebRtcAecm_ProcessBlock(...)
 //
 // This function is called for every block within one frame
@@ -288,19 +216,22 @@ int WebRtcAecm_ProcessFrame(AecmCore_t * aecm, const WebRtc_Word16 * farend,
 // Inputs:
 //      - aecm          : Pointer to the AECM instance
 //      - farend        : In buffer containing one block of echo signal
-//      - nearendNoisy  : In buffer containing one frame of nearend+echo signal without NS
-//      - nearendClean  : In buffer containing one frame of nearend+echo signal with NS
+//      - nearendNoisy  : In buffer containing one frame of nearend+echo signal
+//                        without NS
+//      - nearendClean  : In buffer containing one frame of nearend+echo signal
+//                        with NS
 //
 // Output:
 //      - out           : Out buffer, one block of nearend signal          :
 //
 //
-int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, const WebRtc_Word16 * farend,
-                            const WebRtc_Word16 * nearendNoisy,
-                            const WebRtc_Word16 * noisyClean,
-                            WebRtc_Word16 * out);
+int WebRtcAecm_ProcessBlock(AecmCore* aecm,
+                            const int16_t* farend,
+                            const int16_t* nearendNoisy,
+                            const int16_t* noisyClean,
+                            int16_t* out);

-///////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
 // WebRtcAecm_BufferFarFrame()
 //
 // Inserts a frame of data into farend buffer.
@@ -310,10 +241,11 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, const WebRtc_Word16 * farend,
 //      - farend        : In buffer containing one frame of farend signal
 //      - farLen        : Length of frame
 //
-void WebRtcAecm_BufferFarFrame(AecmCore_t * const aecm, const WebRtc_Word16 * const farend,
+void WebRtcAecm_BufferFarFrame(AecmCore* const aecm,
+                               const int16_t* const farend,
                               const int farLen);

-///////////////////////////////////////////////////////////////////////////////////////////////
+////////////////////////////////////////////////////////////////////////////////
 // WebRtcAecm_FetchFarFrame()
 //
 // Read the farend buffer to account for known delay
@@ -324,35 +256,179 @@ void WebRtcAecm_BufferFarFrame(AecmCore_t * const aecm, const WebRtc_Word16 * co
 //      - farLen        : Length of frame
 //      - knownDelay    : known delay
 //
-void WebRtcAecm_FetchFarFrame(AecmCore_t * const aecm, WebRtc_Word16 * const farend,
-                              const int farLen, const int knownDelay);
+void WebRtcAecm_FetchFarFrame(AecmCore* const aecm,
+                              int16_t* const farend,
+                              const int farLen,
+                              const int knownDelay);

-///////////////////////////////////////////////////////////////////////////////////////////////
-// Some internal functions shared by ARM NEON and generic C code:
+// All the functions below are intended to be private
+
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_UpdateFarHistory()
 //
+// Moves the pointer to the next entry and inserts |far_spectrum| and
+// corresponding Q-domain in its buffer.
+//
+// Inputs:
+//      - self          : Pointer to the delay estimation instance
+//      - far_spectrum  : Pointer to the far end spectrum
+//      - far_q         : Q-domain of far end spectrum
+//
+void WebRtcAecm_UpdateFarHistory(AecmCore* self,
+                                 uint16_t* far_spectrum,
+                                 int far_q);

-void WebRtcAecm_CalcLinearEnergies(AecmCore_t* aecm,
-                                   const WebRtc_UWord16* far_spectrum,
-                                   WebRtc_Word32* echoEst,
-                                   WebRtc_UWord32* far_energy,
-                                   WebRtc_UWord32* echo_energy_adapt,
-                                   WebRtc_UWord32* echo_energy_stored);
+////////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_AlignedFarend()
+//
+// Returns a pointer to the far end spectrum aligned to current near end
+// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been
+// called before AlignedFarend(...). Otherwise, you get the pointer to the
+// previous frame. The memory is only valid until the next call of
+// WebRtc_DelayEstimatorProcessFix(...).
+//
+// Inputs:
+//      - self              : Pointer to the AECM instance.
+//      - delay             : Current delay estimate.
+//
+// Output:
+//      - far_q             : The Q-domain of the aligned far end spectrum
+//
+// Return value:
+//      - far_spectrum      : Pointer to the aligned far end spectrum
+//                            NULL - Error
+//
+const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, int* far_q, int delay);

-void WebRtcAecm_StoreAdaptiveChannel(AecmCore_t* aecm,
-                                     const WebRtc_UWord16* far_spectrum,
-                                     WebRtc_Word32* echo_est);
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcSuppressionGain()
+//
+// This function calculates the suppression gain that is used in the
+// Wiener filter.
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//
+// Return value:
+//      - supGain           : Suppression gain with which to scale the noise
+//                            level (Q14).
+//
+int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm);

-void WebRtcAecm_ResetAdaptiveChannel(AecmCore_t *aecm);
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcEnergies()
+//
+// This function calculates the log of energies for nearend, farend and
+// estimated echoes. There is also an update of energy decision levels,
+// i.e. internal VAD.
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//      - far_spectrum      : Pointer to farend spectrum.
+//      - far_q             : Q-domain of farend spectrum.
+//      - nearEner          : Near end energy for current block in
+//                            Q(aecm->dfaQDomain).
+//
+// Output:
+//     - echoEst            : Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_CalcEnergies(AecmCore* aecm,
+                             const uint16_t* far_spectrum,
+                             const int16_t far_q,
+                             const uint32_t nearEner,
+                             int32_t* echoEst);

-void WebRtcAecm_WindowAndFFT(WebRtc_Word16* fft,
-                             const WebRtc_Word16* time_signal,
-                             complex16_t* freq_signal,
-                             int time_signal_scaling);
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_CalcStepSize()
+//
+// This function calculates the step size used in channel estimation
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//
+// Return value:
+//      - mu                : Stepsize in log2(), i.e. number of shifts.
+//
+int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm);

-void WebRtcAecm_InverseFFTAndWindow(AecmCore_t* aecm,
-                                    WebRtc_Word16* fft,
-                                    complex16_t* efw,
-                                    WebRtc_Word16* output,
-                                    const WebRtc_Word16* nearendClean);
+///////////////////////////////////////////////////////////////////////////////
+// WebRtcAecm_UpdateChannel(...)
+//
+// This function performs channel estimation.
+// NLMS and decision on channel storage.
+//
+// Inputs:
+//      - aecm              : Pointer to the AECM instance.
+//      - far_spectrum      : Absolute value of the farend signal in Q(far_q)
+//      - far_q             : Q-domain of the farend signal
+//      - dfa               : Absolute value of the nearend signal
+//                            (Q[aecm->dfaQDomain])
+//      - mu                : NLMS step size.
+// Input/Output:
+//      - echoEst           : Estimated echo in Q(far_q+RESOLUTION_CHANNEL16).
+//
+void WebRtcAecm_UpdateChannel(AecmCore* aecm,
+                              const uint16_t* far_spectrum,
+                              const int16_t far_q,
+                              const uint16_t* const dfa,
+                              const int16_t mu,
+                              int32_t* echoEst);
+
+extern const int16_t WebRtcAecm_kCosTable[];
+extern const int16_t WebRtcAecm_kSinTable[];
+
+///////////////////////////////////////////////////////////////////////////////
+// Some function pointers, for internal functions shared by ARM NEON and
+// generic C code.
+//
+typedef void (*CalcLinearEnergies)(AecmCore* aecm,
+                                   const uint16_t* far_spectrum,
+                                   int32_t* echoEst,
+                                   uint32_t* far_energy,
+                                   uint32_t* echo_energy_adapt,
+                                   uint32_t* echo_energy_stored);
+extern CalcLinearEnergies WebRtcAecm_CalcLinearEnergies;
+
+typedef void (*StoreAdaptiveChannel)(AecmCore* aecm,
+                                     const uint16_t* far_spectrum,
+                                     int32_t* echo_est);
+extern StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
+
+typedef void (*ResetAdaptiveChannel)(AecmCore* aecm);
+extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
+
+// For the above function pointers, functions for generic platforms are declared
+// and defined as static in file aecm_core.c, while those for ARM Neon platforms
+// are declared below and defined in file aecm_core_neon.c.
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm,
+                                       const uint16_t* far_spectrum,
+                                       int32_t* echo_est,
+                                       uint32_t* far_energy,
+                                       uint32_t* echo_energy_adapt,
+                                       uint32_t* echo_energy_stored);
+
+void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm,
+                                         const uint16_t* far_spectrum,
+                                         int32_t* echo_est);
+
+void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm);
+#endif
+
+#if defined(MIPS32_LE)
+void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm,
+                                        const uint16_t* far_spectrum,
+                                        int32_t* echo_est,
+                                        uint32_t* far_energy,
+                                        uint32_t* echo_energy_adapt,
+                                        uint32_t* echo_energy_stored);
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm,
+                                          const uint16_t* far_spectrum,
+                                          int32_t* echo_est);
+
+void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm);
+#endif
+#endif

 #endif
--- a/webrtc/modules/audio_processing/aecm/aecm_core_c.c
+++ b/webrtc/modules/audio_processing/aecm/aecm_core_c.c
@@ -0,0 +1,771 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
+
+#include <assert.h>
+#include <stddef.h>
+#include <stdlib.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
+#include "webrtc/system_wrappers/interface/compile_assert_c.h"
+#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
+#include "webrtc/typedefs.h"
+
+// Square root of Hanning window in Q14.
+#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
+// Table is defined in an ARM assembly file.
+extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END;
+#else
+static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
+  0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
+  3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
+  6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
+  9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
+  11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
+  13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
+  15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
+  16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
+};
+#endif
+
+#ifdef AECM_WITH_ABS_APPROX
+//Q15 alpha = 0.99439986968132  const Factor for magnitude approximation
+static const uint16_t kAlpha1 = 32584;
+//Q15 beta = 0.12967166976970   const Factor for magnitude approximation
+static const uint16_t kBeta1 = 4249;
+//Q15 alpha = 0.94234827210087  const Factor for magnitude approximation
+static const uint16_t kAlpha2 = 30879;
+//Q15 beta = 0.33787806009150   const Factor for magnitude approximation
+static const uint16_t kBeta2 = 11072;
+//Q15 alpha = 0.82247698684306  const Factor for magnitude approximation
+static const uint16_t kAlpha3 = 26951;
+//Q15 beta = 0.57762063060713   const Factor for magnitude approximation
+static const uint16_t kBeta3 = 18927;
+#endif
+
+static const int16_t kNoiseEstQDomain = 15;
+static const int16_t kNoiseEstIncCount = 5;
+
+static void ComfortNoise(AecmCore* aecm,
+                         const uint16_t* dfa,
+                         ComplexInt16* out,
+                         const int16_t* lambda);
+
+static void WindowAndFFT(AecmCore* aecm,
+                         int16_t* fft,
+                         const int16_t* time_signal,
+                         ComplexInt16* freq_signal,
+                         int time_signal_scaling) {
+  int i = 0;
+
+  // FFT of signal
+  for (i = 0; i < PART_LEN; i++) {
+    // Window time domain signal and insert into real part of
+    // transformation array |fft|
+    int16_t scaled_time_signal = time_signal[i] << time_signal_scaling;
+    fft[i] = (int16_t)((scaled_time_signal * WebRtcAecm_kSqrtHanning[i]) >> 14);
+    scaled_time_signal = time_signal[i + PART_LEN] << time_signal_scaling;
+    fft[PART_LEN + i] = (int16_t)((
+        scaled_time_signal * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14);
+  }
+
+  // Do forward FFT, then take only the first PART_LEN complex samples,
+  // and change signs of the imaginary parts.
+  WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
+  for (i = 0; i < PART_LEN; i++) {
+    freq_signal[i].imag = -freq_signal[i].imag;
+  }
+}
+
+static void InverseFFTAndWindow(AecmCore* aecm,
+                                int16_t* fft,
+                                ComplexInt16* efw,
+                                int16_t* output,
+                                const int16_t* nearendClean) {
+  int i, j, outCFFT;
+  int32_t tmp32no1;
+  // Reuse |efw| for the inverse FFT output after transferring
+  // the contents to |fft|.
+  int16_t* ifft_out = (int16_t*)efw;
+
+  // Synthesis
+  for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
+    fft[j] = efw[i].real;
+    fft[j + 1] = -efw[i].imag;
+  }
+  fft[0] = efw[0].real;
+  fft[1] = -efw[0].imag;
+
+  fft[PART_LEN2] = efw[PART_LEN].real;
+  fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
+
+  // Inverse FFT. Keep outCFFT to scale the samples in the next block.
+  outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out);
+  for (i = 0; i < PART_LEN; i++) {
+    ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                    ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14);
+    tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i],
+                                     outCFFT - aecm->dfaCleanQDomain);
+    output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+                                        tmp32no1 + aecm->outBuf[i],
+                                        WEBRTC_SPL_WORD16_MIN);
+
+    tmp32no1 = (ifft_out[PART_LEN + i] *
+        WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14;
+    tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
+                                    outCFFT - aecm->dfaCleanQDomain);
+    aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
+                                                tmp32no1,
+                                                WEBRTC_SPL_WORD16_MIN);
+  }
+
+  // Copy the current block to the old position
+  // (aecm->outBuf is shifted elsewhere)
+  memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
+  memcpy(aecm->dBufNoisy,
+         aecm->dBufNoisy + PART_LEN,
+         sizeof(int16_t) * PART_LEN);
+  if (nearendClean != NULL)
+  {
+    memcpy(aecm->dBufClean,
+           aecm->dBufClean + PART_LEN,
+           sizeof(int16_t) * PART_LEN);
+  }
+}
+
+// Transforms a time domain signal into the frequency domain, outputting the
+// complex valued signal, absolute value and sum of absolute values.
+//
+// time_signal          [in]    Pointer to time domain signal
+// freq_signal_real     [out]   Pointer to real part of frequency domain array
+// freq_signal_imag     [out]   Pointer to imaginary part of frequency domain
+//                              array
+// freq_signal_abs      [out]   Pointer to absolute value of frequency domain
+//                              array
+// freq_signal_sum_abs  [out]   Pointer to the sum of all absolute values in
+//                              the frequency domain array
+// return value                 The Q-domain of current frequency values
+//
+static int TimeToFrequencyDomain(AecmCore* aecm,
+                                 const int16_t* time_signal,
+                                 ComplexInt16* freq_signal,
+                                 uint16_t* freq_signal_abs,
+                                 uint32_t* freq_signal_sum_abs) {
+  int i = 0;
+  int time_signal_scaling = 0;
+
+  int32_t tmp32no1 = 0;
+  int32_t tmp32no2 = 0;
+
+  // In fft_buf, +16 for 32-byte alignment.
+  int16_t fft_buf[PART_LEN4 + 16];
+  int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
+
+  int16_t tmp16no1;
+#ifndef WEBRTC_ARCH_ARM_V7
+  int16_t tmp16no2;
+#endif
+#ifdef AECM_WITH_ABS_APPROX
+  int16_t max_value = 0;
+  int16_t min_value = 0;
+  uint16_t alpha = 0;
+  uint16_t beta = 0;
+#endif
+
+#ifdef AECM_DYNAMIC_Q
+  tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
+  time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
+#endif
+
+  WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
+
+  // Extract imaginary and real part, calculate the magnitude for
+  // all frequency bins
+  freq_signal[0].imag = 0;
+  freq_signal[PART_LEN].imag = 0;
+  freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
+  freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
+                                freq_signal[PART_LEN].real);
+  (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
+                           (uint32_t)(freq_signal_abs[PART_LEN]);
+
+  for (i = 1; i < PART_LEN; i++)
+  {
+    if (freq_signal[i].real == 0)
+    {
+      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+    }
+    else if (freq_signal[i].imag == 0)
+    {
+      freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+    }
+    else
+    {
+      // Approximation for magnitude of complex fft output
+      // magn = sqrt(real^2 + imag^2)
+      // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
+      //
+      // The parameters alpha and beta are stored in Q15
+
+#ifdef AECM_WITH_ABS_APPROX
+      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+
+      if(tmp16no1 > tmp16no2)
+      {
+        max_value = tmp16no1;
+        min_value = tmp16no2;
+      } else
+      {
+        max_value = tmp16no2;
+        min_value = tmp16no1;
+      }
+
+      // Magnitude in Q(-6)
+      if ((max_value >> 2) > min_value)
+      {
+        alpha = kAlpha1;
+        beta = kBeta1;
+      } else if ((max_value >> 1) > min_value)
+      {
+        alpha = kAlpha2;
+        beta = kBeta2;
+      } else
+      {
+        alpha = kAlpha3;
+        beta = kBeta3;
+      }
+      tmp16no1 = (int16_t)((max_value * alpha) >> 15);
+      tmp16no2 = (int16_t)((min_value * beta) >> 15);
+      freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2;
+#else
+#ifdef WEBRTC_ARCH_ARM_V7
+      __asm __volatile(
+        "smulbb %[tmp32no1], %[real], %[real]\n\t"
+        "smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t"
+        :[tmp32no1]"+&r"(tmp32no1),
+         [tmp32no2]"=r"(tmp32no2)
+        :[real]"r"(freq_signal[i].real),
+         [imag]"r"(freq_signal[i].imag)
+      );
+#else
+      tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
+      tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
+      tmp32no1 = tmp16no1 * tmp16no1;
+      tmp32no2 = tmp16no2 * tmp16no2;
+      tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);
+#endif // WEBRTC_ARCH_ARM_V7
+      tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
+
+      freq_signal_abs[i] = (uint16_t)tmp32no1;
+#endif // AECM_WITH_ABS_APPROX
+    }
+    (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
+  }
+
+  return time_signal_scaling;
+}
+
+int WebRtcAecm_ProcessBlock(AecmCore* aecm,
+                            const int16_t* farend,
+                            const int16_t* nearendNoisy,
+                            const int16_t* nearendClean,
+                            int16_t* output) {
+  int i;
+
+  uint32_t xfaSum;
+  uint32_t dfaNoisySum;
+  uint32_t dfaCleanSum;
+  uint32_t echoEst32Gained;
+  uint32_t tmpU32;
+
+  int32_t tmp32no1;
+
+  uint16_t xfa[PART_LEN1];
+  uint16_t dfaNoisy[PART_LEN1];
+  uint16_t dfaClean[PART_LEN1];
+  uint16_t* ptrDfaClean = dfaClean;
+  const uint16_t* far_spectrum_ptr = NULL;
+
+  // 32 byte aligned buffers (with +8 or +16).
+  // TODO(kma): define fft with ComplexInt16.
+  int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
+  int32_t echoEst32_buf[PART_LEN1 + 8];
+  int32_t dfw_buf[PART_LEN2 + 8];
+  int32_t efw_buf[PART_LEN2 + 8];
+
+  int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31);
+  int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31);
+  ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31);
+  ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31);
+
+  int16_t hnl[PART_LEN1];
+  int16_t numPosCoef = 0;
+  int16_t nlpGain = ONE_Q14;
+  int delay;
+  int16_t tmp16no1;
+  int16_t tmp16no2;
+  int16_t mu;
+  int16_t supGain;
+  int16_t zeros32, zeros16;
+  int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
+  int far_q;
+  int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;
+
+  const int kMinPrefBand = 4;
+  const int kMaxPrefBand = 24;
+  int32_t avgHnl32 = 0;
+
+  // Determine startup state. There are three states:
+  // (0) the first CONV_LEN blocks
+  // (1) another CONV_LEN blocks
+  // (2) the rest
+
+  if (aecm->startupState < 2)
+  {
+    aecm->startupState = (aecm->totCount >= CONV_LEN) +
+                         (aecm->totCount >= CONV_LEN2);
+  }
+  // END: Determine startup state
+
+  // Buffer near and far end signals
+  memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
+  memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN);
+  if (nearendClean != NULL)
+  {
+    memcpy(aecm->dBufClean + PART_LEN,
+           nearendClean,
+           sizeof(int16_t) * PART_LEN);
+  }
+
+  // Transform far end signal from time domain to frequency domain.
+  far_q = TimeToFrequencyDomain(aecm,
+                                aecm->xBuf,
+                                dfw,
+                                xfa,
+                                &xfaSum);
+
+  // Transform noisy near end signal from time domain to frequency domain.
+  zerosDBufNoisy = TimeToFrequencyDomain(aecm,
+                                         aecm->dBufNoisy,
+                                         dfw,
+                                         dfaNoisy,
+                                         &dfaNoisySum);
+  aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
+  aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
+
+
+  if (nearendClean == NULL)
+  {
+    ptrDfaClean = dfaNoisy;
+    aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
+    aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
+    dfaCleanSum = dfaNoisySum;
+  } else
+  {
+    // Transform clean near end signal from time domain to frequency domain.
+    zerosDBufClean = TimeToFrequencyDomain(aecm,
+                                           aecm->dBufClean,
+                                           dfw,
+                                           dfaClean,
+                                           &dfaCleanSum);
+    aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
+    aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
+  }
+
+  // Get the delay
+  // Save far-end history and estimate delay
+  WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
+  if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend,
+                               xfa,
+                               PART_LEN1,
+                               far_q) == -1) {
+    return -1;
+  }
+  delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
+                                          dfaNoisy,
+                                          PART_LEN1,
+                                          zerosDBufNoisy);
+  if (delay == -1)
+  {
+    return -1;
+  }
+  else if (delay == -2)
+  {
+    // If the delay is unknown, we assume zero.
+    // NOTE: this will have to be adjusted if we ever add lookahead.
+    delay = 0;
+  }
+
+  if (aecm->fixedDelay >= 0)
+  {
+    // Use fixed delay
+    delay = aecm->fixedDelay;
+  }
+
+  // Get aligned far end spectrum
+  far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
+  zerosXBuf = (int16_t) far_q;
+  if (far_spectrum_ptr == NULL)
+  {
+    return -1;
+  }
+
+  // Calculate log(energy) and update energy threshold levels
+  WebRtcAecm_CalcEnergies(aecm,
+                          far_spectrum_ptr,
+                          zerosXBuf,
+                          dfaNoisySum,
+                          echoEst32);
+
+  // Calculate stepsize
+  mu = WebRtcAecm_CalcStepSize(aecm);
+
+  // Update counters
+  aecm->totCount++;
+
+  // This is the channel estimation algorithm.
+  // It is base on NLMS but has a variable step length,
+  // which was calculated above.
+  WebRtcAecm_UpdateChannel(aecm,
+                           far_spectrum_ptr,
+                           zerosXBuf,
+                           dfaNoisy,
+                           mu,
+                           echoEst32);
+  supGain = WebRtcAecm_CalcSuppressionGain(aecm);
+
+
+  // Calculate Wiener filter hnl[]
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    // Far end signal through channel estimate in Q8
+    // How much can we shift right to preserve resolution
+    tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
+    aecm->echoFilt[i] += (tmp32no1 * 50) >> 8;
+
+    zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
+    zeros16 = WebRtcSpl_NormW16(supGain) + 1;
+    if (zeros32 + zeros16 > 16)
+    {
+      // Multiplication is safe
+      // Result in
+      // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+
+      //   aecm->xfaQDomainBuf[diff])
+      echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
+                                              (uint16_t)supGain);
+      resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
+      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+    } else
+    {
+      tmp16no1 = 17 - zeros32 - zeros16;
+      resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -
+                       RESOLUTION_SUPGAIN;
+      resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
+      if (zeros32 > tmp16no1)
+      {
+        echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
+                                                supGain >> tmp16no1);
+      } else
+      {
+        // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
+        echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain;
+      }
+    }
+
+    zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
+    assert(zeros16 >= 0);  // |zeros16| is a norm, hence non-negative.
+    dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;
+    if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {
+      tmp16no1 = aecm->nearFilt[i] << zeros16;
+      qDomainDiff = zeros16 - dfa_clean_q_domain_diff;
+      tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;
+    } else {
+      tmp16no1 = dfa_clean_q_domain_diff < 0
+          ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff
+          : aecm->nearFilt[i] << dfa_clean_q_domain_diff;
+      qDomainDiff = 0;
+      tmp16no2 = ptrDfaClean[i];
+    }
+    tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
+    tmp16no2 = (int16_t)(tmp32no1 >> 4);
+    tmp16no2 += tmp16no1;
+    zeros16 = WebRtcSpl_NormW16(tmp16no2);
+    if ((tmp16no2) & (-qDomainDiff > zeros16)) {
+      aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
+    } else {
+      aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff
+                                          : tmp16no2 >> qDomainDiff;
+    }
+
+    // Wiener filter coefficients, resulting hnl in Q14
+    if (echoEst32Gained == 0)
+    {
+      hnl[i] = ONE_Q14;
+    } else if (aecm->nearFilt[i] == 0)
+    {
+      hnl[i] = 0;
+    } else
+    {
+      // Multiply the suppression gain
+      // Rounding
+      echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
+      tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,
+                                   (uint16_t)aecm->nearFilt[i]);
+
+      // Current resolution is
+      // Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32))
+      // Make sure we are in Q14
+      tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
+      if (tmp32no1 > ONE_Q14)
+      {
+        hnl[i] = 0;
+      } else if (tmp32no1 < 0)
+      {
+        hnl[i] = ONE_Q14;
+      } else
+      {
+        // 1-echoEst/dfa
+        hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
+        if (hnl[i] < 0)
+        {
+          hnl[i] = 0;
+        }
+      }
+    }
+    if (hnl[i])
+    {
+      numPosCoef++;
+    }
+  }
+  // Only in wideband. Prevent the gain in upper band from being larger than
+  // in lower band.
+  if (aecm->mult == 2)
+  {
+    // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
+    //               speech distortion in double-talk.
+    for (i = 0; i < PART_LEN1; i++)
+    {
+      hnl[i] = (int16_t)((hnl[i] * hnl[i]) >> 14);
+    }
+
+    for (i = kMinPrefBand; i <= kMaxPrefBand; i++)
+    {
+      avgHnl32 += (int32_t)hnl[i];
+    }
+    assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
+    avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
+
+    for (i = kMaxPrefBand; i < PART_LEN1; i++)
+    {
+      if (hnl[i] > (int16_t)avgHnl32)
+      {
+        hnl[i] = (int16_t)avgHnl32;
+      }
+    }
+  }
+
+  // Calculate NLP gain, result is in Q14
+  if (aecm->nlpFlag)
+  {
+    for (i = 0; i < PART_LEN1; i++)
+    {
+      // Truncate values close to zero and one.
+      if (hnl[i] > NLP_COMP_HIGH)
+      {
+        hnl[i] = ONE_Q14;
+      } else if (hnl[i] < NLP_COMP_LOW)
+      {
+        hnl[i] = 0;
+      }
+
+      // Remove outliers
+      if (numPosCoef < 3)
+      {
+        nlpGain = 0;
+      } else
+      {
+        nlpGain = ONE_Q14;
+      }
+
+      // NLP
+      if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14))
+      {
+        hnl[i] = ONE_Q14;
+      } else
+      {
+        hnl[i] = (int16_t)((hnl[i] * nlpGain) >> 14);
+      }
+
+      // multiply with Wiener coefficients
+      efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
+                                                                   hnl[i], 14));
+      efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
+                                                                   hnl[i], 14));
+    }
+  }
+  else
+  {
+    // multiply with Wiener coefficients
+    for (i = 0; i < PART_LEN1; i++)
+    {
+      efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
+                                                                   hnl[i], 14));
+      efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
+                                                                   hnl[i], 14));
+    }
+  }
+
+  if (aecm->cngMode == AecmTrue)
+  {
+    ComfortNoise(aecm, ptrDfaClean, efw, hnl);
+  }
+
+  InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
+
+  return 0;
+}
+
+static void ComfortNoise(AecmCore* aecm,
+                         const uint16_t* dfa,
+                         ComplexInt16* out,
+                         const int16_t* lambda) {
+  int16_t i;
+  int16_t tmp16;
+  int32_t tmp32;
+
+  int16_t randW16[PART_LEN];
+  int16_t uReal[PART_LEN1];
+  int16_t uImag[PART_LEN1];
+  int32_t outLShift32;
+  int16_t noiseRShift16[PART_LEN1];
+
+  int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
+  int16_t minTrackShift;
+
+  assert(shiftFromNearToNoise >= 0);
+  assert(shiftFromNearToNoise < 16);
+
+  if (aecm->noiseEstCtr < 100)
+  {
+    // Track the minimum more quickly initially.
+    aecm->noiseEstCtr++;
+    minTrackShift = 6;
+  } else
+  {
+    minTrackShift = 9;
+  }
+
+  // Estimate noise power.
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    // Shift to the noise domain.
+    tmp32 = (int32_t)dfa[i];
+    outLShift32 = tmp32 << shiftFromNearToNoise;
+
+    if (outLShift32 < aecm->noiseEst[i])
+    {
+      // Reset "too low" counter
+      aecm->noiseEstTooLowCtr[i] = 0;
+      // Track the minimum.
+      if (aecm->noiseEst[i] < (1 << minTrackShift))
+      {
+        // For small values, decrease noiseEst[i] every
+        // |kNoiseEstIncCount| block. The regular approach below can not
+        // go further down due to truncation.
+        aecm->noiseEstTooHighCtr[i]++;
+        if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount)
+        {
+          aecm->noiseEst[i]--;
+          aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
+        }
+      }
+      else
+      {
+        aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32)
+                              >> minTrackShift);
+      }
+    } else
+    {
+      // Reset "too high" counter
+      aecm->noiseEstTooHighCtr[i] = 0;
+      // Ramp slowly upwards until we hit the minimum again.
+      if ((aecm->noiseEst[i] >> 19) > 0)
+      {
+        // Avoid overflow.
+        // Multiplication with 2049 will cause wrap around. Scale
+        // down first and then multiply
+        aecm->noiseEst[i] >>= 11;
+        aecm->noiseEst[i] *= 2049;
+      }
+      else if ((aecm->noiseEst[i] >> 11) > 0)
+      {
+        // Large enough for relative increase
+        aecm->noiseEst[i] *= 2049;
+        aecm->noiseEst[i] >>= 11;
+      }
+      else
+      {
+        // Make incremental increases based on size every
+        // |kNoiseEstIncCount| block
+        aecm->noiseEstTooLowCtr[i]++;
+        if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount)
+        {
+          aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1;
+          aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
+        }
+      }
+    }
+  }
+
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    tmp32 = aecm->noiseEst[i] >> shiftFromNearToNoise;
+    if (tmp32 > 32767)
+    {
+      tmp32 = 32767;
+      aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;
+    }
+    noiseRShift16[i] = (int16_t)tmp32;
+
+    tmp16 = ONE_Q14 - lambda[i];
+    noiseRShift16[i] = (int16_t)((tmp16 * noiseRShift16[i]) >> 14);
+  }
+
+  // Generate a uniform random array on [0 2^15-1].
+  WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
+
+  // Generate noise according to estimated energy.
+  uReal[0] = 0; // Reject LF noise.
+  uImag[0] = 0;
+  for (i = 1; i < PART_LEN1; i++)
+  {
+    // Get a random index for the cos and sin tables over [0 359].
+    tmp16 = (int16_t)((359 * randW16[i - 1]) >> 15);
+
+    // Tables are in Q13.
+    uReal[i] = (int16_t)((noiseRShift16[i] * WebRtcAecm_kCosTable[tmp16]) >>
+        13);
+    uImag[i] = (int16_t)((-noiseRShift16[i] * WebRtcAecm_kSinTable[tmp16]) >>
+        13);
+  }
+  uImag[PART_LEN] = 0;
+
+  for (i = 0; i < PART_LEN1; i++)
+  {
+    out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]);
+    out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]);
+  }
+}
+
--- a/webrtc/modules/audio_processing/aecm/aecm_core_mips.c
+++ b/webrtc/modules/audio_processing/aecm/aecm_core_mips.c
--- a/webrtc/modules/audio_processing/aecm/aecm_core_neon.c
+++ b/webrtc/modules/audio_processing/aecm/aecm_core_neon.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -7,308 +7,206 @@
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */
-#if defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)

-#include "aecm_core.h"
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"

 #include <arm_neon.h>
 #include <assert.h>

+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+
+// TODO(kma): Re-write the corresponding assembly file, the offset
+// generating script and makefile, to replace these C functions.

 // Square root of Hanning window in Q14.
-static const WebRtc_Word16 kSqrtHanningReversed[] __attribute__ ((aligned (8))) = {       
-     16384, 16373, 16354, 16325,  
-     16286, 16237, 16179, 16111,  
-     16034, 15947, 15851, 15746,  
-     15631, 15506, 15373, 15231,  
-     15079, 14918, 14749, 14571,  
-     14384, 14189, 13985, 13773,  
-     13553, 13325, 13089, 12845,  
-     12594, 12335, 12068, 11795,  
-     11514, 11227, 10933, 10633,  
-     10326, 10013, 9695,  9370,   
-     9040,  8705,  8364,  8019,   
-     7668,  7313,  6954,  6591,   
-     6224,  5853,  5478,  5101,   
-     4720,  4337,  3951,  3562,   
-     3172,  2780,  2386,  1990,   
-     1594,  1196,  798,   399
+const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
+  0,
+  399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
+  3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
+  6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
+  9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
+  11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
+  13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
+  15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
+  16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
 };

-void WebRtcAecm_WindowAndFFT(WebRtc_Word16* fft,
-                             const WebRtc_Word16* time_signal,
-                             complex16_t* freq_signal,
-                             int time_signal_scaling)
-{
-    int i, j;
-
-    int16x4_t tmp16x4_scaling = vdup_n_s16(time_signal_scaling);
-    __asm__("vmov.i16 d21, #0" ::: "d21");
-
-    for(i = 0, j = 0; i < PART_LEN; i += 4, j += 8)
-    {
-        int16x4_t tmp16x4_0;
-        int16x4_t tmp16x4_1;
-        int32x4_t tmp32x4_0;
-
-        /* Window near end */
-        // fft[j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((time_signal[i]
-        //       << time_signal_scaling), WebRtcAecm_kSqrtHanning[i], 14);
-        __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&time_signal[i]));
-        tmp16x4_0 = vshl_s16(tmp16x4_0, tmp16x4_scaling);
-
-        __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_1) : "r"(&WebRtcAecm_kSqrtHanning[i]));
-        tmp32x4_0 = vmull_s16(tmp16x4_0, tmp16x4_1);
-
-        __asm__("vshrn.i32 d20, %q0, #14" : : "w"(tmp32x4_0) : "d20");
-        __asm__("vst2.16 {d20, d21}, [%0, :128]" : : "r"(&fft[j]) : "q10");
-
-        // fft[PART_LEN2 + j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(
-        //      (time_signal[PART_LEN + i] << time_signal_scaling),
-        //       WebRtcAecm_kSqrtHanning[PART_LEN - i], 14);
-        __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&time_signal[i + PART_LEN]));
-        tmp16x4_0 = vshl_s16(tmp16x4_0, tmp16x4_scaling);
-
-        __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_1) : "r"(&kSqrtHanningReversed[i]));
-        tmp32x4_0 = vmull_s16(tmp16x4_0, tmp16x4_1);
-
-        __asm__("vshrn.i32 d20, %q0, #14" : : "w"(tmp32x4_0) : "d20");
-        __asm__("vst2.16 {d20, d21}, [%0, :128]" : : "r"(&fft[PART_LEN2 + j]) : "q10");
-    }
-
-    WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT);
-    WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);
-
-    // Take only the first PART_LEN2 samples, and switch the sign of the imaginary part.
-    for(i = 0, j = 0; j < PART_LEN2; i += 8, j += 16)
-    {
-        __asm__("vld2.16 {d20, d21, d22, d23}, [%0, :256]" : : "r"(&fft[j]) : "q10", "q11");
-        __asm__("vneg.s16 d22, d22" : : : "q10");
-        __asm__("vneg.s16 d23, d23" : : : "q11");
-        __asm__("vst2.16 {d20, d21, d22, d23}, [%0, :256]" : : 
-            "r"(&freq_signal[i].real): "q10", "q11");
-    }
+static inline void AddLanes(uint32_t* ptr, uint32x4_t v) {
+#if defined(WEBRTC_ARCH_ARM64)
+  *(ptr) = vaddvq_u32(v);
+#else
+  uint32x2_t tmp_v;
+  tmp_v = vadd_u32(vget_low_u32(v), vget_high_u32(v));
+  tmp_v = vpadd_u32(tmp_v, tmp_v);
+  *(ptr) = vget_lane_u32(tmp_v, 0);
+#endif
 }

-void WebRtcAecm_InverseFFTAndWindow(AecmCore_t* aecm,
-                        WebRtc_Word16* fft,
-                        complex16_t* efw,
-                        WebRtc_Word16* output,
-                        const WebRtc_Word16* nearendClean)
-{
-    int i, j, outCFFT;
-    WebRtc_Word32 tmp32no1;
+void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm,
+                                       const uint16_t* far_spectrum,
+                                       int32_t* echo_est,
+                                       uint32_t* far_energy,
+                                       uint32_t* echo_energy_adapt,
+                                       uint32_t* echo_energy_stored) {
+  int16_t* start_stored_p = aecm->channelStored;
+  int16_t* start_adapt_p = aecm->channelAdapt16;
+  int32_t* echo_est_p = echo_est;
+  const int16_t* end_stored_p = aecm->channelStored + PART_LEN;
+  const uint16_t* far_spectrum_p = far_spectrum;
+  int16x8_t store_v, adapt_v;
+  uint16x8_t spectrum_v;
+  uint32x4_t echo_est_v_low, echo_est_v_high;
+  uint32x4_t far_energy_v, echo_stored_v, echo_adapt_v;

-    // Synthesis
-    for(i = 0, j = 0; i < PART_LEN; i += 4, j += 8)
-    {
-        // We overwrite two more elements in fft[], but it's ok.
-        __asm__("vld2.16 {d20, d21}, [%0, :128]" : : "r"(&(efw[i].real)) : "q10");
-        __asm__("vmov q11, q10" : : : "q10", "q11");
+  far_energy_v = vdupq_n_u32(0);
+  echo_adapt_v = vdupq_n_u32(0);
+  echo_stored_v = vdupq_n_u32(0);

-        __asm__("vneg.s16 d23, d23" : : : "q11");
-        __asm__("vst2.16 {d22, d23}, [%0, :128]" : : "r"(&fft[j]): "q11");
+  // Get energy for the delayed far end signal and estimated
+  // echo using both stored and adapted channels.
+  // The C code:
+  //  for (i = 0; i < PART_LEN1; i++) {
+  //      echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+  //                                         far_spectrum[i]);
+  //      (*far_energy) += (uint32_t)(far_spectrum[i]);
+  //      *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i];
+  //      (*echo_energy_stored) += (uint32_t)echo_est[i];
+  //  }
+  while (start_stored_p < end_stored_p) {
+    spectrum_v = vld1q_u16(far_spectrum_p);
+    adapt_v = vld1q_s16(start_adapt_p);
+    store_v = vld1q_s16(start_stored_p);

-        __asm__("vrev64.16 q10, q10" : : : "q10");
-        __asm__("vst2.16 {d20, d21}, [%0]" : : "r"(&fft[PART_LEN4 - j - 6]): "q10");
-    }
+    far_energy_v = vaddw_u16(far_energy_v, vget_low_u16(spectrum_v));
+    far_energy_v = vaddw_u16(far_energy_v, vget_high_u16(spectrum_v));

-    fft[PART_LEN2] = efw[PART_LEN].real;
-    fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
+    echo_est_v_low = vmull_u16(vreinterpret_u16_s16(vget_low_s16(store_v)),
+                               vget_low_u16(spectrum_v));
+    echo_est_v_high = vmull_u16(vreinterpret_u16_s16(vget_high_s16(store_v)),
+                                vget_high_u16(spectrum_v));
+    vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low));
+    vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high));

-    // Inverse FFT, result should be scaled with outCFFT.
-    WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT);
-    outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1);
+    echo_stored_v = vaddq_u32(echo_est_v_low, echo_stored_v);
+    echo_stored_v = vaddq_u32(echo_est_v_high, echo_stored_v);

-    // Take only the real values and scale with outCFFT.
-    for (i = 0, j = 0; i < PART_LEN2; i += 8, j+= 16)
-    {
-        __asm__("vld2.16 {d20, d21, d22, d23}, [%0, :256]" : : "r"(&fft[j]) : "q10", "q11");
-        __asm__("vst1.16 {d20, d21}, [%0, :128]" : : "r"(&fft[i]): "q10");
-    }
+    echo_adapt_v = vmlal_u16(echo_adapt_v,
+                             vreinterpret_u16_s16(vget_low_s16(adapt_v)),
+                             vget_low_u16(spectrum_v));
+    echo_adapt_v = vmlal_u16(echo_adapt_v,
+                             vreinterpret_u16_s16(vget_high_s16(adapt_v)),
+                             vget_high_u16(spectrum_v));

-    int32x4_t tmp32x4_2;
-    __asm__("vdup.32 %q0, %1" : "=w"(tmp32x4_2) : "r"((WebRtc_Word32)
-        (outCFFT - aecm->dfaCleanQDomain)));
-    for (i = 0; i < PART_LEN; i += 4)
-    {
-        int16x4_t tmp16x4_0;
-        int16x4_t tmp16x4_1;
-        int32x4_t tmp32x4_0;
-        int32x4_t tmp32x4_1;
+    start_stored_p += 8;
+    start_adapt_p += 8;
+    far_spectrum_p += 8;
+    echo_est_p += 8;
+  }

-        // fft[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
-        //        fft[i], WebRtcAecm_kSqrtHanning[i], 14);
-        __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&fft[i]));
-        __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_1) : "r"(&WebRtcAecm_kSqrtHanning[i]));
-        __asm__("vmull.s16 %q0, %P1, %P2" : "=w"(tmp32x4_0) : "w"(tmp16x4_0), "w"(tmp16x4_1));
-        __asm__("vrshr.s32 %q0, %q1, #14" : "=w"(tmp32x4_0) : "0"(tmp32x4_0));
+  AddLanes(far_energy, far_energy_v);
+  AddLanes(echo_energy_stored, echo_stored_v);
+  AddLanes(echo_energy_adapt, echo_adapt_v);

-        // tmp32no1 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)fft[i],
-        //        outCFFT - aecm->dfaCleanQDomain);
-        __asm__("vshl.s32 %q0, %q1, %q2" : "=w"(tmp32x4_0) : "0"(tmp32x4_0), "w"(tmp32x4_2));
-
-        // fft[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
-        //        tmp32no1 + outBuf[i], WEBRTC_SPL_WORD16_MIN);
-        // output[i] = fft[i];
-        __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&aecm->outBuf[i]));
-        __asm__("vmovl.s16 %q0, %P1" : "=w"(tmp32x4_1) : "w"(tmp16x4_0));
-        __asm__("vadd.i32 %q0, %q1" : : "w"(tmp32x4_0), "w"(tmp32x4_1));
-        __asm__("vqshrn.s32 %P0, %q1, #0" : "=w"(tmp16x4_0) : "w"(tmp32x4_0));
-        __asm__("vst1.16 %P0, [%1, :64]" : : "w"(tmp16x4_0), "r"(&fft[i]));
-        __asm__("vst1.16 %P0, [%1, :64]" : : "w"(tmp16x4_0), "r"(&output[i]));
-
-        // tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT(
-        //        fft[PART_LEN + i], WebRtcAecm_kSqrtHanning[PART_LEN - i], 14);
-        __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&fft[PART_LEN + i]));
-        __asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_1) : "r"(&kSqrtHanningReversed[i]));
-        __asm__("vmull.s16 %q0, %P1, %P2" : "=w"(tmp32x4_0) : "w"(tmp16x4_0), "w"(tmp16x4_1));
-        __asm__("vshr.s32 %q0, %q1, #14" : "=w"(tmp32x4_0) : "0"(tmp32x4_0));
-
-        // tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, outCFFT - aecm->dfaCleanQDomain);
-        __asm__("vshl.s32 %q0, %q1, %q2" : "=w"(tmp32x4_0) : "0"(tmp32x4_0), "w"(tmp32x4_2));
-        // outBuf[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(
-        //        WEBRTC_SPL_WORD16_MAX, tmp32no1, WEBRTC_SPL_WORD16_MIN);
-        __asm__("vqshrn.s32 %P0, %q1, #0" : "=w"(tmp16x4_0) : "w"(tmp32x4_0));
-        __asm__("vst1.16 %P0, [%1, :64]" : : "w"(tmp16x4_0), "r"(&aecm->outBuf[i]));
-    }
-
-    // Copy the current block to the old position (outBuf is shifted elsewhere).
-    for (i = 0; i < PART_LEN; i += 16)
-    {
-        __asm__("vld1.16 {d20, d21, d22, d23}, [%0, :256]" : :
-            "r"(&aecm->xBuf[i + PART_LEN]) : "q10");
-        __asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : : "r"(&aecm->xBuf[i]): "q10");
-    }
-    for (i = 0; i < PART_LEN; i += 16)
-    {
-        __asm__("vld1.16 {d20, d21, d22, d23}, [%0, :256]" : :
-            "r"(&aecm->dBufNoisy[i + PART_LEN]) : "q10");
-        __asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : : 
-            "r"(&aecm->dBufNoisy[i]): "q10");
-    }
-    if (nearendClean != NULL) {
-        for (i = 0; i < PART_LEN; i += 16)
-        {
-            __asm__("vld1.16 {d20, d21, d22, d23}, [%0, :256]" : :
-                "r"(&aecm->dBufClean[i + PART_LEN]) : "q10");
-            __asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : :
-                "r"(&aecm->dBufClean[i]): "q10");
-        }
-    }
+  echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
+                                             far_spectrum[PART_LEN]);
+  *echo_energy_stored += (uint32_t)echo_est[PART_LEN];
+  *far_energy += (uint32_t)far_spectrum[PART_LEN];
+  *echo_energy_adapt += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN];
 }

-void WebRtcAecm_CalcLinearEnergies(AecmCore_t* aecm,
-                                   const WebRtc_UWord16* far_spectrum,
-                                   WebRtc_Word32* echo_est,
-                                   WebRtc_UWord32* far_energy,
-                                   WebRtc_UWord32* echo_energy_adapt,
-                                   WebRtc_UWord32* echo_energy_stored)
-{
-    int i;
+void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm,
+                                         const uint16_t* far_spectrum,
+                                         int32_t* echo_est) {
+  assert((uintptr_t)echo_est % 32 == 0);
+  assert((uintptr_t)(aecm->channelStored) % 16 == 0);
+  assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0);

-    register WebRtc_UWord32 far_energy_r;
-    register WebRtc_UWord32 echo_energy_stored_r;
-    register WebRtc_UWord32 echo_energy_adapt_r;
-    uint32x4_t tmp32x4_0;
+  // This is C code of following optimized code.
+  // During startup we store the channel every block.
+  //  memcpy(aecm->channelStored,
+  //         aecm->channelAdapt16,
+  //         sizeof(int16_t) * PART_LEN1);
+  // Recalculate echo estimate
+  //  for (i = 0; i < PART_LEN; i += 4) {
+  //    echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+  //                                        far_spectrum[i]);
+  //    echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1],
+  //                                            far_spectrum[i + 1]);
+  //    echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2],
+  //                                            far_spectrum[i + 2]);
+  //    echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3],
+  //                                            far_spectrum[i + 3]);
+  //  }
+  //  echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
+  //                                     far_spectrum[i]);
+  const uint16_t* far_spectrum_p = far_spectrum;
+  int16_t* start_adapt_p = aecm->channelAdapt16;
+  int16_t* start_stored_p = aecm->channelStored;
+  const int16_t* end_stored_p = aecm->channelStored + PART_LEN;
+  int32_t* echo_est_p = echo_est;

-    __asm__("vmov.i32 q14, #0" : : : "q14"); // far_energy
-    __asm__("vmov.i32 q8,  #0" : : : "q8"); // echo_energy_stored
-    __asm__("vmov.i32 q9,  #0" : : : "q9"); // echo_energy_adapt
+  uint16x8_t far_spectrum_v;
+  int16x8_t adapt_v;
+  uint32x4_t echo_est_v_low, echo_est_v_high;

-    for(i = 0; i < PART_LEN -7; i += 8)
-    {
-        // far_energy += (WebRtc_UWord32)(far_spectrum[i]);
-        __asm__("vld1.16 {d26, d27}, [%0]" : : "r"(&far_spectrum[i]) : "q13");
-        __asm__("vaddw.u16 q14, q14, d26" : : : "q14", "q13");
-        __asm__("vaddw.u16 q14, q14, d27" : : : "q14", "q13");
+  while (start_stored_p < end_stored_p) {
+    far_spectrum_v = vld1q_u16(far_spectrum_p);
+    adapt_v = vld1q_s16(start_adapt_p);

-        // Get estimated echo energies for adaptive channel and stored channel.
-        // echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]);
-        __asm__("vld1.16 {d24, d25}, [%0, :128]" : : "r"(&aecm->channelStored[i]) : "q12");
-        __asm__("vmull.u16 q10, d26, d24" : : : "q12", "q13", "q10");
-        __asm__("vmull.u16 q11, d27, d25" : : : "q12", "q13", "q11");
-        __asm__("vst1.32 {d20, d21, d22, d23}, [%0, :256]" : : "r"(&echo_est[i]):
-            "q10", "q11");
+    vst1q_s16(start_stored_p, adapt_v);

-        // echo_energy_stored += (WebRtc_UWord32)echoEst[i];
-        __asm__("vadd.u32 q8, q10" : : : "q10", "q8");
-        __asm__("vadd.u32 q8, q11" : : : "q11", "q8");
+    echo_est_v_low = vmull_u16(vget_low_u16(far_spectrum_v),
+                               vget_low_u16(vreinterpretq_u16_s16(adapt_v)));
+    echo_est_v_high = vmull_u16(vget_high_u16(far_spectrum_v),
+                                vget_high_u16(vreinterpretq_u16_s16(adapt_v)));

-        // echo_energy_adapt += WEBRTC_SPL_UMUL_16_16(
-        //     aecm->channelAdapt16[i], far_spectrum[i]);
-        __asm__("vld1.16 {d24, d25}, [%0, :128]" : : "r"(&aecm->channelAdapt16[i]) : "q12");
-        __asm__("vmull.u16 q10, d26, d24" : : : "q12", "q13", "q10");
-        __asm__("vmull.u16 q11, d27, d25" : : : "q12", "q13", "q11");
-        __asm__("vadd.u32 q9, q10" : : : "q9", "q15");
-        __asm__("vadd.u32 q9, q11" : : : "q9", "q11");
-    }
+    vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low));
+    vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high));

-    __asm__("vadd.u32 d28, d29" : : : "q14");
-    __asm__("vpadd.u32 d28, d28" : : : "q14");
-    __asm__("vmov.32 %0, d28[0]" : "=r"(far_energy_r): : "q14");
-
-    __asm__("vadd.u32 d18, d19" : : : "q9");
-    __asm__("vpadd.u32 d18, d18" : : : "q9");
-    __asm__("vmov.32 %0, d18[0]" : "=r"(echo_energy_adapt_r): : "q9");
-
-    __asm__("vadd.u32 d16, d17" : : : "q8");
-    __asm__("vpadd.u32 d16, d16" : : : "q8");
-    __asm__("vmov.32 %0, d16[0]" : "=r"(echo_energy_stored_r): : "q8");
-
-    // Get estimated echo energies for adaptive channel and stored channel.
-    echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]);
-    *echo_energy_stored = echo_energy_stored_r + (WebRtc_UWord32)echo_est[i];
-    *far_energy = far_energy_r + (WebRtc_UWord32)(far_spectrum[i]);
-    *echo_energy_adapt = echo_energy_adapt_r + WEBRTC_SPL_UMUL_16_16(
-        aecm->channelAdapt16[i], far_spectrum[i]);
+    far_spectrum_p += 8;
+    start_adapt_p += 8;
+    start_stored_p += 8;
+    echo_est_p += 8;
+  }
+  aecm->channelStored[PART_LEN] = aecm->channelAdapt16[PART_LEN];
+  echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
+                                             far_spectrum[PART_LEN]);
 }

-void WebRtcAecm_StoreAdaptiveChannel(AecmCore_t* aecm,
-                                     const WebRtc_UWord16* far_spectrum,
-                                     WebRtc_Word32* echo_est)
-{
-    int i;
+void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm) {
+  assert((uintptr_t)(aecm->channelStored) % 16 == 0);
+  assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0);
+  assert((uintptr_t)(aecm->channelAdapt32) % 32 == 0);

-    // During startup we store the channel every block.
-    // Recalculate echo estimate.
-    for(i = 0; i < PART_LEN -7; i += 8)
-    {
-        // aecm->channelStored[i] = acem->channelAdapt16[i];
-        // echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]);
-        __asm__("vld1.16 {d26, d27}, [%0]" : : "r"(&far_spectrum[i]) : "q13");
-        __asm__("vld1.16 {d24, d25}, [%0, :128]" : : "r"(&aecm->channelAdapt16[i]) : "q12");
-        __asm__("vst1.16 {d24, d25}, [%0, :128]" : : "r"(&aecm->channelStored[i]) : "q12");
-        __asm__("vmull.u16 q10, d26, d24" : : : "q12", "q13", "q10");
-        __asm__("vmull.u16 q11, d27, d25" : : : "q12", "q13", "q11");
-        __asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : :
-                               "r"(&echo_est[i]) : "q10", "q11");
-    }
-    aecm->channelStored[i] = aecm->channelAdapt16[i];
-    echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]);
+  // The C code of following optimized code.
+  // for (i = 0; i < PART_LEN1; i++) {
+  //   aecm->channelAdapt16[i] = aecm->channelStored[i];
+  //   aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32(
+  //              (int32_t)aecm->channelStored[i], 16);
+  // }
+
+  int16_t* start_stored_p = aecm->channelStored;
+  int16_t* start_adapt16_p = aecm->channelAdapt16;
+  int32_t* start_adapt32_p = aecm->channelAdapt32;
+  const int16_t* end_stored_p = start_stored_p + PART_LEN;
+
+  int16x8_t stored_v;
+  int32x4_t adapt32_v_low, adapt32_v_high;
+
+  while (start_stored_p < end_stored_p) {
+    stored_v = vld1q_s16(start_stored_p);
+    vst1q_s16(start_adapt16_p, stored_v);
+
+    adapt32_v_low = vshll_n_s16(vget_low_s16(stored_v), 16);
+    adapt32_v_high = vshll_n_s16(vget_high_s16(stored_v), 16);
+
+    vst1q_s32(start_adapt32_p, adapt32_v_low);
+    vst1q_s32(start_adapt32_p + 4, adapt32_v_high);
+
+    start_stored_p += 8;
+    start_adapt16_p += 8;
+    start_adapt32_p += 8;
+  }
+  aecm->channelAdapt16[PART_LEN] = aecm->channelStored[PART_LEN];
+  aecm->channelAdapt32[PART_LEN] = (int32_t)aecm->channelStored[PART_LEN] << 16;
 }
-
-void WebRtcAecm_ResetAdaptiveChannel(AecmCore_t* aecm)
-{
-    int i;
-
-    for(i = 0; i < PART_LEN -7; i += 8)
-    {
-        // aecm->channelAdapt16[i] = aecm->channelStored[i];
-        // aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)
-        //                           aecm->channelStored[i], 16);
-        __asm__("vld1.16 {d24, d25}, [%0, :128]" : :
-                        "r"(&aecm->channelStored[i]) : "q12");
-        __asm__("vst1.16 {d24, d25}, [%0, :128]" : :
-                        "r"(&aecm->channelAdapt16[i]) : "q12");
-        __asm__("vshll.s16 q10, d24, #16" : : : "q12", "q13", "q10");
-        __asm__("vshll.s16 q11, d25, #16" : : : "q12", "q13", "q11");
-        __asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : :
-                        "r"(&aecm->channelAdapt32[i]): "q10", "q11");
-    }
-    aecm->channelAdapt16[i] = aecm->channelStored[i];
-    aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32(
-            (WebRtc_Word32)aecm->channelStored[i], 16);
-}
-
-#endif // #if defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)
--- a/webrtc/modules/audio_processing/aecm/aecm_defines.h
+++ b/webrtc/modules/audio_processing/aecm/aecm_defines.h
@@ -0,0 +1,87 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_
+
+#define AECM_DYNAMIC_Q                 /* Turn on/off dynamic Q-domain. */
+
+/* Algorithm parameters */
+#define FRAME_LEN       80             /* Total frame length, 10 ms. */
+
+#define PART_LEN        64             /* Length of partition. */
+#define PART_LEN_SHIFT  7              /* Length of (PART_LEN * 2) in base 2. */
+
+#define PART_LEN1       (PART_LEN + 1)  /* Unique fft coefficients. */
+#define PART_LEN2       (PART_LEN << 1) /* Length of partition * 2. */
+#define PART_LEN4       (PART_LEN << 2) /* Length of partition * 4. */
+#define FAR_BUF_LEN     PART_LEN4       /* Length of buffers. */
+#define MAX_DELAY       100
+
+/* Counter parameters */
+#define CONV_LEN        512          /* Convergence length used at startup. */
+#define CONV_LEN2       (CONV_LEN << 1) /* Used at startup. */
+
+/* Energy parameters */
+#define MAX_BUF_LEN     64           /* History length of energy signals. */
+#define FAR_ENERGY_MIN  1025         /* Lowest Far energy level: At least 2 */
+                                     /* in energy. */
+#define FAR_ENERGY_DIFF 929          /* Allowed difference between max */
+                                     /* and min. */
+#define ENERGY_DEV_OFFSET       0    /* The energy error offset in Q8. */
+#define ENERGY_DEV_TOL  400          /* The energy estimation tolerance (Q8). */
+#define FAR_ENERGY_VAD_REGION   230  /* Far VAD tolerance region. */
+
+/* Stepsize parameters */
+#define MU_MIN          10          /* Min stepsize 2^-MU_MIN (far end energy */
+                                    /* dependent). */
+#define MU_MAX          1           /* Max stepsize 2^-MU_MAX (far end energy */
+                                    /* dependent). */
+#define MU_DIFF         9           /* MU_MIN - MU_MAX */
+
+/* Channel parameters */
+#define MIN_MSE_COUNT   20 /* Min number of consecutive blocks with enough */
+                           /* far end energy to compare channel estimates. */
+#define MIN_MSE_DIFF    29 /* The ratio between adapted and stored channel to */
+                           /* accept a new storage (0.8 in Q-MSE_RESOLUTION). */
+#define MSE_RESOLUTION  5           /* MSE parameter resolution. */
+#define RESOLUTION_CHANNEL16    12  /* W16 Channel in Q-RESOLUTION_CHANNEL16. */
+#define RESOLUTION_CHANNEL32    28  /* W32 Channel in Q-RESOLUTION_CHANNEL. */
+#define CHANNEL_VAD     16          /* Minimum energy in frequency band */
+                                    /* to update channel. */
+
+/* Suppression gain parameters: SUPGAIN parameters in Q-(RESOLUTION_SUPGAIN). */
+#define RESOLUTION_SUPGAIN      8     /* Channel in Q-(RESOLUTION_SUPGAIN). */
+#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN)  /* Default. */
+#define SUPGAIN_ERROR_PARAM_A   3072  /* Estimation error parameter */
+                                      /* (Maximum gain) (8 in Q8). */
+#define SUPGAIN_ERROR_PARAM_B   1536  /* Estimation error parameter */
+                                      /* (Gain before going down). */
+#define SUPGAIN_ERROR_PARAM_D   SUPGAIN_DEFAULT /* Estimation error parameter */
+                                /* (Should be the same as Default) (1 in Q8). */
+#define SUPGAIN_EPC_DT  200     /* SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL */
+
+/* Defines for "check delay estimation" */
+#define CORR_WIDTH      31      /* Number of samples to correlate over. */
+#define CORR_MAX        16      /* Maximum correlation offset. */
+#define CORR_MAX_BUF    63
+#define CORR_DEV        4
+#define CORR_MAX_LEVEL  20
+#define CORR_MAX_LOW    4
+#define CORR_BUF_LEN    (CORR_MAX << 1) + 1
+/* Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN. */
+
+#define ONE_Q14         (1 << 14)
+
+/* NLP defines */
+#define NLP_COMP_LOW    3277    /* 0.2 in Q14 */
+#define NLP_COMP_HIGH   ONE_Q14 /* 1 in Q14 */
+
+#endif
--- a/webrtc/modules/audio_processing/aecm/echo_control_mobile.c
+++ b/webrtc/modules/audio_processing/aecm/echo_control_mobile.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,22 +8,16 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include <stdlib.h>
-//#include <string.h>
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"

-#include "echo_control_mobile.h"
-#include "aecm_core.h"
-#include "ring_buffer.h"
 #ifdef AEC_DEBUG
 #include <stdio.h>
 #endif
-#ifdef MAC_IPHONE_PRINT
-#include <time.h>
-#include <stdio.h>
-#elif defined ARM_WINM_LOG
-#include "windows.h"
-extern HANDLE logFile;
-#endif
+#include <stdlib.h>
+
+#include "webrtc/common_audio/ring_buffer.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/aecm/aecm_core.h"

 #define BUF_SIZE_FRAMES 50 // buffer size (frames)
 // Maximum length of resampled signal. Must be an integer multiple of frames
@@ -31,7 +25,7 @@ extern HANDLE logFile;
 // The factor of 2 handles wb, and the + 1 is as a safety margin
 #define MAX_RESAMP_LEN (5 * FRAME_LEN)

-static const int kBufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples)
+static const size_t kBufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples)
 static const int kSampMsNb = 8; // samples per ms in nb
 // Target suppression levels for nlp modes
 // log{0.001, 0.00001, 0.00000001}
@@ -63,7 +57,7 @@ typedef struct
    int delayChange;
    short lastDelayDiff;

-    WebRtc_Word16 echoMode;
+    int16_t echoMode;

 #ifdef AEC_DEBUG
    FILE *bufFile;
@@ -72,47 +66,37 @@ typedef struct
    FILE *postCompFile;
 #endif // AEC_DEBUG
    // Structures
-    void *farendBuf;
+    RingBuffer *farendBuf;

    int lastError;

-    AecmCore_t *aecmCore;
-} aecmob_t;
+    AecmCore* aecmCore;
+} AecMobile;

 // Estimates delay to set the position of the farend buffer read pointer
 // (controlled by knownDelay)
-static int WebRtcAecm_EstBufDelay(aecmob_t *aecmInst, short msInSndCardBuf);
+static int WebRtcAecm_EstBufDelay(AecMobile* aecmInst, short msInSndCardBuf);

 // Stuffs the farend buffer if the estimated delay is too large
-static int WebRtcAecm_DelayComp(aecmob_t *aecmInst);
+static int WebRtcAecm_DelayComp(AecMobile* aecmInst);

-WebRtc_Word32 WebRtcAecm_Create(void **aecmInst)
-{
-    aecmob_t *aecm;
-    if (aecmInst == NULL)
-    {
-        return -1;
+void* WebRtcAecm_Create() {
+    AecMobile* aecm = malloc(sizeof(AecMobile));
+
+    WebRtcSpl_Init();
+
+    aecm->aecmCore = WebRtcAecm_CreateCore();
+    if (!aecm->aecmCore) {
+        WebRtcAecm_Free(aecm);
+        return NULL;
    }

-    aecm = malloc(sizeof(aecmob_t));
-    *aecmInst = aecm;
-    if (aecm == NULL)
-    {
-        return -1;
-    }
-
-    if (WebRtcAecm_CreateCore(&aecm->aecmCore) == -1)
+    aecm->farendBuf = WebRtc_CreateBuffer(kBufSizeSamp,
+                                          sizeof(int16_t));
+    if (!aecm->farendBuf)
    {
        WebRtcAecm_Free(aecm);
-        aecm = NULL;
-        return -1;
-    }
-
-    if (WebRtcApm_CreateBuffer(&aecm->farendBuf, kBufSizeSamp) == -1)
-    {
-        WebRtcAecm_Free(aecm);
-        aecm = NULL;
-        return -1;
+        return NULL;
    }

    aecm->initFlag = 0;
@@ -129,16 +113,14 @@ WebRtc_Word32 WebRtcAecm_Create(void **aecmInst)
    aecm->preCompFile = fopen("preComp.pcm", "wb");
    aecm->postCompFile = fopen("postComp.pcm", "wb");
 #endif // AEC_DEBUG
-    return 0;
+    return aecm;
 }

-WebRtc_Word32 WebRtcAecm_Free(void *aecmInst)
-{
-    aecmob_t *aecm = aecmInst;
+void WebRtcAecm_Free(void* aecmInst) {
+  AecMobile* aecm = aecmInst;

-    if (aecm == NULL)
-    {
-        return -1;
+    if (aecm == NULL) {
+      return;
    }

 #ifdef AEC_DEBUG
@@ -153,15 +135,13 @@ WebRtc_Word32 WebRtcAecm_Free(void *aecmInst)
    fclose(aecm->postCompFile);
 #endif // AEC_DEBUG
    WebRtcAecm_FreeCore(aecm->aecmCore);
-    WebRtcApm_FreeBuffer(aecm->farendBuf);
+    WebRtc_FreeBuffer(aecm->farendBuf);
    free(aecm);
-
-    return 0;
 }

-WebRtc_Word32 WebRtcAecm_Init(void *aecmInst, WebRtc_Word32 sampFreq)
+int32_t WebRtcAecm_Init(void *aecmInst, int32_t sampFreq)
 {
-    aecmob_t *aecm = aecmInst;
+  AecMobile* aecm = aecmInst;
    AecmConfig aecConfig;

    if (aecm == NULL)
@@ -184,11 +164,7 @@ WebRtc_Word32 WebRtcAecm_Init(void *aecmInst, WebRtc_Word32 sampFreq)
    }

    // Initialize farend buffer
-    if (WebRtcApm_InitBuffer(aecm->farendBuf) == -1)
-    {
-        aecm->lastError = AECM_UNSPECIFIED_ERROR;
-        return -1;
-    }
+    WebRtc_InitBuffer(aecm->farendBuf);

    aecm->initFlag = kInitCheck; // indicates that initialization has been done

@@ -222,11 +198,11 @@ WebRtc_Word32 WebRtcAecm_Init(void *aecmInst, WebRtc_Word32 sampFreq)
    return 0;
 }

-WebRtc_Word32 WebRtcAecm_BufferFarend(void *aecmInst, const WebRtc_Word16 *farend,
-                                      WebRtc_Word16 nrOfSamples)
+int32_t WebRtcAecm_BufferFarend(void *aecmInst, const int16_t *farend,
+                                size_t nrOfSamples)
 {
-    aecmob_t *aecm = aecmInst;
-    WebRtc_Word32 retVal = 0;
+  AecMobile* aecm = aecmInst;
+    int32_t retVal = 0;

    if (aecm == NULL)
    {
@@ -257,38 +233,25 @@ WebRtc_Word32 WebRtcAecm_BufferFarend(void *aecmInst, const WebRtc_Word16 *faren
        WebRtcAecm_DelayComp(aecm);
    }

-    WebRtcApm_WriteBuffer(aecm->farendBuf, farend, nrOfSamples);
+    WebRtc_WriteBuffer(aecm->farendBuf, farend, nrOfSamples);

    return retVal;
 }

-WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoisy,
-                                 const WebRtc_Word16 *nearendClean, WebRtc_Word16 *out,
-                                 WebRtc_Word16 nrOfSamples, WebRtc_Word16 msInSndCardBuf)
+int32_t WebRtcAecm_Process(void *aecmInst, const int16_t *nearendNoisy,
+                           const int16_t *nearendClean, int16_t *out,
+                           size_t nrOfSamples, int16_t msInSndCardBuf)
 {
-    aecmob_t *aecm = aecmInst;
-    WebRtc_Word32 retVal = 0;
-    short i;
-    short farend[FRAME_LEN];
+  AecMobile* aecm = aecmInst;
+    int32_t retVal = 0;
+    size_t i;
    short nmbrOfFilledBuffers;
-    short nBlocks10ms;
-    short nFrames;
+    size_t nBlocks10ms;
+    size_t nFrames;
 #ifdef AEC_DEBUG
    short msInAECBuf;
 #endif

-#ifdef ARM_WINM_LOG
-    __int64 freq, start, end, diff;
-    unsigned int milliseconds;
-    DWORD temp;
-#elif defined MAC_IPHONE_PRINT
-    //       double endtime = 0, starttime = 0;
-    struct timeval starttime;
-    struct timeval endtime;
-    static long int timeused = 0;
-    static int timecount = 0;
-#endif
-
    if (aecm == NULL)
    {
        return -1;
@@ -339,13 +302,17 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi
    {
        if (nearendClean == NULL)
        {
-            memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples);
-        } else
+            if (out != nearendNoisy)
+            {
+                memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples);
+            }
+        } else if (out != nearendClean)
        {
            memcpy(out, nearendClean, sizeof(short) * nrOfSamples);
        }

-        nmbrOfFilledBuffers = WebRtcApm_get_buffer_size(aecm->farendBuf) / FRAME_LEN;
+        nmbrOfFilledBuffers =
+            (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN;
        // The AECM is in the start up mode
        // AECM is disabled until the soundcard buffer and farend buffers are OK

@@ -407,10 +374,9 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi
                aecm->ECstartup = 0; // Enable the AECM
            } else if (nmbrOfFilledBuffers > aecm->bufSizeStart)
            {
-                WebRtcApm_FlushBuffer(
-                                       aecm->farendBuf,
-                                       WebRtcApm_get_buffer_size(aecm->farendBuf)
-                                               - aecm->bufSizeStart * FRAME_LEN);
+                WebRtc_MoveReadPtr(aecm->farendBuf,
+                                   (int) WebRtc_available_read(aecm->farendBuf)
+                                   - (int) aecm->bufSizeStart * FRAME_LEN);
                aecm->ECstartup = 0;
            }
        }
@@ -422,20 +388,27 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi
        // Note only 1 block supported for nb and 2 blocks for wb
        for (i = 0; i < nFrames; i++)
        {
-            nmbrOfFilledBuffers = WebRtcApm_get_buffer_size(aecm->farendBuf) / FRAME_LEN;
+            int16_t farend[FRAME_LEN];
+            const int16_t* farend_ptr = NULL;
+
+            nmbrOfFilledBuffers =
+                (short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN;

            // Check that there is data in the far end buffer
            if (nmbrOfFilledBuffers > 0)
            {
                // Get the next 80 samples from the farend buffer
-                WebRtcApm_ReadBuffer(aecm->farendBuf, farend, FRAME_LEN);
+                WebRtc_ReadBuffer(aecm->farendBuf, (void**) &farend_ptr, farend,
+                                  FRAME_LEN);

                // Always store the last frame for use when we run out of data
-                memcpy(&(aecm->farendOld[i][0]), farend, FRAME_LEN * sizeof(short));
+                memcpy(&(aecm->farendOld[i][0]), farend_ptr,
+                       FRAME_LEN * sizeof(short));
            } else
            {
                // We have no data so we use the last played frame
                memcpy(farend, &(aecm->farendOld[i][0]), FRAME_LEN * sizeof(short));
+                farend_ptr = farend;
            }

            // Call buffer delay estimator when all data is extracted,
@@ -445,77 +418,23 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi
                WebRtcAecm_EstBufDelay(aecm, aecm->msInSndCardBuf);
            }

-#ifdef ARM_WINM_LOG
-            // measure tick start
-            QueryPerformanceFrequency((LARGE_INTEGER*)&freq);
-            QueryPerformanceCounter((LARGE_INTEGER*)&start);
-#elif defined MAC_IPHONE_PRINT
-            //            starttime = clock()/(double)CLOCKS_PER_SEC;
-            gettimeofday(&starttime, NULL);
-#endif
            // Call the AECM
            /*WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearend[FRAME_LEN * i],
             &out[FRAME_LEN * i], aecm->knownDelay);*/
-            if (nearendClean == NULL)
-            {
-                if (WebRtcAecm_ProcessFrame(aecm->aecmCore,
-                                            farend,
-                                            &nearendNoisy[FRAME_LEN * i],
-                                            NULL,
-                                            &out[FRAME_LEN * i]) == -1)
-                {
-                    return -1;
-                }
-            } else
-            {
-                if (WebRtcAecm_ProcessFrame(aecm->aecmCore,
-                                            farend,
-                                            &nearendNoisy[FRAME_LEN * i],
-                                            &nearendClean[FRAME_LEN * i],
-                                            &out[FRAME_LEN * i]) == -1)
-                {
-                    return -1;
-                }
-            }
-
-#ifdef ARM_WINM_LOG
-
-            // measure tick end
-            QueryPerformanceCounter((LARGE_INTEGER*)&end);
-
-            if(end > start)
-            {
-                diff = ((end - start) * 1000) / (freq/1000);
-                milliseconds = (unsigned int)(diff & 0xffffffff);
-                WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL);
-            }
-#elif defined MAC_IPHONE_PRINT
-            //            endtime = clock()/(double)CLOCKS_PER_SEC;
-            //            printf("%f\n", endtime - starttime);
-
-            gettimeofday(&endtime, NULL);
-
-            if( endtime.tv_usec > starttime.tv_usec)
-            {
-                timeused += endtime.tv_usec - starttime.tv_usec;
-            } else
-            {
-                timeused += endtime.tv_usec + 1000000 - starttime.tv_usec;
-            }
-
-            if(++timecount == 1000)
-            {
-                timecount = 0;
-                printf("AEC: %ld\n", timeused);
-                timeused = 0;
-            }
-#endif
-
+            if (WebRtcAecm_ProcessFrame(aecm->aecmCore,
+                                        farend_ptr,
+                                        &nearendNoisy[FRAME_LEN * i],
+                                        (nearendClean
+                                         ? &nearendClean[FRAME_LEN * i]
+                                         : NULL),
+                                        &out[FRAME_LEN * i]) == -1)
+                return -1;
        }
    }

 #ifdef AEC_DEBUG
-    msInAECBuf = WebRtcApm_get_buffer_size(aecm->farendBuf) / (kSampMsNb*aecm->aecmCore->mult);
+    msInAECBuf = (short) WebRtc_available_read(aecm->farendBuf) /
+        (kSampMsNb * aecm->aecmCore->mult);
    fwrite(&msInAECBuf, 2, 1, aecm->bufFile);
    fwrite(&(aecm->knownDelay), sizeof(aecm->knownDelay), 1, aecm->delayFile);
 #endif
@@ -523,9 +442,9 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi
    return retVal;
 }

-WebRtc_Word32 WebRtcAecm_set_config(void *aecmInst, AecmConfig config)
+int32_t WebRtcAecm_set_config(void *aecmInst, AecmConfig config)
 {
-    aecmob_t *aecm = aecmInst;
+  AecMobile* aecm = aecmInst;

    if (aecm == NULL)
    {
@@ -605,9 +524,9 @@ WebRtc_Word32 WebRtcAecm_set_config(void *aecmInst, AecmConfig config)
    return 0;
 }

-WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst, AecmConfig *config)
+int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config)
 {
-    aecmob_t *aecm = aecmInst;
+  AecMobile* aecm = aecmInst;

    if (aecm == NULL)
    {
@@ -632,17 +551,19 @@ WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst, AecmConfig *config)
    return 0;
 }

-WebRtc_Word32 WebRtcAecm_InitEchoPath(void* aecmInst,
-                                      const void* echo_path,
-                                      size_t size_bytes)
+int32_t WebRtcAecm_InitEchoPath(void* aecmInst,
+                                const void* echo_path,
+                                size_t size_bytes)
 {
-    aecmob_t *aecm = aecmInst;
-    const WebRtc_Word16* echo_path_ptr = echo_path;
+  AecMobile* aecm = aecmInst;
+    const int16_t* echo_path_ptr = echo_path;

-    if ((aecm == NULL) || (echo_path == NULL))
-    {
-        aecm->lastError = AECM_NULL_POINTER_ERROR;
-        return -1;
+    if (aecmInst == NULL) {
+      return -1;
+    }
+    if (echo_path == NULL) {
+      aecm->lastError = AECM_NULL_POINTER_ERROR;
+      return -1;
    }
    if (size_bytes != WebRtcAecm_echo_path_size_bytes())
    {
@@ -661,17 +582,19 @@ WebRtc_Word32 WebRtcAecm_InitEchoPath(void* aecmInst,
    return 0;
 }

-WebRtc_Word32 WebRtcAecm_GetEchoPath(void* aecmInst,
-                                     void* echo_path,
-                                     size_t size_bytes)
+int32_t WebRtcAecm_GetEchoPath(void* aecmInst,
+                               void* echo_path,
+                               size_t size_bytes)
 {
-    aecmob_t *aecm = aecmInst;
-    WebRtc_Word16* echo_path_ptr = echo_path;
+  AecMobile* aecm = aecmInst;
+    int16_t* echo_path_ptr = echo_path;

-    if ((aecm == NULL) || (echo_path == NULL))
-    {
-        aecm->lastError = AECM_NULL_POINTER_ERROR;
-        return -1;
+    if (aecmInst == NULL) {
+      return -1;
+    }
+    if (echo_path == NULL) {
+      aecm->lastError = AECM_NULL_POINTER_ERROR;
+      return -1;
    }
    if (size_bytes != WebRtcAecm_echo_path_size_bytes())
    {
@@ -691,31 +614,12 @@ WebRtc_Word32 WebRtcAecm_GetEchoPath(void* aecmInst,

 size_t WebRtcAecm_echo_path_size_bytes()
 {
-    return (PART_LEN1 * sizeof(WebRtc_Word16));
+    return (PART_LEN1 * sizeof(int16_t));
 }

-WebRtc_Word32 WebRtcAecm_get_version(WebRtc_Word8 *versionStr, WebRtc_Word16 len)
+int32_t WebRtcAecm_get_error_code(void *aecmInst)
 {
-    const char version[] = "AECM 1.2.0";
-    const short versionLen = (short)strlen(version) + 1; // +1 for null-termination
-
-    if (versionStr == NULL)
-    {
-        return -1;
-    }
-
-    if (versionLen > len)
-    {
-        return -1;
-    }
-
-    strncpy(versionStr, version, versionLen);
-    return 0;
-}
-
-WebRtc_Word32 WebRtcAecm_get_error_code(void *aecmInst)
-{
-    aecmob_t *aecm = aecmInst;
+  AecMobile* aecm = aecmInst;

    if (aecm == NULL)
    {
@@ -725,19 +629,18 @@ WebRtc_Word32 WebRtcAecm_get_error_code(void *aecmInst)
    return aecm->lastError;
 }

-static int WebRtcAecm_EstBufDelay(aecmob_t *aecm, short msInSndCardBuf)
-{
-    short delayNew, nSampFar, nSampSndCard;
+static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf) {
+    short delayNew, nSampSndCard;
+    short nSampFar = (short) WebRtc_available_read(aecm->farendBuf);
    short diff;

-    nSampFar = WebRtcApm_get_buffer_size(aecm->farendBuf);
    nSampSndCard = msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult;

    delayNew = nSampSndCard - nSampFar;

    if (delayNew < FRAME_LEN)
    {
-        WebRtcApm_FlushBuffer(aecm->farendBuf, FRAME_LEN);
+        WebRtc_MoveReadPtr(aecm->farendBuf, FRAME_LEN);
        delayNew += FRAME_LEN;
    }

@@ -775,12 +678,11 @@ static int WebRtcAecm_EstBufDelay(aecmob_t *aecm, short msInSndCardBuf)
    return 0;
 }

-static int WebRtcAecm_DelayComp(aecmob_t *aecm)
-{
-    int nSampFar, nSampSndCard, delayNew, nSampAdd;
+static int WebRtcAecm_DelayComp(AecMobile* aecm) {
+    int nSampFar = (int) WebRtc_available_read(aecm->farendBuf);
+    int nSampSndCard, delayNew, nSampAdd;
    const int maxStuffSamp = 10 * FRAME_LEN;

-    nSampFar = WebRtcApm_get_buffer_size(aecm->farendBuf);
    nSampSndCard = aecm->msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult;
    delayNew = nSampSndCard - nSampFar;

@@ -792,7 +694,7 @@ static int WebRtcAecm_DelayComp(aecmob_t *aecm)
                FRAME_LEN));
        nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp);

-        WebRtcApm_StuffBuffer(aecm->farendBuf, nSampAdd);
+        WebRtc_MoveReadPtr(aecm->farendBuf, -nSampAdd);
        aecm->delayChange = 1; // the delay needs to be updated
    }

--- a/webrtc/modules/audio_processing/aecm/interface/echo_control_mobile.h
+++ b/webrtc/modules/audio_processing/aecm/interface/echo_control_mobile.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,10 +8,12 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_INTERFACE_ECHO_CONTROL_MOBILE_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_INTERFACE_ECHO_CONTROL_MOBILE_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_

-#include "typedefs.h"
+#include <stdlib.h>
+
+#include "webrtc/typedefs.h"

 enum {
    AecmFalse = 0,
@@ -29,8 +31,8 @@ enum {
 #define AECM_BAD_PARAMETER_WARNING       12100

 typedef struct {
-    WebRtc_Word16 cngMode;            // AECM_FALSE, AECM_TRUE (default)
-    WebRtc_Word16 echoMode;           // 0, 1, 2, 3 (default), 4
+    int16_t cngMode;            // AECM_FALSE, AECM_TRUE (default)
+    int16_t echoMode;           // 0, 1, 2, 3 (default), 4
 } AecmConfig;

 #ifdef __cplusplus
@@ -40,133 +42,116 @@ extern "C" {
 /*
 * Allocates the memory needed by the AECM. The memory needs to be
 * initialized separately using the WebRtcAecm_Init() function.
- *
- * Inputs                           Description
- * -------------------------------------------------------------------
- * void **aecmInst                  Pointer to the AECM instance to be
- *                                  created and initialized
- *
- * Outputs                          Description
- * -------------------------------------------------------------------
- * WebRtc_Word32 return             0: OK
- *                                 -1: error
+ * Returns a pointer to the instance and a nullptr at failure.
 */
-WebRtc_Word32 WebRtcAecm_Create(void **aecmInst);
+void* WebRtcAecm_Create();

 /*
 * This function releases the memory allocated by WebRtcAecm_Create()
 *
 * Inputs                       Description
 * -------------------------------------------------------------------
- * void *aecmInst               Pointer to the AECM instance
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * WebRtc_Word32  return        0: OK
- *                             -1: error
+ * void*    aecmInst            Pointer to the AECM instance
 */
-WebRtc_Word32 WebRtcAecm_Free(void *aecmInst);
+void WebRtcAecm_Free(void* aecmInst);

 /*
 * Initializes an AECM instance.
 *
 * Inputs                       Description
 * -------------------------------------------------------------------
- * void           *aecmInst     Pointer to the AECM instance
- * WebRtc_Word32  sampFreq      Sampling frequency of data
+ * void*          aecmInst      Pointer to the AECM instance
+ * int32_t        sampFreq      Sampling frequency of data
 *
 * Outputs                      Description
 * -------------------------------------------------------------------
- * WebRtc_Word32  return        0: OK
+ * int32_t        return        0: OK
 *                             -1: error
 */
-WebRtc_Word32 WebRtcAecm_Init(void* aecmInst,
-                              WebRtc_Word32 sampFreq);
+int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq);

 /*
 * Inserts an 80 or 160 sample block of data into the farend buffer.
 *
 * Inputs                       Description
 * -------------------------------------------------------------------
- * void           *aecmInst     Pointer to the AECM instance
- * WebRtc_Word16  *farend       In buffer containing one frame of
+ * void*          aecmInst      Pointer to the AECM instance
+ * int16_t*       farend        In buffer containing one frame of
 *                              farend signal
- * WebRtc_Word16  nrOfSamples   Number of samples in farend buffer
+ * int16_t        nrOfSamples   Number of samples in farend buffer
 *
 * Outputs                      Description
 * -------------------------------------------------------------------
- * WebRtc_Word32  return        0: OK
+ * int32_t        return        0: OK
 *                             -1: error
 */
-WebRtc_Word32 WebRtcAecm_BufferFarend(void* aecmInst,
-                                      const WebRtc_Word16* farend,
-                                      WebRtc_Word16 nrOfSamples);
+int32_t WebRtcAecm_BufferFarend(void* aecmInst,
+                                const int16_t* farend,
+                                size_t nrOfSamples);

 /*
 * Runs the AECM on an 80 or 160 sample blocks of data.
 *
- * Inputs                       Description
+ * Inputs                        Description
 * -------------------------------------------------------------------
- * void           *aecmInst      Pointer to the AECM instance
- * WebRtc_Word16  *nearendNoisy  In buffer containing one frame of
+ * void*          aecmInst       Pointer to the AECM instance
+ * int16_t*       nearendNoisy   In buffer containing one frame of
 *                               reference nearend+echo signal. If
 *                               noise reduction is active, provide
 *                               the noisy signal here.
- * WebRtc_Word16  *nearendClean  In buffer containing one frame of
+ * int16_t*       nearendClean   In buffer containing one frame of
 *                               nearend+echo signal. If noise
 *                               reduction is active, provide the
 *                               clean signal here. Otherwise pass a
 *                               NULL pointer.
- * WebRtc_Word16  nrOfSamples    Number of samples in nearend buffer
- * WebRtc_Word16  msInSndCardBuf Delay estimate for sound card and
+ * int16_t        nrOfSamples    Number of samples in nearend buffer
+ * int16_t        msInSndCardBuf Delay estimate for sound card and
 *                               system buffers
 *
- * Outputs                      Description
+ * Outputs                       Description
 * -------------------------------------------------------------------
- * WebRtc_Word16  *out          Out buffer, one frame of processed nearend
- * WebRtc_Word32  return        0: OK
- *                             -1: error
+ * int16_t*       out            Out buffer, one frame of processed nearend
+ * int32_t        return         0: OK
+ *                              -1: error
 */
-WebRtc_Word32 WebRtcAecm_Process(void* aecmInst,
-                                 const WebRtc_Word16* nearendNoisy,
-                                 const WebRtc_Word16* nearendClean,
-                                 WebRtc_Word16* out,
-                                 WebRtc_Word16 nrOfSamples,
-                                 WebRtc_Word16 msInSndCardBuf);
+int32_t WebRtcAecm_Process(void* aecmInst,
+                           const int16_t* nearendNoisy,
+                           const int16_t* nearendClean,
+                           int16_t* out,
+                           size_t nrOfSamples,
+                           int16_t msInSndCardBuf);

 /*
 * This function enables the user to set certain parameters on-the-fly
 *
 * Inputs                       Description
 * -------------------------------------------------------------------
- * void     *aecmInst           Pointer to the AECM instance
- * AecmConfig config            Config instance that contains all
+ * void*          aecmInst      Pointer to the AECM instance
+ * AecmConfig     config        Config instance that contains all
 *                              properties to be set
 *
 * Outputs                      Description
 * -------------------------------------------------------------------
- * WebRtc_Word32  return        0: OK
+ * int32_t        return        0: OK
 *                             -1: error
 */
-WebRtc_Word32 WebRtcAecm_set_config(void* aecmInst,
-                                    AecmConfig config);
+int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config);

 /*
 * This function enables the user to set certain parameters on-the-fly
 *
 * Inputs                       Description
 * -------------------------------------------------------------------
- * void *aecmInst               Pointer to the AECM instance
+ * void*          aecmInst      Pointer to the AECM instance
 *
 * Outputs                      Description
 * -------------------------------------------------------------------
- * AecmConfig  *config          Pointer to the config instance that
+ * AecmConfig*    config        Pointer to the config instance that
 *                              all properties will be written to
- * WebRtc_Word32  return        0: OK
+ * int32_t        return        0: OK
 *                             -1: error
 */
-WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst,
-                                    AecmConfig *config);
+int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config);

 /*
 * This function enables the user to set the echo path on-the-fly.
@@ -179,12 +164,12 @@ WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst,
 *
 * Outputs                      Description
 * -------------------------------------------------------------------
- * WebRtc_Word32  return        0: OK
+ * int32_t      return          0: OK
 *                             -1: error
 */
-WebRtc_Word32 WebRtcAecm_InitEchoPath(void* aecmInst,
-                                      const void* echo_path,
-                                      size_t size_bytes);
+int32_t WebRtcAecm_InitEchoPath(void* aecmInst,
+                                const void* echo_path,
+                                size_t size_bytes);

 /*
 * This function enables the user to get the currently used echo path
@@ -198,19 +183,19 @@ WebRtc_Word32 WebRtcAecm_InitEchoPath(void* aecmInst,
 *
 * Outputs                      Description
 * -------------------------------------------------------------------
- * WebRtc_Word32  return        0: OK
+ * int32_t      return          0: OK
 *                             -1: error
 */
-WebRtc_Word32 WebRtcAecm_GetEchoPath(void* aecmInst,
-                                     void* echo_path,
-                                     size_t size_bytes);
+int32_t WebRtcAecm_GetEchoPath(void* aecmInst,
+                               void* echo_path,
+                               size_t size_bytes);

 /*
 * This function enables the user to get the echo path size in bytes
 *
 * Outputs                      Description
 * -------------------------------------------------------------------
- * size_t       return           : size in bytes
+ * size_t       return          Size in bytes
 */
 size_t WebRtcAecm_echo_path_size_bytes();

@@ -219,32 +204,15 @@ size_t WebRtcAecm_echo_path_size_bytes();
 *
 * Inputs                       Description
 * -------------------------------------------------------------------
- * void         *aecmInst       Pointer to the AECM instance
+ * void*          aecmInst      Pointer to the AECM instance
 *
 * Outputs                      Description
 * -------------------------------------------------------------------
- * WebRtc_Word32  return        11000-11100: error code
+ * int32_t        return        11000-11100: error code
 */
-WebRtc_Word32 WebRtcAecm_get_error_code(void *aecmInst);
-
-/*
- * Gets a version string
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * char           *versionStr   Pointer to a string array
- * WebRtc_Word16  len           The maximum length of the string
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * WebRtc_Word8   *versionStr   Pointer to a string array
- * WebRtc_Word32  return        0: OK
- *                             -1: error
- */
-WebRtc_Word32 WebRtcAecm_get_version(WebRtc_Word8 *versionStr,
-                                     WebRtc_Word16 len);
+int32_t WebRtcAecm_get_error_code(void *aecmInst);

 #ifdef __cplusplus
 }
 #endif
-#endif /* WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_INTERFACE_ECHO_CONTROL_MOBILE_H_ */
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
--- a/webrtc/modules/audio_processing/agc/Makefile.am
+++ b/webrtc/modules/audio_processing/agc/Makefile.am
@@ -1,10 +0,0 @@
-noinst_LTLIBRARIES = libagc.la
-
-libagc_la_SOURCES = interface/gain_control.h \
-		    analog_agc.c \
-		    analog_agc.h \
-		    digital_agc.c \
-		    digital_agc.h
-libagc_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
-		   -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-		   -I$(top_srcdir)/src/modules/audio_processing/utility
--- a/webrtc/modules/audio_processing/agc/agc.cc
+++ b/webrtc/modules/audio_processing/agc/agc.cc
@@ -0,0 +1,101 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/agc.h"
+
+#include <cmath>
+#include <cstdlib>
+
+#include <algorithm>
+#include <vector>
+
+#include "webrtc/base/checks.h"
+#include "webrtc/modules/audio_processing/agc/histogram.h"
+#include "webrtc/modules/audio_processing/agc/utility.h"
+#include "webrtc/modules/interface/module_common_types.h"
+
+namespace webrtc {
+namespace {
+
+const int kDefaultLevelDbfs = -18;
+const int kNumAnalysisFrames = 100;
+const double kActivityThreshold = 0.3;
+
+}  // namespace
+
+Agc::Agc()
+    : target_level_loudness_(Dbfs2Loudness(kDefaultLevelDbfs)),
+      target_level_dbfs_(kDefaultLevelDbfs),
+      histogram_(Histogram::Create(kNumAnalysisFrames)),
+      inactive_histogram_(Histogram::Create()) {
+  }
+
+Agc::~Agc() {}
+
+float Agc::AnalyzePreproc(const int16_t* audio, size_t length) {
+  assert(length > 0);
+  size_t num_clipped = 0;
+  for (size_t i = 0; i < length; ++i) {
+    if (audio[i] == 32767 || audio[i] == -32768)
+      ++num_clipped;
+  }
+  return 1.0f * num_clipped / length;
+}
+
+int Agc::Process(const int16_t* audio, size_t length, int sample_rate_hz) {
+  vad_.ProcessChunk(audio, length, sample_rate_hz);
+  const std::vector<double>& rms = vad_.chunkwise_rms();
+  const std::vector<double>& probabilities =
+      vad_.chunkwise_voice_probabilities();
+  RTC_DCHECK_EQ(rms.size(), probabilities.size());
+  for (size_t i = 0; i < rms.size(); ++i) {
+    histogram_->Update(rms[i], probabilities[i]);
+  }
+  return 0;
+}
+
+bool Agc::GetRmsErrorDb(int* error) {
+  if (!error) {
+    assert(false);
+    return false;
+  }
+
+  if (histogram_->num_updates() < kNumAnalysisFrames) {
+    // We haven't yet received enough frames.
+    return false;
+  }
+
+  if (histogram_->AudioContent() < kNumAnalysisFrames * kActivityThreshold) {
+    // We are likely in an inactive segment.
+    return false;
+  }
+
+  double loudness = Linear2Loudness(histogram_->CurrentRms());
+  *error = std::floor(Loudness2Db(target_level_loudness_ - loudness) + 0.5);
+  histogram_->Reset();
+  return true;
+}
+
+void Agc::Reset() {
+  histogram_->Reset();
+}
+
+int Agc::set_target_level_dbfs(int level) {
+  // TODO(turajs): just some arbitrary sanity check. We can come up with better
+  // limits. The upper limit should be chosen such that the risk of clipping is
+  // low. The lower limit should not result in a too quiet signal.
+  if (level >= 0 || level <= -100)
+    return -1;
+  target_level_dbfs_ = level;
+  target_level_loudness_ = Dbfs2Loudness(level);
+  return 0;
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/agc/agc.gypi
+++ b/webrtc/modules/audio_processing/agc/agc.gypi
@@ -1,34 +0,0 @@
-# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
-#
-# Use of this source code is governed by a BSD-style license
-# that can be found in the LICENSE file in the root of the source
-# tree. An additional intellectual property rights grant can be found
-# in the file PATENTS.  All contributing project authors may
-# be found in the AUTHORS file in the root of the source tree.
-
-{
-  'targets': [
-    {
-      'target_name': 'agc',
-      'type': '<(library)',
-      'dependencies': [
-        '<(webrtc_root)/common_audio/common_audio.gyp:spl',
-      ],
-      'include_dirs': [
-        'interface',
-      ],
-      'direct_dependent_settings': {
-        'include_dirs': [
-          'interface',
-        ],
-      },
-      'sources': [
-        'interface/gain_control.h',
-        'analog_agc.c',
-        'analog_agc.h',
-        'digital_agc.c',
-        'digital_agc.h',
-      ],
-    },
-  ],
-}
--- a/webrtc/modules/audio_processing/agc/agc.h
+++ b/webrtc/modules/audio_processing/agc/agc.h
@@ -0,0 +1,58 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
+
+#include "webrtc/base/scoped_ptr.h"
+#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+class AudioFrame;
+class Histogram;
+
+class Agc {
+ public:
+  Agc();
+  virtual ~Agc();
+
+  // Returns the proportion of samples in the buffer which are at full-scale
+  // (and presumably clipped).
+  virtual float AnalyzePreproc(const int16_t* audio, size_t length);
+  // |audio| must be mono; in a multi-channel stream, provide the first (usually
+  // left) channel.
+  virtual int Process(const int16_t* audio, size_t length, int sample_rate_hz);
+
+  // Retrieves the difference between the target RMS level and the current
+  // signal RMS level in dB. Returns true if an update is available and false
+  // otherwise, in which case |error| should be ignored and no action taken.
+  virtual bool GetRmsErrorDb(int* error);
+  virtual void Reset();
+
+  virtual int set_target_level_dbfs(int level);
+  virtual int target_level_dbfs() const { return target_level_dbfs_; }
+
+  virtual float voice_probability() const {
+    return vad_.last_voice_probability();
+  }
+
+ private:
+  double target_level_loudness_;
+  int target_level_dbfs_;
+  rtc::scoped_ptr<Histogram> histogram_;
+  rtc::scoped_ptr<Histogram> inactive_histogram_;
+  VoiceActivityDetector vad_;
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
--- a/webrtc/modules/audio_processing/agc/agc_manager_direct.cc
+++ b/webrtc/modules/audio_processing/agc/agc_manager_direct.cc
@@ -0,0 +1,442 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
+
+#include <cassert>
+#include <cmath>
+
+#ifdef WEBRTC_AGC_DEBUG_DUMP
+#include <cstdio>
+#endif
+
+#include "webrtc/modules/audio_processing/agc/gain_map_internal.h"
+#include "webrtc/modules/audio_processing/gain_control_impl.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/system_wrappers/interface/logging.h"
+
+namespace webrtc {
+
+namespace {
+
+// Lowest the microphone level can be lowered due to clipping.
+const int kClippedLevelMin = 170;
+// Amount the microphone level is lowered with every clipping event.
+const int kClippedLevelStep = 15;
+// Proportion of clipped samples required to declare a clipping event.
+const float kClippedRatioThreshold = 0.1f;
+// Time in frames to wait after a clipping event before checking again.
+const int kClippedWaitFrames = 300;
+
+// Amount of error we tolerate in the microphone level (presumably due to OS
+// quantization) before we assume the user has manually adjusted the microphone.
+const int kLevelQuantizationSlack = 25;
+
+const int kDefaultCompressionGain = 7;
+const int kMaxCompressionGain = 12;
+const int kMinCompressionGain = 2;
+// Controls the rate of compression changes towards the target.
+const float kCompressionGainStep = 0.05f;
+
+const int kMaxMicLevel = 255;
+static_assert(kGainMapSize > kMaxMicLevel, "gain map too small");
+const int kMinMicLevel = 12;
+
+// Prevent very large microphone level changes.
+const int kMaxResidualGainChange = 15;
+
+// Maximum additional gain allowed to compensate for microphone level
+// restrictions from clipping events.
+const int kSurplusCompressionGain = 6;
+
+int ClampLevel(int mic_level) {
+  return std::min(std::max(kMinMicLevel, mic_level), kMaxMicLevel);
+}
+
+int LevelFromGainError(int gain_error, int level) {
+  assert(level >= 0 && level <= kMaxMicLevel);
+  if (gain_error == 0) {
+    return level;
+  }
+  // TODO(ajm): Could be made more efficient with a binary search.
+  int new_level = level;
+  if (gain_error > 0) {
+    while (kGainMap[new_level] - kGainMap[level] < gain_error &&
+          new_level < kMaxMicLevel) {
+      ++new_level;
+    }
+  } else {
+    while (kGainMap[new_level] - kGainMap[level] > gain_error &&
+          new_level > kMinMicLevel) {
+      --new_level;
+    }
+  }
+  return new_level;
+}
+
+}  // namespace
+
+// Facility for dumping debug audio files. All methods are no-ops in the
+// default case where WEBRTC_AGC_DEBUG_DUMP is undefined.
+class DebugFile {
+#ifdef WEBRTC_AGC_DEBUG_DUMP
+ public:
+  explicit DebugFile(const char* filename)
+      : file_(fopen(filename, "wb")) {
+    assert(file_);
+  }
+  ~DebugFile() {
+    fclose(file_);
+  }
+  void Write(const int16_t* data, size_t length_samples) {
+    fwrite(data, 1, length_samples * sizeof(int16_t), file_);
+  }
+ private:
+  FILE* file_;
+#else
+ public:
+  explicit DebugFile(const char* filename) {
+  }
+  ~DebugFile() {
+  }
+  void Write(const int16_t* data, size_t length_samples) {
+  }
+#endif  // WEBRTC_AGC_DEBUG_DUMP
+};
+
+AgcManagerDirect::AgcManagerDirect(GainControl* gctrl,
+                                   VolumeCallbacks* volume_callbacks,
+                                   int startup_min_level)
+    : agc_(new Agc()),
+      gctrl_(gctrl),
+      volume_callbacks_(volume_callbacks),
+      frames_since_clipped_(kClippedWaitFrames),
+      level_(0),
+      max_level_(kMaxMicLevel),
+      max_compression_gain_(kMaxCompressionGain),
+      target_compression_(kDefaultCompressionGain),
+      compression_(target_compression_),
+      compression_accumulator_(compression_),
+      capture_muted_(false),
+      check_volume_on_next_process_(true),  // Check at startup.
+      startup_(true),
+      startup_min_level_(ClampLevel(startup_min_level)),
+      file_preproc_(new DebugFile("agc_preproc.pcm")),
+      file_postproc_(new DebugFile("agc_postproc.pcm")) {
+}
+
+AgcManagerDirect::AgcManagerDirect(Agc* agc,
+                                   GainControl* gctrl,
+                                   VolumeCallbacks* volume_callbacks,
+                                   int startup_min_level)
+    : agc_(agc),
+      gctrl_(gctrl),
+      volume_callbacks_(volume_callbacks),
+      frames_since_clipped_(kClippedWaitFrames),
+      level_(0),
+      max_level_(kMaxMicLevel),
+      max_compression_gain_(kMaxCompressionGain),
+      target_compression_(kDefaultCompressionGain),
+      compression_(target_compression_),
+      compression_accumulator_(compression_),
+      capture_muted_(false),
+      check_volume_on_next_process_(true),  // Check at startup.
+      startup_(true),
+      startup_min_level_(ClampLevel(startup_min_level)),
+      file_preproc_(new DebugFile("agc_preproc.pcm")),
+      file_postproc_(new DebugFile("agc_postproc.pcm")) {
+}
+
+AgcManagerDirect::~AgcManagerDirect() {}
+
+int AgcManagerDirect::Initialize() {
+  max_level_ = kMaxMicLevel;
+  max_compression_gain_ = kMaxCompressionGain;
+  target_compression_ = kDefaultCompressionGain;
+  compression_ = target_compression_;
+  compression_accumulator_ = compression_;
+  capture_muted_ = false;
+  check_volume_on_next_process_ = true;
+  // TODO(bjornv): Investigate if we need to reset |startup_| as well. For
+  // example, what happens when we change devices.
+
+  if (gctrl_->set_mode(GainControl::kFixedDigital) != 0) {
+    LOG_FERR1(LS_ERROR, set_mode, GainControl::kFixedDigital);
+    return -1;
+  }
+  if (gctrl_->set_target_level_dbfs(2) != 0) {
+    LOG_FERR1(LS_ERROR, set_target_level_dbfs, 2);
+    return -1;
+  }
+  if (gctrl_->set_compression_gain_db(kDefaultCompressionGain) != 0) {
+    LOG_FERR1(LS_ERROR, set_compression_gain_db, kDefaultCompressionGain);
+    return -1;
+  }
+  if (gctrl_->enable_limiter(true) != 0) {
+    LOG_FERR1(LS_ERROR, enable_limiter, true);
+    return -1;
+  }
+  return 0;
+}
+
+void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
+                                         int num_channels,
+                                         size_t samples_per_channel) {
+  size_t length = num_channels * samples_per_channel;
+  if (capture_muted_) {
+    return;
+  }
+
+  file_preproc_->Write(audio, length);
+
+  if (frames_since_clipped_ < kClippedWaitFrames) {
+    ++frames_since_clipped_;
+    return;
+  }
+
+  // Check for clipped samples, as the AGC has difficulty detecting pitch
+  // under clipping distortion. We do this in the preprocessing phase in order
+  // to catch clipped echo as well.
+  //
+  // If we find a sufficiently clipped frame, drop the current microphone level
+  // and enforce a new maximum level, dropped the same amount from the current
+  // maximum. This harsh treatment is an effort to avoid repeated clipped echo
+  // events. As compensation for this restriction, the maximum compression
+  // gain is increased, through SetMaxLevel().
+  float clipped_ratio = agc_->AnalyzePreproc(audio, length);
+  if (clipped_ratio > kClippedRatioThreshold) {
+    LOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
+                 << clipped_ratio;
+    // Always decrease the maximum level, even if the current level is below
+    // threshold.
+    SetMaxLevel(std::max(kClippedLevelMin, max_level_ - kClippedLevelStep));
+    if (level_ > kClippedLevelMin) {
+      // Don't try to adjust the level if we're already below the limit. As
+      // a consequence, if the user has brought the level above the limit, we
+      // will still not react until the postproc updates the level.
+      SetLevel(std::max(kClippedLevelMin, level_ - kClippedLevelStep));
+      // Reset the AGC since the level has changed.
+      agc_->Reset();
+    }
+    frames_since_clipped_ = 0;
+  }
+}
+
+void AgcManagerDirect::Process(const int16_t* audio,
+                               size_t length,
+                               int sample_rate_hz) {
+  if (capture_muted_) {
+    return;
+  }
+
+  if (check_volume_on_next_process_) {
+    check_volume_on_next_process_ = false;
+    // We have to wait until the first process call to check the volume,
+    // because Chromium doesn't guarantee it to be valid any earlier.
+    CheckVolumeAndReset();
+  }
+
+  if (agc_->Process(audio, length, sample_rate_hz) != 0) {
+    LOG_FERR0(LS_ERROR, Agc::Process);
+    assert(false);
+  }
+
+  UpdateGain();
+  UpdateCompressor();
+
+  file_postproc_->Write(audio, length);
+}
+
+void AgcManagerDirect::SetLevel(int new_level) {
+  int voe_level = volume_callbacks_->GetMicVolume();
+  if (voe_level < 0) {
+    return;
+  }
+  if (voe_level == 0) {
+    LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
+    return;
+  }
+  if (voe_level > kMaxMicLevel) {
+    LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << voe_level;
+    return;
+  }
+
+  if (voe_level > level_ + kLevelQuantizationSlack ||
+      voe_level < level_ - kLevelQuantizationSlack) {
+    LOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating "
+                 << "stored level from " << level_ << " to " << voe_level;
+    level_ = voe_level;
+    // Always allow the user to increase the volume.
+    if (level_ > max_level_) {
+      SetMaxLevel(level_);
+    }
+    // Take no action in this case, since we can't be sure when the volume
+    // was manually adjusted. The compressor will still provide some of the
+    // desired gain change.
+    agc_->Reset();
+    return;
+  }
+
+  new_level = std::min(new_level, max_level_);
+  if (new_level == level_) {
+    return;
+  }
+
+  volume_callbacks_->SetMicVolume(new_level);
+  LOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", "
+               << "level_=" << level_ << ", "
+               << "new_level=" << new_level;
+  level_ = new_level;
+}
+
+void AgcManagerDirect::SetMaxLevel(int level) {
+  assert(level >= kClippedLevelMin);
+  max_level_ = level;
+  // Scale the |kSurplusCompressionGain| linearly across the restricted
+  // level range.
+  max_compression_gain_ = kMaxCompressionGain + std::floor(
+      (1.f * kMaxMicLevel - max_level_) / (kMaxMicLevel - kClippedLevelMin) *
+      kSurplusCompressionGain + 0.5f);
+  LOG(LS_INFO) << "[agc] max_level_=" << max_level_
+               << ", max_compression_gain_="  << max_compression_gain_;
+}
+
+void AgcManagerDirect::SetCaptureMuted(bool muted) {
+  if (capture_muted_ == muted) {
+    return;
+  }
+  capture_muted_ = muted;
+
+  if (!muted) {
+    // When we unmute, we should reset things to be safe.
+    check_volume_on_next_process_ = true;
+  }
+}
+
+float AgcManagerDirect::voice_probability() {
+  return agc_->voice_probability();
+}
+
+int AgcManagerDirect::CheckVolumeAndReset() {
+  int level = volume_callbacks_->GetMicVolume();
+  if (level < 0) {
+    return -1;
+  }
+  // Reasons for taking action at startup:
+  // 1) A person starting a call is expected to be heard.
+  // 2) Independent of interpretation of |level| == 0 we should raise it so the
+  // AGC can do its job properly.
+  if (level == 0 && !startup_) {
+    LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
+    return 0;
+  }
+  if (level > kMaxMicLevel) {
+    LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << level;
+    return -1;
+  }
+  LOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level;
+
+  int minLevel = startup_ ? startup_min_level_ : kMinMicLevel;
+  if (level < minLevel) {
+    level = minLevel;
+    LOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level;
+    volume_callbacks_->SetMicVolume(level);
+  }
+  agc_->Reset();
+  level_ = level;
+  startup_ = false;
+  return 0;
+}
+
+// Requests the RMS error from AGC and distributes the required gain change
+// between the digital compression stage and volume slider. We use the
+// compressor first, providing a slack region around the current slider
+// position to reduce movement.
+//
+// If the slider needs to be moved, we check first if the user has adjusted
+// it, in which case we take no action and cache the updated level.
+void AgcManagerDirect::UpdateGain() {
+  int rms_error = 0;
+  if (!agc_->GetRmsErrorDb(&rms_error)) {
+    // No error update ready.
+    return;
+  }
+  // The compressor will always add at least kMinCompressionGain. In effect,
+  // this adjusts our target gain upward by the same amount and rms_error
+  // needs to reflect that.
+  rms_error += kMinCompressionGain;
+
+  // Handle as much error as possible with the compressor first.
+  int raw_compression = std::max(std::min(rms_error, max_compression_gain_),
+                                 kMinCompressionGain);
+  // Deemphasize the compression gain error. Move halfway between the current
+  // target and the newly received target. This serves to soften perceptible
+  // intra-talkspurt adjustments, at the cost of some adaptation speed.
+  if ((raw_compression == max_compression_gain_ &&
+      target_compression_ == max_compression_gain_ - 1) ||
+      (raw_compression == kMinCompressionGain &&
+      target_compression_ == kMinCompressionGain + 1)) {
+    // Special case to allow the target to reach the endpoints of the
+    // compression range. The deemphasis would otherwise halt it at 1 dB shy.
+    target_compression_ = raw_compression;
+  } else {
+    target_compression_ = (raw_compression - target_compression_) / 2
+        + target_compression_;
+  }
+
+  // Residual error will be handled by adjusting the volume slider. Use the
+  // raw rather than deemphasized compression here as we would otherwise
+  // shrink the amount of slack the compressor provides.
+  int residual_gain = rms_error - raw_compression;
+  residual_gain = std::min(std::max(residual_gain, -kMaxResidualGainChange),
+      kMaxResidualGainChange);
+  LOG(LS_INFO) << "[agc] rms_error=" << rms_error << ", "
+               << "target_compression=" << target_compression_ << ", "
+               << "residual_gain=" << residual_gain;
+  if (residual_gain == 0)
+    return;
+
+  SetLevel(LevelFromGainError(residual_gain, level_));
+}
+
+void AgcManagerDirect::UpdateCompressor() {
+  if (compression_ == target_compression_) {
+    return;
+  }
+
+  // Adapt the compression gain slowly towards the target, in order to avoid
+  // highly perceptible changes.
+  if (target_compression_ > compression_) {
+    compression_accumulator_ += kCompressionGainStep;
+  } else {
+    compression_accumulator_ -= kCompressionGainStep;
+  }
+
+  // The compressor accepts integer gains in dB. Adjust the gain when
+  // we've come within half a stepsize of the nearest integer.  (We don't
+  // check for equality due to potential floating point imprecision).
+  int new_compression = compression_;
+  int nearest_neighbor = std::floor(compression_accumulator_ + 0.5);
+  if (std::fabs(compression_accumulator_ - nearest_neighbor) <
+      kCompressionGainStep / 2) {
+    new_compression = nearest_neighbor;
+  }
+
+  // Set the new compression gain.
+  if (new_compression != compression_) {
+    compression_ = new_compression;
+    compression_accumulator_ = new_compression;
+    if (gctrl_->set_compression_gain_db(compression_) != 0) {
+      LOG_FERR1(LS_ERROR, set_compression_gain_db, compression_);
+    }
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/agc/agc_manager_direct.h
+++ b/webrtc/modules/audio_processing/agc/agc_manager_direct.h
@@ -0,0 +1,108 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
+
+#include "webrtc/base/scoped_ptr.h"
+#include "webrtc/modules/audio_processing/agc/agc.h"
+
+namespace webrtc {
+
+class AudioFrame;
+class DebugFile;
+class GainControl;
+
+// Callbacks that need to be injected into AgcManagerDirect to read and control
+// the volume values. This is done to remove the VoiceEngine dependency in
+// AgcManagerDirect.
+// TODO(aluebs): Remove VolumeCallbacks.
+class VolumeCallbacks {
+ public:
+  virtual ~VolumeCallbacks() {}
+  virtual void SetMicVolume(int volume) = 0;
+  virtual int GetMicVolume() = 0;
+};
+
+// Direct interface to use AGC to set volume and compression values.
+// AudioProcessing uses this interface directly to integrate the callback-less
+// AGC.
+//
+// This class is not thread-safe.
+class AgcManagerDirect final {
+ public:
+  // AgcManagerDirect will configure GainControl internally. The user is
+  // responsible for processing the audio using it after the call to Process.
+  // The operating range of startup_min_level is [12, 255] and any input value
+  // outside that range will be clamped.
+  AgcManagerDirect(GainControl* gctrl,
+                   VolumeCallbacks* volume_callbacks,
+                   int startup_min_level);
+  // Dependency injection for testing. Don't delete |agc| as the memory is owned
+  // by the manager.
+  AgcManagerDirect(Agc* agc,
+                   GainControl* gctrl,
+                   VolumeCallbacks* volume_callbacks,
+                   int startup_min_level);
+  ~AgcManagerDirect();
+
+  int Initialize();
+  void AnalyzePreProcess(int16_t* audio,
+                         int num_channels,
+                         size_t samples_per_channel);
+  void Process(const int16_t* audio, size_t length, int sample_rate_hz);
+
+  // Call when the capture stream has been muted/unmuted. This causes the
+  // manager to disregard all incoming audio; chances are good it's background
+  // noise to which we'd like to avoid adapting.
+  void SetCaptureMuted(bool muted);
+  bool capture_muted() { return capture_muted_; }
+
+  float voice_probability();
+
+ private:
+  // Sets a new microphone level, after first checking that it hasn't been
+  // updated by the user, in which case no action is taken.
+  void SetLevel(int new_level);
+
+  // Set the maximum level the AGC is allowed to apply. Also updates the
+  // maximum compression gain to compensate. The level must be at least
+  // |kClippedLevelMin|.
+  void SetMaxLevel(int level);
+
+  int CheckVolumeAndReset();
+  void UpdateGain();
+  void UpdateCompressor();
+
+  rtc::scoped_ptr<Agc> agc_;
+  GainControl* gctrl_;
+  VolumeCallbacks* volume_callbacks_;
+
+  int frames_since_clipped_;
+  int level_;
+  int max_level_;
+  int max_compression_gain_;
+  int target_compression_;
+  int compression_;
+  float compression_accumulator_;
+  bool capture_muted_;
+  bool check_volume_on_next_process_;
+  bool startup_;
+  int startup_min_level_;
+
+  rtc::scoped_ptr<DebugFile> file_preproc_;
+  rtc::scoped_ptr<DebugFile> file_postproc_;
+
+  RTC_DISALLOW_COPY_AND_ASSIGN(AgcManagerDirect);
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
--- a/webrtc/modules/audio_processing/agc/analog_agc.h
+++ b/webrtc/modules/audio_processing/agc/analog_agc.h
@@ -1,133 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_
-
-#include "typedefs.h"
-#include "gain_control.h"
-#include "digital_agc.h"
-
-//#define AGC_DEBUG
-//#define MIC_LEVEL_FEEDBACK
-#ifdef AGC_DEBUG
-#include <stdio.h>
-#endif
-
-/* Analog Automatic Gain Control variables:
- * Constant declarations (inner limits inside which no changes are done)
- * In the beginning the range is narrower to widen as soon as the measure
- * 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0
- * and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal
- * go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm
- * The limits are created by running the AGC with a file having the desired
- * signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined
- * by out=10*log10(in/260537279.7); Set the target level to the average level
- * of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in
- * Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) )
- */
-#define RXX_BUFFER_LEN  10
-
-static const WebRtc_Word16 kMsecSpeechInner = 520;
-static const WebRtc_Word16 kMsecSpeechOuter = 340;
-
-static const WebRtc_Word16 kNormalVadThreshold = 400;
-
-static const WebRtc_Word16 kAlphaShortTerm = 6; // 1 >> 6 = 0.0156
-static const WebRtc_Word16 kAlphaLongTerm = 10; // 1 >> 10 = 0.000977
-
-typedef struct
-{
-    // Configurable parameters/variables
-    WebRtc_UWord32      fs;                 // Sampling frequency
-    WebRtc_Word16       compressionGaindB;  // Fixed gain level in dB
-    WebRtc_Word16       targetLevelDbfs;    // Target level in -dBfs of envelope (default -3)
-    WebRtc_Word16       agcMode;            // Hard coded mode (adaptAna/adaptDig/fixedDig)
-    WebRtc_UWord8       limiterEnable;      // Enabling limiter (on/off (default off))
-    WebRtcAgc_config_t  defaultConfig;
-    WebRtcAgc_config_t  usedConfig;
-
-    // General variables
-    WebRtc_Word16       initFlag;
-    WebRtc_Word16       lastError;
-
-    // Target level parameters
-    // Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7)
-    WebRtc_Word32       analogTargetLevel;  // = RXX_BUFFER_LEN * 846805;       -22 dBfs
-    WebRtc_Word32       startUpperLimit;    // = RXX_BUFFER_LEN * 1066064;      -21 dBfs
-    WebRtc_Word32       startLowerLimit;    // = RXX_BUFFER_LEN * 672641;       -23 dBfs
-    WebRtc_Word32       upperPrimaryLimit;  // = RXX_BUFFER_LEN * 1342095;      -20 dBfs
-    WebRtc_Word32       lowerPrimaryLimit;  // = RXX_BUFFER_LEN * 534298;       -24 dBfs
-    WebRtc_Word32       upperSecondaryLimit;// = RXX_BUFFER_LEN * 2677832;      -17 dBfs
-    WebRtc_Word32       lowerSecondaryLimit;// = RXX_BUFFER_LEN * 267783;       -27 dBfs
-    WebRtc_UWord16      targetIdx;          // Table index for corresponding target level
-#ifdef MIC_LEVEL_FEEDBACK
-    WebRtc_UWord16      targetIdxOffset;    // Table index offset for level compensation
-#endif
-    WebRtc_Word16       analogTarget;       // Digital reference level in ENV scale
-
-    // Analog AGC specific variables
-    WebRtc_Word32       filterState[8];     // For downsampling wb to nb
-    WebRtc_Word32       upperLimit;         // Upper limit for mic energy
-    WebRtc_Word32       lowerLimit;         // Lower limit for mic energy
-    WebRtc_Word32       Rxx160w32;          // Average energy for one frame
-    WebRtc_Word32       Rxx16_LPw32;        // Low pass filtered subframe energies
-    WebRtc_Word32       Rxx160_LPw32;       // Low pass filtered frame energies
-    WebRtc_Word32       Rxx16_LPw32Max;     // Keeps track of largest energy subframe
-    WebRtc_Word32       Rxx16_vectorw32[RXX_BUFFER_LEN];// Array with subframe energies
-    WebRtc_Word32       Rxx16w32_array[2][5];// Energy values of microphone signal
-    WebRtc_Word32       env[2][10];         // Envelope values of subframes
-
-    WebRtc_Word16       Rxx16pos;           // Current position in the Rxx16_vectorw32
-    WebRtc_Word16       envSum;             // Filtered scaled envelope in subframes
-    WebRtc_Word16       vadThreshold;       // Threshold for VAD decision
-    WebRtc_Word16       inActive;           // Inactive time in milliseconds
-    WebRtc_Word16       msTooLow;           // Milliseconds of speech at a too low level
-    WebRtc_Word16       msTooHigh;          // Milliseconds of speech at a too high level
-    WebRtc_Word16       changeToSlowMode;   // Change to slow mode after some time at target
-    WebRtc_Word16       firstCall;          // First call to the process-function
-    WebRtc_Word16       msZero;             // Milliseconds of zero input
-    WebRtc_Word16       msecSpeechOuterChange;// Min ms of speech between volume changes
-    WebRtc_Word16       msecSpeechInnerChange;// Min ms of speech between volume changes
-    WebRtc_Word16       activeSpeech;       // Milliseconds of active speech
-    WebRtc_Word16       muteGuardMs;        // Counter to prevent mute action
-    WebRtc_Word16       inQueue;            // 10 ms batch indicator
-
-    // Microphone level variables
-    WebRtc_Word32       micRef;             // Remember ref. mic level for virtual mic
-    WebRtc_UWord16      gainTableIdx;       // Current position in virtual gain table
-    WebRtc_Word32       micGainIdx;         // Gain index of mic level to increase slowly
-    WebRtc_Word32       micVol;             // Remember volume between frames
-    WebRtc_Word32       maxLevel;           // Max possible vol level, incl dig gain
-    WebRtc_Word32       maxAnalog;          // Maximum possible analog volume level
-    WebRtc_Word32       maxInit;            // Initial value of "max"
-    WebRtc_Word32       minLevel;           // Minimum possible volume level
-    WebRtc_Word32       minOutput;          // Minimum output volume level
-    WebRtc_Word32       zeroCtrlMax;        // Remember max gain => don't amp low input
-
-    WebRtc_Word16       scale;              // Scale factor for internal volume levels
-#ifdef MIC_LEVEL_FEEDBACK
-    WebRtc_Word16       numBlocksMicLvlSat;
-    WebRtc_UWord8 micLvlSat;
-#endif
-    // Structs for VAD and digital_agc
-    AgcVad_t            vadMic;
-    DigitalAgc_t        digitalAgc;
-
-#ifdef AGC_DEBUG
-    FILE*               fpt;
-    FILE*               agcLog;
-    WebRtc_Word32       fcount;
-#endif
-
-    WebRtc_Word16       lowLevelSignal;
-} Agc_t;
-
-#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_
--- a/webrtc/modules/audio_processing/agc/digital_agc.h
+++ b/webrtc/modules/audio_processing/agc/digital_agc.h
@@ -1,76 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_DIGITAL_AGC_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_DIGITAL_AGC_H_
-
-#ifdef AGC_DEBUG
-#include <stdio.h>
-#endif
-#include "typedefs.h"
-#include "signal_processing_library.h"
-
-// the 32 most significant bits of A(19) * B(26) >> 13
-#define AGC_MUL32(A, B)             (((B)>>13)*(A) + ( ((0x00001FFF & (B))*(A)) >> 13 ))
-// C + the 32 most significant bits of A * B
-#define AGC_SCALEDIFF32(A, B, C)    ((C) + ((B)>>16)*(A) + ( ((0x0000FFFF & (B))*(A)) >> 16 ))
-
-typedef struct
-{
-    WebRtc_Word32 downState[8];
-    WebRtc_Word16 HPstate;
-    WebRtc_Word16 counter;
-    WebRtc_Word16 logRatio; // log( P(active) / P(inactive) ) (Q10)
-    WebRtc_Word16 meanLongTerm; // Q10
-    WebRtc_Word32 varianceLongTerm; // Q8
-    WebRtc_Word16 stdLongTerm; // Q10
-    WebRtc_Word16 meanShortTerm; // Q10
-    WebRtc_Word32 varianceShortTerm; // Q8
-    WebRtc_Word16 stdShortTerm; // Q10
-} AgcVad_t; // total = 54 bytes
-
-typedef struct
-{
-    WebRtc_Word32 capacitorSlow;
-    WebRtc_Word32 capacitorFast;
-    WebRtc_Word32 gain;
-    WebRtc_Word32 gainTable[32];
-    WebRtc_Word16 gatePrevious;
-    WebRtc_Word16 agcMode;
-    AgcVad_t      vadNearend;
-    AgcVad_t      vadFarend;
-#ifdef AGC_DEBUG
-    FILE*         logFile;
-    int           frameCounter;
-#endif
-} DigitalAgc_t;
-
-WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *digitalAgcInst, WebRtc_Word16 agcMode);
-
-WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *digitalAgcInst, const WebRtc_Word16 *inNear,
-                             const WebRtc_Word16 *inNear_H, WebRtc_Word16 *out,
-                             WebRtc_Word16 *out_H, WebRtc_UWord32 FS,
-                             WebRtc_Word16 lowLevelSignal);
-
-WebRtc_Word32 WebRtcAgc_AddFarendToDigital(DigitalAgc_t *digitalAgcInst, const WebRtc_Word16 *inFar,
-                                 WebRtc_Word16 nrSamples);
-
-void WebRtcAgc_InitVad(AgcVad_t *vadInst);
-
-WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *vadInst, // (i) VAD state
-                            const WebRtc_Word16 *in, // (i) Speech signal
-                            WebRtc_Word16 nrSamples); // (i) number of samples
-
-WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
-                                 WebRtc_Word16 compressionGaindB, // Q0 (in dB)
-                                 WebRtc_Word16 targetLevelDbfs,// Q0 (in dB)
-                                 WebRtc_UWord8 limiterEnable, WebRtc_Word16 analogTarget);
-
-#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_
--- a/webrtc/modules/audio_processing/agc/gain_map_internal.h
+++ b/webrtc/modules/audio_processing/agc/gain_map_internal.h
@@ -0,0 +1,275 @@
+/*
+ *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
+
+static const int kGainMapSize = 256;
+// Uses parameters: si = 2, sf = 0.25, D = 8/256
+static const int kGainMap[kGainMapSize] = {
+  -56,
+  -54,
+  -52,
+  -50,
+  -48,
+  -47,
+  -45,
+  -43,
+  -42,
+  -40,
+  -38,
+  -37,
+  -35,
+  -34,
+  -33,
+  -31,
+  -30,
+  -29,
+  -27,
+  -26,
+  -25,
+  -24,
+  -23,
+  -22,
+  -20,
+  -19,
+  -18,
+  -17,
+  -16,
+  -15,
+  -14,
+  -14,
+  -13,
+  -12,
+  -11,
+  -10,
+  -9,
+  -8,
+  -8,
+  -7,
+  -6,
+  -5,
+  -5,
+  -4,
+  -3,
+  -2,
+  -2,
+  -1,
+  0,
+  0,
+  1,
+  1,
+  2,
+  3,
+  3,
+  4,
+  4,
+  5,
+  5,
+  6,
+  6,
+  7,
+  7,
+  8,
+  8,
+  9,
+  9,
+  10,
+  10,
+  11,
+  11,
+  12,
+  12,
+  13,
+  13,
+  13,
+  14,
+  14,
+  15,
+  15,
+  15,
+  16,
+  16,
+  17,
+  17,
+  17,
+  18,
+  18,
+  18,
+  19,
+  19,
+  19,
+  20,
+  20,
+  21,
+  21,
+  21,
+  22,
+  22,
+  22,
+  23,
+  23,
+  23,
+  24,
+  24,
+  24,
+  24,
+  25,
+  25,
+  25,
+  26,
+  26,
+  26,
+  27,
+  27,
+  27,
+  28,
+  28,
+  28,
+  28,
+  29,
+  29,
+  29,
+  30,
+  30,
+  30,
+  30,
+  31,
+  31,
+  31,
+  32,
+  32,
+  32,
+  32,
+  33,
+  33,
+  33,
+  33,
+  34,
+  34,
+  34,
+  35,
+  35,
+  35,
+  35,
+  36,
+  36,
+  36,
+  36,
+  37,
+  37,
+  37,
+  38,
+  38,
+  38,
+  38,
+  39,
+  39,
+  39,
+  39,
+  40,
+  40,
+  40,
+  40,
+  41,
+  41,
+  41,
+  41,
+  42,
+  42,
+  42,
+  42,
+  43,
+  43,
+  43,
+  44,
+  44,
+  44,
+  44,
+  45,
+  45,
+  45,
+  45,
+  46,
+  46,
+  46,
+  46,
+  47,
+  47,
+  47,
+  47,
+  48,
+  48,
+  48,
+  48,
+  49,
+  49,
+  49,
+  49,
+  50,
+  50,
+  50,
+  50,
+  51,
+  51,
+  51,
+  51,
+  52,
+  52,
+  52,
+  52,
+  53,
+  53,
+  53,
+  53,
+  54,
+  54,
+  54,
+  54,
+  55,
+  55,
+  55,
+  55,
+  56,
+  56,
+  56,
+  56,
+  57,
+  57,
+  57,
+  57,
+  58,
+  58,
+  58,
+  58,
+  59,
+  59,
+  59,
+  59,
+  60,
+  60,
+  60,
+  60,
+  61,
+  61,
+  61,
+  61,
+  62,
+  62,
+  62,
+  62,
+  63,
+  63,
+  63,
+  63,
+  64
+};
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
--- a/webrtc/modules/audio_processing/agc/histogram.cc
+++ b/webrtc/modules/audio_processing/agc/histogram.cc
@@ -0,0 +1,228 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/histogram.h"
+
+#include <cmath>
+#include <cstring>
+
+#include "webrtc/modules/interface/module_common_types.h"
+
+namespace webrtc {
+
+static const double kHistBinCenters[] = {
+    7.59621091765857e-02,  9.02036021061016e-02,  1.07115112009343e-01,
+    1.27197217770508e-01,  1.51044347572047e-01,  1.79362373905283e-01,
+    2.12989507320644e-01,  2.52921107370304e-01,  3.00339145144454e-01,
+    3.56647189489147e-01,  4.23511952494003e-01,  5.02912623991786e-01,
+    5.97199455365749e-01,  7.09163326739184e-01,  8.42118356728544e-01,
+    1.00000000000000e+00,  1.18748153630660e+00,  1.41011239906908e+00,
+    1.67448243801153e+00,  1.98841697800836e+00,  2.36120844786349e+00,
+    2.80389143520905e+00,  3.32956930911896e+00,  3.95380207843188e+00,
+    4.69506696634852e+00,  5.57530533426190e+00,  6.62057214370769e+00,
+    7.86180718043869e+00,  9.33575086877358e+00,  1.10860317842269e+01,
+    1.31644580546776e+01,  1.56325508754123e+01,  1.85633655299256e+01,
+    2.20436538184971e+01,  2.61764319021997e+01,  3.10840295702492e+01,
+    3.69117111886792e+01,  4.38319755100383e+01,  5.20496616180135e+01,
+    6.18080121423973e+01,  7.33958732149108e+01,  8.71562442838066e+01,
+    1.03496430860848e+02,  1.22900100720889e+02,  1.45941600416277e+02,
+    1.73302955873365e+02,  2.05794060286978e+02,  2.44376646872353e+02,
+    2.90192756065437e+02,  3.44598539797631e+02,  4.09204403447902e+02,
+    4.85922673669740e+02,  5.77024203055553e+02,  6.85205587130498e+02,
+    8.13668983291589e+02,  9.66216894324125e+02,  1.14736472207740e+03,
+    1.36247442287647e+03,  1.61791322085579e+03,  1.92124207711260e+03,
+    2.28143949334655e+03,  2.70916727454970e+03,  3.21708611729384e+03,
+    3.82023036499473e+03,  4.53645302286906e+03,  5.38695420497926e+03,
+    6.39690865534207e+03,  7.59621091765857e+03,  9.02036021061016e+03,
+    1.07115112009343e+04,  1.27197217770508e+04,  1.51044347572047e+04,
+    1.79362373905283e+04,  2.12989507320644e+04,  2.52921107370304e+04,
+    3.00339145144454e+04,  3.56647189489147e+04};
+
+static const double kProbQDomain = 1024.0;
+// Loudness of -15 dB (smallest expected loudness) in log domain,
+// loudness_db = 13.5 * log10(rms);
+static const double kLogDomainMinBinCenter = -2.57752062648587;
+// Loudness step of 1 dB in log domain
+static const double kLogDomainStepSizeInverse =  5.81954605750359;
+
+static const int kTransientWidthThreshold = 7;
+static const double kLowProbabilityThreshold = 0.2;
+
+static const int kLowProbThresholdQ10 = static_cast<int>(
+    kLowProbabilityThreshold * kProbQDomain);
+
+Histogram::Histogram()
+    : num_updates_(0),
+      audio_content_q10_(0),
+      bin_count_q10_(),
+      activity_probability_(),
+      hist_bin_index_(),
+      buffer_index_(0),
+      buffer_is_full_(false),
+      len_circular_buffer_(0),
+      len_high_activity_(0) {
+  static_assert(
+      kHistSize == sizeof(kHistBinCenters) / sizeof(kHistBinCenters[0]),
+      "histogram bin centers incorrect size");
+}
+
+Histogram::Histogram(int window_size)
+    : num_updates_(0),
+      audio_content_q10_(0),
+      bin_count_q10_(),
+      activity_probability_(new int[window_size]),
+      hist_bin_index_(new int[window_size]),
+      buffer_index_(0),
+      buffer_is_full_(false),
+      len_circular_buffer_(window_size),
+      len_high_activity_(0) {}
+
+Histogram::~Histogram() {}
+
+void Histogram::Update(double rms, double activity_probaility) {
+  // If circular histogram is activated then remove the oldest entry.
+  if (len_circular_buffer_ > 0)
+    RemoveOldestEntryAndUpdate();
+
+  // Find the corresponding bin.
+  int hist_index = GetBinIndex(rms);
+  // To Q10 domain.
+  int prob_q10 = static_cast<int16_t>(floor(activity_probaility *
+                                            kProbQDomain));
+  InsertNewestEntryAndUpdate(prob_q10, hist_index);
+}
+
+// Doing nothing if buffer is not full, yet.
+void Histogram::RemoveOldestEntryAndUpdate() {
+  assert(len_circular_buffer_ > 0);
+  // Do nothing if circular buffer is not full.
+  if (!buffer_is_full_)
+    return;
+
+  int oldest_prob = activity_probability_[buffer_index_];
+  int oldest_hist_index = hist_bin_index_[buffer_index_];
+  UpdateHist(-oldest_prob, oldest_hist_index);
+}
+
+void Histogram::RemoveTransient() {
+  // Don't expect to be here if high-activity region is longer than
+  // |kTransientWidthThreshold| or there has not been any transient.
+  assert(len_high_activity_ <= kTransientWidthThreshold);
+  int index = (buffer_index_ > 0) ? (buffer_index_ - 1) :
+      len_circular_buffer_ - 1;
+  while (len_high_activity_ > 0) {
+    UpdateHist(-activity_probability_[index], hist_bin_index_[index]);
+    activity_probability_[index] = 0;
+    index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1);
+    len_high_activity_--;
+  }
+}
+
+void Histogram::InsertNewestEntryAndUpdate(int activity_prob_q10,
+                                           int hist_index) {
+  // Update the circular buffer if it is enabled.
+  if (len_circular_buffer_ > 0) {
+    // Removing transient.
+    if (activity_prob_q10 <= kLowProbThresholdQ10) {
+      // Lower than threshold probability, set it to zero.
+      activity_prob_q10 = 0;
+      // Check if this has been a transient.
+      if (len_high_activity_ <= kTransientWidthThreshold)
+        RemoveTransient();  // Remove this transient.
+      len_high_activity_ = 0;
+    } else if (len_high_activity_ <= kTransientWidthThreshold) {
+      len_high_activity_++;
+    }
+    // Updating the circular buffer.
+    activity_probability_[buffer_index_] = activity_prob_q10;
+    hist_bin_index_[buffer_index_] = hist_index;
+    // Increment the buffer index and check for wrap-around.
+    buffer_index_++;
+    if (buffer_index_ >= len_circular_buffer_) {
+      buffer_index_ = 0;
+      buffer_is_full_ = true;
+    }
+  }
+
+  num_updates_++;
+  if (num_updates_ < 0)
+    num_updates_--;
+
+  UpdateHist(activity_prob_q10, hist_index);
+}
+
+void Histogram::UpdateHist(int activity_prob_q10, int hist_index) {
+  bin_count_q10_[hist_index] += activity_prob_q10;
+  audio_content_q10_ += activity_prob_q10;
+}
+
+double Histogram::AudioContent() const {
+  return audio_content_q10_ / kProbQDomain;
+}
+
+Histogram* Histogram::Create() {
+  return new Histogram;
+}
+
+Histogram* Histogram::Create(int window_size) {
+  if (window_size < 0)
+    return NULL;
+  return new Histogram(window_size);
+}
+
+void Histogram::Reset() {
+  // Reset the histogram, audio-content and number of updates.
+  memset(bin_count_q10_, 0, sizeof(bin_count_q10_));
+  audio_content_q10_ = 0;
+  num_updates_ = 0;
+  // Empty the circular buffer.
+  buffer_index_ = 0;
+  buffer_is_full_ = false;
+  len_high_activity_ = 0;
+}
+
+int Histogram::GetBinIndex(double rms) {
+  // First exclude overload cases.
+  if (rms <= kHistBinCenters[0]) {
+    return 0;
+  } else if (rms >= kHistBinCenters[kHistSize - 1]) {
+    return kHistSize - 1;
+  } else {
+    // The quantizer is uniform in log domain. Alternatively we could do binary
+    // search in linear domain.
+    double rms_log = log(rms);
+
+    int index = static_cast<int>(floor((rms_log - kLogDomainMinBinCenter) *
+                                       kLogDomainStepSizeInverse));
+    // The final decision is in linear domain.
+    double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]);
+    if (rms > b) {
+      return index + 1;
+    }
+    return index;
+  }
+}
+
+double Histogram::CurrentRms() const {
+  double p;
+  double mean_val = 0;
+  if (audio_content_q10_ > 0) {
+    double p_total_inverse = 1. / static_cast<double>(audio_content_q10_);
+    for (int n = 0; n < kHistSize; n++) {
+      p = static_cast<double>(bin_count_q10_[n]) * p_total_inverse;
+      mean_val += p * kHistBinCenters[n];
+    }
+  } else {
+    mean_val = kHistBinCenters[0];
+  }
+  return mean_val;
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/agc/histogram.h
+++ b/webrtc/modules/audio_processing/agc/histogram.h
@@ -0,0 +1,91 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
+
+#include <string.h>
+
+#include "webrtc/base/scoped_ptr.h"
+#include "webrtc/typedefs.h"
+
+namespace webrtc {
+
+// This class implements the histogram of loudness with circular buffers so that
+// the histogram tracks the last T seconds of the loudness.
+class Histogram {
+ public:
+  // Create a non-sliding Histogram.
+  static Histogram* Create();
+
+  // Create a sliding Histogram, i.e. the histogram represents the last
+  // |window_size| samples.
+  static Histogram* Create(int window_size);
+  ~Histogram();
+
+  // Insert RMS and the corresponding activity probability.
+  void Update(double rms, double activity_probability);
+
+  // Reset the histogram, forget the past.
+  void Reset();
+
+  // Current loudness, which is actually the mean of histogram in loudness
+  // domain.
+  double CurrentRms() const;
+
+  // Sum of the histogram content.
+  double AudioContent() const;
+
+  // Number of times the histogram has been updated.
+  int num_updates() const { return num_updates_; }
+
+ private:
+  Histogram();
+  explicit Histogram(int window);
+
+  // Find the histogram bin associated with the given |rms|.
+  int GetBinIndex(double rms);
+
+  void RemoveOldestEntryAndUpdate();
+  void InsertNewestEntryAndUpdate(int activity_prob_q10, int hist_index);
+  void UpdateHist(int activity_prob_q10, int hist_index);
+  void RemoveTransient();
+
+  // Number of histogram bins.
+  static const int kHistSize = 77;
+
+  // Number of times the histogram is updated
+  int num_updates_;
+  // Audio content, this should be equal to the sum of the components of
+  // |bin_count_q10_|.
+  int64_t audio_content_q10_;
+
+  // Histogram of input RMS in Q10 with |kHistSize_| bins. In each 'Update(),'
+  // we increment the associated histogram-bin with the given probability. The
+  // increment is implemented in Q10 to avoid rounding errors.
+  int64_t bin_count_q10_[kHistSize];
+
+  // Circular buffer for probabilities
+  rtc::scoped_ptr<int[]> activity_probability_;
+  // Circular buffer for histogram-indices of probabilities.
+  rtc::scoped_ptr<int[]> hist_bin_index_;
+  // Current index of circular buffer, where the newest data will be written to,
+  // therefore, pointing to the oldest data if buffer is full.
+  int buffer_index_;
+  // Indicating if buffer is full and we had a wrap around.
+  int buffer_is_full_;
+  // Size of circular buffer.
+  int len_circular_buffer_;
+  int len_high_activity_;
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
--- a/webrtc/modules/audio_processing/agc/legacy/analog_agc.c
+++ b/webrtc/modules/audio_processing/agc/legacy/analog_agc.c
--- a/webrtc/modules/audio_processing/agc/legacy/analog_agc.h
+++ b/webrtc/modules/audio_processing/agc/legacy/analog_agc.h
@@ -0,0 +1,133 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
+
+//#define MIC_LEVEL_FEEDBACK
+#ifdef WEBRTC_AGC_DEBUG_DUMP
+#include <stdio.h>
+#endif
+
+#include "webrtc/modules/audio_processing/agc/legacy/digital_agc.h"
+#include "webrtc/modules/audio_processing/agc/legacy/gain_control.h"
+#include "webrtc/typedefs.h"
+
+/* Analog Automatic Gain Control variables:
+ * Constant declarations (inner limits inside which no changes are done)
+ * In the beginning the range is narrower to widen as soon as the measure
+ * 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0
+ * and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal
+ * go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm
+ * The limits are created by running the AGC with a file having the desired
+ * signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined
+ * by out=10*log10(in/260537279.7); Set the target level to the average level
+ * of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in
+ * Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) )
+ */
+#define RXX_BUFFER_LEN  10
+
+static const int16_t kMsecSpeechInner = 520;
+static const int16_t kMsecSpeechOuter = 340;
+
+static const int16_t kNormalVadThreshold = 400;
+
+static const int16_t kAlphaShortTerm = 6; // 1 >> 6 = 0.0156
+static const int16_t kAlphaLongTerm = 10; // 1 >> 10 = 0.000977
+
+typedef struct
+{
+    // Configurable parameters/variables
+    uint32_t            fs;                 // Sampling frequency
+    int16_t             compressionGaindB;  // Fixed gain level in dB
+    int16_t             targetLevelDbfs;    // Target level in -dBfs of envelope (default -3)
+    int16_t             agcMode;            // Hard coded mode (adaptAna/adaptDig/fixedDig)
+    uint8_t             limiterEnable;      // Enabling limiter (on/off (default off))
+    WebRtcAgcConfig defaultConfig;
+    WebRtcAgcConfig usedConfig;
+
+    // General variables
+    int16_t             initFlag;
+    int16_t             lastError;
+
+    // Target level parameters
+    // Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7)
+    int32_t             analogTargetLevel;  // = RXX_BUFFER_LEN * 846805;       -22 dBfs
+    int32_t             startUpperLimit;    // = RXX_BUFFER_LEN * 1066064;      -21 dBfs
+    int32_t             startLowerLimit;    // = RXX_BUFFER_LEN * 672641;       -23 dBfs
+    int32_t             upperPrimaryLimit;  // = RXX_BUFFER_LEN * 1342095;      -20 dBfs
+    int32_t             lowerPrimaryLimit;  // = RXX_BUFFER_LEN * 534298;       -24 dBfs
+    int32_t             upperSecondaryLimit;// = RXX_BUFFER_LEN * 2677832;      -17 dBfs
+    int32_t             lowerSecondaryLimit;// = RXX_BUFFER_LEN * 267783;       -27 dBfs
+    uint16_t            targetIdx;          // Table index for corresponding target level
+#ifdef MIC_LEVEL_FEEDBACK
+    uint16_t            targetIdxOffset;    // Table index offset for level compensation
+#endif
+    int16_t             analogTarget;       // Digital reference level in ENV scale
+
+    // Analog AGC specific variables
+    int32_t             filterState[8];     // For downsampling wb to nb
+    int32_t             upperLimit;         // Upper limit for mic energy
+    int32_t             lowerLimit;         // Lower limit for mic energy
+    int32_t             Rxx160w32;          // Average energy for one frame
+    int32_t             Rxx16_LPw32;        // Low pass filtered subframe energies
+    int32_t             Rxx160_LPw32;       // Low pass filtered frame energies
+    int32_t             Rxx16_LPw32Max;     // Keeps track of largest energy subframe
+    int32_t             Rxx16_vectorw32[RXX_BUFFER_LEN];// Array with subframe energies
+    int32_t             Rxx16w32_array[2][5];// Energy values of microphone signal
+    int32_t             env[2][10];         // Envelope values of subframes
+
+    int16_t             Rxx16pos;           // Current position in the Rxx16_vectorw32
+    int16_t             envSum;             // Filtered scaled envelope in subframes
+    int16_t             vadThreshold;       // Threshold for VAD decision
+    int16_t             inActive;           // Inactive time in milliseconds
+    int16_t             msTooLow;           // Milliseconds of speech at a too low level
+    int16_t             msTooHigh;          // Milliseconds of speech at a too high level
+    int16_t             changeToSlowMode;   // Change to slow mode after some time at target
+    int16_t             firstCall;          // First call to the process-function
+    int16_t             msZero;             // Milliseconds of zero input
+    int16_t             msecSpeechOuterChange;// Min ms of speech between volume changes
+    int16_t             msecSpeechInnerChange;// Min ms of speech between volume changes
+    int16_t             activeSpeech;       // Milliseconds of active speech
+    int16_t             muteGuardMs;        // Counter to prevent mute action
+    int16_t             inQueue;            // 10 ms batch indicator
+
+    // Microphone level variables
+    int32_t             micRef;             // Remember ref. mic level for virtual mic
+    uint16_t            gainTableIdx;       // Current position in virtual gain table
+    int32_t             micGainIdx;         // Gain index of mic level to increase slowly
+    int32_t             micVol;             // Remember volume between frames
+    int32_t             maxLevel;           // Max possible vol level, incl dig gain
+    int32_t             maxAnalog;          // Maximum possible analog volume level
+    int32_t             maxInit;            // Initial value of "max"
+    int32_t             minLevel;           // Minimum possible volume level
+    int32_t             minOutput;          // Minimum output volume level
+    int32_t             zeroCtrlMax;        // Remember max gain => don't amp low input
+    int32_t             lastInMicLevel;
+
+    int16_t             scale;              // Scale factor for internal volume levels
+#ifdef MIC_LEVEL_FEEDBACK
+    int16_t             numBlocksMicLvlSat;
+    uint8_t             micLvlSat;
+#endif
+    // Structs for VAD and digital_agc
+    AgcVad vadMic;
+    DigitalAgc digitalAgc;
+
+#ifdef WEBRTC_AGC_DEBUG_DUMP
+    FILE* fpt;
+    FILE* agcLog;
+    int32_t fcount;
+#endif
+
+    int16_t             lowLevelSignal;
+} LegacyAgc;
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
--- a/webrtc/modules/audio_processing/agc/legacy/digital_agc.c
+++ b/webrtc/modules/audio_processing/agc/legacy/digital_agc.c
@@ -12,12 +12,15 @@
 *
 */

+#include "webrtc/modules/audio_processing/agc/legacy/digital_agc.h"
+
+#include <assert.h>
 #include <string.h>
-#ifdef AGC_DEBUG
+#ifdef WEBRTC_AGC_DEBUG_DUMP
 #include <stdio.h>
 #endif
-#include "digital_agc.h"
-#include "gain_control.h"
+
+#include "webrtc/modules/audio_processing/agc/legacy/gain_control.h"

 // To generate the gaintable, copy&paste the following lines to a Matlab window:
 // MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1;
@@ -33,7 +36,8 @@
 // zoom on;

 // Generator table for y=log2(1+e^x) in Q8.
-static const WebRtc_UWord16 kGenFuncTable[128] = {
+enum { kGenFuncTableSize = 128 };
+static const uint16_t kGenFuncTable[kGenFuncTableSize] = {
          256,   485,   786,  1126,  1484,  1849,  2217,  2586,
         2955,  3324,  3693,  4063,  4432,  4801,  5171,  5540,
         5909,  6279,  6648,  7017,  7387,  7756,  8125,  8495,
@@ -52,29 +56,29 @@ static const WebRtc_UWord16 kGenFuncTable[128] = {
        44320, 44689, 45058, 45428, 45797, 46166, 46536, 46905
 };

-static const WebRtc_Word16 kAvgDecayTime = 250; // frames; < 3000
+static const int16_t kAvgDecayTime = 250; // frames; < 3000

-WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
-                                           WebRtc_Word16 digCompGaindB, // Q0
-                                           WebRtc_Word16 targetLevelDbfs,// Q0
-                                           WebRtc_UWord8 limiterEnable,
-                                           WebRtc_Word16 analogTarget) // Q0
+int32_t WebRtcAgc_CalculateGainTable(int32_t *gainTable, // Q16
+                                     int16_t digCompGaindB, // Q0
+                                     int16_t targetLevelDbfs,// Q0
+                                     uint8_t limiterEnable,
+                                     int16_t analogTarget) // Q0
 {
    // This function generates the compressor gain table used in the fixed digital part.
-    WebRtc_UWord32 tmpU32no1, tmpU32no2, absInLevel, logApprox;
-    WebRtc_Word32 inLevel, limiterLvl;
-    WebRtc_Word32 tmp32, tmp32no1, tmp32no2, numFIX, den, y32;
-    const WebRtc_UWord16 kLog10 = 54426; // log2(10)     in Q14
-    const WebRtc_UWord16 kLog10_2 = 49321; // 10*log10(2)  in Q14
-    const WebRtc_UWord16 kLogE_1 = 23637; // log2(e)      in Q14
-    WebRtc_UWord16 constMaxGain;
-    WebRtc_UWord16 tmpU16, intPart, fracPart;
-    const WebRtc_Word16 kCompRatio = 3;
-    const WebRtc_Word16 kSoftLimiterLeft = 1;
-    WebRtc_Word16 limiterOffset = 0; // Limiter offset
-    WebRtc_Word16 limiterIdx, limiterLvlX;
-    WebRtc_Word16 constLinApprox, zeroGainLvl, maxGain, diffGain;
-    WebRtc_Word16 i, tmp16, tmp16no1;
+    uint32_t tmpU32no1, tmpU32no2, absInLevel, logApprox;
+    int32_t inLevel, limiterLvl;
+    int32_t tmp32, tmp32no1, tmp32no2, numFIX, den, y32;
+    const uint16_t kLog10 = 54426; // log2(10)     in Q14
+    const uint16_t kLog10_2 = 49321; // 10*log10(2)  in Q14
+    const uint16_t kLogE_1 = 23637; // log2(e)      in Q14
+    uint16_t constMaxGain;
+    uint16_t tmpU16, intPart, fracPart;
+    const int16_t kCompRatio = 3;
+    const int16_t kSoftLimiterLeft = 1;
+    int16_t limiterOffset = 0; // Limiter offset
+    int16_t limiterIdx, limiterLvlX;
+    int16_t constLinApprox, zeroGainLvl, maxGain, diffGain;
+    int16_t i, tmp16, tmp16no1;
    int zeros, zerosScale;

    // Constants
@@ -83,11 +87,11 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
 //    kLog10_2 = 49321; // 10*log10(2)  in Q14

    // Calculate maximum digital gain and zero gain level
-    tmp32no1 = WEBRTC_SPL_MUL_16_16(digCompGaindB - analogTarget, kCompRatio - 1);
+    tmp32no1 = (digCompGaindB - analogTarget) * (kCompRatio - 1);
    tmp16no1 = analogTarget - targetLevelDbfs;
    tmp16no1 += WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
    maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs));
-    tmp32no1 = WEBRTC_SPL_MUL_16_16(maxGain, kCompRatio);
+    tmp32no1 = maxGain * kCompRatio;
    zeroGainLvl = digCompGaindB;
    zeroGainLvl -= WebRtcSpl_DivW32W16ResW16(tmp32no1 + ((kCompRatio - 1) >> 1),
                                             kCompRatio - 1);
@@ -100,10 +104,11 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
    // Calculate the difference between maximum gain and gain at 0dB0v:
    //  diffGain = maxGain + (compRatio-1)*zeroGainLvl/compRatio
    //           = (compRatio-1)*digCompGaindB/compRatio
-    tmp32no1 = WEBRTC_SPL_MUL_16_16(digCompGaindB, kCompRatio - 1);
+    tmp32no1 = digCompGaindB * (kCompRatio - 1);
    diffGain = WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
-    if (diffGain < 0)
+    if (diffGain < 0 || diffGain >= kGenFuncTableSize)
    {
+        assert(0);
        return -1;
    }

@@ -111,9 +116,8 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
    //  limiterLvlX = analogTarget - limiterOffset
    //  limiterLvl  = targetLevelDbfs + limiterOffset/compRatio
    limiterLvlX = analogTarget - limiterOffset;
-    limiterIdx = 2
-            + WebRtcSpl_DivW32W16ResW16(WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)limiterLvlX, 13),
-                                        WEBRTC_SPL_RSHIFT_U16(kLog10_2, 1));
+    limiterIdx =
+        2 + WebRtcSpl_DivW32W16ResW16((int32_t)limiterLvlX << 13, kLog10_2 / 2);
    tmp16no1 = WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio);
    limiterLvl = targetLevelDbfs + tmp16no1;

@@ -134,23 +138,23 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
    {
        // Calculate scaled input level (compressor):
        //  inLevel = fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio)
-        tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(kCompRatio - 1, i - 1); // Q0
+        tmp16 = (int16_t)((kCompRatio - 1) * (i - 1));  // Q0
        tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14
        inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14

        // Calculate diffGain-inLevel, to map using the genFuncTable
-        inLevel = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)diffGain, 14) - inLevel; // Q14
+        inLevel = ((int32_t)diffGain << 14) - inLevel;  // Q14

        // Make calculations on abs(inLevel) and compensate for the sign afterwards.
-        absInLevel = (WebRtc_UWord32)WEBRTC_SPL_ABS_W32(inLevel); // Q14
+        absInLevel = (uint32_t)WEBRTC_SPL_ABS_W32(inLevel); // Q14

        // LUT with interpolation
-        intPart = (WebRtc_UWord16)WEBRTC_SPL_RSHIFT_U32(absInLevel, 14);
-        fracPart = (WebRtc_UWord16)(absInLevel & 0x00003FFF); // extract the fractional part
+        intPart = (uint16_t)(absInLevel >> 14);
+        fracPart = (uint16_t)(absInLevel & 0x00003FFF); // extract the fractional part
        tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8
-        tmpU32no1 = WEBRTC_SPL_UMUL_16_16(tmpU16, fracPart); // Q22
-        tmpU32no1 += WEBRTC_SPL_LSHIFT_U32((WebRtc_UWord32)kGenFuncTable[intPart], 14); // Q22
-        logApprox = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 8); // Q14
+        tmpU32no1 = tmpU16 * fracPart;  // Q22
+        tmpU32no1 += (uint32_t)kGenFuncTable[intPart] << 14;  // Q22
+        logApprox = tmpU32no1 >> 8;  // Q14
        // Compensate for negative exponent using the relation:
        //  log2(1 + 2^-x) = log2(1 + 2^x) - x
        if (inLevel < 0)
@@ -160,83 +164,89 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
            if (zeros < 15)
            {
                // Not enough space for multiplication
-                tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(absInLevel, 15 - zeros); // Q(zeros-1)
+                tmpU32no2 = absInLevel >> (15 - zeros);  // Q(zeros-1)
                tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13)
                if (zeros < 9)
                {
-                    tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 9 - zeros); // Q(zeros+13)
                    zerosScale = 9 - zeros;
+                    tmpU32no1 >>= zerosScale;  // Q(zeros+13)
                } else
                {
-                    tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, zeros - 9); // Q22
+                    tmpU32no2 >>= zeros - 9;  // Q22
                }
            } else
            {
                tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28
-                tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, 6); // Q22
+                tmpU32no2 >>= 6;  // Q22
            }
            logApprox = 0;
            if (tmpU32no2 < tmpU32no1)
            {
-                logApprox = WEBRTC_SPL_RSHIFT_U32(tmpU32no1 - tmpU32no2, 8 - zerosScale); //Q14
+                logApprox = (tmpU32no1 - tmpU32no2) >> (8 - zerosScale);  //Q14
            }
        }
-        numFIX = WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_U16(maxGain, constMaxGain), 6); // Q14
-        numFIX -= WEBRTC_SPL_MUL_32_16((WebRtc_Word32)logApprox, diffGain); // Q14
+        numFIX = (maxGain * constMaxGain) << 6;  // Q14
+        numFIX -= (int32_t)logApprox * diffGain;  // Q14

        // Calculate ratio
-        // Shift numFIX as much as possible
-        zeros = WebRtcSpl_NormW32(numFIX);
-        numFIX = WEBRTC_SPL_LSHIFT_W32(numFIX, zeros); // Q(14+zeros)
+        // Shift |numFIX| as much as possible.
+        // Ensure we avoid wrap-around in |den| as well.
+        if (numFIX > (den >> 8))  // |den| is Q8.
+        {
+            zeros = WebRtcSpl_NormW32(numFIX);
+        } else
+        {
+            zeros = WebRtcSpl_NormW32(den) + 8;
+        }
+        numFIX <<= zeros;  // Q(14+zeros)

        // Shift den so we end up in Qy1
        tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 8); // Q(zeros)
        if (numFIX < 0)
        {
-            numFIX -= WEBRTC_SPL_RSHIFT_W32(tmp32no1, 1);
+            numFIX -= tmp32no1 / 2;
        } else
        {
-            numFIX += WEBRTC_SPL_RSHIFT_W32(tmp32no1, 1);
+            numFIX += tmp32no1 / 2;
        }
-        y32 = WEBRTC_SPL_DIV(numFIX, tmp32no1); // in Q14
+        y32 = numFIX / tmp32no1;  // in Q14
        if (limiterEnable && (i < limiterIdx))
        {
            tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14
-            tmp32 -= WEBRTC_SPL_LSHIFT_W32(limiterLvl, 14); // Q14
+            tmp32 -= limiterLvl << 14;  // Q14
            y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20);
        }
        if (y32 > 39000)
        {
-            tmp32 = WEBRTC_SPL_MUL(y32 >> 1, kLog10) + 4096; // in Q27
-            tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 13); // in Q14
+            tmp32 = (y32 >> 1) * kLog10 + 4096;  // in Q27
+            tmp32 >>= 13;  // In Q14.
        } else
        {
-            tmp32 = WEBRTC_SPL_MUL(y32, kLog10) + 8192; // in Q28
-            tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 14); // in Q14
+            tmp32 = y32 * kLog10 + 8192;  // in Q28
+            tmp32 >>= 14;  // In Q14.
        }
-        tmp32 += WEBRTC_SPL_LSHIFT_W32(16, 14); // in Q14 (Make sure final output is in Q16)
+        tmp32 += 16 << 14;  // in Q14 (Make sure final output is in Q16)

        // Calculate power
        if (tmp32 > 0)
        {
-            intPart = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 14);
-            fracPart = (WebRtc_UWord16)(tmp32 & 0x00003FFF); // in Q14
-            if (WEBRTC_SPL_RSHIFT_W32(fracPart, 13))
+            intPart = (int16_t)(tmp32 >> 14);
+            fracPart = (uint16_t)(tmp32 & 0x00003FFF); // in Q14
+            if ((fracPart >> 13) != 0)
            {
-                tmp16 = WEBRTC_SPL_LSHIFT_W16(2, 14) - constLinApprox;
-                tmp32no2 = WEBRTC_SPL_LSHIFT_W32(1, 14) - fracPart;
-                tmp32no2 = WEBRTC_SPL_MUL_32_16(tmp32no2, tmp16);
-                tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 13);
-                tmp32no2 = WEBRTC_SPL_LSHIFT_W32(1, 14) - tmp32no2;
+                tmp16 = (2 << 14) - constLinApprox;
+                tmp32no2 = (1 << 14) - fracPart;
+                tmp32no2 *= tmp16;
+                tmp32no2 >>= 13;
+                tmp32no2 = (1 << 14) - tmp32no2;
            } else
            {
-                tmp16 = constLinApprox - WEBRTC_SPL_LSHIFT_W16(1, 14);
-                tmp32no2 = WEBRTC_SPL_MUL_32_16(fracPart, tmp16);
-                tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 13);
+                tmp16 = constLinApprox - (1 << 14);
+                tmp32no2 = (fracPart * tmp16) >> 13;
            }
-            fracPart = (WebRtc_UWord16)tmp32no2;
-            gainTable[i] = WEBRTC_SPL_LSHIFT_W32(1, intPart)
-                    + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14);
+            fracPart = (uint16_t)tmp32no2;
+            gainTable[i] =
+                (1 << intPart) + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14);
        } else
        {
            gainTable[i] = 0;
@@ -246,9 +256,7 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
    return 0;
 }

-WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode)
-{
-
+int32_t WebRtcAgc_InitDigital(DigitalAgc* stt, int16_t agcMode) {
    if (agcMode == kAgcModeFixedDigital)
    {
        // start at minimum to find correct gain faster
@@ -256,13 +264,13 @@ WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode)
    } else
    {
        // start out with 0 dB gain
-        stt->capacitorSlow = 134217728; // (WebRtc_Word32)(0.125f * 32768.0f * 32768.0f);
+        stt->capacitorSlow = 134217728; // (int32_t)(0.125f * 32768.0f * 32768.0f);
    }
    stt->capacitorFast = 0;
    stt->gain = 65536;
    stt->gatePrevious = 0;
    stt->agcMode = agcMode;
-#ifdef AGC_DEBUG
+#ifdef WEBRTC_AGC_DEBUG_DUMP
    stt->frameCounter = 0;
 #endif

@@ -273,52 +281,45 @@ WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode)
    return 0;
 }

-WebRtc_Word32 WebRtcAgc_AddFarendToDigital(DigitalAgc_t *stt, const WebRtc_Word16 *in_far,
-                                           WebRtc_Word16 nrSamples)
-{
-    // Check for valid pointer
-    if (&stt->vadFarend == NULL)
-    {
-        return -1;
-    }
-
+int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* stt,
+                                     const int16_t* in_far,
+                                     size_t nrSamples) {
+    assert(stt != NULL);
    // VAD for far end
    WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples);

    return 0;
 }

-WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *in_near,
-                                       const WebRtc_Word16 *in_near_H, WebRtc_Word16 *out,
-                                       WebRtc_Word16 *out_H, WebRtc_UWord32 FS,
-                                       WebRtc_Word16 lowlevelSignal)
-{
+int32_t WebRtcAgc_ProcessDigital(DigitalAgc* stt,
+                                 const int16_t* const* in_near,
+                                 size_t num_bands,
+                                 int16_t* const* out,
+                                 uint32_t FS,
+                                 int16_t lowlevelSignal) {
    // array for gains (one value per ms, incl start & end)
-    WebRtc_Word32 gains[11];
+    int32_t gains[11];

-    WebRtc_Word32 out_tmp, tmp32;
-    WebRtc_Word32 env[10];
-    WebRtc_Word32 nrg, max_nrg;
-    WebRtc_Word32 cur_level;
-    WebRtc_Word32 gain32, delta;
-    WebRtc_Word16 logratio;
-    WebRtc_Word16 lower_thr, upper_thr;
-    WebRtc_Word16 zeros, zeros_fast, frac;
-    WebRtc_Word16 decay;
-    WebRtc_Word16 gate, gain_adj;
-    WebRtc_Word16 k, n;
-    WebRtc_Word16 L, L2; // samples/subframe
+    int32_t out_tmp, tmp32;
+    int32_t env[10];
+    int32_t max_nrg;
+    int32_t cur_level;
+    int32_t gain32, delta;
+    int16_t logratio;
+    int16_t lower_thr, upper_thr;
+    int16_t zeros = 0, zeros_fast, frac = 0;
+    int16_t decay;
+    int16_t gate, gain_adj;
+    int16_t k;
+    size_t n, i, L;
+    int16_t L2; // samples/subframe

    // determine number of samples per ms
    if (FS == 8000)
    {
        L = 8;
        L2 = 3;
-    } else if (FS == 16000)
-    {
-        L = 16;
-        L2 = 4;
-    } else if (FS == 32000)
+    } else if (FS == 16000 || FS == 32000 || FS == 48000)
    {
        L = 16;
        L2 = 4;
@@ -327,27 +328,22 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
        return -1;
    }

-    // TODO(andrew): again, we don't need input and output pointers...
-    if (in_near != out)
+    for (i = 0; i < num_bands; ++i)
    {
-        // Only needed if they don't already point to the same place.
-        memcpy(out, in_near, 10 * L * sizeof(WebRtc_Word16));
-    }
-    if (FS == 32000)
-    {
-        if (in_near_H != out_H)
+        if (in_near[i] != out[i])
        {
-            memcpy(out_H, in_near_H, 10 * L * sizeof(WebRtc_Word16));
+            // Only needed if they don't already point to the same place.
+            memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0]));
        }
    }
    // VAD for near end
-    logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out, L * 10);
+    logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out[0], L * 10);

    // Account for far end VAD
    if (stt->vadFarend.counter > 10)
    {
-        tmp32 = WEBRTC_SPL_MUL_16_16(3, logratio);
-        logratio = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 - stt->vadFarend.logRatio, 2);
+        tmp32 = 3 * logratio;
+        logratio = (int16_t)((tmp32 - stt->vadFarend.logRatio) >> 2);
    }

    // Determine decay factor depending on VAD
@@ -364,11 +360,11 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
        decay = 0;
    } else
    {
-        // decay = (WebRtc_Word16)(((lower_thr - logratio)
+        // decay = (int16_t)(((lower_thr - logratio)
        //       * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10);
        // SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr))  ->  65
-        tmp32 = WEBRTC_SPL_MUL_16_16((lower_thr - logratio), 65);
-        decay = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 10);
+        tmp32 = (lower_thr - logratio) * 65;
+        decay = (int16_t)(tmp32 >> 10);
    }

    // adjust decay factor for long silence (detected as low standard deviation)
@@ -380,9 +376,9 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
            decay = 0;
        } else if (stt->vadNearend.stdLongTerm < 8096)
        {
-            // decay = (WebRtc_Word16)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> 12);
-            tmp32 = WEBRTC_SPL_MUL_16_16((stt->vadNearend.stdLongTerm - 4000), decay);
-            decay = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 12);
+            // decay = (int16_t)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> 12);
+            tmp32 = (stt->vadNearend.stdLongTerm - 4000) * decay;
+            decay = (int16_t)(tmp32 >> 12);
        }

        if (lowlevelSignal != 0)
@@ -390,9 +386,14 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
            decay = 0;
        }
    }
-#ifdef AGC_DEBUG
+#ifdef WEBRTC_AGC_DEBUG_DUMP
    stt->frameCounter++;
-    fprintf(stt->logFile, "%5.2f\t%d\t%d\t%d\t", (float)(stt->frameCounter) / 100, logratio, decay, stt->vadNearend.stdLongTerm);
+    fprintf(stt->logFile,
+            "%5.2f\t%d\t%d\t%d\t",
+            (float)(stt->frameCounter) / 100,
+            logratio,
+            decay,
+            stt->vadNearend.stdLongTerm);
 #endif
    // Find max amplitude per sub frame
    // iterate over sub frames
@@ -402,7 +403,7 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
        max_nrg = 0;
        for (n = 0; n < L; n++)
        {
-            nrg = WEBRTC_SPL_MUL_16_16(out[k * L + n], out[k * L + n]);
+            int32_t nrg = out[0][k * L + n] * out[0][k * L + n];
            if (nrg > max_nrg)
            {
                max_nrg = nrg;
@@ -445,34 +446,39 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
        }
        // Translate signal level into gain, using a piecewise linear approximation
        // find number of leading zeros
-        zeros = WebRtcSpl_NormU32((WebRtc_UWord32)cur_level);
+        zeros = WebRtcSpl_NormU32((uint32_t)cur_level);
        if (cur_level == 0)
        {
            zeros = 31;
        }
-        tmp32 = (WEBRTC_SPL_LSHIFT_W32(cur_level, zeros) & 0x7FFFFFFF);
-        frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 19); // Q12
-        tmp32 = WEBRTC_SPL_MUL((stt->gainTable[zeros-1] - stt->gainTable[zeros]), frac);
-        gains[k + 1] = stt->gainTable[zeros] + WEBRTC_SPL_RSHIFT_W32(tmp32, 12);
-#ifdef AGC_DEBUG
-        if (k == 0)
-        {
-            fprintf(stt->logFile, "%d\t%d\t%d\t%d\t%d\n", env[0], cur_level, stt->capacitorFast, stt->capacitorSlow, zeros);
+        tmp32 = (cur_level << zeros) & 0x7FFFFFFF;
+        frac = (int16_t)(tmp32 >> 19);  // Q12.
+        tmp32 = (stt->gainTable[zeros-1] - stt->gainTable[zeros]) * frac;
+        gains[k + 1] = stt->gainTable[zeros] + (tmp32 >> 12);
+#ifdef WEBRTC_AGC_DEBUG_DUMP
+        if (k == 0) {
+          fprintf(stt->logFile,
+                  "%d\t%d\t%d\t%d\t%d\n",
+                  env[0],
+                  cur_level,
+                  stt->capacitorFast,
+                  stt->capacitorSlow,
+                  zeros);
        }
 #endif
    }

    // Gate processing (lower gain during absence of speech)
-    zeros = WEBRTC_SPL_LSHIFT_W16(zeros, 9) - WEBRTC_SPL_RSHIFT_W16(frac, 3);
+    zeros = (zeros << 9) - (frac >> 3);
    // find number of leading zeros
-    zeros_fast = WebRtcSpl_NormU32((WebRtc_UWord32)stt->capacitorFast);
+    zeros_fast = WebRtcSpl_NormU32((uint32_t)stt->capacitorFast);
    if (stt->capacitorFast == 0)
    {
        zeros_fast = 31;
    }
-    tmp32 = (WEBRTC_SPL_LSHIFT_W32(stt->capacitorFast, zeros_fast) & 0x7FFFFFFF);
-    zeros_fast = WEBRTC_SPL_LSHIFT_W16(zeros_fast, 9);
-    zeros_fast -= (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 22);
+    tmp32 = (stt->capacitorFast << zeros_fast) & 0x7FFFFFFF;
+    zeros_fast <<= 9;
+    zeros_fast -= (int16_t)(tmp32 >> 22);

    gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm;

@@ -481,8 +487,8 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
        stt->gatePrevious = 0;
    } else
    {
-        tmp32 = WEBRTC_SPL_MUL_16_16(stt->gatePrevious, 7);
-        gate = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32((WebRtc_Word32)gate + tmp32, 3);
+        tmp32 = stt->gatePrevious * 7;
+        gate = (int16_t)((gate + tmp32) >> 3);
        stt->gatePrevious = gate;
    }
    // gate < 0     -> no gate
@@ -491,7 +497,7 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
    {
        if (gate < 2500)
        {
-            gain_adj = WEBRTC_SPL_RSHIFT_W16(2500 - gate, 5);
+            gain_adj = (2500 - gate) >> 5;
        } else
        {
            gain_adj = 0;
@@ -501,12 +507,12 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
            if ((gains[k + 1] - stt->gainTable[0]) > 8388608)
            {
                // To prevent wraparound
-                tmp32 = WEBRTC_SPL_RSHIFT_W32((gains[k+1] - stt->gainTable[0]), 8);
-                tmp32 = WEBRTC_SPL_MUL(tmp32, (178 + gain_adj));
+                tmp32 = (gains[k + 1] - stt->gainTable[0]) >> 8;
+                tmp32 *= 178 + gain_adj;
            } else
            {
-                tmp32 = WEBRTC_SPL_MUL((gains[k+1] - stt->gainTable[0]), (178 + gain_adj));
-                tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 8);
+                tmp32 = (gains[k+1] - stt->gainTable[0]) * (178 + gain_adj);
+                tmp32 >>= 8;
            }
            gains[k + 1] = stt->gainTable[0] + tmp32;
        }
@@ -521,23 +527,23 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
        {
            zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]);
        }
-        gain32 = WEBRTC_SPL_RSHIFT_W32(gains[k+1], zeros) + 1;
-        gain32 = WEBRTC_SPL_MUL(gain32, gain32);
+        gain32 = (gains[k + 1] >> zeros) + 1;
+        gain32 *= gain32;
        // check for overflow
-        while (AGC_MUL32(WEBRTC_SPL_RSHIFT_W32(env[k], 12) + 1, gain32)
-                > WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)32767, 2 * (1 - zeros + 10)))
+        while (AGC_MUL32((env[k] >> 12) + 1, gain32)
+                > WEBRTC_SPL_SHIFT_W32((int32_t)32767, 2 * (1 - zeros + 10)))
        {
            // multiply by 253/256 ==> -0.1 dB
            if (gains[k + 1] > 8388607)
            {
                // Prevent wrap around
-                gains[k + 1] = WEBRTC_SPL_MUL(WEBRTC_SPL_RSHIFT_W32(gains[k+1], 8), 253);
+                gains[k + 1] = (gains[k+1] / 256) * 253;
            } else
            {
-                gains[k + 1] = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(gains[k+1], 253), 8);
+                gains[k + 1] = (gains[k+1] * 253) / 256;
            }
-            gain32 = WEBRTC_SPL_RSHIFT_W32(gains[k+1], zeros) + 1;
-            gain32 = WEBRTC_SPL_MUL(gain32, gain32);
+            gain32 = (gains[k + 1] >> zeros) + 1;
+            gain32 *= gain32;
        }
    }
    // gain reductions should be done 1 ms earlier than gain increases
@@ -553,42 +559,25 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i

    // Apply gain
    // handle first sub frame separately
-    delta = WEBRTC_SPL_LSHIFT_W32(gains[1] - gains[0], (4 - L2));
-    gain32 = WEBRTC_SPL_LSHIFT_W32(gains[0], 4);
+    delta = (gains[1] - gains[0]) << (4 - L2);
+    gain32 = gains[0] << 4;
    // iterate over samples
    for (n = 0; n < L; n++)
    {
-        // For lower band
-        tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[n], WEBRTC_SPL_RSHIFT_W32(gain32 + 127, 7));
-        out_tmp = WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
-        if (out_tmp > 4095)
+        for (i = 0; i < num_bands; ++i)
        {
-            out[n] = (WebRtc_Word16)32767;
-        } else if (out_tmp < -4096)
-        {
-            out[n] = (WebRtc_Word16)-32768;
-        } else
-        {
-            tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[n], WEBRTC_SPL_RSHIFT_W32(gain32, 4));
-            out[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
-        }
-        // For higher band
-        if (FS == 32000)
-        {
-            tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[n],
-                                   WEBRTC_SPL_RSHIFT_W32(gain32 + 127, 7));
-            out_tmp = WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
+            tmp32 = out[i][n] * ((gain32 + 127) >> 7);
+            out_tmp = tmp32 >> 16;
            if (out_tmp > 4095)
            {
-                out_H[n] = (WebRtc_Word16)32767;
+                out[i][n] = (int16_t)32767;
            } else if (out_tmp < -4096)
            {
-                out_H[n] = (WebRtc_Word16)-32768;
+                out[i][n] = (int16_t)-32768;
            } else
            {
-                tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[n],
-                                       WEBRTC_SPL_RSHIFT_W32(gain32, 4));
-                out_H[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
+                tmp32 = out[i][n] * (gain32 >> 4);
+                out[i][n] = (int16_t)(tmp32 >> 16);
            }
        }
        //
@@ -598,21 +587,15 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
    // iterate over subframes
    for (k = 1; k < 10; k++)
    {
-        delta = WEBRTC_SPL_LSHIFT_W32(gains[k+1] - gains[k], (4 - L2));
-        gain32 = WEBRTC_SPL_LSHIFT_W32(gains[k], 4);
+        delta = (gains[k+1] - gains[k]) << (4 - L2);
+        gain32 = gains[k] << 4;
        // iterate over samples
        for (n = 0; n < L; n++)
        {
-            // For lower band
-            tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[k * L + n],
-                                   WEBRTC_SPL_RSHIFT_W32(gain32, 4));
-            out[k * L + n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
-            // For higher band
-            if (FS == 32000)
+            for (i = 0; i < num_bands; ++i)
            {
-                tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[k * L + n],
-                                       WEBRTC_SPL_RSHIFT_W32(gain32, 4));
-                out_H[k * L + n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
+                tmp32 = out[i][k * L + n] * (gain32 >> 4);
+                out[i][k * L + n] = (int16_t)(tmp32 >> 16);
            }
            gain32 += delta;
        }
@@ -621,24 +604,23 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
    return 0;
 }

-void WebRtcAgc_InitVad(AgcVad_t *state)
-{
-    WebRtc_Word16 k;
+void WebRtcAgc_InitVad(AgcVad* state) {
+    int16_t k;

    state->HPstate = 0; // state of high pass filter
    state->logRatio = 0; // log( P(active) / P(inactive) )
    // average input level (Q10)
-    state->meanLongTerm = WEBRTC_SPL_LSHIFT_W16(15, 10);
+    state->meanLongTerm = 15 << 10;

    // variance of input level (Q8)
-    state->varianceLongTerm = WEBRTC_SPL_LSHIFT_W32(500, 8);
+    state->varianceLongTerm = 500 << 8;

    state->stdLongTerm = 0; // standard deviation of input level in dB
    // short-term average input level (Q10)
-    state->meanShortTerm = WEBRTC_SPL_LSHIFT_W16(15, 10);
+    state->meanShortTerm = 15 << 10;

    // short-term variance of input level (Q8)
-    state->varianceShortTerm = WEBRTC_SPL_LSHIFT_W32(500, 8);
+    state->varianceShortTerm = 500 << 8;

    state->stdShortTerm = 0; // short-term standard deviation of input level in dB
    state->counter = 3; // counts updates
@@ -649,17 +631,17 @@ void WebRtcAgc_InitVad(AgcVad_t *state)
    }
 }

-WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
-                                   const WebRtc_Word16 *in, // (i) Speech signal
-                                   WebRtc_Word16 nrSamples) // (i) number of samples
+int16_t WebRtcAgc_ProcessVad(AgcVad* state,      // (i) VAD state
+                             const int16_t* in,  // (i) Speech signal
+                             size_t nrSamples)  // (i) number of samples
 {
-    WebRtc_Word32 out, nrg, tmp32, tmp32b;
-    WebRtc_UWord16 tmpU16;
-    WebRtc_Word16 k, subfr, tmp16;
-    WebRtc_Word16 buf1[8];
-    WebRtc_Word16 buf2[4];
-    WebRtc_Word16 HPstate;
-    WebRtc_Word16 zeros, dB;
+    int32_t out, nrg, tmp32, tmp32b;
+    uint16_t tmpU16;
+    int16_t k, subfr, tmp16;
+    int16_t buf1[8];
+    int16_t buf2[4];
+    int16_t HPstate;
+    int16_t zeros, dB;

    // process in 10 sub frames of 1 ms (to save on memory)
    nrg = 0;
@@ -671,9 +653,9 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
        {
            for (k = 0; k < 8; k++)
            {
-                tmp32 = (WebRtc_Word32)in[2 * k] + (WebRtc_Word32)in[2 * k + 1];
-                tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 1);
-                buf1[k] = (WebRtc_Word16)tmp32;
+                tmp32 = (int32_t)in[2 * k] + (int32_t)in[2 * k + 1];
+                tmp32 >>= 1;
+                buf1[k] = (int16_t)tmp32;
            }
            in += 16;

@@ -688,10 +670,9 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
        for (k = 0; k < 4; k++)
        {
            out = buf2[k] + HPstate;
-            tmp32 = WEBRTC_SPL_MUL(600, out);
-            HPstate = (WebRtc_Word16)(WEBRTC_SPL_RSHIFT_W32(tmp32, 10) - buf2[k]);
-            tmp32 = WEBRTC_SPL_MUL(out, out);
-            nrg += WEBRTC_SPL_RSHIFT_W32(tmp32, 6);
+            tmp32 = 600 * out;
+            HPstate = (int16_t)((tmp32 >> 10) - buf2[k]);
+            nrg += (out * out) >> 6;
        }
    }
    state->HPstate = HPstate;
@@ -722,7 +703,7 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
    }

    // energy level (range {-32..30}) (Q10)
-    dB = WEBRTC_SPL_LSHIFT_W16(15 - zeros, 11);
+    dB = (15 - zeros) << 11;

    // Update statistics

@@ -733,44 +714,49 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
    }

    // update short-term estimate of mean energy level (Q10)
-    tmp32 = (WEBRTC_SPL_MUL_16_16(state->meanShortTerm, 15) + (WebRtc_Word32)dB);
-    state->meanShortTerm = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 4);
+    tmp32 = state->meanShortTerm * 15 + dB;
+    state->meanShortTerm = (int16_t)(tmp32 >> 4);

    // update short-term estimate of variance in energy level (Q8)
-    tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(dB, dB), 12);
-    tmp32 += WEBRTC_SPL_MUL(state->varianceShortTerm, 15);
-    state->varianceShortTerm = WEBRTC_SPL_RSHIFT_W32(tmp32, 4);
+    tmp32 = (dB * dB) >> 12;
+    tmp32 += state->varianceShortTerm * 15;
+    state->varianceShortTerm = tmp32 / 16;

    // update short-term estimate of standard deviation in energy level (Q10)
-    tmp32 = WEBRTC_SPL_MUL_16_16(state->meanShortTerm, state->meanShortTerm);
-    tmp32 = WEBRTC_SPL_LSHIFT_W32(state->varianceShortTerm, 12) - tmp32;
-    state->stdShortTerm = (WebRtc_Word16)WebRtcSpl_Sqrt(tmp32);
+    tmp32 = state->meanShortTerm * state->meanShortTerm;
+    tmp32 = (state->varianceShortTerm << 12) - tmp32;
+    state->stdShortTerm = (int16_t)WebRtcSpl_Sqrt(tmp32);

    // update long-term estimate of mean energy level (Q10)
-    tmp32 = WEBRTC_SPL_MUL_16_16(state->meanLongTerm, state->counter) + (WebRtc_Word32)dB;
-    state->meanLongTerm = WebRtcSpl_DivW32W16ResW16(tmp32,
-                                                    WEBRTC_SPL_ADD_SAT_W16(state->counter, 1));
+    tmp32 = state->meanLongTerm * state->counter + dB;
+    state->meanLongTerm = WebRtcSpl_DivW32W16ResW16(
+        tmp32, WebRtcSpl_AddSatW16(state->counter, 1));

    // update long-term estimate of variance in energy level (Q8)
-    tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(dB, dB), 12);
-    tmp32 += WEBRTC_SPL_MUL(state->varianceLongTerm, state->counter);
-    state->varianceLongTerm = WebRtcSpl_DivW32W16(tmp32,
-                                                  WEBRTC_SPL_ADD_SAT_W16(state->counter, 1));
+    tmp32 = (dB * dB) >> 12;
+    tmp32 += state->varianceLongTerm * state->counter;
+    state->varianceLongTerm = WebRtcSpl_DivW32W16(
+        tmp32, WebRtcSpl_AddSatW16(state->counter, 1));

    // update long-term estimate of standard deviation in energy level (Q10)
-    tmp32 = WEBRTC_SPL_MUL_16_16(state->meanLongTerm, state->meanLongTerm);
-    tmp32 = WEBRTC_SPL_LSHIFT_W32(state->varianceLongTerm, 12) - tmp32;
-    state->stdLongTerm = (WebRtc_Word16)WebRtcSpl_Sqrt(tmp32);
+    tmp32 = state->meanLongTerm * state->meanLongTerm;
+    tmp32 = (state->varianceLongTerm << 12) - tmp32;
+    state->stdLongTerm = (int16_t)WebRtcSpl_Sqrt(tmp32);

    // update voice activity measure (Q10)
-    tmp16 = WEBRTC_SPL_LSHIFT_W16(3, 12);
-    tmp32 = WEBRTC_SPL_MUL_16_16(tmp16, (dB - state->meanLongTerm));
+    tmp16 = 3 << 12;
+    // TODO(bjornv): (dB - state->meanLongTerm) can overflow, e.g., in
+    // ApmTest.Process unit test. Previously the macro WEBRTC_SPL_MUL_16_16()
+    // was used, which did an intermediate cast to (int16_t), hence losing
+    // significant bits. This cause logRatio to max out positive, rather than
+    // negative. This is a bug, but has very little significance.
+    tmp32 = tmp16 * (int16_t)(dB - state->meanLongTerm);
    tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm);
-    tmpU16 = WEBRTC_SPL_LSHIFT_U16((WebRtc_UWord16)13, 12);
+    tmpU16 = (13 << 12);
    tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16);
-    tmp32 += WEBRTC_SPL_RSHIFT_W32(tmp32b, 10);
+    tmp32 += tmp32b >> 10;

-    state->logRatio = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 6);
+    state->logRatio = (int16_t)(tmp32 >> 6);

    // limit
    if (state->logRatio > 2048)
--- a/webrtc/modules/audio_processing/agc/legacy/digital_agc.h
+++ b/webrtc/modules/audio_processing/agc/legacy/digital_agc.h
@@ -0,0 +1,80 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_
+
+#ifdef WEBRTC_AGC_DEBUG_DUMP
+#include <stdio.h>
+#endif
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/typedefs.h"
+
+// the 32 most significant bits of A(19) * B(26) >> 13
+#define AGC_MUL32(A, B)             (((B)>>13)*(A) + ( ((0x00001FFF & (B))*(A)) >> 13 ))
+// C + the 32 most significant bits of A * B
+#define AGC_SCALEDIFF32(A, B, C)    ((C) + ((B)>>16)*(A) + ( ((0x0000FFFF & (B))*(A)) >> 16 ))
+
+typedef struct
+{
+    int32_t downState[8];
+    int16_t HPstate;
+    int16_t counter;
+    int16_t logRatio; // log( P(active) / P(inactive) ) (Q10)
+    int16_t meanLongTerm; // Q10
+    int32_t varianceLongTerm; // Q8
+    int16_t stdLongTerm; // Q10
+    int16_t meanShortTerm; // Q10
+    int32_t varianceShortTerm; // Q8
+    int16_t stdShortTerm; // Q10
+} AgcVad;  // total = 54 bytes
+
+typedef struct
+{
+    int32_t capacitorSlow;
+    int32_t capacitorFast;
+    int32_t gain;
+    int32_t gainTable[32];
+    int16_t gatePrevious;
+    int16_t agcMode;
+    AgcVad vadNearend;
+    AgcVad vadFarend;
+#ifdef WEBRTC_AGC_DEBUG_DUMP
+    FILE* logFile;
+    int frameCounter;
+#endif
+} DigitalAgc;
+
+int32_t WebRtcAgc_InitDigital(DigitalAgc* digitalAgcInst, int16_t agcMode);
+
+int32_t WebRtcAgc_ProcessDigital(DigitalAgc* digitalAgcInst,
+                                 const int16_t* const* inNear,
+                                 size_t num_bands,
+                                 int16_t* const* out,
+                                 uint32_t FS,
+                                 int16_t lowLevelSignal);
+
+int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* digitalAgcInst,
+                                     const int16_t* inFar,
+                                     size_t nrSamples);
+
+void WebRtcAgc_InitVad(AgcVad* vadInst);
+
+int16_t WebRtcAgc_ProcessVad(AgcVad* vadInst,     // (i) VAD state
+                             const int16_t* in,   // (i) Speech signal
+                             size_t nrSamples);  // (i) number of samples
+
+int32_t WebRtcAgc_CalculateGainTable(int32_t *gainTable, // Q16
+                                     int16_t compressionGaindB, // Q0 (in dB)
+                                     int16_t targetLevelDbfs,// Q0 (in dB)
+                                     uint8_t limiterEnable,
+                                     int16_t analogTarget);
+
+#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_
--- a/webrtc/modules/audio_processing/agc/interface/gain_control.h
+++ b/webrtc/modules/audio_processing/agc/interface/gain_control.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,10 +8,10 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_

-#include "typedefs.h"
+#include "webrtc/typedefs.h"

 // Errors
 #define AGC_UNSPECIFIED_ERROR           18000
@@ -39,10 +39,10 @@ enum

 typedef struct
 {
-    WebRtc_Word16 targetLevelDbfs;   // default 3 (-3 dBOv)
-    WebRtc_Word16 compressionGaindB; // default 9 dB
-    WebRtc_UWord8 limiterEnable;     // default kAgcTrue (on)
-} WebRtcAgc_config_t;
+    int16_t targetLevelDbfs;   // default 3 (-3 dBOv)
+    int16_t compressionGaindB; // default 9 dB
+    uint8_t limiterEnable;     // default kAgcTrue (on)
+} WebRtcAgcConfig;

 #if defined(__cplusplus)
 extern "C"
@@ -50,14 +50,14 @@ extern "C"
 #endif

 /*
- * This function processes a 10/20ms frame of far-end speech to determine
- * if there is active speech. Far-end speech length can be either 10ms or
- * 20ms. The length of the input speech vector must be given in samples
- * (80/160 when FS=8000, and 160/320 when FS=16000 or FS=32000).
+ * This function processes a 10 ms frame of far-end speech to determine
+ * if there is active speech. The length of the input speech vector must be
+ * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
+ * FS=48000).
 *
 * Input:
 *      - agcInst           : AGC instance.
- *      - inFar             : Far-end input speech vector (10 or 20ms)
+ *      - inFar             : Far-end input speech vector
 *      - samples           : Number of samples in input vector
 *
 * Return value:
@@ -65,26 +65,23 @@ extern "C"
 *                          : -1 - Error
 */
 int WebRtcAgc_AddFarend(void* agcInst,
-                        const WebRtc_Word16* inFar,
-                        WebRtc_Word16 samples);
+                        const int16_t* inFar,
+                        size_t samples);

 /*
- * This function processes a 10/20ms frame of microphone speech to determine
- * if there is active speech. Microphone speech length can be either 10ms or
- * 20ms. The length of the input speech vector must be given in samples
- * (80/160 when FS=8000, and 160/320 when FS=16000 or FS=32000). For very low
- * input levels, the input signal is increased in level by multiplying and
- * overwriting the samples in inMic[].
+ * This function processes a 10 ms frame of microphone speech to determine
+ * if there is active speech. The length of the input speech vector must be
+ * given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
+ * FS=48000). For very low input levels, the input signal is increased in level
+ * by multiplying and overwriting the samples in inMic[].
 *
 * This function should be called before any further processing of the
 * near-end microphone signal.
 *
 * Input:
 *      - agcInst           : AGC instance.
- *      - inMic             : Microphone input speech vector (10 or 20 ms) for
- *                            L band
- *      - inMic_H           : Microphone input speech vector (10 or 20 ms) for
- *                            H band
+ *      - inMic             : Microphone input speech vector for each band
+ *      - num_bands         : Number of bands in input vector
 *      - samples           : Number of samples in input vector
 *
 * Return value:
@@ -92,24 +89,21 @@ int WebRtcAgc_AddFarend(void* agcInst,
 *                          : -1 - Error
 */
 int WebRtcAgc_AddMic(void* agcInst,
-                     WebRtc_Word16* inMic,
-                     WebRtc_Word16* inMic_H,
-                     WebRtc_Word16 samples);
+                     int16_t* const* inMic,
+                     size_t num_bands,
+                     size_t samples);

 /*
 * This function replaces the analog microphone with a virtual one.
 * It is a digital gain applied to the input signal and is used in the
- * agcAdaptiveDigital mode where no microphone level is adjustable.
- * Microphone speech length can be either 10ms or 20ms. The length of the
- * input speech vector must be given in samples (80/160 when FS=8000, and
- * 160/320 when FS=16000 or FS=32000).
+ * agcAdaptiveDigital mode where no microphone level is adjustable. The length
+ * of the input speech vector must be given in samples (80 when FS=8000, and 160
+ * when FS=16000, FS=32000 or FS=48000).
 *
 * Input:
 *      - agcInst           : AGC instance.
- *      - inMic             : Microphone input speech vector for (10 or 20 ms)
- *                            L band
- *      - inMic_H           : Microphone input speech vector for (10 or 20 ms)
- *                            H band
+ *      - inMic             : Microphone input speech vector for each band
+ *      - num_bands         : Number of bands in input vector
 *      - samples           : Number of samples in input vector
 *      - micLevelIn        : Input level of microphone (static)
 *
@@ -123,30 +117,27 @@ int WebRtcAgc_AddMic(void* agcInst,
 *                          : -1 - Error
 */
 int WebRtcAgc_VirtualMic(void* agcInst,
-                         WebRtc_Word16* inMic,
-                         WebRtc_Word16* inMic_H,
-                         WebRtc_Word16 samples,
-                         WebRtc_Word32 micLevelIn,
-                         WebRtc_Word32* micLevelOut);
+                         int16_t* const* inMic,
+                         size_t num_bands,
+                         size_t samples,
+                         int32_t micLevelIn,
+                         int32_t* micLevelOut);

 /*
- * This function processes a 10/20ms frame and adjusts (normalizes) the gain
- * both analog and digitally. The gain adjustments are done only during
- * active periods of speech. The input speech length can be either 10ms or
- * 20ms and the output is of the same length. The length of the speech
- * vectors must be given in samples (80/160 when FS=8000, and 160/320 when
- * FS=16000 or FS=32000). The echo parameter can be used to ensure the AGC will
- * not adjust upward in the presence of echo.
+ * This function processes a 10 ms frame and adjusts (normalizes) the gain both
+ * analog and digitally. The gain adjustments are done only during active
+ * periods of speech. The length of the speech vectors must be given in samples
+ * (80 when FS=8000, and 160 when FS=16000, FS=32000 or FS=48000). The echo
+ * parameter can be used to ensure the AGC will not adjust upward in the
+ * presence of echo.
 *
 * This function should be called after processing the near-end microphone
 * signal, in any case after any echo cancellation.
 *
 * Input:
 *      - agcInst           : AGC instance
- *      - inNear            : Near-end input speech vector (10 or 20 ms) for
- *                            L band
- *      - inNear_H          : Near-end input speech vector (10 or 20 ms) for
- *                            H band
+ *      - inNear            : Near-end input speech vector for each band
+ *      - num_bands         : Number of bands in input/output vector
 *      - samples           : Number of samples in input/output vector
 *      - inMicLevel        : Current microphone volume level
 *      - echo              : Set to 0 if the signal passed to add_mic is
@@ -156,9 +147,8 @@ int WebRtcAgc_VirtualMic(void* agcInst,
 *
 * Output:
 *      - outMicLevel       : Adjusted microphone volume level
- *      - out               : Gain-adjusted near-end speech vector (L band)
+ *      - out               : Gain-adjusted near-end speech vector
 *                          : May be the same vector as the input.
- *      - out_H             : Gain-adjusted near-end speech vector (H band)
 *      - saturationWarning : A returned value of 1 indicates a saturation event
 *                            has occurred and the volume cannot be further
 *                            reduced. Otherwise will be set to 0.
@@ -168,15 +158,14 @@ int WebRtcAgc_VirtualMic(void* agcInst,
 *                          : -1 - Error
 */
 int WebRtcAgc_Process(void* agcInst,
-                      const WebRtc_Word16* inNear,
-                      const WebRtc_Word16* inNear_H,
-                      WebRtc_Word16 samples,
-                      WebRtc_Word16* out,
-                      WebRtc_Word16* out_H,
-                      WebRtc_Word32 inMicLevel,
-                      WebRtc_Word32* outMicLevel,
-                      WebRtc_Word16 echo,
-                      WebRtc_UWord8* saturationWarning);
+                      const int16_t* const* inNear,
+                      size_t num_bands,
+                      size_t samples,
+                      int16_t* const* out,
+                      int32_t inMicLevel,
+                      int32_t* outMicLevel,
+                      int16_t echo,
+                      uint8_t* saturationWarning);

 /*
 * This function sets the config parameters (targetLevelDbfs,
@@ -192,7 +181,7 @@ int WebRtcAgc_Process(void* agcInst,
 *                          :  0 - Normal operation.
 *                          : -1 - Error
 */
-int WebRtcAgc_set_config(void* agcInst, WebRtcAgc_config_t config);
+int WebRtcAgc_set_config(void* agcInst, WebRtcAgcConfig config);

 /*
 * This function returns the config parameters (targetLevelDbfs,
@@ -208,27 +197,21 @@ int WebRtcAgc_set_config(void* agcInst, WebRtcAgc_config_t config);
 *                          :  0 - Normal operation.
 *                          : -1 - Error
 */
-int WebRtcAgc_get_config(void* agcInst, WebRtcAgc_config_t* config);
+int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config);

 /*
- * This function creates an AGC instance, which will contain the state
- * information for one (duplex) channel.
- *
- * Return value             : AGC instance if successful
- *                          : 0 (i.e., a NULL pointer) if unsuccessful
+ * This function creates and returns an AGC instance, which will contain the
+ * state information for one (duplex) channel.
 */
-int WebRtcAgc_Create(void **agcInst);
+void* WebRtcAgc_Create();

 /*
 * This function frees the AGC instance created at the beginning.
 *
 * Input:
 *      - agcInst           : AGC instance.
- *
- * Return value             :  0 - Ok
- *                            -1 - Error
 */
-int WebRtcAgc_Free(void *agcInst);
+void WebRtcAgc_Free(void* agcInst);

 /*
 * This function initializes an AGC instance.
@@ -247,27 +230,13 @@ int WebRtcAgc_Free(void *agcInst);
 *                            -1 - Error
 */
 int WebRtcAgc_Init(void *agcInst,
-                   WebRtc_Word32 minLevel,
-                   WebRtc_Word32 maxLevel,
-                   WebRtc_Word16 agcMode,
-                   WebRtc_UWord32 fs);
-
-/*
- * This function returns a text string containing the version.
- *
- * Input:
- *      - length            : Length of the char array pointed to by version
- * Output:
- *      - version           : Pointer to a char array of to which the version
- *                          : string will be copied.
- *
- * Return value             :  0 - OK
- *                            -1 - Error
- */
-int WebRtcAgc_Version(WebRtc_Word8 *versionStr, WebRtc_Word16 length);
+                   int32_t minLevel,
+                   int32_t maxLevel,
+                   int16_t agcMode,
+                   uint32_t fs);

 #if defined(__cplusplus)
 }
 #endif

-#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_
--- a/webrtc/modules/audio_processing/agc/utility.cc
+++ b/webrtc/modules/audio_processing/agc/utility.cc
@@ -0,0 +1,35 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/agc/utility.h"
+
+#include <math.h>
+
+static const double kLog10 = 2.30258509299;
+static const double kLinear2DbScale = 20.0 / kLog10;
+static const double kLinear2LoudnessScale = 13.4 / kLog10;
+
+double Loudness2Db(double loudness) {
+  return loudness * kLinear2DbScale / kLinear2LoudnessScale;
+}
+
+double Linear2Loudness(double rms) {
+  if (rms == 0)
+    return -15;
+  return kLinear2LoudnessScale * log(rms);
+}
+
+double Db2Loudness(double db) {
+  return db * kLinear2LoudnessScale / kLinear2DbScale;
+}
+
+double Dbfs2Loudness(double dbfs) {
+  return Db2Loudness(90 + dbfs);
+}
--- a/webrtc/modules/audio_processing/agc/utility.h
+++ b/webrtc/modules/audio_processing/agc/utility.h
@@ -0,0 +1,23 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
+
+// TODO(turajs): Add description of function.
+double Loudness2Db(double loudness);
+
+double Linear2Loudness(double rms);
+
+double Db2Loudness(double db);
+
+double Dbfs2Loudness(double dbfs);
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
--- a/webrtc/modules/audio_processing/audio_buffer.cc
+++ b/webrtc/modules/audio_processing/audio_buffer.cc
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,173 +8,331 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "audio_buffer.h"
+#include "webrtc/modules/audio_processing/audio_buffer.h"
+
+#include "webrtc/common_audio/include/audio_util.h"
+#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/common_audio/channel_buffer.h"
+#include "webrtc/modules/audio_processing/common.h"

 namespace webrtc {
 namespace {

-enum {
-  kSamplesPer8kHzChannel = 80,
-  kSamplesPer16kHzChannel = 160,
-  kSamplesPer32kHzChannel = 320
-};
+const size_t kSamplesPer16kHzChannel = 160;
+const size_t kSamplesPer32kHzChannel = 320;
+const size_t kSamplesPer48kHzChannel = 480;

-void StereoToMono(const WebRtc_Word16* left, const WebRtc_Word16* right,
-                  WebRtc_Word16* out, int samples_per_channel) {
-  WebRtc_Word32 data_int32 = 0;
-  for (int i = 0; i < samples_per_channel; i++) {
-    data_int32 = (left[i] + right[i]) >> 1;
-    if (data_int32 > 32767) {
-      data_int32 = 32767;
-    } else if (data_int32 < -32768) {
-      data_int32 = -32768;
-    }
-
-    out[i] = static_cast<WebRtc_Word16>(data_int32);
+int KeyboardChannelIndex(const StreamConfig& stream_config) {
+  if (!stream_config.has_keyboard()) {
+    assert(false);
+    return -1;
  }
+
+  return stream_config.num_channels();
 }
+
+size_t NumBandsFromSamplesPerChannel(size_t num_frames) {
+  size_t num_bands = 1;
+  if (num_frames == kSamplesPer32kHzChannel ||
+      num_frames == kSamplesPer48kHzChannel) {
+    num_bands = rtc::CheckedDivExact(num_frames, kSamplesPer16kHzChannel);
+  }
+  return num_bands;
+}
+
 }  // namespace

-struct AudioChannel {
-  AudioChannel() {
-    memset(data, 0, sizeof(data));
-  }
-
-  WebRtc_Word16 data[kSamplesPer32kHzChannel];
-};
-
-struct SplitAudioChannel {
-  SplitAudioChannel() {
-    memset(low_pass_data, 0, sizeof(low_pass_data));
-    memset(high_pass_data, 0, sizeof(high_pass_data));
-    memset(analysis_filter_state1, 0, sizeof(analysis_filter_state1));
-    memset(analysis_filter_state2, 0, sizeof(analysis_filter_state2));
-    memset(synthesis_filter_state1, 0, sizeof(synthesis_filter_state1));
-    memset(synthesis_filter_state2, 0, sizeof(synthesis_filter_state2));
-  }
-
-  WebRtc_Word16 low_pass_data[kSamplesPer16kHzChannel];
-  WebRtc_Word16 high_pass_data[kSamplesPer16kHzChannel];
-
-  WebRtc_Word32 analysis_filter_state1[6];
-  WebRtc_Word32 analysis_filter_state2[6];
-  WebRtc_Word32 synthesis_filter_state1[6];
-  WebRtc_Word32 synthesis_filter_state2[6];
-};
-
-// TODO(andrew): check range of input parameters?
-AudioBuffer::AudioBuffer(int max_num_channels,
-                         int samples_per_channel)
-  : max_num_channels_(max_num_channels),
-    num_channels_(0),
-    num_mixed_channels_(0),
-    num_mixed_low_pass_channels_(0),
-    samples_per_channel_(samples_per_channel),
-    samples_per_split_channel_(samples_per_channel),
+AudioBuffer::AudioBuffer(size_t input_num_frames,
+                         int num_input_channels,
+                         size_t process_num_frames,
+                         int num_process_channels,
+                         size_t output_num_frames)
+  : input_num_frames_(input_num_frames),
+    num_input_channels_(num_input_channels),
+    proc_num_frames_(process_num_frames),
+    num_proc_channels_(num_process_channels),
+    output_num_frames_(output_num_frames),
+    num_channels_(num_process_channels),
+    num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),
+    num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),
+    mixed_low_pass_valid_(false),
    reference_copied_(false),
    activity_(AudioFrame::kVadUnknown),
-    data_(NULL),
-    channels_(NULL),
-    split_channels_(NULL),
-    mixed_low_pass_channels_(NULL),
-    low_pass_reference_channels_(NULL) {
-  if (max_num_channels_ > 1) {
-    channels_ = new AudioChannel[max_num_channels_];
-    mixed_low_pass_channels_ = new AudioChannel[max_num_channels_];
-  }
-  low_pass_reference_channels_ = new AudioChannel[max_num_channels_];
+    keyboard_data_(NULL),
+    data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) {
+  assert(input_num_frames_ > 0);
+  assert(proc_num_frames_ > 0);
+  assert(output_num_frames_ > 0);
+  assert(num_input_channels_ > 0);
+  assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_);

-  if (samples_per_channel_ == kSamplesPer32kHzChannel) {
-    split_channels_ = new SplitAudioChannel[max_num_channels_];
-    samples_per_split_channel_ = kSamplesPer16kHzChannel;
+  if (input_num_frames_ != proc_num_frames_ ||
+      output_num_frames_ != proc_num_frames_) {
+    // Create an intermediate buffer for resampling.
+    process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_,
+                                                   num_proc_channels_));
+
+    if (input_num_frames_ != proc_num_frames_) {
+      for (int i = 0; i < num_proc_channels_; ++i) {
+        input_resamplers_.push_back(
+            new PushSincResampler(input_num_frames_,
+                                  proc_num_frames_));
+      }
+    }
+
+    if (output_num_frames_ != proc_num_frames_) {
+      for (int i = 0; i < num_proc_channels_; ++i) {
+        output_resamplers_.push_back(
+            new PushSincResampler(proc_num_frames_,
+                                  output_num_frames_));
+      }
+    }
+  }
+
+  if (num_bands_ > 1) {
+    split_data_.reset(new IFChannelBuffer(proc_num_frames_,
+                                          num_proc_channels_,
+                                          num_bands_));
+    splitting_filter_.reset(new SplittingFilter(num_proc_channels_,
+                                                num_bands_,
+                                                proc_num_frames_));
  }
 }

-AudioBuffer::~AudioBuffer() {
-  if (channels_ != NULL) {
-    delete [] channels_;
+AudioBuffer::~AudioBuffer() {}
+
+void AudioBuffer::CopyFrom(const float* const* data,
+                           const StreamConfig& stream_config) {
+  assert(stream_config.num_frames() == input_num_frames_);
+  assert(stream_config.num_channels() == num_input_channels_);
+  InitForNewData();
+  // Initialized lazily because there's a different condition in
+  // DeinterleaveFrom.
+  const bool need_to_downmix =
+      num_input_channels_ > 1 && num_proc_channels_ == 1;
+  if (need_to_downmix && !input_buffer_) {
+    input_buffer_.reset(
+        new IFChannelBuffer(input_num_frames_, num_proc_channels_));
  }

-  if (mixed_low_pass_channels_ != NULL) {
-    delete [] mixed_low_pass_channels_;
+  if (stream_config.has_keyboard()) {
+    keyboard_data_ = data[KeyboardChannelIndex(stream_config)];
  }

-  if (low_pass_reference_channels_ != NULL) {
-    delete [] low_pass_reference_channels_;
+  // Downmix.
+  const float* const* data_ptr = data;
+  if (need_to_downmix) {
+    DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_,
+                                input_buffer_->fbuf()->channels()[0]);
+    data_ptr = input_buffer_->fbuf_const()->channels();
  }

-  if (split_channels_ != NULL) {
-    delete [] split_channels_;
+  // Resample.
+  if (input_num_frames_ != proc_num_frames_) {
+    for (int i = 0; i < num_proc_channels_; ++i) {
+      input_resamplers_[i]->Resample(data_ptr[i],
+                                     input_num_frames_,
+                                     process_buffer_->channels()[i],
+                                     proc_num_frames_);
+    }
+    data_ptr = process_buffer_->channels();
+  }
+
+  // Convert to the S16 range.
+  for (int i = 0; i < num_proc_channels_; ++i) {
+    FloatToFloatS16(data_ptr[i],
+                    proc_num_frames_,
+                    data_->fbuf()->channels()[i]);
  }
 }

-WebRtc_Word16* AudioBuffer::data(int channel) const {
-  assert(channel >= 0 && channel < num_channels_);
-  if (data_ != NULL) {
-    return data_;
+void AudioBuffer::CopyTo(const StreamConfig& stream_config,
+                         float* const* data) {
+  assert(stream_config.num_frames() == output_num_frames_);
+  assert(stream_config.num_channels() == num_channels_);
+
+  // Convert to the float range.
+  float* const* data_ptr = data;
+  if (output_num_frames_ != proc_num_frames_) {
+    // Convert to an intermediate buffer for subsequent resampling.
+    data_ptr = process_buffer_->channels();
+  }
+  for (int i = 0; i < num_channels_; ++i) {
+    FloatS16ToFloat(data_->fbuf()->channels()[i],
+                    proc_num_frames_,
+                    data_ptr[i]);
  }

-  return channels_[channel].data;
+  // Resample.
+  if (output_num_frames_ != proc_num_frames_) {
+    for (int i = 0; i < num_channels_; ++i) {
+      output_resamplers_[i]->Resample(data_ptr[i],
+                                      proc_num_frames_,
+                                      data[i],
+                                      output_num_frames_);
+    }
+  }
 }

-WebRtc_Word16* AudioBuffer::low_pass_split_data(int channel) const {
-  assert(channel >= 0 && channel < num_channels_);
-  if (split_channels_ == NULL) {
-    return data(channel);
+void AudioBuffer::InitForNewData() {
+  keyboard_data_ = NULL;
+  mixed_low_pass_valid_ = false;
+  reference_copied_ = false;
+  activity_ = AudioFrame::kVadUnknown;
+  num_channels_ = num_proc_channels_;
+}
+
+const int16_t* const* AudioBuffer::channels_const() const {
+  return data_->ibuf_const()->channels();
+}
+
+int16_t* const* AudioBuffer::channels() {
+  mixed_low_pass_valid_ = false;
+  return data_->ibuf()->channels();
+}
+
+const int16_t* const* AudioBuffer::split_bands_const(int channel) const {
+  return split_data_.get() ?
+         split_data_->ibuf_const()->bands(channel) :
+         data_->ibuf_const()->bands(channel);
+}
+
+int16_t* const* AudioBuffer::split_bands(int channel) {
+  mixed_low_pass_valid_ = false;
+  return split_data_.get() ?
+         split_data_->ibuf()->bands(channel) :
+         data_->ibuf()->bands(channel);
+}
+
+const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
+  if (split_data_.get()) {
+    return split_data_->ibuf_const()->channels(band);
+  } else {
+    return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr;
+  }
+}
+
+int16_t* const* AudioBuffer::split_channels(Band band) {
+  mixed_low_pass_valid_ = false;
+  if (split_data_.get()) {
+    return split_data_->ibuf()->channels(band);
+  } else {
+    return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr;
+  }
+}
+
+ChannelBuffer<int16_t>* AudioBuffer::data() {
+  mixed_low_pass_valid_ = false;
+  return data_->ibuf();
+}
+
+const ChannelBuffer<int16_t>* AudioBuffer::data() const {
+  return data_->ibuf_const();
+}
+
+ChannelBuffer<int16_t>* AudioBuffer::split_data() {
+  mixed_low_pass_valid_ = false;
+  return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
+}
+
+const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
+  return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
+}
+
+const float* const* AudioBuffer::channels_const_f() const {
+  return data_->fbuf_const()->channels();
+}
+
+float* const* AudioBuffer::channels_f() {
+  mixed_low_pass_valid_ = false;
+  return data_->fbuf()->channels();
+}
+
+const float* const* AudioBuffer::split_bands_const_f(int channel) const {
+  return split_data_.get() ?
+         split_data_->fbuf_const()->bands(channel) :
+         data_->fbuf_const()->bands(channel);
+}
+
+float* const* AudioBuffer::split_bands_f(int channel) {
+  mixed_low_pass_valid_ = false;
+  return split_data_.get() ?
+         split_data_->fbuf()->bands(channel) :
+         data_->fbuf()->bands(channel);
+}
+
+const float* const* AudioBuffer::split_channels_const_f(Band band) const {
+  if (split_data_.get()) {
+    return split_data_->fbuf_const()->channels(band);
+  } else {
+    return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
+  }
+}
+
+float* const* AudioBuffer::split_channels_f(Band band) {
+  mixed_low_pass_valid_ = false;
+  if (split_data_.get()) {
+    return split_data_->fbuf()->channels(band);
+  } else {
+    return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr;
+  }
+}
+
+ChannelBuffer<float>* AudioBuffer::data_f() {
+  mixed_low_pass_valid_ = false;
+  return data_->fbuf();
+}
+
+const ChannelBuffer<float>* AudioBuffer::data_f() const {
+  return data_->fbuf_const();
+}
+
+ChannelBuffer<float>* AudioBuffer::split_data_f() {
+  mixed_low_pass_valid_ = false;
+  return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
+}
+
+const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
+  return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
+}
+
+const int16_t* AudioBuffer::mixed_low_pass_data() {
+  if (num_proc_channels_ == 1) {
+    return split_bands_const(0)[kBand0To8kHz];
  }

-  return split_channels_[channel].low_pass_data;
-}
+  if (!mixed_low_pass_valid_) {
+    if (!mixed_low_pass_channels_.get()) {
+      mixed_low_pass_channels_.reset(
+          new ChannelBuffer<int16_t>(num_split_frames_, 1));
+    }

-WebRtc_Word16* AudioBuffer::high_pass_split_data(int channel) const {
-  assert(channel >= 0 && channel < num_channels_);
-  if (split_channels_ == NULL) {
-    return NULL;
+    DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz),
+                                    num_split_frames_, num_channels_,
+                                    mixed_low_pass_channels_->channels()[0]);
+    mixed_low_pass_valid_ = true;
  }
-
-  return split_channels_[channel].high_pass_data;
+  return mixed_low_pass_channels_->channels()[0];
 }

-WebRtc_Word16* AudioBuffer::mixed_low_pass_data(int channel) const {
-  assert(channel >= 0 && channel < num_mixed_low_pass_channels_);
-
-  return mixed_low_pass_channels_[channel].data;
-}
-
-WebRtc_Word16* AudioBuffer::low_pass_reference(int channel) const {
-  assert(channel >= 0 && channel < num_channels_);
+const int16_t* AudioBuffer::low_pass_reference(int channel) const {
  if (!reference_copied_) {
    return NULL;
  }

-  return low_pass_reference_channels_[channel].data;
+  return low_pass_reference_channels_->channels()[channel];
 }

-WebRtc_Word32* AudioBuffer::analysis_filter_state1(int channel) const {
-  assert(channel >= 0 && channel < num_channels_);
-  return split_channels_[channel].analysis_filter_state1;
-}
-
-WebRtc_Word32* AudioBuffer::analysis_filter_state2(int channel) const {
-  assert(channel >= 0 && channel < num_channels_);
-  return split_channels_[channel].analysis_filter_state2;
-}
-
-WebRtc_Word32* AudioBuffer::synthesis_filter_state1(int channel) const {
-  assert(channel >= 0 && channel < num_channels_);
-  return split_channels_[channel].synthesis_filter_state1;
-}
-
-WebRtc_Word32* AudioBuffer::synthesis_filter_state2(int channel) const {
-  assert(channel >= 0 && channel < num_channels_);
-  return split_channels_[channel].synthesis_filter_state2;
+const float* AudioBuffer::keyboard_data() const {
+  return keyboard_data_;
 }

 void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
  activity_ = activity;
 }

-AudioFrame::VADActivity AudioBuffer::activity() {
+AudioFrame::VADActivity AudioBuffer::activity() const {
  return activity_;
 }

@@ -182,107 +340,123 @@ int AudioBuffer::num_channels() const {
  return num_channels_;
 }

-int AudioBuffer::samples_per_channel() const {
-  return samples_per_channel_;
+void AudioBuffer::set_num_channels(int num_channels) {
+  num_channels_ = num_channels;
 }

-int AudioBuffer::samples_per_split_channel() const {
-  return samples_per_split_channel_;
+size_t AudioBuffer::num_frames() const {
+  return proc_num_frames_;
 }

-// TODO(andrew): Do deinterleaving and mixing in one step?
+size_t AudioBuffer::num_frames_per_band() const {
+  return num_split_frames_;
+}
+
+size_t AudioBuffer::num_keyboard_frames() const {
+  // We don't resample the keyboard channel.
+  return input_num_frames_;
+}
+
+size_t AudioBuffer::num_bands() const {
+  return num_bands_;
+}
+
+// The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
 void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
-  assert(frame->_audioChannel <= max_num_channels_);
-  assert(frame->_payloadDataLengthInSamples ==  samples_per_channel_);
+  assert(frame->num_channels_ == num_input_channels_);
+  assert(frame->samples_per_channel_ == input_num_frames_);
+  InitForNewData();
+  // Initialized lazily because there's a different condition in CopyFrom.
+  if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) {
+    input_buffer_.reset(
+        new IFChannelBuffer(input_num_frames_, num_proc_channels_));
+  }
+  activity_ = frame->vad_activity_;

-  num_channels_ = frame->_audioChannel;
-  num_mixed_channels_ = 0;
-  num_mixed_low_pass_channels_ = 0;
-  reference_copied_ = false;
-  activity_ = frame->_vadActivity;
+  int16_t* const* deinterleaved;
+  if (input_num_frames_ == proc_num_frames_) {
+    deinterleaved = data_->ibuf()->channels();
+  } else {
+    deinterleaved = input_buffer_->ibuf()->channels();
+  }
+  if (num_proc_channels_ == 1) {
+    // Downmix and deinterleave simultaneously.
+    DownmixInterleavedToMono(frame->data_, input_num_frames_,
+                             num_input_channels_, deinterleaved[0]);
+  } else {
+    assert(num_proc_channels_ == num_input_channels_);
+    Deinterleave(frame->data_,
+                 input_num_frames_,
+                 num_proc_channels_,
+                 deinterleaved);
+  }

-  if (num_channels_ == 1) {
-    // We can get away with a pointer assignment in this case.
-    data_ = frame->_payloadData;
+  // Resample.
+  if (input_num_frames_ != proc_num_frames_) {
+    for (int i = 0; i < num_proc_channels_; ++i) {
+      input_resamplers_[i]->Resample(input_buffer_->fbuf_const()->channels()[i],
+                                     input_num_frames_,
+                                     data_->fbuf()->channels()[i],
+                                     proc_num_frames_);
+    }
+  }
+}
+
+void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) {
+  frame->vad_activity_ = activity_;
+  if (!data_changed) {
    return;
  }

-  WebRtc_Word16* interleaved = frame->_payloadData;
-  for (int i = 0; i < num_channels_; i++) {
-    WebRtc_Word16* deinterleaved = channels_[i].data;
-    int interleaved_idx = i;
-    for (int j = 0; j < samples_per_channel_; j++) {
-      deinterleaved[j] = interleaved[interleaved_idx];
-      interleaved_idx += num_channels_;
+  assert(frame->num_channels_ == num_channels_ || num_channels_ == 1);
+  assert(frame->samples_per_channel_ == output_num_frames_);
+
+  // Resample if necessary.
+  IFChannelBuffer* data_ptr = data_.get();
+  if (proc_num_frames_ != output_num_frames_) {
+    if (!output_buffer_) {
+      output_buffer_.reset(
+          new IFChannelBuffer(output_num_frames_, num_channels_));
    }
-  }
-}
-
-void AudioBuffer::InterleaveTo(AudioFrame* frame) const {
-  assert(frame->_audioChannel == num_channels_);
-  assert(frame->_payloadDataLengthInSamples == samples_per_channel_);
-  frame->_vadActivity = activity_;
-
-  if (num_channels_ == 1) {
-    if (num_mixed_channels_ == 1) {
-      memcpy(frame->_payloadData,
-             channels_[0].data,
-             sizeof(WebRtc_Word16) * samples_per_channel_);
-    } else {
-      // These should point to the same buffer in this case.
-      assert(data_ == frame->_payloadData);
+    for (int i = 0; i < num_channels_; ++i) {
+      output_resamplers_[i]->Resample(
+          data_->fbuf()->channels()[i], proc_num_frames_,
+          output_buffer_->fbuf()->channels()[i], output_num_frames_);
    }
-
-    return;
+    data_ptr = output_buffer_.get();
  }

-  WebRtc_Word16* interleaved = frame->_payloadData;
-  for (int i = 0; i < num_channels_; i++) {
-    WebRtc_Word16* deinterleaved = channels_[i].data;
-    int interleaved_idx = i;
-    for (int j = 0; j < samples_per_channel_; j++) {
-      interleaved[interleaved_idx] = deinterleaved[j];
-      interleaved_idx += num_channels_;
-    }
+  if (frame->num_channels_ == num_channels_) {
+    Interleave(data_ptr->ibuf()->channels(), proc_num_frames_, num_channels_,
+               frame->data_);
+  } else {
+    UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], proc_num_frames_,
+                           frame->num_channels_, frame->data_);
  }
 }

-// TODO(andrew): would be good to support the no-mix case with pointer
-// assignment.
-// TODO(andrew): handle mixing to multiple channels?
-void AudioBuffer::Mix(int num_mixed_channels) {
-  // We currently only support the stereo to mono case.
-  assert(num_channels_ == 2);
-  assert(num_mixed_channels == 1);
-
-  StereoToMono(channels_[0].data,
-               channels_[1].data,
-               channels_[0].data,
-               samples_per_channel_);
-
-  num_channels_ = num_mixed_channels;
-  num_mixed_channels_ = num_mixed_channels;
-}
-
-void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) {
-  // We currently only support the stereo to mono case.
-  assert(num_channels_ == 2);
-  assert(num_mixed_channels == 1);
-
-  StereoToMono(low_pass_split_data(0),
-               low_pass_split_data(1),
-               mixed_low_pass_channels_[0].data,
-               samples_per_split_channel_);
-
-  num_mixed_low_pass_channels_ = num_mixed_channels;
-}
-
 void AudioBuffer::CopyLowPassToReference() {
  reference_copied_ = true;
-  for (int i = 0; i < num_channels_; i++) {
-    memcpy(low_pass_reference_channels_[i].data,
-           low_pass_split_data(i),
-           sizeof(WebRtc_Word16) * samples_per_split_channel_);
+  if (!low_pass_reference_channels_.get() ||
+      low_pass_reference_channels_->num_channels() != num_channels_) {
+    low_pass_reference_channels_.reset(
+        new ChannelBuffer<int16_t>(num_split_frames_,
+                                   num_proc_channels_));
+  }
+  for (int i = 0; i < num_proc_channels_; i++) {
+    memcpy(low_pass_reference_channels_->channels()[i],
+           split_bands_const(i)[kBand0To8kHz],
+           low_pass_reference_channels_->num_frames_per_band() *
+               sizeof(split_bands_const(i)[kBand0To8kHz][0]));
  }
 }
+
+void AudioBuffer::SplitIntoFrequencyBands() {
+  splitting_filter_->Analysis(data_.get(), split_data_.get());
+}
+
+void AudioBuffer::MergeFrequencyBands() {
+  splitting_filter_->Synthesis(split_data_.get(), data_.get());
+}
+
 }  // namespace webrtc
--- a/webrtc/modules/audio_processing/audio_buffer.h
+++ b/webrtc/modules/audio_processing/audio_buffer.h
@@ -8,64 +8,156 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_

-#include "module_common_types.h"
-#include "typedefs.h"
+#include "webrtc/base/scoped_ptr.h"
+#include "webrtc/common_audio/channel_buffer.h"
+#include "webrtc/modules/audio_processing/include/audio_processing.h"
+#include "webrtc/modules/audio_processing/splitting_filter.h"
+#include "webrtc/modules/interface/module_common_types.h"
+#include "webrtc/system_wrappers/interface/scoped_vector.h"
+#include "webrtc/typedefs.h"

 namespace webrtc {

-struct AudioChannel;
-struct SplitAudioChannel;
+class PushSincResampler;
+class IFChannelBuffer;
+
+enum Band {
+  kBand0To8kHz = 0,
+  kBand8To16kHz = 1,
+  kBand16To24kHz = 2
+};

 class AudioBuffer {
 public:
-  AudioBuffer(int max_num_channels, int samples_per_channel);
+  // TODO(ajm): Switch to take ChannelLayouts.
+  AudioBuffer(size_t input_num_frames,
+              int num_input_channels,
+              size_t process_num_frames,
+              int num_process_channels,
+              size_t output_num_frames);
  virtual ~AudioBuffer();

  int num_channels() const;
-  int samples_per_channel() const;
-  int samples_per_split_channel() const;
+  void set_num_channels(int num_channels);
+  size_t num_frames() const;
+  size_t num_frames_per_band() const;
+  size_t num_keyboard_frames() const;
+  size_t num_bands() const;

-  WebRtc_Word16* data(int channel) const;
-  WebRtc_Word16* low_pass_split_data(int channel) const;
-  WebRtc_Word16* high_pass_split_data(int channel) const;
-  WebRtc_Word16* mixed_low_pass_data(int channel) const;
-  WebRtc_Word16* low_pass_reference(int channel) const;
+  // Returns a pointer array to the full-band channels.
+  // Usage:
+  // channels()[channel][sample].
+  // Where:
+  // 0 <= channel < |num_proc_channels_|
+  // 0 <= sample < |proc_num_frames_|
+  int16_t* const* channels();
+  const int16_t* const* channels_const() const;
+  float* const* channels_f();
+  const float* const* channels_const_f() const;

-  WebRtc_Word32* analysis_filter_state1(int channel) const;
-  WebRtc_Word32* analysis_filter_state2(int channel) const;
-  WebRtc_Word32* synthesis_filter_state1(int channel) const;
-  WebRtc_Word32* synthesis_filter_state2(int channel) const;
+  // Returns a pointer array to the bands for a specific channel.
+  // Usage:
+  // split_bands(channel)[band][sample].
+  // Where:
+  // 0 <= channel < |num_proc_channels_|
+  // 0 <= band < |num_bands_|
+  // 0 <= sample < |num_split_frames_|
+  int16_t* const* split_bands(int channel);
+  const int16_t* const* split_bands_const(int channel) const;
+  float* const* split_bands_f(int channel);
+  const float* const* split_bands_const_f(int channel) const;
+
+  // Returns a pointer array to the channels for a specific band.
+  // Usage:
+  // split_channels(band)[channel][sample].
+  // Where:
+  // 0 <= band < |num_bands_|
+  // 0 <= channel < |num_proc_channels_|
+  // 0 <= sample < |num_split_frames_|
+  int16_t* const* split_channels(Band band);
+  const int16_t* const* split_channels_const(Band band) const;
+  float* const* split_channels_f(Band band);
+  const float* const* split_channels_const_f(Band band) const;
+
+  // Returns a pointer to the ChannelBuffer that encapsulates the full-band
+  // data.
+  ChannelBuffer<int16_t>* data();
+  const ChannelBuffer<int16_t>* data() const;
+  ChannelBuffer<float>* data_f();
+  const ChannelBuffer<float>* data_f() const;
+
+  // Returns a pointer to the ChannelBuffer that encapsulates the split data.
+  ChannelBuffer<int16_t>* split_data();
+  const ChannelBuffer<int16_t>* split_data() const;
+  ChannelBuffer<float>* split_data_f();
+  const ChannelBuffer<float>* split_data_f() const;
+
+  // Returns a pointer to the low-pass data downmixed to mono. If this data
+  // isn't already available it re-calculates it.
+  const int16_t* mixed_low_pass_data();
+  const int16_t* low_pass_reference(int channel) const;
+
+  const float* keyboard_data() const;

  void set_activity(AudioFrame::VADActivity activity);
-  AudioFrame::VADActivity activity();
+  AudioFrame::VADActivity activity() const;

+  // Use for int16 interleaved data.
  void DeinterleaveFrom(AudioFrame* audioFrame);
-  void InterleaveTo(AudioFrame* audioFrame) const;
-  void Mix(int num_mixed_channels);
-  void CopyAndMixLowPass(int num_mixed_channels);
+  // If |data_changed| is false, only the non-audio data members will be copied
+  // to |frame|.
+  void InterleaveTo(AudioFrame* frame, bool data_changed);
+
+  // Use for float deinterleaved data.
+  void CopyFrom(const float* const* data, const StreamConfig& stream_config);
+  void CopyTo(const StreamConfig& stream_config, float* const* data);
  void CopyLowPassToReference();

+  // Splits the signal into different bands.
+  void SplitIntoFrequencyBands();
+  // Recombine the different bands into one signal.
+  void MergeFrequencyBands();
+
 private:
-  const int max_num_channels_;
+  // Called from DeinterleaveFrom() and CopyFrom().
+  void InitForNewData();
+
+  // The audio is passed into DeinterleaveFrom() or CopyFrom() with input
+  // format (samples per channel and number of channels).
+  const size_t input_num_frames_;
+  const int num_input_channels_;
+  // The audio is stored by DeinterleaveFrom() or CopyFrom() with processing
+  // format.
+  const size_t proc_num_frames_;
+  const int num_proc_channels_;
+  // The audio is returned by InterleaveTo() and CopyTo() with output samples
+  // per channels and the current number of channels. This last one can be
+  // changed at any time using set_num_channels().
+  const size_t output_num_frames_;
  int num_channels_;
-  int num_mixed_channels_;
-  int num_mixed_low_pass_channels_;
-  const int samples_per_channel_;
-  int samples_per_split_channel_;
+
+  size_t num_bands_;
+  size_t num_split_frames_;
+  bool mixed_low_pass_valid_;
  bool reference_copied_;
  AudioFrame::VADActivity activity_;

-  WebRtc_Word16* data_;
-  // TODO(andrew): use vectors here.
-  AudioChannel* channels_;
-  SplitAudioChannel* split_channels_;
-  // TODO(andrew): improve this, we don't need the full 32 kHz space here.
-  AudioChannel* mixed_low_pass_channels_;
-  AudioChannel* low_pass_reference_channels_;
+  const float* keyboard_data_;
+  rtc::scoped_ptr<IFChannelBuffer> data_;
+  rtc::scoped_ptr<IFChannelBuffer> split_data_;
+  rtc::scoped_ptr<SplittingFilter> splitting_filter_;
+  rtc::scoped_ptr<ChannelBuffer<int16_t> > mixed_low_pass_channels_;
+  rtc::scoped_ptr<ChannelBuffer<int16_t> > low_pass_reference_channels_;
+  rtc::scoped_ptr<IFChannelBuffer> input_buffer_;
+  rtc::scoped_ptr<IFChannelBuffer> output_buffer_;
+  rtc::scoped_ptr<ChannelBuffer<float> > process_buffer_;
+  ScopedVector<PushSincResampler> input_resamplers_;
+  ScopedVector<PushSincResampler> output_resamplers_;
 };
+
 }  // namespace webrtc

-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_
--- a/webrtc/modules/audio_processing/audio_processing_impl.cc
+++ b/webrtc/modules/audio_processing/audio_processing_impl.cc
--- a/webrtc/modules/audio_processing/audio_processing_impl.h
+++ b/webrtc/modules/audio_processing/audio_processing_impl.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,81 +8,140 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_

 #include <list>
 #include <string>
+#include <vector>

-#include "audio_processing.h"
+#include "webrtc/base/scoped_ptr.h"
+#include "webrtc/base/thread_annotations.h"
+#include "webrtc/modules/audio_processing/include/audio_processing.h"

 namespace webrtc {
-namespace audioproc {
-class Event;
-}  // audioproc
+
+class AgcManagerDirect;
 class AudioBuffer;
+class AudioConverter;
+
+template<typename T>
+class Beamformer;
+
 class CriticalSectionWrapper;
 class EchoCancellationImpl;
 class EchoControlMobileImpl;
 class FileWrapper;
 class GainControlImpl;
+class GainControlForNewAgc;
 class HighPassFilterImpl;
 class LevelEstimatorImpl;
 class NoiseSuppressionImpl;
 class ProcessingComponent;
+class TransientSuppressor;
 class VoiceDetectionImpl;
+class IntelligibilityEnhancer;
+
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+namespace audioproc {
+
+class Event;
+
+}  // namespace audioproc
+#endif

 class AudioProcessingImpl : public AudioProcessing {
 public:
-  enum {
-    kSampleRate8kHz = 8000,
-    kSampleRate16kHz = 16000,
-    kSampleRate32kHz = 32000
-  };
+  explicit AudioProcessingImpl(const Config& config);

-  explicit AudioProcessingImpl(int id);
+  // AudioProcessingImpl takes ownership of beamformer.
+  AudioProcessingImpl(const Config& config, Beamformer<float>* beamformer);
  virtual ~AudioProcessingImpl();

-  CriticalSectionWrapper* crit() const;
-
-  int split_sample_rate_hz() const;
-  bool was_stream_delay_set() const;
-
  // AudioProcessing methods.
-  virtual int Initialize();
-  virtual int InitializeLocked();
-  virtual int set_sample_rate_hz(int rate);
-  virtual int sample_rate_hz() const;
-  virtual int set_num_channels(int input_channels, int output_channels);
-  virtual int num_input_channels() const;
-  virtual int num_output_channels() const;
-  virtual int set_num_reverse_channels(int channels);
-  virtual int num_reverse_channels() const;
-  virtual int ProcessStream(AudioFrame* frame);
-  virtual int AnalyzeReverseStream(AudioFrame* frame);
-  virtual int set_stream_delay_ms(int delay);
-  virtual int stream_delay_ms() const;
-  virtual int StartDebugRecording(const char filename[kMaxFilenameSize]);
-  virtual int StopDebugRecording();
-  virtual EchoCancellation* echo_cancellation() const;
-  virtual EchoControlMobile* echo_control_mobile() const;
-  virtual GainControl* gain_control() const;
-  virtual HighPassFilter* high_pass_filter() const;
-  virtual LevelEstimator* level_estimator() const;
-  virtual NoiseSuppression* noise_suppression() const;
-  virtual VoiceDetection* voice_detection() const;
+  int Initialize() override;
+  int Initialize(int input_sample_rate_hz,
+                 int output_sample_rate_hz,
+                 int reverse_sample_rate_hz,
+                 ChannelLayout input_layout,
+                 ChannelLayout output_layout,
+                 ChannelLayout reverse_layout) override;
+  int Initialize(const ProcessingConfig& processing_config) override;
+  void SetExtraOptions(const Config& config) override;
+  int proc_sample_rate_hz() const override;
+  int proc_split_sample_rate_hz() const override;
+  int num_input_channels() const override;
+  int num_output_channels() const override;
+  int num_reverse_channels() const override;
+  void set_output_will_be_muted(bool muted) override;
+  int ProcessStream(AudioFrame* frame) override;
+  int ProcessStream(const float* const* src,
+                    size_t samples_per_channel,
+                    int input_sample_rate_hz,
+                    ChannelLayout input_layout,
+                    int output_sample_rate_hz,
+                    ChannelLayout output_layout,
+                    float* const* dest) override;
+  int ProcessStream(const float* const* src,
+                    const StreamConfig& input_config,
+                    const StreamConfig& output_config,
+                    float* const* dest) override;
+  int AnalyzeReverseStream(AudioFrame* frame) override;
+  int ProcessReverseStream(AudioFrame* frame) override;
+  int AnalyzeReverseStream(const float* const* data,
+                           size_t samples_per_channel,
+                           int sample_rate_hz,
+                           ChannelLayout layout) override;
+  int ProcessReverseStream(const float* const* src,
+                           const StreamConfig& reverse_input_config,
+                           const StreamConfig& reverse_output_config,
+                           float* const* dest) override;
+  int set_stream_delay_ms(int delay) override;
+  int stream_delay_ms() const override;
+  bool was_stream_delay_set() const override;
+  void set_delay_offset_ms(int offset) override;
+  int delay_offset_ms() const override;
+  void set_stream_key_pressed(bool key_pressed) override;
+  int StartDebugRecording(const char filename[kMaxFilenameSize]) override;
+  int StartDebugRecording(FILE* handle) override;
+  int StartDebugRecordingForPlatformFile(rtc::PlatformFile handle) override;
+  int StopDebugRecording() override;
+  void UpdateHistogramsOnCallEnd() override;
+  EchoCancellation* echo_cancellation() const override;
+  EchoControlMobile* echo_control_mobile() const override;
+  GainControl* gain_control() const override;
+  HighPassFilter* high_pass_filter() const override;
+  LevelEstimator* level_estimator() const override;
+  NoiseSuppression* noise_suppression() const override;
+  VoiceDetection* voice_detection() const override;

-  // Module methods.
-  virtual WebRtc_Word32 Version(WebRtc_Word8* version,
-                              WebRtc_UWord32& remainingBufferInBytes,
-                              WebRtc_UWord32& position) const;
-  virtual WebRtc_Word32 ChangeUniqueId(const WebRtc_Word32 id);
+ protected:
+  // Overridden in a mock.
+  virtual int InitializeLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);

 private:
-  int WriteMessageToDebugFile();
-  int WriteInitMessage();
+  int InitializeLocked(const ProcessingConfig& config)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  int MaybeInitializeLocked(const ProcessingConfig& config)
+      EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  // TODO(ekm): Remove once all clients updated to new interface.
+  int AnalyzeReverseStream(const float* const* src,
+                           const StreamConfig& input_config,
+                           const StreamConfig& output_config);
+  int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  int ProcessReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);

-  int id_;
+  bool is_data_processed() const;
+  bool output_copy_needed(bool is_data_processed) const;
+  bool synthesis_needed(bool is_data_processed) const;
+  bool analysis_needed(bool is_data_processed) const;
+  bool is_rev_processed() const;
+  bool rev_conversion_needed() const;
+  void InitializeExperimentalAgc() EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  void InitializeTransient() EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  void InitializeBeamformer() EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  void InitializeIntelligibility() EXCLUSIVE_LOCKS_REQUIRED(crit_);
+  void MaybeUpdateHistograms() EXCLUSIVE_LOCKS_REQUIRED(crit_);

  EchoCancellationImpl* echo_cancellation_;
  EchoControlMobileImpl* echo_control_mobile_;
@@ -91,27 +150,69 @@ class AudioProcessingImpl : public AudioProcessing {
  LevelEstimatorImpl* level_estimator_;
  NoiseSuppressionImpl* noise_suppression_;
  VoiceDetectionImpl* voice_detection_;
+  rtc::scoped_ptr<GainControlForNewAgc> gain_control_for_new_agc_;

  std::list<ProcessingComponent*> component_list_;
-
-  FileWrapper* debug_file_;
-  audioproc::Event* event_msg_; // Protobuf message.
-  std::string event_str_; // Memory for protobuf serialization.
  CriticalSectionWrapper* crit_;
+  rtc::scoped_ptr<AudioBuffer> render_audio_;
+  rtc::scoped_ptr<AudioBuffer> capture_audio_;
+  rtc::scoped_ptr<AudioConverter> render_converter_;
+#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
+  // TODO(andrew): make this more graceful. Ideally we would split this stuff
+  // out into a separate class with an "enabled" and "disabled" implementation.
+  int WriteMessageToDebugFile();
+  int WriteInitMessage();

-  AudioBuffer* render_audio_;
-  AudioBuffer* capture_audio_;
+  // Writes Config message. If not |forced|, only writes the current config if
+  // it is different from the last saved one; if |forced|, writes the config
+  // regardless of the last saved.
+  int WriteConfigMessage(bool forced);
+
+  rtc::scoped_ptr<FileWrapper> debug_file_;
+  rtc::scoped_ptr<audioproc::Event> event_msg_;  // Protobuf message.
+  std::string event_str_;  // Memory for protobuf serialization.
+
+  // Serialized string of last saved APM configuration.
+  std::string last_serialized_config_;
+#endif
+
+  // Format of processing streams at input/output call sites.
+  ProcessingConfig api_format_;
+
+  // Only the rate and samples fields of fwd_proc_format_ are used because the
+  // forward processing number of channels is mutable and is tracked by the
+  // capture_audio_.
+  StreamConfig fwd_proc_format_;
+  StreamConfig rev_proc_format_;
+  int split_rate_;

-  int sample_rate_hz_;
-  int split_sample_rate_hz_;
-  int samples_per_channel_;
  int stream_delay_ms_;
+  int delay_offset_ms_;
  bool was_stream_delay_set_;
+  int last_stream_delay_ms_;
+  int last_aec_system_delay_ms_;
+  int stream_delay_jumps_;
+  int aec_system_delay_jumps_;

-  int num_reverse_channels_;
-  int num_input_channels_;
-  int num_output_channels_;
+  bool output_will_be_muted_ GUARDED_BY(crit_);
+
+  bool key_pressed_;
+
+  // Only set through the constructor's Config parameter.
+  const bool use_new_agc_;
+  rtc::scoped_ptr<AgcManagerDirect> agc_manager_ GUARDED_BY(crit_);
+  int agc_startup_min_volume_;
+
+  bool transient_suppressor_enabled_;
+  rtc::scoped_ptr<TransientSuppressor> transient_suppressor_;
+  const bool beamformer_enabled_;
+  rtc::scoped_ptr<Beamformer<float>> beamformer_;
+  const std::vector<Point> array_geometry_;
+
+  bool intelligibility_enabled_;
+  rtc::scoped_ptr<IntelligibilityEnhancer> intelligibility_enhancer_;
 };
+
 }  // namespace webrtc

-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_
--- a/webrtc/modules/audio_processing/beamformer/array_util.h
+++ b/webrtc/modules/audio_processing/beamformer/array_util.h
@@ -0,0 +1,58 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_
+
+#include <cmath>
+
+namespace webrtc {
+
+// Coordinates in meters.
+template<typename T>
+struct CartesianPoint {
+  CartesianPoint(T x, T y, T z) {
+    c[0] = x;
+    c[1] = y;
+    c[2] = z;
+  }
+  T x() const { return c[0]; }
+  T y() const { return c[1]; }
+  T z() const { return c[2]; }
+  T c[3];
+};
+
+using Point = CartesianPoint<float>;
+
+template<typename T>
+float Distance(CartesianPoint<T> a, CartesianPoint<T> b) {
+  return std::sqrt((a.x() - b.x()) * (a.x() - b.x()) +
+                   (a.y() - b.y()) * (a.y() - b.y()) +
+                   (a.z() - b.z()) * (a.z() - b.z()));
+}
+
+template <typename T>
+struct SphericalPoint {
+  SphericalPoint(T azimuth, T elevation, T radius) {
+    s[0] = azimuth;
+    s[1] = elevation;
+    s[2] = radius;
+  }
+  T azimuth() const { return s[0]; }
+  T elevation() const { return s[1]; }
+  T distance() const { return s[2]; }
+  T s[3];
+};
+
+using SphericalPointf = SphericalPoint<float>;
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_
--- a/webrtc/modules/audio_processing/beamformer/beamformer.h
+++ b/webrtc/modules/audio_processing/beamformer/beamformer.h
@@ -0,0 +1,45 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_
+
+#include "webrtc/common_audio/channel_buffer.h"
+#include "webrtc/modules/audio_processing/beamformer/array_util.h"
+
+namespace webrtc {
+
+template<typename T>
+class Beamformer {
+ public:
+  virtual ~Beamformer() {}
+
+  // Process one time-domain chunk of audio. The audio is expected to be split
+  // into frequency bands inside the ChannelBuffer. The number of frames and
+  // channels must correspond to the constructor parameters. The same
+  // ChannelBuffer can be passed in as |input| and |output|.
+  virtual void ProcessChunk(const ChannelBuffer<T>& input,
+                            ChannelBuffer<T>* output) = 0;
+
+  // Sample rate corresponds to the lower band.
+  // Needs to be called before the the Beamformer can be used.
+  virtual void Initialize(int chunk_size_ms, int sample_rate_hz) = 0;
+
+  // Indicates whether a given point is inside of the beam.
+  virtual bool IsInBeam(const SphericalPointf& spherical_point) { return true; }
+
+  // Returns true if the current data contains the target signal.
+  // Which signals are considered "targets" is implementation dependent.
+  virtual bool is_target_present() = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_
--- a/webrtc/modules/audio_processing/beamformer/complex_matrix.h
+++ b/webrtc/modules/audio_processing/beamformer/complex_matrix.h
@@ -0,0 +1,97 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_
+
+#include <complex>
+
+#include "webrtc/base/checks.h"
+#include "webrtc/base/scoped_ptr.h"
+#include "webrtc/modules/audio_processing/beamformer/matrix.h"
+
+namespace webrtc {
+
+using std::complex;
+
+// An extension of Matrix for operations that only work on a complex type.
+template <typename T>
+class ComplexMatrix : public Matrix<complex<T> > {
+ public:
+  ComplexMatrix() : Matrix<complex<T> >() {}
+
+  ComplexMatrix(int num_rows, int num_columns)
+      : Matrix<complex<T> >(num_rows, num_columns) {}
+
+  ComplexMatrix(const complex<T>* data, int num_rows, int num_columns)
+      : Matrix<complex<T> >(data, num_rows, num_columns) {}
+
+  // Complex Matrix operations.
+  ComplexMatrix& PointwiseConjugate() {
+    complex<T>* const data = this->data();
+    size_t size = this->num_rows() * this->num_columns();
+    for (size_t i = 0; i < size; ++i) {
+      data[i] = conj(data[i]);
+    }
+
+    return *this;
+  }
+
+  ComplexMatrix& PointwiseConjugate(const ComplexMatrix& operand) {
+    this->CopyFrom(operand);
+    return PointwiseConjugate();
+  }
+
+  ComplexMatrix& ConjugateTranspose() {
+    this->CopyDataToScratch();
+    int num_rows = this->num_rows();
+    this->SetNumRows(this->num_columns());
+    this->SetNumColumns(num_rows);
+    this->Resize();
+    return ConjugateTranspose(this->scratch_elements());
+  }
+
+  ComplexMatrix& ConjugateTranspose(const ComplexMatrix& operand) {
+    RTC_CHECK_EQ(operand.num_rows(), this->num_columns());
+    RTC_CHECK_EQ(operand.num_columns(), this->num_rows());
+    return ConjugateTranspose(operand.elements());
+  }
+
+  ComplexMatrix& ZeroImag() {
+    complex<T>* const data = this->data();
+    size_t size = this->num_rows() * this->num_columns();
+    for (size_t i = 0; i < size; ++i) {
+      data[i] = complex<T>(data[i].real(), 0);
+    }
+
+    return *this;
+  }
+
+  ComplexMatrix& ZeroImag(const ComplexMatrix& operand) {
+    this->CopyFrom(operand);
+    return ZeroImag();
+  }
+
+ private:
+  ComplexMatrix& ConjugateTranspose(const complex<T>* const* src) {
+    complex<T>* const* elements = this->elements();
+    for (int i = 0; i < this->num_rows(); ++i) {
+      for (int j = 0; j < this->num_columns(); ++j) {
+        elements[i][j] = conj(src[j][i]);
+      }
+    }
+
+    return *this;
+  }
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_
--- a/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc
+++ b/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc
@@ -0,0 +1,102 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#define _USE_MATH_DEFINES
+
+#include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h"
+
+#include <cmath>
+
+namespace {
+
+float BesselJ0(float x) {
+#if WEBRTC_WIN
+  return _j0(x);
+#else
+  return j0(x);
+#endif
+}
+
+}  // namespace
+
+namespace webrtc {
+
+void CovarianceMatrixGenerator::UniformCovarianceMatrix(
+    float wave_number,
+    const std::vector<Point>& geometry,
+    ComplexMatrix<float>* mat) {
+  RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_rows());
+  RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns());
+
+  complex<float>* const* mat_els = mat->elements();
+  for (size_t i = 0; i < geometry.size(); ++i) {
+    for (size_t j = 0; j < geometry.size(); ++j) {
+      if (wave_number > 0.f) {
+        mat_els[i][j] =
+            BesselJ0(wave_number * Distance(geometry[i], geometry[j]));
+      } else {
+        mat_els[i][j] = i == j ? 1.f : 0.f;
+      }
+    }
+  }
+}
+
+void CovarianceMatrixGenerator::AngledCovarianceMatrix(
+    float sound_speed,
+    float angle,
+    size_t frequency_bin,
+    size_t fft_size,
+    size_t num_freq_bins,
+    int sample_rate,
+    const std::vector<Point>& geometry,
+    ComplexMatrix<float>* mat) {
+  RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_rows());
+  RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns());
+
+  ComplexMatrix<float> interf_cov_vector(1, geometry.size());
+  ComplexMatrix<float> interf_cov_vector_transposed(geometry.size(), 1);
+  PhaseAlignmentMasks(frequency_bin,
+                      fft_size,
+                      sample_rate,
+                      sound_speed,
+                      geometry,
+                      angle,
+                      &interf_cov_vector);
+  interf_cov_vector_transposed.Transpose(interf_cov_vector);
+  interf_cov_vector.PointwiseConjugate();
+  mat->Multiply(interf_cov_vector_transposed, interf_cov_vector);
+}
+
+void CovarianceMatrixGenerator::PhaseAlignmentMasks(
+    size_t frequency_bin,
+    size_t fft_size,
+    int sample_rate,
+    float sound_speed,
+    const std::vector<Point>& geometry,
+    float angle,
+    ComplexMatrix<float>* mat) {
+  RTC_CHECK_EQ(1, mat->num_rows());
+  RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns());
+
+  float freq_in_hertz =
+      (static_cast<float>(frequency_bin) / fft_size) * sample_rate;
+
+  complex<float>* const* mat_els = mat->elements();
+  for (size_t c_ix = 0; c_ix < geometry.size(); ++c_ix) {
+    float distance = std::cos(angle) * geometry[c_ix].x() +
+                     std::sin(angle) * geometry[c_ix].y();
+    float phase_shift = -2.f * M_PI * distance * freq_in_hertz / sound_speed;
+
+    // Euler's formula for mat[0][c_ix] = e^(j * phase_shift).
+    mat_els[0][c_ix] = complex<float>(cos(phase_shift), sin(phase_shift));
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h
+++ b/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h
@@ -0,0 +1,54 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COVARIANCE_MATRIX_GENERATOR_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COVARIANCE_MATRIX_GENERATOR_H_
+
+#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
+#include "webrtc/modules/audio_processing/beamformer/array_util.h"
+
+namespace webrtc {
+
+// Helper class for Beamformer in charge of generating covariance matrices. For
+// each function, the passed-in ComplexMatrix is expected to be of size
+// |num_input_channels| x |num_input_channels|.
+class CovarianceMatrixGenerator {
+ public:
+  // A uniform covariance matrix with a gap at the target location. WARNING:
+  // The target angle is assumed to be 0.
+  static void UniformCovarianceMatrix(float wave_number,
+                                      const std::vector<Point>& geometry,
+                                      ComplexMatrix<float>* mat);
+
+  // The covariance matrix of a source at the given angle.
+  static void AngledCovarianceMatrix(float sound_speed,
+                                     float angle,
+                                     size_t frequency_bin,
+                                     size_t fft_size,
+                                     size_t num_freq_bins,
+                                     int sample_rate,
+                                     const std::vector<Point>& geometry,
+                                     ComplexMatrix<float>* mat);
+
+  // Calculates phase shifts that, when applied to a multichannel signal and
+  // added together, cause constructive interferernce for sources located at
+  // the given angle.
+  static void PhaseAlignmentMasks(size_t frequency_bin,
+                                  size_t fft_size,
+                                  int sample_rate,
+                                  float sound_speed,
+                                  const std::vector<Point>& geometry,
+                                  float angle,
+                                  ComplexMatrix<float>* mat);
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BF_HELPERS_H_
--- a/webrtc/modules/audio_processing/beamformer/matrix.h
+++ b/webrtc/modules/audio_processing/beamformer/matrix.h
@@ -0,0 +1,368 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_
+
+#include <algorithm>
+#include <cstring>
+#include <string>
+#include <vector>
+
+#include "webrtc/base/checks.h"
+#include "webrtc/base/constructormagic.h"
+#include "webrtc/base/scoped_ptr.h"
+
+namespace {
+
+// Wrappers to get around the compiler warning resulting from the fact that
+// there's no std::sqrt overload for ints. We cast all non-complex types to
+// a double for the sqrt method.
+template <typename T>
+T sqrt_wrapper(T x) {
+  return sqrt(static_cast<double>(x));
+}
+
+template <typename S>
+std::complex<S> sqrt_wrapper(std::complex<S> x) {
+  return sqrt(x);
+}
+} // namespace
+
+namespace webrtc {
+
+// Matrix is a class for doing standard matrix operations on 2 dimensional
+// matrices of any size. Results of matrix operations are stored in the
+// calling object. Function overloads exist for both in-place (the calling
+// object is used as both an operand and the result) and out-of-place (all
+// operands are passed in as parameters) operations. If operand dimensions
+// mismatch, the program crashes. Out-of-place operations change the size of
+// the calling object, if necessary, before operating.
+//
+// 'In-place' operations that inherently change the size of the matrix (eg.
+// Transpose, Multiply on different-sized matrices) must make temporary copies
+// (|scratch_elements_| and |scratch_data_|) of existing data to complete the
+// operations.
+//
+// The data is stored contiguously. Data can be accessed internally as a flat
+// array, |data_|, or as an array of row pointers, |elements_|, but is
+// available to users only as an array of row pointers through |elements()|.
+// Memory for storage is allocated when a matrix is resized only if the new
+// size overflows capacity. Memory needed temporarily for any operations is
+// similarly resized only if the new size overflows capacity.
+//
+// If you pass in storage through the ctor, that storage is copied into the
+// matrix. TODO(claguna): albeit tricky, allow for data to be referenced
+// instead of copied, and owned by the user.
+template <typename T>
+class Matrix {
+ public:
+  Matrix() : num_rows_(0), num_columns_(0) {}
+
+  // Allocates space for the elements and initializes all values to zero.
+  Matrix(int num_rows, int num_columns)
+      : num_rows_(num_rows), num_columns_(num_columns) {
+    Resize();
+    scratch_data_.resize(num_rows_ * num_columns_);
+    scratch_elements_.resize(num_rows_);
+  }
+
+  // Copies |data| into the new Matrix.
+  Matrix(const T* data, int num_rows, int num_columns)
+      : num_rows_(0), num_columns_(0) {
+    CopyFrom(data, num_rows, num_columns);
+    scratch_data_.resize(num_rows_ * num_columns_);
+    scratch_elements_.resize(num_rows_);
+  }
+
+  virtual ~Matrix() {}
+
+  // Deep copy an existing matrix.
+  void CopyFrom(const Matrix& other) {
+    CopyFrom(&other.data_[0], other.num_rows_, other.num_columns_);
+  }
+
+  // Copy |data| into the Matrix. The current data is lost.
+  void CopyFrom(const T* const data, int num_rows, int num_columns) {
+    Resize(num_rows, num_columns);
+    memcpy(&data_[0], data, num_rows_ * num_columns_ * sizeof(data_[0]));
+  }
+
+  Matrix& CopyFromColumn(const T* const* src,
+                         size_t column_index,
+                         int num_rows) {
+    Resize(1, num_rows);
+    for (int i = 0; i < num_columns_; ++i) {
+      data_[i] = src[i][column_index];
+    }
+
+    return *this;
+  }
+
+  void Resize(int num_rows, int num_columns) {
+    if (num_rows != num_rows_ || num_columns != num_columns_) {
+      num_rows_ = num_rows;
+      num_columns_ = num_columns;
+      Resize();
+    }
+  }
+
+  // Accessors and mutators.
+  int num_rows() const { return num_rows_; }
+  int num_columns() const { return num_columns_; }
+  T* const* elements() { return &elements_[0]; }
+  const T* const* elements() const { return &elements_[0]; }
+
+  T Trace() {
+    RTC_CHECK_EQ(num_rows_, num_columns_);
+
+    T trace = 0;
+    for (int i = 0; i < num_rows_; ++i) {
+      trace += elements_[i][i];
+    }
+    return trace;
+  }
+
+  // Matrix Operations. Returns *this to support method chaining.
+  Matrix& Transpose() {
+    CopyDataToScratch();
+    Resize(num_columns_, num_rows_);
+    return Transpose(scratch_elements());
+  }
+
+  Matrix& Transpose(const Matrix& operand) {
+    RTC_CHECK_EQ(operand.num_rows_, num_columns_);
+    RTC_CHECK_EQ(operand.num_columns_, num_rows_);
+
+    return Transpose(operand.elements());
+  }
+
+  template <typename S>
+  Matrix& Scale(const S& scalar) {
+    for (size_t i = 0; i < data_.size(); ++i) {
+      data_[i] *= scalar;
+    }
+
+    return *this;
+  }
+
+  template <typename S>
+  Matrix& Scale(const Matrix& operand, const S& scalar) {
+    CopyFrom(operand);
+    return Scale(scalar);
+  }
+
+  Matrix& Add(const Matrix& operand) {
+    RTC_CHECK_EQ(num_rows_, operand.num_rows_);
+    RTC_CHECK_EQ(num_columns_, operand.num_columns_);
+
+    for (size_t i = 0; i < data_.size(); ++i) {
+      data_[i] += operand.data_[i];
+    }
+
+    return *this;
+  }
+
+  Matrix& Add(const Matrix& lhs, const Matrix& rhs) {
+    CopyFrom(lhs);
+    return Add(rhs);
+  }
+
+  Matrix& Subtract(const Matrix& operand) {
+    RTC_CHECK_EQ(num_rows_, operand.num_rows_);
+    RTC_CHECK_EQ(num_columns_, operand.num_columns_);
+
+    for (size_t i = 0; i < data_.size(); ++i) {
+      data_[i] -= operand.data_[i];
+    }
+
+    return *this;
+  }
+
+  Matrix& Subtract(const Matrix& lhs, const Matrix& rhs) {
+    CopyFrom(lhs);
+    return Subtract(rhs);
+  }
+
+  Matrix& PointwiseMultiply(const Matrix& operand) {
+    RTC_CHECK_EQ(num_rows_, operand.num_rows_);
+    RTC_CHECK_EQ(num_columns_, operand.num_columns_);
+
+    for (size_t i = 0; i < data_.size(); ++i) {
+      data_[i] *= operand.data_[i];
+    }
+
+    return *this;
+  }
+
+  Matrix& PointwiseMultiply(const Matrix& lhs, const Matrix& rhs) {
+    CopyFrom(lhs);
+    return PointwiseMultiply(rhs);
+  }
+
+  Matrix& PointwiseDivide(const Matrix& operand) {
+    RTC_CHECK_EQ(num_rows_, operand.num_rows_);
+    RTC_CHECK_EQ(num_columns_, operand.num_columns_);
+
+    for (size_t i = 0; i < data_.size(); ++i) {
+      data_[i] /= operand.data_[i];
+    }
+
+    return *this;
+  }
+
+  Matrix& PointwiseDivide(const Matrix& lhs, const Matrix& rhs) {
+    CopyFrom(lhs);
+    return PointwiseDivide(rhs);
+  }
+
+  Matrix& PointwiseSquareRoot() {
+    for (size_t i = 0; i < data_.size(); ++i) {
+      data_[i] = sqrt_wrapper(data_[i]);
+    }
+
+    return *this;
+  }
+
+  Matrix& PointwiseSquareRoot(const Matrix& operand) {
+    CopyFrom(operand);
+    return PointwiseSquareRoot();
+  }
+
+  Matrix& PointwiseAbsoluteValue() {
+    for (size_t i = 0; i < data_.size(); ++i) {
+      data_[i] = abs(data_[i]);
+    }
+
+    return *this;
+  }
+
+  Matrix& PointwiseAbsoluteValue(const Matrix& operand) {
+    CopyFrom(operand);
+    return PointwiseAbsoluteValue();
+  }
+
+  Matrix& PointwiseSquare() {
+    for (size_t i = 0; i < data_.size(); ++i) {
+      data_[i] *= data_[i];
+    }
+
+    return *this;
+  }
+
+  Matrix& PointwiseSquare(const Matrix& operand) {
+    CopyFrom(operand);
+    return PointwiseSquare();
+  }
+
+  Matrix& Multiply(const Matrix& lhs, const Matrix& rhs) {
+    RTC_CHECK_EQ(lhs.num_columns_, rhs.num_rows_);
+    RTC_CHECK_EQ(num_rows_, lhs.num_rows_);
+    RTC_CHECK_EQ(num_columns_, rhs.num_columns_);
+
+    return Multiply(lhs.elements(), rhs.num_rows_, rhs.elements());
+  }
+
+  Matrix& Multiply(const Matrix& rhs) {
+    RTC_CHECK_EQ(num_columns_, rhs.num_rows_);
+
+    CopyDataToScratch();
+    Resize(num_rows_, rhs.num_columns_);
+    return Multiply(scratch_elements(), rhs.num_rows_, rhs.elements());
+  }
+
+  std::string ToString() const {
+    std::ostringstream ss;
+    ss << std::endl << "Matrix" << std::endl;
+
+    for (int i = 0; i < num_rows_; ++i) {
+      for (int j = 0; j < num_columns_; ++j) {
+        ss << elements_[i][j] << " ";
+      }
+      ss << std::endl;
+    }
+    ss << std::endl;
+
+    return ss.str();
+  }
+
+ protected:
+  void SetNumRows(const int num_rows) { num_rows_ = num_rows; }
+  void SetNumColumns(const int num_columns) { num_columns_ = num_columns; }
+  T* data() { return &data_[0]; }
+  const T* data() const { return &data_[0]; }
+  const T* const* scratch_elements() const { return &scratch_elements_[0]; }
+
+  // Resize the matrix. If an increase in capacity is required, the current
+  // data is lost.
+  void Resize() {
+    size_t size = num_rows_ * num_columns_;
+    data_.resize(size);
+    elements_.resize(num_rows_);
+
+    for (int i = 0; i < num_rows_; ++i) {
+      elements_[i] = &data_[i * num_columns_];
+    }
+  }
+
+  // Copies data_ into scratch_data_ and updates scratch_elements_ accordingly.
+  void CopyDataToScratch() {
+    scratch_data_ = data_;
+    scratch_elements_.resize(num_rows_);
+
+    for (int i = 0; i < num_rows_; ++i) {
+      scratch_elements_[i] = &scratch_data_[i * num_columns_];
+    }
+  }
+
+ private:
+  int num_rows_;
+  int num_columns_;
+  std::vector<T> data_;
+  std::vector<T*> elements_;
+
+  // Stores temporary copies of |data_| and |elements_| for in-place operations
+  // where referring to original data is necessary.
+  std::vector<T> scratch_data_;
+  std::vector<T*> scratch_elements_;
+
+  // Helpers for Transpose and Multiply operations that unify in-place and
+  // out-of-place solutions.
+  Matrix& Transpose(const T* const* src) {
+    for (int i = 0; i < num_rows_; ++i) {
+      for (int j = 0; j < num_columns_; ++j) {
+        elements_[i][j] = src[j][i];
+      }
+    }
+
+    return *this;
+  }
+
+  Matrix& Multiply(const T* const* lhs, int num_rows_rhs, const T* const* rhs) {
+    for (int row = 0; row < num_rows_; ++row) {
+      for (int col = 0; col < num_columns_; ++col) {
+        T cur_element = 0;
+        for (int i = 0; i < num_rows_rhs; ++i) {
+          cur_element += lhs[row][i] * rhs[i][col];
+        }
+
+        elements_[row][col] = cur_element;
+      }
+    }
+
+    return *this;
+  }
+
+  RTC_DISALLOW_COPY_AND_ASSIGN(Matrix);
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_
--- a/webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h
+++ b/webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h
@@ -0,0 +1,102 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_
+
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
+#include "webrtc/modules/audio_processing/beamformer/matrix.h"
+
+namespace {
+const float kTolerance = 0.001f;
+}
+
+namespace webrtc {
+
+using std::complex;
+
+// Functions used in both matrix_unittest and complex_matrix_unittest.
+class MatrixTestHelpers {
+ public:
+  template <typename T>
+  static void ValidateMatrixEquality(const Matrix<T>& expected,
+                                     const Matrix<T>& actual) {
+    EXPECT_EQ(expected.num_rows(), actual.num_rows());
+    EXPECT_EQ(expected.num_columns(), actual.num_columns());
+
+    const T* const* expected_elements = expected.elements();
+    const T* const* actual_elements = actual.elements();
+    for (int i = 0; i < expected.num_rows(); ++i) {
+      for (int j = 0; j < expected.num_columns(); ++j) {
+        EXPECT_EQ(expected_elements[i][j], actual_elements[i][j]);
+      }
+    }
+  }
+
+  static void ValidateMatrixEqualityFloat(const Matrix<float>& expected,
+                                          const Matrix<float>& actual) {
+    EXPECT_EQ(expected.num_rows(), actual.num_rows());
+    EXPECT_EQ(expected.num_columns(), actual.num_columns());
+
+    const float* const* expected_elements = expected.elements();
+    const float* const* actual_elements = actual.elements();
+    for (int i = 0; i < expected.num_rows(); ++i) {
+      for (int j = 0; j < expected.num_columns(); ++j) {
+        EXPECT_NEAR(expected_elements[i][j], actual_elements[i][j], kTolerance);
+      }
+    }
+  }
+
+  static void ValidateMatrixEqualityComplexFloat(
+      const Matrix<complex<float> >& expected,
+      const Matrix<complex<float> >& actual) {
+    EXPECT_EQ(expected.num_rows(), actual.num_rows());
+    EXPECT_EQ(expected.num_columns(), actual.num_columns());
+
+    const complex<float>* const* expected_elements = expected.elements();
+    const complex<float>* const* actual_elements = actual.elements();
+    for (int i = 0; i < expected.num_rows(); ++i) {
+      for (int j = 0; j < expected.num_columns(); ++j) {
+        EXPECT_NEAR(expected_elements[i][j].real(),
+                    actual_elements[i][j].real(),
+                    kTolerance);
+        EXPECT_NEAR(expected_elements[i][j].imag(),
+                    actual_elements[i][j].imag(),
+                    kTolerance);
+      }
+    }
+  }
+
+  static void ValidateMatrixNearEqualityComplexFloat(
+      const Matrix<complex<float> >& expected,
+      const Matrix<complex<float> >& actual,
+      float tolerance) {
+    EXPECT_EQ(expected.num_rows(), actual.num_rows());
+    EXPECT_EQ(expected.num_columns(), actual.num_columns());
+
+    const complex<float>* const* expected_elements = expected.elements();
+    const complex<float>* const* actual_elements = actual.elements();
+    for (int i = 0; i < expected.num_rows(); ++i) {
+      for (int j = 0; j < expected.num_columns(); ++j) {
+        EXPECT_NEAR(expected_elements[i][j].real(),
+                    actual_elements[i][j].real(),
+                    tolerance);
+        EXPECT_NEAR(expected_elements[i][j].imag(),
+                    actual_elements[i][j].imag(),
+                    tolerance);
+      }
+    }
+  }
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_
--- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
+++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
@@ -0,0 +1,516 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#define _USE_MATH_DEFINES
+
+#include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
+
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+#include <vector>
+
+#include "webrtc/base/arraysize.h"
+#include "webrtc/common_audio/window_generator.h"
+#include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h"
+
+namespace webrtc {
+namespace {
+
+// Alpha for the Kaiser Bessel Derived window.
+const float kKbdAlpha = 1.5f;
+
+// The minimum value a post-processing mask can take.
+const float kMaskMinimum = 0.01f;
+
+const float kSpeedOfSoundMeterSeconds = 343;
+
+// For both target and interference angles, PI / 2 is perpendicular to the
+// microphone array, facing forwards. The positive direction goes
+// counterclockwise.
+// The angle at which we amplify sound.
+const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;
+
+// The angle at which we suppress sound. Suppression is symmetric around PI / 2
+// radians, so sound is suppressed at both +|kInterfAngleRadians| and
+// PI - |kInterfAngleRadians|. Since the beamformer is robust, this should
+// suppress sound coming from close angles as well.
+const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f;
+
+// When calculating the interference covariance matrix, this is the weight for
+// the weighted average between the uniform covariance matrix and the angled
+// covariance matrix.
+// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
+const float kBalance = 0.4f;
+
+const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f;
+
+// TODO(claguna): need comment here.
+const float kBeamwidthConstant = 0.00002f;
+
+// Alpha coefficients for mask smoothing.
+const float kMaskTimeSmoothAlpha = 0.2f;
+const float kMaskFrequencySmoothAlpha = 0.6f;
+
+// The average mask is computed from masks in this mid-frequency range. If these
+// ranges are changed |kMaskQuantile| might need to be adjusted.
+const int kLowMeanStartHz = 200;
+const int kLowMeanEndHz = 400;
+
+const int kHighMeanStartHz = 3000;
+const int kHighMeanEndHz = 5000;
+
+// Quantile of mask values which is used to estimate target presence.
+const float kMaskQuantile = 0.7f;
+// Mask threshold over which the data is considered signal and not interference.
+const float kMaskTargetThreshold = 0.3f;
+// Time in seconds after which the data is considered interference if the mask
+// does not pass |kMaskTargetThreshold|.
+const float kHoldTargetSeconds = 0.25f;
+
+// Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
+// used; to accomplish this, we compute both multiplications in the same loop.
+// The returned norm is clamped to be non-negative.
+float Norm(const ComplexMatrix<float>& mat,
+           const ComplexMatrix<float>& norm_mat) {
+  RTC_CHECK_EQ(norm_mat.num_rows(), 1);
+  RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows());
+  RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns());
+
+  complex<float> first_product = complex<float>(0.f, 0.f);
+  complex<float> second_product = complex<float>(0.f, 0.f);
+
+  const complex<float>* const* mat_els = mat.elements();
+  const complex<float>* const* norm_mat_els = norm_mat.elements();
+
+  for (int i = 0; i < norm_mat.num_columns(); ++i) {
+    for (int j = 0; j < norm_mat.num_columns(); ++j) {
+      first_product += conj(norm_mat_els[0][j]) * mat_els[j][i];
+    }
+    second_product += first_product * norm_mat_els[0][i];
+    first_product = 0.f;
+  }
+  return std::max(second_product.real(), 0.f);
+}
+
+// Does conjugate(|lhs|) * |rhs| for row vectors |lhs| and |rhs|.
+complex<float> ConjugateDotProduct(const ComplexMatrix<float>& lhs,
+                                   const ComplexMatrix<float>& rhs) {
+  RTC_CHECK_EQ(lhs.num_rows(), 1);
+  RTC_CHECK_EQ(rhs.num_rows(), 1);
+  RTC_CHECK_EQ(lhs.num_columns(), rhs.num_columns());
+
+  const complex<float>* const* lhs_elements = lhs.elements();
+  const complex<float>* const* rhs_elements = rhs.elements();
+
+  complex<float> result = complex<float>(0.f, 0.f);
+  for (int i = 0; i < lhs.num_columns(); ++i) {
+    result += conj(lhs_elements[0][i]) * rhs_elements[0][i];
+  }
+
+  return result;
+}
+
+// Works for positive numbers only.
+size_t Round(float x) {
+  return static_cast<size_t>(std::floor(x + 0.5f));
+}
+
+// Calculates the sum of absolute values of a complex matrix.
+float SumAbs(const ComplexMatrix<float>& mat) {
+  float sum_abs = 0.f;
+  const complex<float>* const* mat_els = mat.elements();
+  for (int i = 0; i < mat.num_rows(); ++i) {
+    for (int j = 0; j < mat.num_columns(); ++j) {
+      sum_abs += std::abs(mat_els[i][j]);
+    }
+  }
+  return sum_abs;
+}
+
+// Calculates the sum of squares of a complex matrix.
+float SumSquares(const ComplexMatrix<float>& mat) {
+  float sum_squares = 0.f;
+  const complex<float>* const* mat_els = mat.elements();
+  for (int i = 0; i < mat.num_rows(); ++i) {
+    for (int j = 0; j < mat.num_columns(); ++j) {
+      float abs_value = std::abs(mat_els[i][j]);
+      sum_squares += abs_value * abs_value;
+    }
+  }
+  return sum_squares;
+}
+
+// Does |out| = |in|.' * conj(|in|) for row vector |in|.
+void TransposedConjugatedProduct(const ComplexMatrix<float>& in,
+                                 ComplexMatrix<float>* out) {
+  RTC_CHECK_EQ(in.num_rows(), 1);
+  RTC_CHECK_EQ(out->num_rows(), in.num_columns());
+  RTC_CHECK_EQ(out->num_columns(), in.num_columns());
+  const complex<float>* in_elements = in.elements()[0];
+  complex<float>* const* out_elements = out->elements();
+  for (int i = 0; i < out->num_rows(); ++i) {
+    for (int j = 0; j < out->num_columns(); ++j) {
+      out_elements[i][j] = in_elements[i] * conj(in_elements[j]);
+    }
+  }
+}
+
+std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) {
+  for (int dim = 0; dim < 3; ++dim) {
+    float center = 0.f;
+    for (size_t i = 0; i < array_geometry.size(); ++i) {
+      center += array_geometry[i].c[dim];
+    }
+    center /= array_geometry.size();
+    for (size_t i = 0; i < array_geometry.size(); ++i) {
+      array_geometry[i].c[dim] -= center;
+    }
+  }
+  return array_geometry;
+}
+
+}  // namespace
+
+// static
+const size_t NonlinearBeamformer::kNumFreqBins;
+
+NonlinearBeamformer::NonlinearBeamformer(
+    const std::vector<Point>& array_geometry)
+  : num_input_channels_(array_geometry.size()),
+      array_geometry_(GetCenteredArray(array_geometry)) {
+  WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
+}
+
+void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
+  chunk_length_ =
+      static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
+  sample_rate_hz_ = sample_rate_hz;
+  low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
+  low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
+  high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
+  high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
+  // These bin indexes determine the regions over which a mean is taken. This
+  // is applied as a constant value over the adjacent end "frequency correction"
+  // regions.
+  //
+  //             low_mean_start_bin_     high_mean_start_bin_
+  //                   v                         v              constant
+  // |----------------|--------|----------------|-------|----------------|
+  //   constant               ^                        ^
+  //             low_mean_end_bin_       high_mean_end_bin_
+  //
+  RTC_DCHECK_GT(low_mean_start_bin_, 0U);
+  RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
+  RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
+  RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
+  RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
+
+  high_pass_postfilter_mask_ = 1.f;
+  is_target_present_ = false;
+  hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
+  interference_blocks_count_ = hold_target_blocks_;
+
+
+  lapped_transform_.reset(new LappedTransform(num_input_channels_,
+                                              1,
+                                              chunk_length_,
+                                              window_,
+                                              kFftSize,
+                                              kFftSize / 2,
+                                              this));
+  for (size_t i = 0; i < kNumFreqBins; ++i) {
+    time_smooth_mask_[i] = 1.f;
+    final_mask_[i] = 1.f;
+    float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
+    wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
+    mask_thresholds_[i] = num_input_channels_ * num_input_channels_ *
+                          kBeamwidthConstant * wave_numbers_[i] *
+                          wave_numbers_[i];
+  }
+
+  // Initialize all nonadaptive values before looping through the frames.
+  InitDelaySumMasks();
+  InitTargetCovMats();
+  InitInterfCovMats();
+
+  for (size_t i = 0; i < kNumFreqBins; ++i) {
+    rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
+    rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]);
+    reflected_rpsiws_[i] =
+        Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);
+  }
+}
+
+void NonlinearBeamformer::InitDelaySumMasks() {
+  for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
+    delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
+    CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix,
+                                                   kFftSize,
+                                                   sample_rate_hz_,
+                                                   kSpeedOfSoundMeterSeconds,
+                                                   array_geometry_,
+                                                   kTargetAngleRadians,
+                                                   &delay_sum_masks_[f_ix]);
+
+    complex_f norm_factor = sqrt(
+        ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
+    delay_sum_masks_[f_ix].Scale(1.f / norm_factor);
+    normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);
+    normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(
+        normalized_delay_sum_masks_[f_ix]));
+  }
+}
+
+void NonlinearBeamformer::InitTargetCovMats() {
+  for (size_t i = 0; i < kNumFreqBins; ++i) {
+    target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
+    TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
+    complex_f normalization_factor = target_cov_mats_[i].Trace();
+    target_cov_mats_[i].Scale(1.f / normalization_factor);
+  }
+}
+
+void NonlinearBeamformer::InitInterfCovMats() {
+  for (size_t i = 0; i < kNumFreqBins; ++i) {
+    interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
+    ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);
+    ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
+
+    CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],
+                                                       array_geometry_,
+                                                       &uniform_cov_mat);
+
+    CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds,
+                                                      kInterfAngleRadians,
+                                                      i,
+                                                      kFftSize,
+                                                      kNumFreqBins,
+                                                      sample_rate_hz_,
+                                                      array_geometry_,
+                                                      &angled_cov_mat);
+    // Normalize matrices before averaging them.
+    complex_f normalization_factor = uniform_cov_mat.Trace();
+    uniform_cov_mat.Scale(1.f / normalization_factor);
+    normalization_factor = angled_cov_mat.Trace();
+    angled_cov_mat.Scale(1.f / normalization_factor);
+
+    // Average matrices.
+    uniform_cov_mat.Scale(1 - kBalance);
+    angled_cov_mat.Scale(kBalance);
+    interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat);
+    reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]);
+  }
+}
+
+void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
+                                       ChannelBuffer<float>* output) {
+  RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);
+  RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
+
+  float old_high_pass_mask = high_pass_postfilter_mask_;
+  lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));
+  // Ramp up/down for smoothing. 1 mask per 10ms results in audible
+  // discontinuities.
+  const float ramp_increment =
+      (high_pass_postfilter_mask_ - old_high_pass_mask) /
+      input.num_frames_per_band();
+  // Apply delay and sum and post-filter in the time domain. WARNING: only works
+  // because delay-and-sum is not frequency dependent.
+  for (size_t i = 1; i < input.num_bands(); ++i) {
+    float smoothed_mask = old_high_pass_mask;
+    for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
+      smoothed_mask += ramp_increment;
+
+      // Applying the delay and sum (at zero degrees, this is equivalent to
+      // averaging).
+      float sum = 0.f;
+      for (int k = 0; k < input.num_channels(); ++k) {
+        sum += input.channels(i)[k][j];
+      }
+      output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask;
+    }
+  }
+}
+
+bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {
+  // If more than half-beamwidth degrees away from the beam's center,
+  // you are out of the beam.
+  return fabs(spherical_point.azimuth() - kTargetAngleRadians) <
+         kHalfBeamWidthRadians;
+}
+
+void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
+                                            int num_input_channels,
+                                            size_t num_freq_bins,
+                                            int num_output_channels,
+                                            complex_f* const* output) {
+  RTC_CHECK_EQ(num_freq_bins, kNumFreqBins);
+  RTC_CHECK_EQ(num_input_channels, num_input_channels_);
+  RTC_CHECK_EQ(num_output_channels, 1);
+
+  // Calculating the post-filter masks. Note that we need two for each
+  // frequency bin to account for the positive and negative interferer
+  // angle.
+  for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
+    eig_m_.CopyFromColumn(input, i, num_input_channels_);
+    float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_));
+    if (eig_m_norm_factor != 0.f) {
+      eig_m_.Scale(1.f / eig_m_norm_factor);
+    }
+
+    float rxim = Norm(target_cov_mats_[i], eig_m_);
+    float ratio_rxiw_rxim = 0.f;
+    if (rxim > 0.f) {
+      ratio_rxiw_rxim = rxiws_[i] / rxim;
+    }
+
+    complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_));
+    rmw *= rmw;
+    float rmw_r = rmw.real();
+
+    new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i],
+                                           rpsiws_[i],
+                                           ratio_rxiw_rxim,
+                                           rmw_r,
+                                           mask_thresholds_[i]);
+
+    new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i],
+                                            reflected_rpsiws_[i],
+                                            ratio_rxiw_rxim,
+                                            rmw_r,
+                                            mask_thresholds_[i]);
+  }
+
+  ApplyMaskTimeSmoothing();
+  EstimateTargetPresence();
+  ApplyLowFrequencyCorrection();
+  ApplyHighFrequencyCorrection();
+  ApplyMaskFrequencySmoothing();
+  ApplyMasks(input, output);
+}
+
+float NonlinearBeamformer::CalculatePostfilterMask(
+    const ComplexMatrixF& interf_cov_mat,
+    float rpsiw,
+    float ratio_rxiw_rxim,
+    float rmw_r,
+    float mask_threshold) {
+  float rpsim = Norm(interf_cov_mat, eig_m_);
+
+  // Find lambda.
+  float ratio = 0.f;
+  if (rpsim > 0.f) {
+    ratio = rpsiw / rpsim;
+  }
+  float numerator = rmw_r - ratio;
+  float denominator = ratio_rxiw_rxim - ratio;
+
+  float mask = 1.f;
+  if (denominator > mask_threshold) {
+    float lambda = numerator / denominator;
+    mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum);
+  }
+  return mask;
+}
+
+void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
+                                     complex_f* const* output) {
+  complex_f* output_channel = output[0];
+  for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
+    output_channel[f_ix] = complex_f(0.f, 0.f);
+
+    const complex_f* delay_sum_mask_els =
+        normalized_delay_sum_masks_[f_ix].elements()[0];
+    for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) {
+      output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
+    }
+
+    output_channel[f_ix] *= final_mask_[f_ix];
+  }
+}
+
+// Smooth new_mask_ into time_smooth_mask_.
+void NonlinearBeamformer::ApplyMaskTimeSmoothing() {
+  for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
+    time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +
+                           (1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];
+  }
+}
+
+// Copy time_smooth_mask_ to final_mask_ and smooth over frequency.
+void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {
+  // Smooth over frequency in both directions. The "frequency correction"
+  // regions have constant value, but we enter them to smooth over the jump
+  // that exists at the boundary. However, this does mean when smoothing "away"
+  // from the region that we only need to use the last element.
+  //
+  // Upward smoothing:
+  //   low_mean_start_bin_
+  //         v
+  // |------|------------|------|
+  //       ^------------------>^
+  //
+  // Downward smoothing:
+  //         high_mean_end_bin_
+  //                    v
+  // |------|------------|------|
+  //  ^<------------------^
+  std::copy(time_smooth_mask_, time_smooth_mask_ + kNumFreqBins, final_mask_);
+  for (size_t i = low_mean_start_bin_; i < kNumFreqBins; ++i) {
+    final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] +
+                     (1 - kMaskFrequencySmoothAlpha) * final_mask_[i - 1];
+  }
+  for (size_t i = high_mean_end_bin_ + 1; i > 0; --i) {
+    final_mask_[i - 1] = kMaskFrequencySmoothAlpha * final_mask_[i - 1] +
+                         (1 - kMaskFrequencySmoothAlpha) * final_mask_[i];
+  }
+}
+
+// Apply low frequency correction to time_smooth_mask_.
+void NonlinearBeamformer::ApplyLowFrequencyCorrection() {
+  const float low_frequency_mask =
+      MaskRangeMean(low_mean_start_bin_, low_mean_end_bin_ + 1);
+  std::fill(time_smooth_mask_, time_smooth_mask_ + low_mean_start_bin_,
+            low_frequency_mask);
+}
+
+// Apply high frequency correction to time_smooth_mask_. Update
+// high_pass_postfilter_mask_ to use for the high frequency time-domain bands.
+void NonlinearBeamformer::ApplyHighFrequencyCorrection() {
+  high_pass_postfilter_mask_ =
+      MaskRangeMean(high_mean_start_bin_, high_mean_end_bin_ + 1);
+  std::fill(time_smooth_mask_ + high_mean_end_bin_ + 1,
+            time_smooth_mask_ + kNumFreqBins, high_pass_postfilter_mask_);
+}
+
+// Compute mean over the given range of time_smooth_mask_, [first, last).
+float NonlinearBeamformer::MaskRangeMean(size_t first, size_t last) {
+  RTC_DCHECK_GT(last, first);
+  const float sum = std::accumulate(time_smooth_mask_ + first,
+                                    time_smooth_mask_ + last, 0.f);
+  return sum / (last - first);
+}
+
+void NonlinearBeamformer::EstimateTargetPresence() {
+  const size_t quantile = static_cast<size_t>(
+      (high_mean_end_bin_ - low_mean_start_bin_) * kMaskQuantile +
+      low_mean_start_bin_);
+  std::nth_element(new_mask_ + low_mean_start_bin_, new_mask_ + quantile,
+                   new_mask_ + high_mean_end_bin_ + 1);
+  if (new_mask_[quantile] > kMaskTargetThreshold) {
+    is_target_present_ = true;
+    interference_blocks_count_ = 0;
+  } else {
+    is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h
+++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h
@@ -0,0 +1,177 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
+
+#include <vector>
+
+#include "webrtc/common_audio/lapped_transform.h"
+#include "webrtc/common_audio/channel_buffer.h"
+#include "webrtc/modules/audio_processing/beamformer/beamformer.h"
+#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
+
+namespace webrtc {
+
+// Enhances sound sources coming directly in front of a uniform linear array
+// and suppresses sound sources coming from all other directions. Operates on
+// multichannel signals and produces single-channel output.
+//
+// The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear
+// Beamforming Postprocessor" by Bastiaan Kleijn.
+//
+// TODO(aluebs): Target angle assumed to be 0. Parameterize target angle.
+class NonlinearBeamformer
+  : public Beamformer<float>,
+    public LappedTransform::Callback {
+ public:
+  // At the moment it only accepts uniform linear microphone arrays. Using the
+  // first microphone as a reference position [0, 0, 0] is a natural choice.
+  explicit NonlinearBeamformer(const std::vector<Point>& array_geometry);
+
+  // Sample rate corresponds to the lower band.
+  // Needs to be called before the NonlinearBeamformer can be used.
+  void Initialize(int chunk_size_ms, int sample_rate_hz) override;
+
+  // Process one time-domain chunk of audio. The audio is expected to be split
+  // into frequency bands inside the ChannelBuffer. The number of frames and
+  // channels must correspond to the constructor parameters. The same
+  // ChannelBuffer can be passed in as |input| and |output|.
+  void ProcessChunk(const ChannelBuffer<float>& input,
+                    ChannelBuffer<float>* output) override;
+
+  bool IsInBeam(const SphericalPointf& spherical_point) override;
+
+  // After processing each block |is_target_present_| is set to true if the
+  // target signal es present and to false otherwise. This methods can be called
+  // to know if the data is target signal or interference and process it
+  // accordingly.
+  bool is_target_present() override { return is_target_present_; }
+
+ protected:
+  // Process one frequency-domain block of audio. This is where the fun
+  // happens. Implements LappedTransform::Callback.
+  void ProcessAudioBlock(const complex<float>* const* input,
+                         int num_input_channels,
+                         size_t num_freq_bins,
+                         int num_output_channels,
+                         complex<float>* const* output) override;
+
+ private:
+  typedef Matrix<float> MatrixF;
+  typedef ComplexMatrix<float> ComplexMatrixF;
+  typedef complex<float> complex_f;
+
+  void InitDelaySumMasks();
+  void InitTargetCovMats();  // TODO(aluebs): Make this depend on target angle.
+  void InitInterfCovMats();
+
+  // An implementation of equation 18, which calculates postfilter masks that,
+  // when applied, minimize the mean-square error of our estimation of the
+  // desired signal. A sub-task is to calculate lambda, which is solved via
+  // equation 13.
+  float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,
+                                float rpsiw,
+                                float ratio_rxiw_rxim,
+                                float rmxi_r,
+                                float mask_threshold);
+
+  // Prevents the postfilter masks from degenerating too quickly (a cause of
+  // musical noise).
+  void ApplyMaskTimeSmoothing();
+  void ApplyMaskFrequencySmoothing();
+
+  // The postfilter masks are unreliable at low frequencies. Calculates a better
+  // mask by averaging mid-low frequency values.
+  void ApplyLowFrequencyCorrection();
+
+  // Postfilter masks are also unreliable at high frequencies. Average mid-high
+  // frequency masks to calculate a single mask per block which can be applied
+  // in the time-domain. Further, we average these block-masks over a chunk,
+  // resulting in one postfilter mask per audio chunk. This allows us to skip
+  // both transforming and blocking the high-frequency signal.
+  void ApplyHighFrequencyCorrection();
+
+  // Compute the means needed for the above frequency correction.
+  float MaskRangeMean(size_t start_bin, size_t end_bin);
+
+  // Applies both sets of masks to |input| and store in |output|.
+  void ApplyMasks(const complex_f* const* input, complex_f* const* output);
+
+  void EstimateTargetPresence();
+
+  static const size_t kFftSize = 256;
+  static const size_t kNumFreqBins = kFftSize / 2 + 1;
+
+  // Deals with the fft transform and blocking.
+  size_t chunk_length_;
+  rtc::scoped_ptr<LappedTransform> lapped_transform_;
+  float window_[kFftSize];
+
+  // Parameters exposed to the user.
+  const int num_input_channels_;
+  int sample_rate_hz_;
+
+  const std::vector<Point> array_geometry_;
+
+  // Calculated based on user-input and constants in the .cc file.
+  size_t low_mean_start_bin_;
+  size_t low_mean_end_bin_;
+  size_t high_mean_start_bin_;
+  size_t high_mean_end_bin_;
+
+  // Quickly varying mask updated every block.
+  float new_mask_[kNumFreqBins];
+  // Time smoothed mask.
+  float time_smooth_mask_[kNumFreqBins];
+  // Time and frequency smoothed mask.
+  float final_mask_[kNumFreqBins];
+
+  // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
+  ComplexMatrixF delay_sum_masks_[kNumFreqBins];
+  ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];
+
+  // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
+  // |num_input_channels_|.
+  ComplexMatrixF target_cov_mats_[kNumFreqBins];
+
+  // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
+  // |num_input_channels_|.
+  ComplexMatrixF interf_cov_mats_[kNumFreqBins];
+  ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins];
+
+  // Of length |kNumFreqBins|.
+  float mask_thresholds_[kNumFreqBins];
+  float wave_numbers_[kNumFreqBins];
+
+  // Preallocated for ProcessAudioBlock()
+  // Of length |kNumFreqBins|.
+  float rxiws_[kNumFreqBins];
+  float rpsiws_[kNumFreqBins];
+  float reflected_rpsiws_[kNumFreqBins];
+
+  // The microphone normalization factor.
+  ComplexMatrixF eig_m_;
+
+  // For processing the high-frequency input signal.
+  float high_pass_postfilter_mask_;
+
+  // True when the target signal is present.
+  bool is_target_present_;
+  // Number of blocks after which the data is considered interference if the
+  // mask does not pass |kMaskSignalThreshold|.
+  size_t hold_target_blocks_;
+  // Number of blocks since the last mask that passed |kMaskSignalThreshold|.
+  size_t interference_blocks_count_;
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
--- a/webrtc/modules/audio_processing/common.h
+++ b/webrtc/modules/audio_processing/common.h
@@ -0,0 +1,35 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_
+
+#include <assert.h>
+
+#include "webrtc/modules/audio_processing/include/audio_processing.h"
+
+namespace webrtc {
+
+static inline int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) {
+  switch (layout) {
+    case AudioProcessing::kMono:
+    case AudioProcessing::kMonoAndKeyboard:
+      return 1;
+    case AudioProcessing::kStereo:
+    case AudioProcessing::kStereoAndKeyboard:
+      return 2;
+  }
+  assert(false);
+  return -1;
+}
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_
--- a/webrtc/modules/audio_processing/echo_cancellation_impl.cc
+++ b/webrtc/modules/audio_processing/echo_cancellation_impl.cc
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,23 +8,24 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "echo_cancellation_impl.h"
+#include "webrtc/modules/audio_processing/echo_cancellation_impl.h"

-#include <cassert>
+#include <assert.h>
 #include <string.h>

-#include "critical_section_wrapper.h"
-#include "echo_cancellation.h"
-
-#include "audio_processing_impl.h"
-#include "audio_buffer.h"
+extern "C" {
+#include "webrtc/modules/audio_processing/aec/aec_core.h"
+}
+#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
+#include "webrtc/modules/audio_processing/audio_buffer.h"
+#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"

 namespace webrtc {

 typedef void Handle;

 namespace {
-WebRtc_Word16 MapSetting(EchoCancellation::SuppressionLevel level) {
+int16_t MapSetting(EchoCancellation::SuppressionLevel level) {
  switch (level) {
    case EchoCancellation::kLowSuppression:
      return kAecNlpConservative;
@@ -32,22 +33,19 @@ WebRtc_Word16 MapSetting(EchoCancellation::SuppressionLevel level) {
      return kAecNlpModerate;
    case EchoCancellation::kHighSuppression:
      return kAecNlpAggressive;
-    default:
-      return -1;
  }
+  assert(false);
+  return -1;
 }

-int MapError(int err) {
+AudioProcessing::Error MapError(int err) {
  switch (err) {
    case AEC_UNSUPPORTED_FUNCTION_ERROR:
      return AudioProcessing::kUnsupportedFunctionError;
-      break;
    case AEC_BAD_PARAMETER_ERROR:
      return AudioProcessing::kBadParameterError;
-      break;
    case AEC_BAD_PARAMETER_WARNING:
      return AudioProcessing::kBadStreamParameterWarning;
-      break;
    default:
      // AEC_UNSPECIFIED_ERROR
      // AEC_UNINITIALIZED_ERROR
@@ -57,17 +55,21 @@ int MapError(int err) {
 }
 }  // namespace

-EchoCancellationImpl::EchoCancellationImpl(const AudioProcessingImpl* apm)
-  : ProcessingComponent(apm),
-    apm_(apm),
-    drift_compensation_enabled_(false),
-    metrics_enabled_(false),
-    suppression_level_(kModerateSuppression),
-    device_sample_rate_hz_(48000),
-    stream_drift_samples_(0),
-    was_stream_drift_set_(false),
-    stream_has_echo_(false),
-    delay_logging_enabled_(false) {}
+EchoCancellationImpl::EchoCancellationImpl(const AudioProcessing* apm,
+                                           CriticalSectionWrapper* crit)
+    : ProcessingComponent(),
+      apm_(apm),
+      crit_(crit),
+      drift_compensation_enabled_(false),
+      metrics_enabled_(false),
+      suppression_level_(kModerateSuppression),
+      stream_drift_samples_(0),
+      was_stream_drift_set_(false),
+      stream_has_echo_(false),
+      delay_logging_enabled_(false),
+      extended_filter_enabled_(false),
+      delay_agnostic_enabled_(false) {
+}

 EchoCancellationImpl::~EchoCancellationImpl() {}

@@ -76,7 +78,7 @@ int EchoCancellationImpl::ProcessRenderAudio(const AudioBuffer* audio) {
    return apm_->kNoError;
  }

-  assert(audio->samples_per_split_channel() <= 160);
+  assert(audio->num_frames_per_band() <= 160);
  assert(audio->num_channels() == apm_->num_reverse_channels());

  int err = apm_->kNoError;
@@ -88,8 +90,8 @@ int EchoCancellationImpl::ProcessRenderAudio(const AudioBuffer* audio) {
      Handle* my_handle = static_cast<Handle*>(handle(handle_index));
      err = WebRtcAec_BufferFarend(
          my_handle,
-          audio->low_pass_split_data(j),
-          static_cast<WebRtc_Word16>(audio->samples_per_split_channel()));
+          audio->split_bands_const_f(j)[kBand0To8kHz],
+          audio->num_frames_per_band());

      if (err != apm_->kNoError) {
        return GetHandleError(my_handle);  // TODO(ajm): warning possible?
@@ -115,7 +117,7 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) {
    return apm_->kStreamParameterNotSetError;
  }

-  assert(audio->samples_per_split_channel() <= 160);
+  assert(audio->num_frames_per_band() <= 160);
  assert(audio->num_channels() == apm_->num_output_channels());

  int err = apm_->kNoError;
@@ -128,11 +130,10 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) {
      Handle* my_handle = handle(handle_index);
      err = WebRtcAec_Process(
          my_handle,
-          audio->low_pass_split_data(i),
-          audio->high_pass_split_data(i),
-          audio->low_pass_split_data(i),
-          audio->high_pass_split_data(i),
-          static_cast<WebRtc_Word16>(audio->samples_per_split_channel()),
+          audio->split_bands_const_f(i),
+          audio->num_bands(),
+          audio->split_bands_f(i),
+          audio->num_frames_per_band(),
          apm_->stream_delay_ms(),
          stream_drift_samples_);

@@ -144,7 +145,7 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) {
        }
      }

-      WebRtc_Word16 status = 0;
+      int status = 0;
      err = WebRtcAec_get_echo_status(my_handle, &status);
      if (err != apm_->kNoError) {
        return GetHandleError(my_handle);
@@ -163,7 +164,7 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) {
 }

 int EchoCancellationImpl::Enable(bool enable) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  // Ensure AEC and AECM are not both enabled.
  if (enable && apm_->echo_control_mobile()->is_enabled()) {
    return apm_->kBadParameterError;
@@ -177,7 +178,7 @@ bool EchoCancellationImpl::is_enabled() const {
 }

 int EchoCancellationImpl::set_suppression_level(SuppressionLevel level) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  if (MapSetting(level) == -1) {
    return apm_->kBadParameterError;
  }
@@ -192,7 +193,7 @@ EchoCancellation::SuppressionLevel EchoCancellationImpl::suppression_level()
 }

 int EchoCancellationImpl::enable_drift_compensation(bool enable) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  drift_compensation_enabled_ = enable;
  return Configure();
 }
@@ -201,24 +202,9 @@ bool EchoCancellationImpl::is_drift_compensation_enabled() const {
  return drift_compensation_enabled_;
 }

-int EchoCancellationImpl::set_device_sample_rate_hz(int rate) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
-  if (rate < 8000 || rate > 96000) {
-    return apm_->kBadParameterError;
-  }
-
-  device_sample_rate_hz_ = rate;
-  return Initialize();
-}
-
-int EchoCancellationImpl::device_sample_rate_hz() const {
-  return device_sample_rate_hz_;
-}
-
-int EchoCancellationImpl::set_stream_drift_samples(int drift) {
+void EchoCancellationImpl::set_stream_drift_samples(int drift) {
  was_stream_drift_set_ = true;
  stream_drift_samples_ = drift;
-  return apm_->kNoError;
 }

 int EchoCancellationImpl::stream_drift_samples() const {
@@ -226,7 +212,7 @@ int EchoCancellationImpl::stream_drift_samples() const {
 }

 int EchoCancellationImpl::enable_metrics(bool enable) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  metrics_enabled_ = enable;
  return Configure();
 }
@@ -238,7 +224,7 @@ bool EchoCancellationImpl::are_metrics_enabled() const {
 // TODO(ajm): we currently just use the metrics from the first AEC. Think more
 //            aboue the best way to extend this to multi-channel.
 int EchoCancellationImpl::GetMetrics(Metrics* metrics) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  if (metrics == NULL) {
    return apm_->kNullPointerError;
  }
@@ -285,7 +271,7 @@ bool EchoCancellationImpl::stream_has_echo() const {
 }

 int EchoCancellationImpl::enable_delay_logging(bool enable) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  delay_logging_enabled_ = enable;
  return Configure();
 }
@@ -294,9 +280,23 @@ bool EchoCancellationImpl::is_delay_logging_enabled() const {
  return delay_logging_enabled_;
 }

+bool EchoCancellationImpl::is_delay_agnostic_enabled() const {
+  return delay_agnostic_enabled_;
+}
+
+bool EchoCancellationImpl::is_extended_filter_enabled() const {
+  return extended_filter_enabled_;
+}
+
 // TODO(bjornv): How should we handle the multi-channel case?
 int EchoCancellationImpl::GetDelayMetrics(int* median, int* std) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  float fraction_poor_delays = 0;
+  return GetDelayMetrics(median, std, &fraction_poor_delays);
+}
+
+int EchoCancellationImpl::GetDelayMetrics(int* median, int* std,
+                                          float* fraction_poor_delays) {
+  CriticalSectionScoped crit_scoped(crit_);
  if (median == NULL) {
    return apm_->kNullPointerError;
  }
@@ -309,7 +309,7 @@ int EchoCancellationImpl::GetDelayMetrics(int* median, int* std) {
  }

  Handle* my_handle = static_cast<Handle*>(handle(0));
-  if (WebRtcAec_GetDelayMetrics(my_handle, median, std) !=
+  if (WebRtcAec_GetDelayMetrics(my_handle, median, std, fraction_poor_delays) !=
      apm_->kNoError) {
    return GetHandleError(my_handle);
  }
@@ -317,47 +317,47 @@ int EchoCancellationImpl::GetDelayMetrics(int* median, int* std) {
  return apm_->kNoError;
 }

+struct AecCore* EchoCancellationImpl::aec_core() const {
+  CriticalSectionScoped crit_scoped(crit_);
+  if (!is_component_enabled()) {
+    return NULL;
+  }
+  Handle* my_handle = static_cast<Handle*>(handle(0));
+  return WebRtcAec_aec_core(my_handle);
+}
+
 int EchoCancellationImpl::Initialize() {
  int err = ProcessingComponent::Initialize();
  if (err != apm_->kNoError || !is_component_enabled()) {
    return err;
  }

-  was_stream_drift_set_ = false;
-
  return apm_->kNoError;
 }

-int EchoCancellationImpl::get_version(char* version,
-                                      int version_len_bytes) const {
-  if (WebRtcAec_get_version(version, version_len_bytes) != 0) {
-      return apm_->kBadParameterError;
-  }
-
-  return apm_->kNoError;
+void EchoCancellationImpl::SetExtraOptions(const Config& config) {
+  extended_filter_enabled_ = config.Get<ExtendedFilter>().enabled;
+  delay_agnostic_enabled_ = config.Get<DelayAgnostic>().enabled;
+  Configure();
 }

 void* EchoCancellationImpl::CreateHandle() const {
-  Handle* handle = NULL;
-  if (WebRtcAec_Create(&handle) != apm_->kNoError) {
-    handle = NULL;
-  } else {
-    assert(handle != NULL);
-  }
-
-  return handle;
+  return WebRtcAec_Create();
 }

-int EchoCancellationImpl::DestroyHandle(void* handle) const {
+void EchoCancellationImpl::DestroyHandle(void* handle) const {
  assert(handle != NULL);
-  return WebRtcAec_Free(static_cast<Handle*>(handle));
+  WebRtcAec_Free(static_cast<Handle*>(handle));
 }

 int EchoCancellationImpl::InitializeHandle(void* handle) const {
  assert(handle != NULL);
+  // TODO(ajm): Drift compensation is disabled in practice. If restored, it
+  // should be managed internally and not depend on the hardware sample rate.
+  // For now, just hardcode a 48 kHz value.
  return WebRtcAec_Init(static_cast<Handle*>(handle),
-                       apm_->sample_rate_hz(),
-                       device_sample_rate_hz_);
+                       apm_->proc_sample_rate_hz(),
+                       48000);
 }

 int EchoCancellationImpl::ConfigureHandle(void* handle) const {
@@ -368,6 +368,12 @@ int EchoCancellationImpl::ConfigureHandle(void* handle) const {
  config.skewMode = drift_compensation_enabled_;
  config.delay_logging = delay_logging_enabled_;

+  WebRtcAec_enable_extended_filter(
+      WebRtcAec_aec_core(static_cast<Handle*>(handle)),
+      extended_filter_enabled_ ? 1 : 0);
+  WebRtcAec_enable_delay_agnostic(
+      WebRtcAec_aec_core(static_cast<Handle*>(handle)),
+      delay_agnostic_enabled_ ? 1 : 0);
  return WebRtcAec_set_config(static_cast<Handle*>(handle), config);
 }

--- a/webrtc/modules/audio_processing/echo_cancellation_impl.h
+++ b/webrtc/modules/audio_processing/echo_cancellation_impl.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,69 +8,79 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CANCELLATION_IMPL_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CANCELLATION_IMPL_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_

-#include "audio_processing.h"
-#include "processing_component.h"
+#include "webrtc/modules/audio_processing/include/audio_processing.h"
+#include "webrtc/modules/audio_processing/processing_component.h"

 namespace webrtc {
-class AudioProcessingImpl;
+
 class AudioBuffer;
+class CriticalSectionWrapper;

 class EchoCancellationImpl : public EchoCancellation,
                             public ProcessingComponent {
 public:
-  explicit EchoCancellationImpl(const AudioProcessingImpl* apm);
+  EchoCancellationImpl(const AudioProcessing* apm,
+                       CriticalSectionWrapper* crit);
  virtual ~EchoCancellationImpl();

  int ProcessRenderAudio(const AudioBuffer* audio);
  int ProcessCaptureAudio(AudioBuffer* audio);

  // EchoCancellation implementation.
-  virtual bool is_enabled() const;
-  virtual int device_sample_rate_hz() const;
-  virtual int stream_drift_samples() const;
+  bool is_enabled() const override;
+  int stream_drift_samples() const override;
+  SuppressionLevel suppression_level() const override;
+  bool is_drift_compensation_enabled() const override;

  // ProcessingComponent implementation.
-  virtual int Initialize();
-  virtual int get_version(char* version, int version_len_bytes) const;
+  int Initialize() override;
+  void SetExtraOptions(const Config& config) override;
+
+  bool is_delay_agnostic_enabled() const;
+  bool is_extended_filter_enabled() const;

 private:
  // EchoCancellation implementation.
-  virtual int Enable(bool enable);
-  virtual int enable_drift_compensation(bool enable);
-  virtual bool is_drift_compensation_enabled() const;
-  virtual int set_device_sample_rate_hz(int rate);
-  virtual int set_stream_drift_samples(int drift);
-  virtual int set_suppression_level(SuppressionLevel level);
-  virtual SuppressionLevel suppression_level() const;
-  virtual int enable_metrics(bool enable);
-  virtual bool are_metrics_enabled() const;
-  virtual bool stream_has_echo() const;
-  virtual int GetMetrics(Metrics* metrics);
-  virtual int enable_delay_logging(bool enable);
-  virtual bool is_delay_logging_enabled() const;
-  virtual int GetDelayMetrics(int* median, int* std);
+  int Enable(bool enable) override;
+  int enable_drift_compensation(bool enable) override;
+  void set_stream_drift_samples(int drift) override;
+  int set_suppression_level(SuppressionLevel level) override;
+  int enable_metrics(bool enable) override;
+  bool are_metrics_enabled() const override;
+  bool stream_has_echo() const override;
+  int GetMetrics(Metrics* metrics) override;
+  int enable_delay_logging(bool enable) override;
+  bool is_delay_logging_enabled() const override;
+  int GetDelayMetrics(int* median, int* std) override;
+  int GetDelayMetrics(int* median,
+                      int* std,
+                      float* fraction_poor_delays) override;
+  struct AecCore* aec_core() const override;

  // ProcessingComponent implementation.
-  virtual void* CreateHandle() const;
-  virtual int InitializeHandle(void* handle) const;
-  virtual int ConfigureHandle(void* handle) const;
-  virtual int DestroyHandle(void* handle) const;
-  virtual int num_handles_required() const;
-  virtual int GetHandleError(void* handle) const;
+  void* CreateHandle() const override;
+  int InitializeHandle(void* handle) const override;
+  int ConfigureHandle(void* handle) const override;
+  void DestroyHandle(void* handle) const override;
+  int num_handles_required() const override;
+  int GetHandleError(void* handle) const override;

-  const AudioProcessingImpl* apm_;
+  const AudioProcessing* apm_;
+  CriticalSectionWrapper* crit_;
  bool drift_compensation_enabled_;
  bool metrics_enabled_;
  SuppressionLevel suppression_level_;
-  int device_sample_rate_hz_;
  int stream_drift_samples_;
  bool was_stream_drift_set_;
  bool stream_has_echo_;
  bool delay_logging_enabled_;
+  bool extended_filter_enabled_;
+  bool delay_agnostic_enabled_;
 };
+
 }  // namespace webrtc

-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CANCELLATION_IMPL_H_
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_
--- a/webrtc/modules/audio_processing/echo_control_mobile_impl.cc
+++ b/webrtc/modules/audio_processing/echo_control_mobile_impl.cc
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,23 +8,22 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "echo_control_mobile_impl.h"
+#include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"

-#include <cassert>
-#include <cstring>
+#include <assert.h>
+#include <string.h>

-#include "critical_section_wrapper.h"
-#include "echo_control_mobile.h"
-
-#include "audio_processing_impl.h"
-#include "audio_buffer.h"
+#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
+#include "webrtc/modules/audio_processing/audio_buffer.h"
+#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
+#include "webrtc/system_wrappers/interface/logging.h"

 namespace webrtc {

 typedef void Handle;

 namespace {
-WebRtc_Word16 MapSetting(EchoControlMobile::RoutingMode mode) {
+int16_t MapSetting(EchoControlMobile::RoutingMode mode) {
  switch (mode) {
    case EchoControlMobile::kQuietEarpieceOrHeadset:
      return 0;
@@ -36,12 +35,12 @@ WebRtc_Word16 MapSetting(EchoControlMobile::RoutingMode mode) {
      return 3;
    case EchoControlMobile::kLoudSpeakerphone:
      return 4;
-    default:
-      return -1;
  }
+  assert(false);
+  return -1;
 }

-int MapError(int err) {
+AudioProcessing::Error MapError(int err) {
  switch (err) {
    case AECM_UNSUPPORTED_FUNCTION_ERROR:
      return AudioProcessing::kUnsupportedFunctionError;
@@ -63,9 +62,11 @@ size_t EchoControlMobile::echo_path_size_bytes() {
    return WebRtcAecm_echo_path_size_bytes();
 }

-EchoControlMobileImpl::EchoControlMobileImpl(const AudioProcessingImpl* apm)
-  : ProcessingComponent(apm),
+EchoControlMobileImpl::EchoControlMobileImpl(const AudioProcessing* apm,
+                                             CriticalSectionWrapper* crit)
+  : ProcessingComponent(),
    apm_(apm),
+    crit_(crit),
    routing_mode_(kSpeakerphone),
    comfort_noise_enabled_(true),
    external_echo_path_(NULL) {}
@@ -82,7 +83,7 @@ int EchoControlMobileImpl::ProcessRenderAudio(const AudioBuffer* audio) {
    return apm_->kNoError;
  }

-  assert(audio->samples_per_split_channel() <= 160);
+  assert(audio->num_frames_per_band() <= 160);
  assert(audio->num_channels() == apm_->num_reverse_channels());

  int err = apm_->kNoError;
@@ -94,8 +95,8 @@ int EchoControlMobileImpl::ProcessRenderAudio(const AudioBuffer* audio) {
      Handle* my_handle = static_cast<Handle*>(handle(handle_index));
      err = WebRtcAecm_BufferFarend(
          my_handle,
-          audio->low_pass_split_data(j),
-          static_cast<WebRtc_Word16>(audio->samples_per_split_channel()));
+          audio->split_bands_const(j)[kBand0To8kHz],
+          audio->num_frames_per_band());

      if (err != apm_->kNoError) {
        return GetHandleError(my_handle);  // TODO(ajm): warning possible?
@@ -117,7 +118,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
    return apm_->kStreamParameterNotSetError;
  }

-  assert(audio->samples_per_split_channel() <= 160);
+  assert(audio->num_frames_per_band() <= 160);
  assert(audio->num_channels() == apm_->num_output_channels());

  int err = apm_->kNoError;
@@ -127,8 +128,8 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
  for (int i = 0; i < audio->num_channels(); i++) {
    // TODO(ajm): improve how this works, possibly inside AECM.
    //            This is kind of hacked up.
-    WebRtc_Word16* noisy = audio->low_pass_reference(i);
-    WebRtc_Word16* clean = audio->low_pass_split_data(i);
+    const int16_t* noisy = audio->low_pass_reference(i);
+    const int16_t* clean = audio->split_bands_const(i)[kBand0To8kHz];
    if (noisy == NULL) {
      noisy = clean;
      clean = NULL;
@@ -139,8 +140,8 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
          my_handle,
          noisy,
          clean,
-          audio->low_pass_split_data(i),
-          static_cast<WebRtc_Word16>(audio->samples_per_split_channel()),
+          audio->split_bands(i)[kBand0To8kHz],
+          audio->num_frames_per_band(),
          apm_->stream_delay_ms());

      if (err != apm_->kNoError) {
@@ -155,7 +156,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
 }

 int EchoControlMobileImpl::Enable(bool enable) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  // Ensure AEC and AECM are not both enabled.
  if (enable && apm_->echo_cancellation()->is_enabled()) {
    return apm_->kBadParameterError;
@@ -169,7 +170,7 @@ bool EchoControlMobileImpl::is_enabled() const {
 }

 int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  if (MapSetting(mode) == -1) {
    return apm_->kBadParameterError;
  }
@@ -184,7 +185,7 @@ EchoControlMobile::RoutingMode EchoControlMobileImpl::routing_mode()
 }

 int EchoControlMobileImpl::enable_comfort_noise(bool enable) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  comfort_noise_enabled_ = enable;
  return Configure();
 }
@@ -195,7 +196,7 @@ bool EchoControlMobileImpl::is_comfort_noise_enabled() const {

 int EchoControlMobileImpl::SetEchoPath(const void* echo_path,
                                       size_t size_bytes) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  if (echo_path == NULL) {
    return apm_->kNullPointerError;
  }
@@ -214,7 +215,7 @@ int EchoControlMobileImpl::SetEchoPath(const void* echo_path,

 int EchoControlMobileImpl::GetEchoPath(void* echo_path,
                                       size_t size_bytes) const {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  if (echo_path == NULL) {
    return apm_->kNullPointerError;
  }
@@ -240,42 +241,26 @@ int EchoControlMobileImpl::Initialize() {
    return apm_->kNoError;
  }

-  if (apm_->sample_rate_hz() == apm_->kSampleRate32kHz) {
-    // AECM doesn't support super-wideband.
+  if (apm_->proc_sample_rate_hz() > apm_->kSampleRate16kHz) {
+    LOG(LS_ERROR) << "AECM only supports 16 kHz or lower sample rates";
    return apm_->kBadSampleRateError;
  }

  return ProcessingComponent::Initialize();
 }

-int EchoControlMobileImpl::get_version(char* version,
-                                       int version_len_bytes) const {
-  if (WebRtcAecm_get_version(version, version_len_bytes) != 0) {
-    return apm_->kBadParameterError;
-  }
-
-  return apm_->kNoError;
-}
-
 void* EchoControlMobileImpl::CreateHandle() const {
-  Handle* handle = NULL;
-  if (WebRtcAecm_Create(&handle) != apm_->kNoError) {
-    handle = NULL;
-  } else {
-    assert(handle != NULL);
-  }
-
-  return handle;
+  return WebRtcAecm_Create();
 }

-int EchoControlMobileImpl::DestroyHandle(void* handle) const {
-  return WebRtcAecm_Free(static_cast<Handle*>(handle));
+void EchoControlMobileImpl::DestroyHandle(void* handle) const {
+  WebRtcAecm_Free(static_cast<Handle*>(handle));
 }

 int EchoControlMobileImpl::InitializeHandle(void* handle) const {
  assert(handle != NULL);
  Handle* my_handle = static_cast<Handle*>(handle);
-  if (WebRtcAecm_Init(my_handle, apm_->sample_rate_hz()) != 0) {
+  if (WebRtcAecm_Init(my_handle, apm_->proc_sample_rate_hz()) != 0) {
    return GetHandleError(my_handle);
  }
  if (external_echo_path_ != NULL) {
--- a/webrtc/modules/audio_processing/echo_control_mobile_impl.h
+++ b/webrtc/modules/audio_processing/echo_control_mobile_impl.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,55 +8,57 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CONTROL_MOBILE_IMPL_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CONTROL_MOBILE_IMPL_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_

-#include "audio_processing.h"
-#include "processing_component.h"
+#include "webrtc/modules/audio_processing/include/audio_processing.h"
+#include "webrtc/modules/audio_processing/processing_component.h"

 namespace webrtc {
-class AudioProcessingImpl;
+
 class AudioBuffer;
+class CriticalSectionWrapper;

 class EchoControlMobileImpl : public EchoControlMobile,
                              public ProcessingComponent {
 public:
-  explicit EchoControlMobileImpl(const AudioProcessingImpl* apm);
+  EchoControlMobileImpl(const AudioProcessing* apm,
+                        CriticalSectionWrapper* crit);
  virtual ~EchoControlMobileImpl();

  int ProcessRenderAudio(const AudioBuffer* audio);
  int ProcessCaptureAudio(AudioBuffer* audio);

  // EchoControlMobile implementation.
-  virtual bool is_enabled() const;
+  bool is_enabled() const override;
+  RoutingMode routing_mode() const override;
+  bool is_comfort_noise_enabled() const override;

  // ProcessingComponent implementation.
-  virtual int Initialize();
-  virtual int get_version(char* version, int version_len_bytes) const;
+  int Initialize() override;

 private:
  // EchoControlMobile implementation.
-  virtual int Enable(bool enable);
-  virtual int set_routing_mode(RoutingMode mode);
-  virtual RoutingMode routing_mode() const;
-  virtual int enable_comfort_noise(bool enable);
-  virtual bool is_comfort_noise_enabled() const;
-  virtual int SetEchoPath(const void* echo_path, size_t size_bytes);
-  virtual int GetEchoPath(void* echo_path, size_t size_bytes) const;
+  int Enable(bool enable) override;
+  int set_routing_mode(RoutingMode mode) override;
+  int enable_comfort_noise(bool enable) override;
+  int SetEchoPath(const void* echo_path, size_t size_bytes) override;
+  int GetEchoPath(void* echo_path, size_t size_bytes) const override;

  // ProcessingComponent implementation.
-  virtual void* CreateHandle() const;
-  virtual int InitializeHandle(void* handle) const;
-  virtual int ConfigureHandle(void* handle) const;
-  virtual int DestroyHandle(void* handle) const;
-  virtual int num_handles_required() const;
-  virtual int GetHandleError(void* handle) const;
+  void* CreateHandle() const override;
+  int InitializeHandle(void* handle) const override;
+  int ConfigureHandle(void* handle) const override;
+  void DestroyHandle(void* handle) const override;
+  int num_handles_required() const override;
+  int GetHandleError(void* handle) const override;

-  const AudioProcessingImpl* apm_;
+  const AudioProcessing* apm_;
+  CriticalSectionWrapper* crit_;
  RoutingMode routing_mode_;
  bool comfort_noise_enabled_;
  unsigned char* external_echo_path_;
 };
 }  // namespace webrtc

-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CONTROL_MOBILE_IMPL_H_
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_
--- a/webrtc/modules/audio_processing/gain_control_impl.cc
+++ b/webrtc/modules/audio_processing/gain_control_impl.cc
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,54 +8,38 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "gain_control_impl.h"
+#include "webrtc/modules/audio_processing/gain_control_impl.h"

-#include <cassert>
+#include <assert.h>

-#include "critical_section_wrapper.h"
-#include "gain_control.h"
-
-#include "audio_processing_impl.h"
-#include "audio_buffer.h"
+#include "webrtc/modules/audio_processing/audio_buffer.h"
+#include "webrtc/modules/audio_processing/agc/legacy/gain_control.h"
+#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"

 namespace webrtc {

 typedef void Handle;

-/*template <class T>
-class GainControlHandle : public ComponentHandle<T> {
-  public:
-    GainControlHandle();
-    virtual ~GainControlHandle();
-
-    virtual int Create();
-    virtual T* ptr() const;
-
-  private:
-    T* handle;
-};*/
-
 namespace {
-WebRtc_Word16 MapSetting(GainControl::Mode mode) {
+int16_t MapSetting(GainControl::Mode mode) {
  switch (mode) {
    case GainControl::kAdaptiveAnalog:
      return kAgcModeAdaptiveAnalog;
-      break;
    case GainControl::kAdaptiveDigital:
      return kAgcModeAdaptiveDigital;
-      break;
    case GainControl::kFixedDigital:
      return kAgcModeFixedDigital;
-      break;
-    default:
-      return -1;
  }
+  assert(false);
+  return -1;
 }
 }  // namespace

-GainControlImpl::GainControlImpl(const AudioProcessingImpl* apm)
-  : ProcessingComponent(apm),
+GainControlImpl::GainControlImpl(const AudioProcessing* apm,
+                                 CriticalSectionWrapper* crit)
+  : ProcessingComponent(),
    apm_(apm),
+    crit_(crit),
    mode_(kAdaptiveAnalog),
    minimum_capture_level_(0),
    maximum_capture_level_(255),
@@ -73,20 +57,14 @@ int GainControlImpl::ProcessRenderAudio(AudioBuffer* audio) {
    return apm_->kNoError;
  }

-  assert(audio->samples_per_split_channel() <= 160);
-
-  WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
-  if (audio->num_channels() > 1) {
-    audio->CopyAndMixLowPass(1);
-    mixed_data = audio->mixed_low_pass_data(0);
-  }
+  assert(audio->num_frames_per_band() <= 160);

  for (int i = 0; i < num_handles(); i++) {
    Handle* my_handle = static_cast<Handle*>(handle(i));
    int err = WebRtcAgc_AddFarend(
        my_handle,
-        mixed_data,
-        static_cast<WebRtc_Word16>(audio->samples_per_split_channel()));
+        audio->mixed_low_pass_data(),
+        audio->num_frames_per_band());

    if (err != apm_->kNoError) {
      return GetHandleError(my_handle);
@@ -101,19 +79,20 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
    return apm_->kNoError;
  }

-  assert(audio->samples_per_split_channel() <= 160);
+  assert(audio->num_frames_per_band() <= 160);
  assert(audio->num_channels() == num_handles());

  int err = apm_->kNoError;

  if (mode_ == kAdaptiveAnalog) {
+    capture_levels_.assign(num_handles(), analog_capture_level_);
    for (int i = 0; i < num_handles(); i++) {
      Handle* my_handle = static_cast<Handle*>(handle(i));
      err = WebRtcAgc_AddMic(
          my_handle,
-          audio->low_pass_split_data(i),
-          audio->high_pass_split_data(i),
-          static_cast<WebRtc_Word16>(audio->samples_per_split_channel()));
+          audio->split_bands(i),
+          audio->num_bands(),
+          audio->num_frames_per_band());

      if (err != apm_->kNoError) {
        return GetHandleError(my_handle);
@@ -123,14 +102,13 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {

    for (int i = 0; i < num_handles(); i++) {
      Handle* my_handle = static_cast<Handle*>(handle(i));
-      WebRtc_Word32 capture_level_out = 0;
+      int32_t capture_level_out = 0;

      err = WebRtcAgc_VirtualMic(
          my_handle,
-          audio->low_pass_split_data(i),
-          audio->high_pass_split_data(i),
-          static_cast<WebRtc_Word16>(audio->samples_per_split_channel()),
-          //capture_levels_[i],
+          audio->split_bands(i),
+          audio->num_bands(),
+          audio->num_frames_per_band(),
          analog_capture_level_,
          &capture_level_out);

@@ -155,22 +133,21 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) {
    return apm_->kStreamParameterNotSetError;
  }

-  assert(audio->samples_per_split_channel() <= 160);
+  assert(audio->num_frames_per_band() <= 160);
  assert(audio->num_channels() == num_handles());

  stream_is_saturated_ = false;
  for (int i = 0; i < num_handles(); i++) {
    Handle* my_handle = static_cast<Handle*>(handle(i));
-    WebRtc_Word32 capture_level_out = 0;
-    WebRtc_UWord8 saturation_warning = 0;
+    int32_t capture_level_out = 0;
+    uint8_t saturation_warning = 0;

    int err = WebRtcAgc_Process(
        my_handle,
-        audio->low_pass_split_data(i),
-        audio->high_pass_split_data(i),
-        static_cast<WebRtc_Word16>(audio->samples_per_split_channel()),
-        audio->low_pass_split_data(i),
-        audio->high_pass_split_data(i),
+        audio->split_bands_const(i),
+        audio->num_bands(),
+        audio->num_frames_per_band(),
+        audio->split_bands(i),
        capture_levels_[i],
        &capture_level_out,
        apm_->echo_cancellation()->stream_has_echo(),
@@ -202,17 +179,11 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) {

 // TODO(ajm): ensure this is called under kAdaptiveAnalog.
 int GainControlImpl::set_stream_analog_level(int level) {
+  CriticalSectionScoped crit_scoped(crit_);
  was_analog_level_set_ = true;
  if (level < minimum_capture_level_ || level > maximum_capture_level_) {
    return apm_->kBadParameterError;
  }
-
-  if (mode_ == kAdaptiveAnalog) {
-    if (level != analog_capture_level_) {
-      // The analog level has been changed; update our internal levels.
-      capture_levels_.assign(num_handles(), level);
-    }
-  }
  analog_capture_level_ = level;

  return apm_->kNoError;
@@ -226,7 +197,7 @@ int GainControlImpl::stream_analog_level() {
 }

 int GainControlImpl::Enable(bool enable) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  return EnableComponent(enable);
 }

@@ -235,7 +206,7 @@ bool GainControlImpl::is_enabled() const {
 }

 int GainControlImpl::set_mode(Mode mode) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  if (MapSetting(mode) == -1) {
    return apm_->kBadParameterError;
  }
@@ -250,7 +221,7 @@ GainControl::Mode GainControlImpl::mode() const {

 int GainControlImpl::set_analog_level_limits(int minimum,
                                             int maximum) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  if (minimum < 0) {
    return apm_->kBadParameterError;
  }
@@ -282,7 +253,7 @@ bool GainControlImpl::stream_is_saturated() const {
 }

 int GainControlImpl::set_target_level_dbfs(int level) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  if (level > 31 || level < 0) {
    return apm_->kBadParameterError;
  }
@@ -296,7 +267,7 @@ int GainControlImpl::target_level_dbfs() const {
 }

 int GainControlImpl::set_compression_gain_db(int gain) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  if (gain < 0 || gain > 90) {
    return apm_->kBadParameterError;
  }
@@ -310,7 +281,7 @@ int GainControlImpl::compression_gain_db() const {
 }

 int GainControlImpl::enable_limiter(bool enable) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  limiter_enabled_ = enable;
  return Configure();
 }
@@ -325,35 +296,16 @@ int GainControlImpl::Initialize() {
    return err;
  }

-  analog_capture_level_ =
-      (maximum_capture_level_ - minimum_capture_level_) >> 1;
  capture_levels_.assign(num_handles(), analog_capture_level_);
-  was_analog_level_set_ = false;
-
-  return apm_->kNoError;
-}
-
-int GainControlImpl::get_version(char* version, int version_len_bytes) const {
-  if (WebRtcAgc_Version(version, version_len_bytes) != 0) {
-      return apm_->kBadParameterError;
-  }
-
  return apm_->kNoError;
 }

 void* GainControlImpl::CreateHandle() const {
-  Handle* handle = NULL;
-  if (WebRtcAgc_Create(&handle) != apm_->kNoError) {
-    handle = NULL;
-  } else {
-    assert(handle != NULL);
-  }
-
-  return handle;
+  return WebRtcAgc_Create();
 }

-int GainControlImpl::DestroyHandle(void* handle) const {
-  return WebRtcAgc_Free(static_cast<Handle*>(handle));
+void GainControlImpl::DestroyHandle(void* handle) const {
+  WebRtcAgc_Free(static_cast<Handle*>(handle));
 }

 int GainControlImpl::InitializeHandle(void* handle) const {
@@ -361,18 +313,18 @@ int GainControlImpl::InitializeHandle(void* handle) const {
                          minimum_capture_level_,
                          maximum_capture_level_,
                          MapSetting(mode_),
-                          apm_->sample_rate_hz());
+                          apm_->proc_sample_rate_hz());
 }

 int GainControlImpl::ConfigureHandle(void* handle) const {
-  WebRtcAgc_config_t config;
+  WebRtcAgcConfig config;
  // TODO(ajm): Flip the sign here (since AGC expects a positive value) if we
  //            change the interface.
  //assert(target_level_dbfs_ <= 0);
-  //config.targetLevelDbfs = static_cast<WebRtc_Word16>(-target_level_dbfs_);
-  config.targetLevelDbfs = static_cast<WebRtc_Word16>(target_level_dbfs_);
+  //config.targetLevelDbfs = static_cast<int16_t>(-target_level_dbfs_);
+  config.targetLevelDbfs = static_cast<int16_t>(target_level_dbfs_);
  config.compressionGaindB =
-      static_cast<WebRtc_Word16>(compression_gain_db_);
+      static_cast<int16_t>(compression_gain_db_);
  config.limiterEnable = limiter_enabled_;

  return WebRtcAgc_set_config(static_cast<Handle*>(handle), config);
--- a/webrtc/modules/audio_processing/gain_control_impl.h
+++ b/webrtc/modules/audio_processing/gain_control_impl.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,22 +8,24 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_GAIN_CONTROL_IMPL_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_GAIN_CONTROL_IMPL_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_

 #include <vector>

-#include "audio_processing.h"
-#include "processing_component.h"
+#include "webrtc/modules/audio_processing/include/audio_processing.h"
+#include "webrtc/modules/audio_processing/processing_component.h"

 namespace webrtc {
-class AudioProcessingImpl;
+
 class AudioBuffer;
+class CriticalSectionWrapper;

 class GainControlImpl : public GainControl,
                        public ProcessingComponent {
 public:
-  explicit GainControlImpl(const AudioProcessingImpl* apm);
+  GainControlImpl(const AudioProcessing* apm,
+                  CriticalSectionWrapper* crit);
  virtual ~GainControlImpl();

  int ProcessRenderAudio(AudioBuffer* audio);
@@ -31,39 +33,39 @@ class GainControlImpl : public GainControl,
  int ProcessCaptureAudio(AudioBuffer* audio);

  // ProcessingComponent implementation.
-  virtual int Initialize();
-  virtual int get_version(char* version, int version_len_bytes) const;
+  int Initialize() override;

  // GainControl implementation.
-  virtual bool is_enabled() const;
-  virtual int stream_analog_level();
+  bool is_enabled() const override;
+  int stream_analog_level() override;
+  bool is_limiter_enabled() const override;
+  Mode mode() const override;

 private:
  // GainControl implementation.
-  virtual int Enable(bool enable);
-  virtual int set_stream_analog_level(int level);
-  virtual int set_mode(Mode mode);
-  virtual Mode mode() const;
-  virtual int set_target_level_dbfs(int level);
-  virtual int target_level_dbfs() const;
-  virtual int set_compression_gain_db(int gain);
-  virtual int compression_gain_db() const;
-  virtual int enable_limiter(bool enable);
-  virtual bool is_limiter_enabled() const;
-  virtual int set_analog_level_limits(int minimum, int maximum);
-  virtual int analog_level_minimum() const;
-  virtual int analog_level_maximum() const;
-  virtual bool stream_is_saturated() const;
+  int Enable(bool enable) override;
+  int set_stream_analog_level(int level) override;
+  int set_mode(Mode mode) override;
+  int set_target_level_dbfs(int level) override;
+  int target_level_dbfs() const override;
+  int set_compression_gain_db(int gain) override;
+  int compression_gain_db() const override;
+  int enable_limiter(bool enable) override;
+  int set_analog_level_limits(int minimum, int maximum) override;
+  int analog_level_minimum() const override;
+  int analog_level_maximum() const override;
+  bool stream_is_saturated() const override;

  // ProcessingComponent implementation.
-  virtual void* CreateHandle() const;
-  virtual int InitializeHandle(void* handle) const;
-  virtual int ConfigureHandle(void* handle) const;
-  virtual int DestroyHandle(void* handle) const;
-  virtual int num_handles_required() const;
-  virtual int GetHandleError(void* handle) const;
+  void* CreateHandle() const override;
+  int InitializeHandle(void* handle) const override;
+  int ConfigureHandle(void* handle) const override;
+  void DestroyHandle(void* handle) const override;
+  int num_handles_required() const override;
+  int GetHandleError(void* handle) const override;

-  const AudioProcessingImpl* apm_;
+  const AudioProcessing* apm_;
+  CriticalSectionWrapper* crit_;
  Mode mode_;
  int minimum_capture_level_;
  int maximum_capture_level_;
@@ -77,4 +79,4 @@ class GainControlImpl : public GainControl,
 };
 }  // namespace webrtc

-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_GAIN_CONTROL_IMPL_H_
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_
--- a/webrtc/modules/audio_processing/high_pass_filter_impl.cc
+++ b/webrtc/modules/audio_processing/high_pass_filter_impl.cc
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,35 +8,34 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "high_pass_filter_impl.h"
+#include "webrtc/modules/audio_processing/high_pass_filter_impl.h"

-#include <cassert>
+#include <assert.h>

-#include "critical_section_wrapper.h"
-#include "typedefs.h"
-#include "signal_processing_library.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/audio_buffer.h"
+#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
+#include "webrtc/typedefs.h"

-#include "audio_processing_impl.h"
-#include "audio_buffer.h"

 namespace webrtc {
 namespace {
-const WebRtc_Word16 kFilterCoefficients8kHz[5] =
+const int16_t kFilterCoefficients8kHz[5] =
    {3798, -7596, 3798, 7807, -3733};

-const WebRtc_Word16 kFilterCoefficients[5] =
+const int16_t kFilterCoefficients[5] =
    {4012, -8024, 4012, 8002, -3913};

 struct FilterState {
-  WebRtc_Word16 y[4];
-  WebRtc_Word16 x[2];
-  const WebRtc_Word16* ba;
+  int16_t y[4];
+  int16_t x[2];
+  const int16_t* ba;
 };

 int InitializeFilter(FilterState* hpf, int sample_rate_hz) {
  assert(hpf != NULL);

-  if (sample_rate_hz == AudioProcessingImpl::kSampleRate8kHz) {
+  if (sample_rate_hz == AudioProcessing::kSampleRate8kHz) {
    hpf->ba = kFilterCoefficients8kHz;
  } else {
    hpf->ba = kFilterCoefficients;
@@ -48,32 +47,28 @@ int InitializeFilter(FilterState* hpf, int sample_rate_hz) {
  return AudioProcessing::kNoError;
 }

-int Filter(FilterState* hpf, WebRtc_Word16* data, int length) {
+int Filter(FilterState* hpf, int16_t* data, size_t length) {
  assert(hpf != NULL);

-  WebRtc_Word32 tmp_int32 = 0;
-  WebRtc_Word16* y = hpf->y;
-  WebRtc_Word16* x = hpf->x;
-  const WebRtc_Word16* ba = hpf->ba;
+  int32_t tmp_int32 = 0;
+  int16_t* y = hpf->y;
+  int16_t* x = hpf->x;
+  const int16_t* ba = hpf->ba;

-  for (int i = 0; i < length; i++) {
+  for (size_t i = 0; i < length; i++) {
    //  y[i] = b[0] * x[i] + b[1] * x[i-1] + b[2] * x[i-2]
    //         + -a[1] * y[i-1] + -a[2] * y[i-2];

-    tmp_int32 =
-        WEBRTC_SPL_MUL_16_16(y[1], ba[3]); // -a[1] * y[i-1] (low part)
-    tmp_int32 +=
-        WEBRTC_SPL_MUL_16_16(y[3], ba[4]); // -a[2] * y[i-2] (low part)
+    tmp_int32 = y[1] * ba[3];  // -a[1] * y[i-1] (low part)
+    tmp_int32 += y[3] * ba[4];  // -a[2] * y[i-2] (low part)
    tmp_int32 = (tmp_int32 >> 15);
-    tmp_int32 +=
-        WEBRTC_SPL_MUL_16_16(y[0], ba[3]); // -a[1] * y[i-1] (high part)
-    tmp_int32 +=
-        WEBRTC_SPL_MUL_16_16(y[2], ba[4]); // -a[2] * y[i-2] (high part)
+    tmp_int32 += y[0] * ba[3];  // -a[1] * y[i-1] (high part)
+    tmp_int32 += y[2] * ba[4];  // -a[2] * y[i-2] (high part)
    tmp_int32 = (tmp_int32 << 1);

-    tmp_int32 += WEBRTC_SPL_MUL_16_16(data[i], ba[0]); // b[0]*x[0]
-    tmp_int32 += WEBRTC_SPL_MUL_16_16(x[0], ba[1]);    // b[1]*x[i-1]
-    tmp_int32 += WEBRTC_SPL_MUL_16_16(x[1], ba[2]);    // b[2]*x[i-2]
+    tmp_int32 += data[i] * ba[0];  // b[0]*x[0]
+    tmp_int32 += x[0] * ba[1];  // b[1]*x[i-1]
+    tmp_int32 += x[1] * ba[2];  // b[2]*x[i-2]

    // Update state (input part)
    x[1] = x[0];
@@ -82,21 +77,20 @@ int Filter(FilterState* hpf, WebRtc_Word16* data, int length) {
    // Update state (filtered part)
    y[2] = y[0];
    y[3] = y[1];
-    y[0] = static_cast<WebRtc_Word16>(tmp_int32 >> 13);
-    y[1] = static_cast<WebRtc_Word16>((tmp_int32 -
-        WEBRTC_SPL_LSHIFT_W32(static_cast<WebRtc_Word32>(y[0]), 13)) << 2);
+    y[0] = static_cast<int16_t>(tmp_int32 >> 13);
+    y[1] = static_cast<int16_t>(
+        (tmp_int32 - (static_cast<int32_t>(y[0]) << 13)) << 2);

    // Rounding in Q12, i.e. add 2^11
    tmp_int32 += 2048;

    // Saturate (to 2^27) so that the HP filtered signal does not overflow
-    tmp_int32 = WEBRTC_SPL_SAT(static_cast<WebRtc_Word32>(134217727),
+    tmp_int32 = WEBRTC_SPL_SAT(static_cast<int32_t>(134217727),
                               tmp_int32,
-                               static_cast<WebRtc_Word32>(-134217728));
-
-    // Convert back to Q0 and use rounding
-    data[i] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp_int32, 12);
+                               static_cast<int32_t>(-134217728));

+    // Convert back to Q0 and use rounding.
+    data[i] = (int16_t)(tmp_int32 >> 12);
  }

  return AudioProcessing::kNoError;
@@ -105,9 +99,11 @@ int Filter(FilterState* hpf, WebRtc_Word16* data, int length) {

 typedef FilterState Handle;

-HighPassFilterImpl::HighPassFilterImpl(const AudioProcessingImpl* apm)
-  : ProcessingComponent(apm),
-    apm_(apm) {}
+HighPassFilterImpl::HighPassFilterImpl(const AudioProcessing* apm,
+                                       CriticalSectionWrapper* crit)
+  : ProcessingComponent(),
+    apm_(apm),
+    crit_(crit) {}

 HighPassFilterImpl::~HighPassFilterImpl() {}

@@ -118,13 +114,13 @@ int HighPassFilterImpl::ProcessCaptureAudio(AudioBuffer* audio) {
    return apm_->kNoError;
  }

-  assert(audio->samples_per_split_channel() <= 160);
+  assert(audio->num_frames_per_band() <= 160);

  for (int i = 0; i < num_handles(); i++) {
    Handle* my_handle = static_cast<Handle*>(handle(i));
    err = Filter(my_handle,
-                 audio->low_pass_split_data(i),
-                 audio->samples_per_split_channel());
+                 audio->split_bands(i)[kBand0To8kHz],
+                 audio->num_frames_per_band());

    if (err != apm_->kNoError) {
      return GetHandleError(my_handle);
@@ -135,7 +131,7 @@ int HighPassFilterImpl::ProcessCaptureAudio(AudioBuffer* audio) {
 }

 int HighPassFilterImpl::Enable(bool enable) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  return EnableComponent(enable);
 }

@@ -143,25 +139,17 @@ bool HighPassFilterImpl::is_enabled() const {
  return is_component_enabled();
 }

-int HighPassFilterImpl::get_version(char* version,
-                                    int version_len_bytes) const {
-  // An empty string is used to indicate no version information.
-  memset(version, 0, version_len_bytes);
-  return apm_->kNoError;
-}
-
 void* HighPassFilterImpl::CreateHandle() const {
  return new FilterState;
 }

-int HighPassFilterImpl::DestroyHandle(void* handle) const {
+void HighPassFilterImpl::DestroyHandle(void* handle) const {
  delete static_cast<Handle*>(handle);
-  return apm_->kNoError;
 }

 int HighPassFilterImpl::InitializeHandle(void* handle) const {
  return InitializeFilter(static_cast<Handle*>(handle),
-                          apm_->sample_rate_hz());
+                          apm_->proc_sample_rate_hz());
 }

 int HighPassFilterImpl::ConfigureHandle(void* /*handle*/) const {
--- a/webrtc/modules/audio_processing/high_pass_filter_impl.h
+++ b/webrtc/modules/audio_processing/high_pass_filter_impl.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,44 +8,43 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_HIGH_PASS_FILTER_IMPL_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_HIGH_PASS_FILTER_IMPL_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_IMPL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_IMPL_H_

-#include "audio_processing.h"
-#include "processing_component.h"
+#include "webrtc/modules/audio_processing/include/audio_processing.h"
+#include "webrtc/modules/audio_processing/processing_component.h"

 namespace webrtc {
-class AudioProcessingImpl;
+
 class AudioBuffer;
+class CriticalSectionWrapper;

 class HighPassFilterImpl : public HighPassFilter,
                           public ProcessingComponent {
 public:
-  explicit HighPassFilterImpl(const AudioProcessingImpl* apm);
+  HighPassFilterImpl(const AudioProcessing* apm, CriticalSectionWrapper* crit);
  virtual ~HighPassFilterImpl();

  int ProcessCaptureAudio(AudioBuffer* audio);

  // HighPassFilter implementation.
-  virtual bool is_enabled() const;
-
-  // ProcessingComponent implementation.
-  virtual int get_version(char* version, int version_len_bytes) const;
+  bool is_enabled() const override;

 private:
  // HighPassFilter implementation.
-  virtual int Enable(bool enable);
+  int Enable(bool enable) override;

  // ProcessingComponent implementation.
-  virtual void* CreateHandle() const;
-  virtual int InitializeHandle(void* handle) const;
-  virtual int ConfigureHandle(void* handle) const;
-  virtual int DestroyHandle(void* handle) const;
-  virtual int num_handles_required() const;
-  virtual int GetHandleError(void* handle) const;
+  void* CreateHandle() const override;
+  int InitializeHandle(void* handle) const override;
+  int ConfigureHandle(void* handle) const override;
+  void DestroyHandle(void* handle) const override;
+  int num_handles_required() const override;
+  int GetHandleError(void* handle) const override;

-  const AudioProcessingImpl* apm_;
+  const AudioProcessing* apm_;
+  CriticalSectionWrapper* crit_;
 };
 }  // namespace webrtc

-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_HIGH_PASS_FILTER_IMPL_H_
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_IMPL_H_
--- a/webrtc/modules/audio_processing/interface/audio_processing.h
+++ b/webrtc/modules/audio_processing/interface/audio_processing.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,17 +8,31 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_

-#include <stddef.h> // size_t
+#include <stddef.h>  // size_t
+#include <stdio.h>  // FILE
+#include <vector>

-#include "typedefs.h"
-#include "module.h"
+#include "webrtc/base/arraysize.h"
+#include "webrtc/base/platform_file.h"
+#include "webrtc/common.h"
+#include "webrtc/modules/audio_processing/beamformer/array_util.h"
+#include "webrtc/typedefs.h"
+
+struct AecCore;

 namespace webrtc {

 class AudioFrame;
+
+template<typename T>
+class Beamformer;
+
+class StreamConfig;
+class ProcessingConfig;
+
 class EchoCancellation;
 class EchoControlMobile;
 class GainControl;
@@ -27,6 +41,94 @@ class LevelEstimator;
 class NoiseSuppression;
 class VoiceDetection;

+// Use to enable the extended filter mode in the AEC, along with robustness
+// measures around the reported system delays. It comes with a significant
+// increase in AEC complexity, but is much more robust to unreliable reported
+// delays.
+//
+// Detailed changes to the algorithm:
+// - The filter length is changed from 48 to 128 ms. This comes with tuning of
+//   several parameters: i) filter adaptation stepsize and error threshold;
+//   ii) non-linear processing smoothing and overdrive.
+// - Option to ignore the reported delays on platforms which we deem
+//   sufficiently unreliable. See WEBRTC_UNTRUSTED_DELAY in echo_cancellation.c.
+// - Faster startup times by removing the excessive "startup phase" processing
+//   of reported delays.
+// - Much more conservative adjustments to the far-end read pointer. We smooth
+//   the delay difference more heavily, and back off from the difference more.
+//   Adjustments force a readaptation of the filter, so they should be avoided
+//   except when really necessary.
+struct ExtendedFilter {
+  ExtendedFilter() : enabled(false) {}
+  explicit ExtendedFilter(bool enabled) : enabled(enabled) {}
+  bool enabled;
+};
+
+// Enables delay-agnostic echo cancellation. This feature relies on internally
+// estimated delays between the process and reverse streams, thus not relying
+// on reported system delays. This configuration only applies to
+// EchoCancellation and not EchoControlMobile. It can be set in the constructor
+// or using AudioProcessing::SetExtraOptions().
+struct DelayAgnostic {
+  DelayAgnostic() : enabled(false) {}
+  explicit DelayAgnostic(bool enabled) : enabled(enabled) {}
+  bool enabled;
+};
+
+// Use to enable experimental gain control (AGC). At startup the experimental
+// AGC moves the microphone volume up to |startup_min_volume| if the current
+// microphone volume is set too low. The value is clamped to its operating range
+// [12, 255]. Here, 255 maps to 100%.
+//
+// Must be provided through AudioProcessing::Create(Confg&).
+#if defined(WEBRTC_CHROMIUM_BUILD)
+static const int kAgcStartupMinVolume = 85;
+#else
+static const int kAgcStartupMinVolume = 0;
+#endif  // defined(WEBRTC_CHROMIUM_BUILD)
+struct ExperimentalAgc {
+  ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {}
+  explicit ExperimentalAgc(bool enabled)
+      : enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {}
+  ExperimentalAgc(bool enabled, int startup_min_volume)
+      : enabled(enabled), startup_min_volume(startup_min_volume) {}
+  bool enabled;
+  int startup_min_volume;
+};
+
+// Use to enable experimental noise suppression. It can be set in the
+// constructor or using AudioProcessing::SetExtraOptions().
+struct ExperimentalNs {
+  ExperimentalNs() : enabled(false) {}
+  explicit ExperimentalNs(bool enabled) : enabled(enabled) {}
+  bool enabled;
+};
+
+// Use to enable beamforming. Must be provided through the constructor. It will
+// have no impact if used with AudioProcessing::SetExtraOptions().
+struct Beamforming {
+  Beamforming()
+      : enabled(false),
+        array_geometry() {}
+  Beamforming(bool enabled, const std::vector<Point>& array_geometry)
+      : enabled(enabled),
+        array_geometry(array_geometry) {}
+  const bool enabled;
+  const std::vector<Point> array_geometry;
+};
+
+// Use to enable intelligibility enhancer in audio processing. Must be provided
+// though the constructor. It will have no impact if used with
+// AudioProcessing::SetExtraOptions().
+//
+// Note: If enabled and the reverse stream has more than one output channel,
+// the reverse stream will become an upmixed mono signal.
+struct Intelligibility {
+  Intelligibility() : enabled(false) {}
+  explicit Intelligibility(bool enabled) : enabled(enabled) {}
+  bool enabled;
+};
+
 // The Audio Processing Module (APM) provides a collection of voice processing
 // components designed for real-time communications software.
 //
@@ -56,16 +158,12 @@ class VoiceDetection;
 //   2. Parameter getters are never called concurrently with the corresponding
 //      setter.
 //
-// APM accepts only 16-bit linear PCM audio data in frames of 10 ms. Multiple
-// channels should be interleaved.
+// APM accepts only linear PCM audio data in chunks of 10 ms. The int16
+// interfaces use interleaved data, while the float interfaces use deinterleaved
+// data.
 //
 // Usage example, omitting error checking:
 // AudioProcessing* apm = AudioProcessing::Create(0);
-// apm->set_sample_rate_hz(32000); // Super-wideband processing.
-//
-// // Mono capture and stereo render.
-// apm->set_num_channels(1, 1);
-// apm->set_num_reverse_channels(2);
 //
 // apm->high_pass_filter()->Enable(true);
 //
@@ -102,44 +200,84 @@ class VoiceDetection;
 // apm->Initialize();
 //
 // // Close the application...
-// AudioProcessing::Destroy(apm);
-// apm = NULL;
+// delete apm;
 //
-class AudioProcessing : public Module {
+class AudioProcessing {
 public:
-  // Creates a APM instance, with identifier |id|. Use one instance for every
-  // primary audio stream requiring processing. On the client-side, this would
-  // typically be one instance for the near-end stream, and additional instances
-  // for each far-end stream which requires processing. On the server-side,
-  // this would typically be one instance for every incoming stream.
-  static AudioProcessing* Create(int id);
+  // TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone.
+  enum ChannelLayout {
+    kMono,
+    // Left, right.
+    kStereo,
+    // Mono, keyboard mic.
+    kMonoAndKeyboard,
+    // Left, right, keyboard mic.
+    kStereoAndKeyboard
+  };

-  // Destroys a |apm| instance.
-  static void Destroy(AudioProcessing* apm);
+  // Creates an APM instance. Use one instance for every primary audio stream
+  // requiring processing. On the client-side, this would typically be one
+  // instance for the near-end stream, and additional instances for each far-end
+  // stream which requires processing. On the server-side, this would typically
+  // be one instance for every incoming stream.
+  static AudioProcessing* Create();
+  // Allows passing in an optional configuration at create-time.
+  static AudioProcessing* Create(const Config& config);
+  // Only for testing.
+  static AudioProcessing* Create(const Config& config,
+                                 Beamformer<float>* beamformer);
+  virtual ~AudioProcessing() {}

  // Initializes internal states, while retaining all user settings. This
  // should be called before beginning to process a new audio stream. However,
  // it is not necessary to call before processing the first stream after
  // creation.
+  //
+  // It is also not necessary to call if the audio parameters (sample
+  // rate and number of channels) have changed. Passing updated parameters
+  // directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible.
+  // If the parameters are known at init-time though, they may be provided.
  virtual int Initialize() = 0;

-  // Sets the sample |rate| in Hz for both the primary and reverse audio
-  // streams. 8000, 16000 or 32000 Hz are permitted.
-  virtual int set_sample_rate_hz(int rate) = 0;
-  virtual int sample_rate_hz() const = 0;
+  // The int16 interfaces require:
+  //   - only |NativeRate|s be used
+  //   - that the input, output and reverse rates must match
+  //   - that |processing_config.output_stream()| matches
+  //     |processing_config.input_stream()|.
+  //
+  // The float interfaces accept arbitrary rates and support differing input and
+  // output layouts, but the output must have either one channel or the same
+  // number of channels as the input.
+  virtual int Initialize(const ProcessingConfig& processing_config) = 0;

-  // Sets the number of channels for the primary audio stream. Input frames must
-  // contain a number of channels given by |input_channels|, while output frames
-  // will be returned with number of channels given by |output_channels|.
-  virtual int set_num_channels(int input_channels, int output_channels) = 0;
+  // Initialize with unpacked parameters. See Initialize() above for details.
+  //
+  // TODO(mgraczyk): Remove once clients are updated to use the new interface.
+  virtual int Initialize(int input_sample_rate_hz,
+                         int output_sample_rate_hz,
+                         int reverse_sample_rate_hz,
+                         ChannelLayout input_layout,
+                         ChannelLayout output_layout,
+                         ChannelLayout reverse_layout) = 0;
+
+  // Pass down additional options which don't have explicit setters. This
+  // ensures the options are applied immediately.
+  virtual void SetExtraOptions(const Config& config) = 0;
+
+  // TODO(ajm): Only intended for internal use. Make private and friend the
+  // necessary classes?
+  virtual int proc_sample_rate_hz() const = 0;
+  virtual int proc_split_sample_rate_hz() const = 0;
  virtual int num_input_channels() const = 0;
  virtual int num_output_channels() const = 0;
-
-  // Sets the number of channels for the reverse audio stream. Input frames must
-  // contain a number of channels given by |channels|.
-  virtual int set_num_reverse_channels(int channels) = 0;
  virtual int num_reverse_channels() const = 0;

+  // Set to true when the output of AudioProcessing will be muted or in some
+  // other way not used. Ideally, the captured audio would still be processed,
+  // but some components may change behavior based on this information.
+  // Default false.
+  virtual void set_output_will_be_muted(bool muted) = 0;
+
  // Processes a 10 ms |frame| of the primary audio stream. On the client-side,
  // this is the near-end (or captured) audio.
  //
@@ -147,11 +285,40 @@ class AudioProcessing : public Module {
  // must be called prior to processing the current frame. Any getter function
  // with the stream_ tag which is needed should be called after processing.
  //
-  // The |_frequencyInHz|, |_audioChannel|, and |_payloadDataLengthInSamples|
-  // members of |frame| must be valid, and correspond to settings supplied
-  // to APM.
+  // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
+  // members of |frame| must be valid. If changed from the previous call to this
+  // method, it will trigger an initialization.
  virtual int ProcessStream(AudioFrame* frame) = 0;

+  // Accepts deinterleaved float audio with the range [-1, 1]. Each element
+  // of |src| points to a channel buffer, arranged according to
+  // |input_layout|. At output, the channels will be arranged according to
+  // |output_layout| at |output_sample_rate_hz| in |dest|.
+  //
+  // The output layout must have one channel or as many channels as the input.
+  // |src| and |dest| may use the same memory, if desired.
+  //
+  // TODO(mgraczyk): Remove once clients are updated to use the new interface.
+  virtual int ProcessStream(const float* const* src,
+                            size_t samples_per_channel,
+                            int input_sample_rate_hz,
+                            ChannelLayout input_layout,
+                            int output_sample_rate_hz,
+                            ChannelLayout output_layout,
+                            float* const* dest) = 0;
+
+  // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
+  // |src| points to a channel buffer, arranged according to |input_stream|. At
+  // output, the channels will be arranged according to |output_stream| in
+  // |dest|.
+  //
+  // The output must have one channel or as many channels as the input. |src|
+  // and |dest| may use the same memory, if desired.
+  virtual int ProcessStream(const float* const* src,
+                            const StreamConfig& input_config,
+                            const StreamConfig& output_config,
+                            float* const* dest) = 0;
+
  // Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame
  // will not be modified. On the client-side, this is the far-end (or to be
  // rendered) audio.
@@ -162,12 +329,34 @@ class AudioProcessing : public Module {
  // typically will not be used. If you're not sure what to pass in here,
  // chances are you don't need to use it.
  //
-  // The |_frequencyInHz|, |_audioChannel|, and |_payloadDataLengthInSamples|
-  // members of |frame| must be valid.
+  // The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
+  // members of |frame| must be valid. |sample_rate_hz_| must correspond to
+  // |input_sample_rate_hz()|
  //
  // TODO(ajm): add const to input; requires an implementation fix.
+  // DEPRECATED: Use |ProcessReverseStream| instead.
+  // TODO(ekm): Remove once all users have updated to |ProcessReverseStream|.
  virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;

+  // Same as |AnalyzeReverseStream|, but may modify |frame| if intelligibility
+  // is enabled.
+  virtual int ProcessReverseStream(AudioFrame* frame) = 0;
+
+  // Accepts deinterleaved float audio with the range [-1, 1]. Each element
+  // of |data| points to a channel buffer, arranged according to |layout|.
+  // TODO(mgraczyk): Remove once clients are updated to use the new interface.
+  virtual int AnalyzeReverseStream(const float* const* data,
+                                   size_t samples_per_channel,
+                                   int rev_sample_rate_hz,
+                                   ChannelLayout layout) = 0;
+
+  // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
+  // |data| points to a channel buffer, arranged according to |reverse_config|.
+  virtual int ProcessReverseStream(const float* const* src,
+                                   const StreamConfig& reverse_input_config,
+                                   const StreamConfig& reverse_output_config,
+                                   float* const* dest) = 0;
+
  // This must be called if and only if echo processing is enabled.
  //
  // Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end
@@ -183,18 +372,46 @@ class AudioProcessing : public Module {
  //     ProcessStream().
  virtual int set_stream_delay_ms(int delay) = 0;
  virtual int stream_delay_ms() const = 0;
+  virtual bool was_stream_delay_set() const = 0;
+
+  // Call to signal that a key press occurred (true) or did not occur (false)
+  // with this chunk of audio.
+  virtual void set_stream_key_pressed(bool key_pressed) = 0;
+
+  // Sets a delay |offset| in ms to add to the values passed in through
+  // set_stream_delay_ms(). May be positive or negative.
+  //
+  // Note that this could cause an otherwise valid value passed to
+  // set_stream_delay_ms() to return an error.
+  virtual void set_delay_offset_ms(int offset) = 0;
+  virtual int delay_offset_ms() const = 0;

  // Starts recording debugging information to a file specified by |filename|,
  // a NULL-terminated string. If there is an ongoing recording, the old file
  // will be closed, and recording will continue in the newly specified file.
  // An already existing file will be overwritten without warning.
-  static const int kMaxFilenameSize = 1024;
+  static const size_t kMaxFilenameSize = 1024;
  virtual int StartDebugRecording(const char filename[kMaxFilenameSize]) = 0;

+  // Same as above but uses an existing file handle. Takes ownership
+  // of |handle| and closes it at StopDebugRecording().
+  virtual int StartDebugRecording(FILE* handle) = 0;
+
+  // Same as above but uses an existing PlatformFile handle. Takes ownership
+  // of |handle| and closes it at StopDebugRecording().
+  // TODO(xians): Make this interface pure virtual.
+  virtual int StartDebugRecordingForPlatformFile(rtc::PlatformFile handle) {
+      return -1;
+  }
+
  // Stops recording debugging information, and closes the file. Recording
  // cannot be resumed in the same file (without overwriting it).
  virtual int StopDebugRecording() = 0;

+  // Use to send UMA histograms at end of a call. Note that all histogram
+  // specific member variables are reset.
+  virtual void UpdateHistogramsOnCallEnd() = 0;
+
  // These provide access to the component interfaces and should never return
  // NULL. The pointers will be valid for the lifetime of the APM instance.
  // The memory for these objects is entirely managed internally.
@@ -213,8 +430,8 @@ class AudioProcessing : public Module {
    int minimum;  // Long-term minimum.
  };

-  // Fatal errors.
-  enum Errors {
+  enum Error {
+    // Fatal errors.
    kNoError = 0,
    kUnspecifiedError = -1,
    kCreationFailedError = -2,
@@ -227,22 +444,134 @@ class AudioProcessing : public Module {
    kBadNumberChannelsError = -9,
    kFileError = -10,
    kStreamParameterNotSetError = -11,
-    kNotEnabledError = -12
-  };
+    kNotEnabledError = -12,

-  // Warnings are non-fatal.
-  enum Warnings {
+    // Warnings are non-fatal.
    // This results when a set_stream_ parameter is out of range. Processing
    // will continue, but the parameter may have been truncated.
-    kBadStreamParameterWarning = -13,
+    kBadStreamParameterWarning = -13
  };

-  // Inherited from Module.
-  virtual WebRtc_Word32 TimeUntilNextProcess() { return -1; };
-  virtual WebRtc_Word32 Process() { return -1; };
+  enum NativeRate {
+    kSampleRate8kHz = 8000,
+    kSampleRate16kHz = 16000,
+    kSampleRate32kHz = 32000,
+    kSampleRate48kHz = 48000
+  };

- protected:
-  virtual ~AudioProcessing() {};
+  static const int kNativeSampleRatesHz[];
+  static const size_t kNumNativeSampleRates;
+  static const int kMaxNativeSampleRateHz;
+  static const int kMaxAECMSampleRateHz;
+
+  static const int kChunkSizeMs = 10;
+};
+
+class StreamConfig {
+ public:
+  // sample_rate_hz: The sampling rate of the stream.
+  //
+  // num_channels: The number of audio channels in the stream, excluding the
+  //               keyboard channel if it is present. When passing a
+  //               StreamConfig with an array of arrays T*[N],
+  //
+  //                N == {num_channels + 1  if  has_keyboard
+  //                     {num_channels      if  !has_keyboard
+  //
+  // has_keyboard: True if the stream has a keyboard channel. When has_keyboard
+  //               is true, the last channel in any corresponding list of
+  //               channels is the keyboard channel.
+  StreamConfig(int sample_rate_hz = 0,
+               int num_channels = 0,
+               bool has_keyboard = false)
+      : sample_rate_hz_(sample_rate_hz),
+        num_channels_(num_channels),
+        has_keyboard_(has_keyboard),
+        num_frames_(calculate_frames(sample_rate_hz)) {}
+
+  void set_sample_rate_hz(int value) {
+    sample_rate_hz_ = value;
+    num_frames_ = calculate_frames(value);
+  }
+  void set_num_channels(int value) { num_channels_ = value; }
+  void set_has_keyboard(bool value) { has_keyboard_ = value; }
+
+  int sample_rate_hz() const { return sample_rate_hz_; }
+
+  // The number of channels in the stream, not including the keyboard channel if
+  // present.
+  int num_channels() const { return num_channels_; }
+
+  bool has_keyboard() const { return has_keyboard_; }
+  size_t num_frames() const { return num_frames_; }
+  size_t num_samples() const { return num_channels_ * num_frames_; }
+
+  bool operator==(const StreamConfig& other) const {
+    return sample_rate_hz_ == other.sample_rate_hz_ &&
+           num_channels_ == other.num_channels_ &&
+           has_keyboard_ == other.has_keyboard_;
+  }
+
+  bool operator!=(const StreamConfig& other) const { return !(*this == other); }
+
+ private:
+  static size_t calculate_frames(int sample_rate_hz) {
+    return static_cast<size_t>(
+        AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000);
+  }
+
+  int sample_rate_hz_;
+  int num_channels_;
+  bool has_keyboard_;
+  size_t num_frames_;
+};
+
+class ProcessingConfig {
+ public:
+  enum StreamName {
+    kInputStream,
+    kOutputStream,
+    kReverseInputStream,
+    kReverseOutputStream,
+    kNumStreamNames,
+  };
+
+  const StreamConfig& input_stream() const {
+    return streams[StreamName::kInputStream];
+  }
+  const StreamConfig& output_stream() const {
+    return streams[StreamName::kOutputStream];
+  }
+  const StreamConfig& reverse_input_stream() const {
+    return streams[StreamName::kReverseInputStream];
+  }
+  const StreamConfig& reverse_output_stream() const {
+    return streams[StreamName::kReverseOutputStream];
+  }
+
+  StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
+  StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
+  StreamConfig& reverse_input_stream() {
+    return streams[StreamName::kReverseInputStream];
+  }
+  StreamConfig& reverse_output_stream() {
+    return streams[StreamName::kReverseOutputStream];
+  }
+
+  bool operator==(const ProcessingConfig& other) const {
+    for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
+      if (this->streams[i] != other.streams[i]) {
+        return false;
+      }
+    }
+    return true;
+  }
+
+  bool operator!=(const ProcessingConfig& other) const {
+    return !(*this == other);
+  }
+
+  StreamConfig streams[StreamName::kNumStreamNames];
 };

 // The acoustic echo cancellation (AEC) component provides better performance
@@ -263,20 +592,14 @@ class EchoCancellation {
  // render and capture devices are used, particularly with webcams.
  //
  // This enables a compensation mechanism, and requires that
-  // |set_device_sample_rate_hz()| and |set_stream_drift_samples()| be called.
+  // set_stream_drift_samples() be called.
  virtual int enable_drift_compensation(bool enable) = 0;
  virtual bool is_drift_compensation_enabled() const = 0;

-  // Provides the sampling rate of the audio devices. It is assumed the render
-  // and capture devices use the same nominal sample rate. Required if and only
-  // if drift compensation is enabled.
-  virtual int set_device_sample_rate_hz(int rate) = 0;
-  virtual int device_sample_rate_hz() const = 0;
-
  // Sets the difference between the number of samples rendered and captured by
  // the audio devices since the last call to |ProcessStream()|. Must be called
-  // if and only if drift compensation is enabled, prior to |ProcessStream()|.
-  virtual int set_stream_drift_samples(int drift) = 0;
+  // if drift compensation is enabled, prior to |ProcessStream()|.
+  virtual void set_stream_drift_samples(int drift) = 0;
  virtual int stream_drift_samples() const = 0;

  enum SuppressionLevel {
@@ -328,12 +651,26 @@ class EchoCancellation {
  virtual bool is_delay_logging_enabled() const = 0;

  // The delay metrics consists of the delay |median| and the delay standard
-  // deviation |std|. The values are averaged over the time period since the
-  // last call to |GetDelayMetrics()|.
+  // deviation |std|. It also consists of the fraction of delay estimates
+  // |fraction_poor_delays| that can make the echo cancellation perform poorly.
+  // The values are aggregated until the first call to |GetDelayMetrics()| and
+  // afterwards aggregated and updated every second.
+  // Note that if there are several clients pulling metrics from
+  // |GetDelayMetrics()| during a session the first call from any of them will
+  // change to one second aggregation window for all.
+  // TODO(bjornv): Deprecated, remove.
  virtual int GetDelayMetrics(int* median, int* std) = 0;
+  virtual int GetDelayMetrics(int* median, int* std,
+                              float* fraction_poor_delays) = 0;
+
+  // Returns a pointer to the low level AEC component.  In case of multiple
+  // channels, the pointer to the first one is returned.  A NULL pointer is
+  // returned when the AEC component is disabled or has not been initialized
+  // successfully.
+  virtual struct AecCore* aec_core() const = 0;

 protected:
-  virtual ~EchoCancellation() {};
+  virtual ~EchoCancellation() {}
 };

 // The acoustic echo control for mobile (AECM) component is a low complexity
@@ -389,7 +726,7 @@ class EchoControlMobile {
  static size_t echo_path_size_bytes();

 protected:
-  virtual ~EchoControlMobile() {};
+  virtual ~EchoControlMobile() {}
 };

 // The automatic gain control (AGC) component brings the signal to an
@@ -481,7 +818,7 @@ class GainControl {
  virtual bool stream_is_saturated() const = 0;

 protected:
-  virtual ~GainControl() {};
+  virtual ~GainControl() {}
 };

 // A filtering component which removes DC offset and low-frequency noise.
@@ -492,34 +829,29 @@ class HighPassFilter {
  virtual bool is_enabled() const = 0;

 protected:
-  virtual ~HighPassFilter() {};
+  virtual ~HighPassFilter() {}
 };

 // An estimation component used to retrieve level metrics.
-// NOTE: currently unavailable. All methods return errors.
 class LevelEstimator {
 public:
  virtual int Enable(bool enable) = 0;
  virtual bool is_enabled() const = 0;

-  // The metrics are reported in dBFs calculated as:
-  //   Level = 10log_10(P_s / P_max) [dBFs], where
-  //   P_s is the signal power and P_max is the maximum possible (or peak)
-  //   power. With 16-bit signals, P_max = (2^15)^2.
-  struct Metrics {
-    AudioProcessing::Statistic signal;  // Overall signal level.
-    AudioProcessing::Statistic speech;  // Speech level.
-    AudioProcessing::Statistic noise;   // Noise level.
-  };
-
-  virtual int GetMetrics(Metrics* metrics, Metrics* reverse_metrics) = 0;
-
-  //virtual int enable_noise_warning(bool enable) = 0;
-  //bool is_noise_warning_enabled() const = 0;
-  //virtual bool stream_has_high_noise() const = 0;
+  // Returns the root mean square (RMS) level in dBFs (decibels from digital
+  // full-scale), or alternately dBov. It is computed over all primary stream
+  // frames since the last call to RMS(). The returned value is positive but
+  // should be interpreted as negative. It is constrained to [0, 127].
+  //
+  // The computation follows: https://tools.ietf.org/html/rfc6465
+  // with the intent that it can provide the RTP audio level indication.
+  //
+  // Frames passed to ProcessStream() with an |_energy| of zero are considered
+  // to have been muted. The RMS of the frame will be interpreted as -127.
+  virtual int RMS() = 0;

 protected:
-  virtual ~LevelEstimator() {};
+  virtual ~LevelEstimator() {}
 };

 // The noise suppression (NS) component attempts to remove noise while
@@ -543,8 +875,13 @@ class NoiseSuppression {
  virtual int set_level(Level level) = 0;
  virtual Level level() const = 0;

+  // Returns the internally computed prior speech probability of current frame
+  // averaged over output channels. This is not supported in fixed point, for
+  // which |kUnsupportedFunctionError| is returned.
+  virtual float speech_probability() const = 0;
+
 protected:
-  virtual ~NoiseSuppression() {};
+  virtual ~NoiseSuppression() {}
 };

 // The voice activity detection (VAD) component analyzes the stream to
@@ -552,7 +889,7 @@ class NoiseSuppression {
 // external VAD decision.
 //
 // In addition to |stream_has_voice()| the VAD decision is provided through the
-// |AudioFrame| passed to |ProcessStream()|. The |_vadActivity| member will be
+// |AudioFrame| passed to |ProcessStream()|. The |vad_activity_| member will be
 // modified to reflect the current decision.
 class VoiceDetection {
 public:
@@ -594,8 +931,8 @@ class VoiceDetection {
  virtual int frame_size_ms() const = 0;

 protected:
-  virtual ~VoiceDetection() {};
+  virtual ~VoiceDetection() {}
 };
 }  // namespace webrtc

-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.cc
@@ -0,0 +1,381 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+//
+//  Implements core class for intelligibility enhancer.
+//
+//  Details of the model and algorithm can be found in the original paper:
+//  http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788
+//
+
+#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <algorithm>
+#include <numeric>
+
+#include "webrtc/base/checks.h"
+#include "webrtc/common_audio/include/audio_util.h"
+#include "webrtc/common_audio/window_generator.h"
+
+namespace webrtc {
+
+namespace {
+
+const size_t kErbResolution = 2;
+const int kWindowSizeMs = 2;
+const int kChunkSizeMs = 10;  // Size provided by APM.
+const float kClipFreq = 200.0f;
+const float kConfigRho = 0.02f;  // Default production and interpretation SNR.
+const float kKbdAlpha = 1.5f;
+const float kLambdaBot = -1.0f;      // Extreme values in bisection
+const float kLambdaTop = -10e-18f;  // search for lamda.
+
+}  // namespace
+
+using std::complex;
+using std::max;
+using std::min;
+using VarianceType = intelligibility::VarianceArray::StepType;
+
+IntelligibilityEnhancer::TransformCallback::TransformCallback(
+    IntelligibilityEnhancer* parent,
+    IntelligibilityEnhancer::AudioSource source)
+    : parent_(parent), source_(source) {
+}
+
+void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock(
+    const complex<float>* const* in_block,
+    int in_channels,
+    size_t frames,
+    int /* out_channels */,
+    complex<float>* const* out_block) {
+  RTC_DCHECK_EQ(parent_->freqs_, frames);
+  for (int i = 0; i < in_channels; ++i) {
+    parent_->DispatchAudio(source_, in_block[i], out_block[i]);
+  }
+}
+
+IntelligibilityEnhancer::IntelligibilityEnhancer()
+    : IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) {
+}
+
+IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config)
+    : freqs_(RealFourier::ComplexLength(
+          RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))),
+      window_size_(static_cast<size_t>(1 << RealFourier::FftOrder(freqs_))),
+      chunk_length_(
+          static_cast<size_t>(config.sample_rate_hz * kChunkSizeMs / 1000)),
+      bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)),
+      sample_rate_hz_(config.sample_rate_hz),
+      erb_resolution_(kErbResolution),
+      num_capture_channels_(config.num_capture_channels),
+      num_render_channels_(config.num_render_channels),
+      analysis_rate_(config.analysis_rate),
+      active_(true),
+      clear_variance_(freqs_,
+                      config.var_type,
+                      config.var_window_size,
+                      config.var_decay_rate),
+      noise_variance_(freqs_,
+                      config.var_type,
+                      config.var_window_size,
+                      config.var_decay_rate),
+      filtered_clear_var_(new float[bank_size_]),
+      filtered_noise_var_(new float[bank_size_]),
+      filter_bank_(bank_size_),
+      center_freqs_(new float[bank_size_]),
+      rho_(new float[bank_size_]),
+      gains_eq_(new float[bank_size_]),
+      gain_applier_(freqs_, config.gain_change_limit),
+      temp_render_out_buffer_(chunk_length_, num_render_channels_),
+      temp_capture_out_buffer_(chunk_length_, num_capture_channels_),
+      kbd_window_(new float[window_size_]),
+      render_callback_(this, AudioSource::kRenderStream),
+      capture_callback_(this, AudioSource::kCaptureStream),
+      block_count_(0),
+      analysis_step_(0) {
+  RTC_DCHECK_LE(config.rho, 1.0f);
+
+  CreateErbBank();
+
+  // Assumes all rho equal.
+  for (size_t i = 0; i < bank_size_; ++i) {
+    rho_[i] = config.rho * config.rho;
+  }
+
+  float freqs_khz = kClipFreq / 1000.0f;
+  size_t erb_index = static_cast<size_t>(ceilf(
+      11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f));
+  start_freq_ = std::max(static_cast<size_t>(1), erb_index * erb_resolution_);
+
+  WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_,
+                                       kbd_window_.get());
+  render_mangler_.reset(new LappedTransform(
+      num_render_channels_, num_render_channels_, chunk_length_,
+      kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_));
+  capture_mangler_.reset(new LappedTransform(
+      num_capture_channels_, num_capture_channels_, chunk_length_,
+      kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_));
+}
+
+void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
+                                                 int sample_rate_hz,
+                                                 int num_channels) {
+  RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);
+  RTC_CHECK_EQ(num_render_channels_, num_channels);
+
+  if (active_) {
+    render_mangler_->ProcessChunk(audio, temp_render_out_buffer_.channels());
+  }
+
+  if (active_) {
+    for (int i = 0; i < num_render_channels_; ++i) {
+      memcpy(audio[i], temp_render_out_buffer_.channels()[i],
+             chunk_length_ * sizeof(**audio));
+    }
+  }
+}
+
+void IntelligibilityEnhancer::AnalyzeCaptureAudio(float* const* audio,
+                                                  int sample_rate_hz,
+                                                  int num_channels) {
+  RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);
+  RTC_CHECK_EQ(num_capture_channels_, num_channels);
+
+  capture_mangler_->ProcessChunk(audio, temp_capture_out_buffer_.channels());
+}
+
+void IntelligibilityEnhancer::DispatchAudio(
+    IntelligibilityEnhancer::AudioSource source,
+    const complex<float>* in_block,
+    complex<float>* out_block) {
+  switch (source) {
+    case kRenderStream:
+      ProcessClearBlock(in_block, out_block);
+      break;
+    case kCaptureStream:
+      ProcessNoiseBlock(in_block, out_block);
+      break;
+  }
+}
+
+void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block,
+                                                complex<float>* out_block) {
+  if (block_count_ < 2) {
+    memset(out_block, 0, freqs_ * sizeof(*out_block));
+    ++block_count_;
+    return;
+  }
+
+  // TODO(ekm): Use VAD to |Step| and |AnalyzeClearBlock| only if necessary.
+  if (true) {
+    clear_variance_.Step(in_block, false);
+    if (block_count_ % analysis_rate_ == analysis_rate_ - 1) {
+      const float power_target = std::accumulate(
+          clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.f);
+      AnalyzeClearBlock(power_target);
+      ++analysis_step_;
+    }
+    ++block_count_;
+  }
+
+  if (active_) {
+    gain_applier_.Apply(in_block, out_block);
+  }
+}
+
+void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) {
+  FilterVariance(clear_variance_.variance(), filtered_clear_var_.get());
+  FilterVariance(noise_variance_.variance(), filtered_noise_var_.get());
+
+  SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get());
+  const float power_top =
+      DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
+  SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get());
+  const float power_bot =
+      DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
+  if (power_target >= power_bot && power_target <= power_top) {
+    SolveForLambda(power_target, power_bot, power_top);
+    UpdateErbGains();
+  }  // Else experiencing variance underflow, so do nothing.
+}
+
+void IntelligibilityEnhancer::SolveForLambda(float power_target,
+                                             float power_bot,
+                                             float power_top) {
+  const float kConvergeThresh = 0.001f;  // TODO(ekmeyerson): Find best values
+  const int kMaxIters = 100;             // for these, based on experiments.
+
+  const float reciprocal_power_target = 1.f / power_target;
+  float lambda_bot = kLambdaBot;
+  float lambda_top = kLambdaTop;
+  float power_ratio = 2.0f;  // Ratio of achieved power to target power.
+  int iters = 0;
+  while (std::fabs(power_ratio - 1.0f) > kConvergeThresh &&
+         iters <= kMaxIters) {
+    const float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.0f;
+    SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get());
+    const float power =
+        DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
+    if (power < power_target) {
+      lambda_bot = lambda;
+    } else {
+      lambda_top = lambda;
+    }
+    power_ratio = std::fabs(power * reciprocal_power_target);
+    ++iters;
+  }
+}
+
+void IntelligibilityEnhancer::UpdateErbGains() {
+  // (ERB gain) = filterbank' * (freq gain)
+  float* gains = gain_applier_.target();
+  for (size_t i = 0; i < freqs_; ++i) {
+    gains[i] = 0.0f;
+    for (size_t j = 0; j < bank_size_; ++j) {
+      gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]);
+    }
+  }
+}
+
+void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block,
+                                                complex<float>* /*out_block*/) {
+  noise_variance_.Step(in_block);
+}
+
+size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
+                                            size_t erb_resolution) {
+  float freq_limit = sample_rate / 2000.0f;
+  size_t erb_scale = static_cast<size_t>(ceilf(
+      11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f));
+  return erb_scale * erb_resolution;
+}
+
+void IntelligibilityEnhancer::CreateErbBank() {
+  size_t lf = 1, rf = 4;
+
+  for (size_t i = 0; i < bank_size_; ++i) {
+    float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_));
+    center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));
+    center_freqs_[i] -= 14678.49f;
+  }
+  float last_center_freq = center_freqs_[bank_size_ - 1];
+  for (size_t i = 0; i < bank_size_; ++i) {
+    center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq;
+  }
+
+  for (size_t i = 0; i < bank_size_; ++i) {
+    filter_bank_[i].resize(freqs_);
+  }
+
+  for (size_t i = 1; i <= bank_size_; ++i) {
+    size_t lll, ll, rr, rrr;
+    static const size_t kOne = 1;  // Avoids repeated static_cast<>s below.
+    lll = static_cast<size_t>(round(
+        center_freqs_[max(kOne, i - lf) - 1] * freqs_ /
+            (0.5f * sample_rate_hz_)));
+    ll = static_cast<size_t>(round(
+        center_freqs_[max(kOne, i) - 1] * freqs_ / (0.5f * sample_rate_hz_)));
+    lll = min(freqs_, max(lll, kOne)) - 1;
+    ll = min(freqs_, max(ll, kOne)) - 1;
+
+    rrr = static_cast<size_t>(round(
+        center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ /
+            (0.5f * sample_rate_hz_)));
+    rr = static_cast<size_t>(round(
+        center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ /
+            (0.5f * sample_rate_hz_)));
+    rrr = min(freqs_, max(rrr, kOne)) - 1;
+    rr = min(freqs_, max(rr, kOne)) - 1;
+
+    float step, element;
+
+    step = 1.0f / (ll - lll);
+    element = 0.0f;
+    for (size_t j = lll; j <= ll; ++j) {
+      filter_bank_[i - 1][j] = element;
+      element += step;
+    }
+    step = 1.0f / (rrr - rr);
+    element = 1.0f;
+    for (size_t j = rr; j <= rrr; ++j) {
+      filter_bank_[i - 1][j] = element;
+      element -= step;
+    }
+    for (size_t j = ll; j <= rr; ++j) {
+      filter_bank_[i - 1][j] = 1.0f;
+    }
+  }
+
+  float sum;
+  for (size_t i = 0; i < freqs_; ++i) {
+    sum = 0.0f;
+    for (size_t j = 0; j < bank_size_; ++j) {
+      sum += filter_bank_[j][i];
+    }
+    for (size_t j = 0; j < bank_size_; ++j) {
+      filter_bank_[j][i] /= sum;
+    }
+  }
+}
+
+void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
+                                                       size_t start_freq,
+                                                       float* sols) {
+  bool quadratic = (kConfigRho < 1.0f);
+  const float* var_x0 = filtered_clear_var_.get();
+  const float* var_n0 = filtered_noise_var_.get();
+
+  for (size_t n = 0; n < start_freq; ++n) {
+    sols[n] = 1.0f;
+  }
+
+  // Analytic solution for optimal gains. See paper for derivation.
+  for (size_t n = start_freq - 1; n < bank_size_; ++n) {
+    float alpha0, beta0, gamma0;
+    gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] +
+             lambda * var_x0[n] * var_n0[n] * var_n0[n];
+    beta0 = lambda * var_x0[n] * (2 - rho_[n]) * var_x0[n] * var_n0[n];
+    if (quadratic) {
+      alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n];
+      sols[n] =
+          (-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0);
+    } else {
+      sols[n] = -gamma0 / beta0;
+    }
+    sols[n] = fmax(0, sols[n]);
+  }
+}
+
+void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) {
+  RTC_DCHECK_GT(freqs_, 0u);
+  for (size_t i = 0; i < bank_size_; ++i) {
+    result[i] = DotProduct(&filter_bank_[i][0], var, freqs_);
+  }
+}
+
+float IntelligibilityEnhancer::DotProduct(const float* a,
+                                          const float* b,
+                                          size_t length) {
+  float ret = 0.0f;
+
+  for (size_t i = 0; i < length; ++i) {
+    ret = fmaf(a[i], b[i], ret);
+  }
+  return ret;
+}
+
+bool IntelligibilityEnhancer::active() const {
+  return active_;
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h
@@ -0,0 +1,182 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+//
+//  Specifies core class for intelligbility enhancement.
+//
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
+
+#include <complex>
+#include <vector>
+
+#include "webrtc/base/scoped_ptr.h"
+#include "webrtc/common_audio/lapped_transform.h"
+#include "webrtc/common_audio/channel_buffer.h"
+#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"
+
+namespace webrtc {
+
+// Speech intelligibility enhancement module. Reads render and capture
+// audio streams and modifies the render stream with a set of gains per
+// frequency bin to enhance speech against the noise background.
+// Note: assumes speech and noise streams are already separated.
+class IntelligibilityEnhancer {
+ public:
+  struct Config {
+    // |var_*| are parameters for the VarianceArray constructor for the
+    // clear speech stream.
+    // TODO(bercic): the |var_*|, |*_rate| and |gain_limit| parameters should
+    // probably go away once fine tuning is done.
+    Config()
+        : sample_rate_hz(16000),
+          num_capture_channels(1),
+          num_render_channels(1),
+          var_type(intelligibility::VarianceArray::kStepDecaying),
+          var_decay_rate(0.9f),
+          var_window_size(10),
+          analysis_rate(800),
+          gain_change_limit(0.1f),
+          rho(0.02f) {}
+    int sample_rate_hz;
+    int num_capture_channels;
+    int num_render_channels;
+    intelligibility::VarianceArray::StepType var_type;
+    float var_decay_rate;
+    size_t var_window_size;
+    int analysis_rate;
+    float gain_change_limit;
+    float rho;
+  };
+
+  explicit IntelligibilityEnhancer(const Config& config);
+  IntelligibilityEnhancer();  // Initialize with default config.
+
+  // Reads and processes chunk of noise stream in time domain.
+  void AnalyzeCaptureAudio(float* const* audio,
+                           int sample_rate_hz,
+                           int num_channels);
+
+  // Reads chunk of speech in time domain and updates with modified signal.
+  void ProcessRenderAudio(float* const* audio,
+                          int sample_rate_hz,
+                          int num_channels);
+  bool active() const;
+
+ private:
+  enum AudioSource {
+    kRenderStream = 0,  // Clear speech stream.
+    kCaptureStream,  // Noise stream.
+  };
+
+  // Provides access point to the frequency domain.
+  class TransformCallback : public LappedTransform::Callback {
+   public:
+    TransformCallback(IntelligibilityEnhancer* parent, AudioSource source);
+
+    // All in frequency domain, receives input |in_block|, applies
+    // intelligibility enhancement, and writes result to |out_block|.
+    void ProcessAudioBlock(const std::complex<float>* const* in_block,
+                           int in_channels,
+                           size_t frames,
+                           int out_channels,
+                           std::complex<float>* const* out_block) override;
+
+   private:
+    IntelligibilityEnhancer* parent_;
+    AudioSource source_;
+  };
+  friend class TransformCallback;
+  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);
+  FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);
+
+  // Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source.
+  void DispatchAudio(AudioSource source,
+                     const std::complex<float>* in_block,
+                     std::complex<float>* out_block);
+
+  // Updates variance computation and analysis with |in_block_|,
+  // and writes modified speech to |out_block|.
+  void ProcessClearBlock(const std::complex<float>* in_block,
+                         std::complex<float>* out_block);
+
+  // Computes and sets modified gains.
+  void AnalyzeClearBlock(float power_target);
+
+  // Bisection search for optimal |lambda|.
+  void SolveForLambda(float power_target, float power_bot, float power_top);
+
+  // Transforms freq gains to ERB gains.
+  void UpdateErbGains();
+
+  // Updates variance calculation for noise input with |in_block|.
+  void ProcessNoiseBlock(const std::complex<float>* in_block,
+                         std::complex<float>* out_block);
+
+  // Returns number of ERB filters.
+  static size_t GetBankSize(int sample_rate, size_t erb_resolution);
+
+  // Initializes ERB filterbank.
+  void CreateErbBank();
+
+  // Analytically solves quadratic for optimal gains given |lambda|.
+  // Negative gains are set to 0. Stores the results in |sols|.
+  void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);
+
+  // Computes variance across ERB filters from freq variance |var|.
+  // Stores in |result|.
+  void FilterVariance(const float* var, float* result);
+
+  // Returns dot product of vectors specified by size |length| arrays |a|,|b|.
+  static float DotProduct(const float* a, const float* b, size_t length);
+
+  const size_t freqs_;         // Num frequencies in frequency domain.
+  const size_t window_size_;   // Window size in samples; also the block size.
+  const size_t chunk_length_;  // Chunk size in samples.
+  const size_t bank_size_;     // Num ERB filters.
+  const int sample_rate_hz_;
+  const int erb_resolution_;
+  const int num_capture_channels_;
+  const int num_render_channels_;
+  const int analysis_rate_;    // Num blocks before gains recalculated.
+
+  const bool active_;          // Whether render gains are being updated.
+                               // TODO(ekm): Add logic for updating |active_|.
+
+  intelligibility::VarianceArray clear_variance_;
+  intelligibility::VarianceArray noise_variance_;
+  rtc::scoped_ptr<float[]> filtered_clear_var_;
+  rtc::scoped_ptr<float[]> filtered_noise_var_;
+  std::vector<std::vector<float>> filter_bank_;
+  rtc::scoped_ptr<float[]> center_freqs_;
+  size_t start_freq_;
+  rtc::scoped_ptr<float[]> rho_;  // Production and interpretation SNR.
+                                  // for each ERB band.
+  rtc::scoped_ptr<float[]> gains_eq_;  // Pre-filter modified gains.
+  intelligibility::GainApplier gain_applier_;
+
+  // Destination buffers used to reassemble blocked chunks before overwriting
+  // the original input array with modifications.
+  ChannelBuffer<float> temp_render_out_buffer_;
+  ChannelBuffer<float> temp_capture_out_buffer_;
+
+  rtc::scoped_ptr<float[]> kbd_window_;
+  TransformCallback render_callback_;
+  TransformCallback capture_callback_;
+  rtc::scoped_ptr<LappedTransform> render_mangler_;
+  rtc::scoped_ptr<LappedTransform> capture_mangler_;
+  int block_count_;
+  int analysis_step_;
+};
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.cc
@@ -0,0 +1,314 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+//
+//  Implements helper functions and classes for intelligibility enhancement.
+//
+
+#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"
+
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+#include <algorithm>
+
+using std::complex;
+using std::min;
+
+namespace webrtc {
+
+namespace intelligibility {
+
+float UpdateFactor(float target, float current, float limit) {
+  float delta = fabsf(target - current);
+  float sign = copysign(1.0f, target - current);
+  return current + sign * fminf(delta, limit);
+}
+
+float AddDitherIfZero(float value) {
+  return value == 0.f ? std::rand() * 0.01f / RAND_MAX : value;
+}
+
+complex<float> zerofudge(complex<float> c) {
+  return complex<float>(AddDitherIfZero(c.real()), AddDitherIfZero(c.imag()));
+}
+
+complex<float> NewMean(complex<float> mean, complex<float> data, size_t count) {
+  return mean + (data - mean) / static_cast<float>(count);
+}
+
+void AddToMean(complex<float> data, size_t count, complex<float>* mean) {
+  (*mean) = NewMean(*mean, data, count);
+}
+
+
+static const size_t kWindowBlockSize = 10;
+
+VarianceArray::VarianceArray(size_t num_freqs,
+                             StepType type,
+                             size_t window_size,
+                             float decay)
+    : running_mean_(new complex<float>[num_freqs]()),
+      running_mean_sq_(new complex<float>[num_freqs]()),
+      sub_running_mean_(new complex<float>[num_freqs]()),
+      sub_running_mean_sq_(new complex<float>[num_freqs]()),
+      variance_(new float[num_freqs]()),
+      conj_sum_(new float[num_freqs]()),
+      num_freqs_(num_freqs),
+      window_size_(window_size),
+      decay_(decay),
+      history_cursor_(0),
+      count_(0),
+      array_mean_(0.0f),
+      buffer_full_(false) {
+  history_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
+  for (size_t i = 0; i < num_freqs_; ++i) {
+    history_[i].reset(new complex<float>[window_size_]());
+  }
+  subhistory_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
+  for (size_t i = 0; i < num_freqs_; ++i) {
+    subhistory_[i].reset(new complex<float>[window_size_]());
+  }
+  subhistory_sq_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
+  for (size_t i = 0; i < num_freqs_; ++i) {
+    subhistory_sq_[i].reset(new complex<float>[window_size_]());
+  }
+  switch (type) {
+    case kStepInfinite:
+      step_func_ = &VarianceArray::InfiniteStep;
+      break;
+    case kStepDecaying:
+      step_func_ = &VarianceArray::DecayStep;
+      break;
+    case kStepWindowed:
+      step_func_ = &VarianceArray::WindowedStep;
+      break;
+    case kStepBlocked:
+      step_func_ = &VarianceArray::BlockedStep;
+      break;
+    case kStepBlockBasedMovingAverage:
+      step_func_ = &VarianceArray::BlockBasedMovingAverage;
+      break;
+  }
+}
+
+// Compute the variance with Welford's algorithm, adding some fudge to
+// the input in case of all-zeroes.
+void VarianceArray::InfiniteStep(const complex<float>* data, bool skip_fudge) {
+  array_mean_ = 0.0f;
+  ++count_;
+  for (size_t i = 0; i < num_freqs_; ++i) {
+    complex<float> sample = data[i];
+    if (!skip_fudge) {
+      sample = zerofudge(sample);
+    }
+    if (count_ == 1) {
+      running_mean_[i] = sample;
+      variance_[i] = 0.0f;
+    } else {
+      float old_sum = conj_sum_[i];
+      complex<float> old_mean = running_mean_[i];
+      running_mean_[i] =
+          old_mean + (sample - old_mean) / static_cast<float>(count_);
+      conj_sum_[i] =
+          (old_sum + std::conj(sample - old_mean) * (sample - running_mean_[i]))
+              .real();
+      variance_[i] =
+          conj_sum_[i] / (count_ - 1);
+    }
+    array_mean_ += (variance_[i] - array_mean_) / (i + 1);
+  }
+}
+
+// Compute the variance from the beginning, with exponential decaying of the
+// series data.
+void VarianceArray::DecayStep(const complex<float>* data, bool /*dummy*/) {
+  array_mean_ = 0.0f;
+  ++count_;
+  for (size_t i = 0; i < num_freqs_; ++i) {
+    complex<float> sample = data[i];
+    sample = zerofudge(sample);
+
+    if (count_ == 1) {
+      running_mean_[i] = sample;
+      running_mean_sq_[i] = sample * std::conj(sample);
+      variance_[i] = 0.0f;
+    } else {
+      complex<float> prev = running_mean_[i];
+      complex<float> prev2 = running_mean_sq_[i];
+      running_mean_[i] = decay_ * prev + (1.0f - decay_) * sample;
+      running_mean_sq_[i] =
+          decay_ * prev2 + (1.0f - decay_) * sample * std::conj(sample);
+      variance_[i] = (running_mean_sq_[i] -
+                      running_mean_[i] * std::conj(running_mean_[i])).real();
+    }
+
+    array_mean_ += (variance_[i] - array_mean_) / (i + 1);
+  }
+}
+
+// Windowed variance computation. On each step, the variances for the
+// window are recomputed from scratch, using Welford's algorithm.
+void VarianceArray::WindowedStep(const complex<float>* data, bool /*dummy*/) {
+  size_t num = min(count_ + 1, window_size_);
+  array_mean_ = 0.0f;
+  for (size_t i = 0; i < num_freqs_; ++i) {
+    complex<float> mean;
+    float conj_sum = 0.0f;
+
+    history_[i][history_cursor_] = data[i];
+
+    mean = history_[i][history_cursor_];
+    variance_[i] = 0.0f;
+    for (size_t j = 1; j < num; ++j) {
+      complex<float> sample =
+          zerofudge(history_[i][(history_cursor_ + j) % window_size_]);
+      sample = history_[i][(history_cursor_ + j) % window_size_];
+      float old_sum = conj_sum;
+      complex<float> old_mean = mean;
+
+      mean = old_mean + (sample - old_mean) / static_cast<float>(j + 1);
+      conj_sum =
+          (old_sum + std::conj(sample - old_mean) * (sample - mean)).real();
+      variance_[i] = conj_sum / (j);
+    }
+    array_mean_ += (variance_[i] - array_mean_) / (i + 1);
+  }
+  history_cursor_ = (history_cursor_ + 1) % window_size_;
+  ++count_;
+}
+
+// Variance with a window of blocks. Within each block, the variances are
+// recomputed from scratch at every stp, using |Var(X) = E(X^2) - E^2(X)|.
+// Once a block is filled with kWindowBlockSize samples, it is added to the
+// history window and a new block is started. The variances for the window
+// are recomputed from scratch at each of these transitions.
+void VarianceArray::BlockedStep(const complex<float>* data, bool /*dummy*/) {
+  size_t blocks = min(window_size_, history_cursor_ + 1);
+  for (size_t i = 0; i < num_freqs_; ++i) {
+    AddToMean(data[i], count_ + 1, &sub_running_mean_[i]);
+    AddToMean(data[i] * std::conj(data[i]), count_ + 1,
+              &sub_running_mean_sq_[i]);
+    subhistory_[i][history_cursor_ % window_size_] = sub_running_mean_[i];
+    subhistory_sq_[i][history_cursor_ % window_size_] = sub_running_mean_sq_[i];
+
+    variance_[i] =
+        (NewMean(running_mean_sq_[i], sub_running_mean_sq_[i], blocks) -
+         NewMean(running_mean_[i], sub_running_mean_[i], blocks) *
+             std::conj(NewMean(running_mean_[i], sub_running_mean_[i], blocks)))
+            .real();
+    if (count_ == kWindowBlockSize - 1) {
+      sub_running_mean_[i] = complex<float>(0.0f, 0.0f);
+      sub_running_mean_sq_[i] = complex<float>(0.0f, 0.0f);
+      running_mean_[i] = complex<float>(0.0f, 0.0f);
+      running_mean_sq_[i] = complex<float>(0.0f, 0.0f);
+      for (size_t j = 0; j < min(window_size_, history_cursor_); ++j) {
+        AddToMean(subhistory_[i][j], j + 1, &running_mean_[i]);
+        AddToMean(subhistory_sq_[i][j], j + 1, &running_mean_sq_[i]);
+      }
+      ++history_cursor_;
+    }
+  }
+  ++count_;
+  if (count_ == kWindowBlockSize) {
+    count_ = 0;
+  }
+}
+
+// Recomputes variances for each window from scratch based on previous window.
+void VarianceArray::BlockBasedMovingAverage(const std::complex<float>* data,
+                                            bool /*dummy*/) {
+  // TODO(ekmeyerson) To mitigate potential divergence, add counter so that
+  // after every so often sums are computed scratch by summing over all
+  // elements instead of subtracting oldest and adding newest.
+  for (size_t i = 0; i < num_freqs_; ++i) {
+    sub_running_mean_[i] += data[i];
+    sub_running_mean_sq_[i] += data[i] * std::conj(data[i]);
+  }
+  ++count_;
+
+  // TODO(ekmeyerson) Make kWindowBlockSize nonconstant to allow
+  // experimentation with different block size,window size pairs.
+  if (count_ >= kWindowBlockSize) {
+    count_ = 0;
+
+    for (size_t i = 0; i < num_freqs_; ++i) {
+      running_mean_[i] -= subhistory_[i][history_cursor_];
+      running_mean_sq_[i] -= subhistory_sq_[i][history_cursor_];
+
+      float scale = 1.f / kWindowBlockSize;
+      subhistory_[i][history_cursor_] = sub_running_mean_[i] * scale;
+      subhistory_sq_[i][history_cursor_] = sub_running_mean_sq_[i] * scale;
+
+      sub_running_mean_[i] = std::complex<float>(0.0f, 0.0f);
+      sub_running_mean_sq_[i] = std::complex<float>(0.0f, 0.0f);
+
+      running_mean_[i] += subhistory_[i][history_cursor_];
+      running_mean_sq_[i] += subhistory_sq_[i][history_cursor_];
+
+      scale = 1.f / (buffer_full_ ? window_size_ : history_cursor_ + 1);
+      variance_[i] = std::real(running_mean_sq_[i] * scale -
+                               running_mean_[i] * scale *
+                                   std::conj(running_mean_[i]) * scale);
+    }
+
+    ++history_cursor_;
+    if (history_cursor_ >= window_size_) {
+      buffer_full_ = true;
+      history_cursor_ = 0;
+    }
+  }
+}
+
+void VarianceArray::Clear() {
+  memset(running_mean_.get(), 0, sizeof(*running_mean_.get()) * num_freqs_);
+  memset(running_mean_sq_.get(), 0,
+         sizeof(*running_mean_sq_.get()) * num_freqs_);
+  memset(variance_.get(), 0, sizeof(*variance_.get()) * num_freqs_);
+  memset(conj_sum_.get(), 0, sizeof(*conj_sum_.get()) * num_freqs_);
+  history_cursor_ = 0;
+  count_ = 0;
+  array_mean_ = 0.0f;
+}
+
+void VarianceArray::ApplyScale(float scale) {
+  array_mean_ = 0.0f;
+  for (size_t i = 0; i < num_freqs_; ++i) {
+    variance_[i] *= scale * scale;
+    array_mean_ += (variance_[i] - array_mean_) / (i + 1);
+  }
+}
+
+GainApplier::GainApplier(size_t freqs, float change_limit)
+    : num_freqs_(freqs),
+      change_limit_(change_limit),
+      target_(new float[freqs]()),
+      current_(new float[freqs]()) {
+  for (size_t i = 0; i < freqs; ++i) {
+    target_[i] = 1.0f;
+    current_[i] = 1.0f;
+  }
+}
+
+void GainApplier::Apply(const complex<float>* in_block,
+                        complex<float>* out_block) {
+  for (size_t i = 0; i < num_freqs_; ++i) {
+    float factor = sqrtf(fabsf(current_[i]));
+    if (!std::isnormal(factor)) {
+      factor = 1.0f;
+    }
+    out_block[i] = factor * in_block[i];
+    current_[i] = UpdateFactor(target_[i], current_[i], change_limit_);
+  }
+}
+
+}  // namespace intelligibility
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h
+++ b/webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h
@@ -0,0 +1,160 @@
+/*
+ *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+//
+//  Specifies helper classes for intelligibility enhancement.
+//
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
+
+#include <complex>
+
+#include "webrtc/base/scoped_ptr.h"
+
+namespace webrtc {
+
+namespace intelligibility {
+
+// Return |current| changed towards |target|, with the change being at most
+// |limit|.
+float UpdateFactor(float target, float current, float limit);
+
+// Apply a small fudge to degenerate complex values. The numbers in the array
+// were chosen randomly, so that even a series of all zeroes has some small
+// variability.
+std::complex<float> zerofudge(std::complex<float> c);
+
+// Incremental mean computation. Return the mean of the series with the
+// mean |mean| with added |data|.
+std::complex<float> NewMean(std::complex<float> mean,
+                            std::complex<float> data,
+                            size_t count);
+
+// Updates |mean| with added |data|;
+void AddToMean(std::complex<float> data,
+               size_t count,
+               std::complex<float>* mean);
+
+// Internal helper for computing the variances of a stream of arrays.
+// The result is an array of variances per position: the i-th variance
+// is the variance of the stream of data on the i-th positions in the
+// input arrays.
+// There are four methods of computation:
+//  * kStepInfinite computes variances from the beginning onwards
+//  * kStepDecaying uses a recursive exponential decay formula with a
+//    settable forgetting factor
+//  * kStepWindowed computes variances within a moving window
+//  * kStepBlocked is similar to kStepWindowed, but history is kept
+//    as a rolling window of blocks: multiple input elements are used for
+//    one block and the history then consists of the variances of these blocks
+//    with the same effect as kStepWindowed, but less storage, so the window
+//    can be longer
+class VarianceArray {
+ public:
+  enum StepType {
+    kStepInfinite = 0,
+    kStepDecaying,
+    kStepWindowed,
+    kStepBlocked,
+    kStepBlockBasedMovingAverage
+  };
+
+  // Construct an instance for the given input array length (|freqs|) and
+  // computation algorithm (|type|), with the appropriate parameters.
+  // |window_size| is the number of samples for kStepWindowed and
+  // the number of blocks for kStepBlocked. |decay| is the forgetting factor
+  // for kStepDecaying.
+  VarianceArray(size_t freqs, StepType type, size_t window_size, float decay);
+
+  // Add a new data point to the series and compute the new variances.
+  // TODO(bercic) |skip_fudge| is a flag for kStepWindowed and kStepDecaying,
+  // whether they should skip adding some small dummy values to the input
+  // to prevent problems with all-zero inputs. Can probably be removed.
+  void Step(const std::complex<float>* data, bool skip_fudge = false) {
+    (this->*step_func_)(data, skip_fudge);
+  }
+  // Reset variances to zero and forget all history.
+  void Clear();
+  // Scale the input data by |scale|. Effectively multiply variances
+  // by |scale^2|.
+  void ApplyScale(float scale);
+
+  // The current set of variances.
+  const float* variance() const { return variance_.get(); }
+
+  // The mean value of the current set of variances.
+  float array_mean() const { return array_mean_; }
+
+ private:
+  void InfiniteStep(const std::complex<float>* data, bool dummy);
+  void DecayStep(const std::complex<float>* data, bool dummy);
+  void WindowedStep(const std::complex<float>* data, bool dummy);
+  void BlockedStep(const std::complex<float>* data, bool dummy);
+  void BlockBasedMovingAverage(const std::complex<float>* data, bool dummy);
+
+  // TODO(ekmeyerson): Switch the following running means
+  // and histories from rtc::scoped_ptr to std::vector.
+
+  // The current average X and X^2.
+  rtc::scoped_ptr<std::complex<float>[]> running_mean_;
+  rtc::scoped_ptr<std::complex<float>[]> running_mean_sq_;
+
+  // Average X and X^2 for the current block in kStepBlocked.
+  rtc::scoped_ptr<std::complex<float>[]> sub_running_mean_;
+  rtc::scoped_ptr<std::complex<float>[]> sub_running_mean_sq_;
+
+  // Sample history for the rolling window in kStepWindowed and block-wise
+  // histories for kStepBlocked.
+  rtc::scoped_ptr<rtc::scoped_ptr<std::complex<float>[]>[]> history_;
+  rtc::scoped_ptr<rtc::scoped_ptr<std::complex<float>[]>[]> subhistory_;
+  rtc::scoped_ptr<rtc::scoped_ptr<std::complex<float>[]>[]> subhistory_sq_;
+
+  // The current set of variances and sums for Welford's algorithm.
+  rtc::scoped_ptr<float[]> variance_;
+  rtc::scoped_ptr<float[]> conj_sum_;
+
+  const size_t num_freqs_;
+  const size_t window_size_;
+  const float decay_;
+  size_t history_cursor_;
+  size_t count_;
+  float array_mean_;
+  bool buffer_full_;
+  void (VarianceArray::*step_func_)(const std::complex<float>*, bool);
+};
+
+// Helper class for smoothing gain changes. On each applicatiion step, the
+// currently used gains are changed towards a set of settable target gains,
+// constrained by a limit on the magnitude of the changes.
+class GainApplier {
+ public:
+  GainApplier(size_t freqs, float change_limit);
+
+  // Copy |in_block| to |out_block|, multiplied by the current set of gains,
+  // and step the current set of gains towards the target set.
+  void Apply(const std::complex<float>* in_block,
+             std::complex<float>* out_block);
+
+  // Return the current target gain set. Modify this array to set the targets.
+  float* target() const { return target_.get(); }
+
+ private:
+  const size_t num_freqs_;
+  const float change_limit_;
+  rtc::scoped_ptr<float[]> target_;
+  rtc::scoped_ptr<float[]> current_;
+};
+
+}  // namespace intelligibility
+
+}  // namespace webrtc
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
--- a/webrtc/modules/audio_processing/level_estimator_impl.cc
+++ b/webrtc/modules/audio_processing/level_estimator_impl.cc
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,175 +8,79 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "level_estimator_impl.h"
+#include "webrtc/modules/audio_processing/level_estimator_impl.h"

-#include <cassert>
-#include <cstring>
-
-#include "critical_section_wrapper.h"
-
-#include "audio_processing_impl.h"
-#include "audio_buffer.h"
-
-// TODO(ajm): implement the underlying level estimator component.
+#include "webrtc/modules/audio_processing/audio_buffer.h"
+#include "webrtc/modules/audio_processing/include/audio_processing.h"
+#include "webrtc/modules/audio_processing/rms_level.h"
+#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"

 namespace webrtc {

-typedef void Handle;
-
-namespace {
-/*int EstimateLevel(AudioBuffer* audio, Handle* my_handle) {
-  assert(audio->samples_per_split_channel() <= 160);
-
-  WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
-  if (audio->num_channels() > 1) {
-    audio->CopyAndMixLowPass(1);
-    mixed_data = audio->mixed_low_pass_data(0);
-  }
-
-  int err = UpdateLvlEst(my_handle,
-                         mixed_data,
-                         audio->samples_per_split_channel());
-  if (err != AudioProcessing::kNoError) {
-    return GetHandleError(my_handle);
-  }
-
-  return AudioProcessing::kNoError;
-}
-
-int GetMetricsLocal(Handle* my_handle, LevelEstimator::Metrics* metrics) {
-  level_t levels;
-  memset(&levels, 0, sizeof(levels));
-
-  int err = ExportLevels(my_handle, &levels, 2);
-  if (err != AudioProcessing::kNoError) {
-    return err;
-  }
-  metrics->signal.instant = levels.instant;
-  metrics->signal.average = levels.average;
-  metrics->signal.maximum = levels.max;
-  metrics->signal.minimum = levels.min;
-
-  err = ExportLevels(my_handle, &levels, 1);
-  if (err != AudioProcessing::kNoError) {
-    return err;
-  }
-  metrics->speech.instant = levels.instant;
-  metrics->speech.average = levels.average;
-  metrics->speech.maximum = levels.max;
-  metrics->speech.minimum = levels.min;
-
-  err = ExportLevels(my_handle, &levels, 0);
-  if (err != AudioProcessing::kNoError) {
-    return err;
-  }
-  metrics->noise.instant = levels.instant;
-  metrics->noise.average = levels.average;
-  metrics->noise.maximum = levels.max;
-  metrics->noise.minimum = levels.min;
-
-  return AudioProcessing::kNoError;
-}*/
-}  // namespace
-
-LevelEstimatorImpl::LevelEstimatorImpl(const AudioProcessingImpl* apm)
-  : ProcessingComponent(apm),
-    apm_(apm) {}
+LevelEstimatorImpl::LevelEstimatorImpl(const AudioProcessing* apm,
+                                       CriticalSectionWrapper* crit)
+    : ProcessingComponent(),
+      crit_(crit) {}

 LevelEstimatorImpl::~LevelEstimatorImpl() {}

-int LevelEstimatorImpl::AnalyzeReverseStream(AudioBuffer* /*audio*/) {
-  return apm_->kUnsupportedComponentError;
-  /*if (!is_component_enabled()) {
-    return apm_->kNoError;
+int LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) {
+  if (!is_component_enabled()) {
+    return AudioProcessing::kNoError;
  }

-  return EstimateLevel(audio, static_cast<Handle*>(handle(1)));*/
-}
-
-int LevelEstimatorImpl::ProcessCaptureAudio(AudioBuffer* /*audio*/) {
-  return apm_->kUnsupportedComponentError;
-  /*if (!is_component_enabled()) {
-    return apm_->kNoError;
+  RMSLevel* rms_level = static_cast<RMSLevel*>(handle(0));
+  for (int i = 0; i < audio->num_channels(); ++i) {
+    rms_level->Process(audio->channels_const()[i],
+                       audio->num_frames());
  }

-  return EstimateLevel(audio, static_cast<Handle*>(handle(0)));*/
+  return AudioProcessing::kNoError;
 }

-int LevelEstimatorImpl::Enable(bool /*enable*/) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
-  return apm_->kUnsupportedComponentError;
-  //return EnableComponent(enable);
+int LevelEstimatorImpl::Enable(bool enable) {
+  CriticalSectionScoped crit_scoped(crit_);
+  return EnableComponent(enable);
 }

 bool LevelEstimatorImpl::is_enabled() const {
  return is_component_enabled();
 }

-int LevelEstimatorImpl::GetMetrics(LevelEstimator::Metrics* /*metrics*/,
-                                   LevelEstimator::Metrics* /*reverse_metrics*/) {
-  return apm_->kUnsupportedComponentError;
-  /*if (!is_component_enabled()) {
-    return apm_->kNotEnabledError;
+int LevelEstimatorImpl::RMS() {
+  if (!is_component_enabled()) {
+    return AudioProcessing::kNotEnabledError;
  }

-  int err = GetMetricsLocal(static_cast<Handle*>(handle(0)), metrics);
-  if (err != apm_->kNoError) {
-    return err;
-  }
-
-  err = GetMetricsLocal(static_cast<Handle*>(handle(1)), reverse_metrics);
-  if (err != apm_->kNoError) {
-    return err;
-  }
-
-  return apm_->kNoError;*/
-}
-
-int LevelEstimatorImpl::get_version(char* version,
-                                    int version_len_bytes) const {
-  // An empty string is used to indicate no version information.
-  memset(version, 0, version_len_bytes);
-  return apm_->kNoError;
+  RMSLevel* rms_level = static_cast<RMSLevel*>(handle(0));
+  return rms_level->RMS();
 }

+// The ProcessingComponent implementation is pretty weird in this class since
+// we have only a single instance of the trivial underlying component.
 void* LevelEstimatorImpl::CreateHandle() const {
-  Handle* handle = NULL;
-  /*if (CreateLvlEst(&handle) != apm_->kNoError) {
-    handle = NULL;
-  } else {
-    assert(handle != NULL);
-  }*/
-
-  return handle;
+  return new RMSLevel;
 }

-int LevelEstimatorImpl::DestroyHandle(void* /*handle*/) const {
-  return apm_->kUnsupportedComponentError;
-  //return FreeLvlEst(static_cast<Handle*>(handle));
+void LevelEstimatorImpl::DestroyHandle(void* handle) const {
+  delete static_cast<RMSLevel*>(handle);
 }

-int LevelEstimatorImpl::InitializeHandle(void* /*handle*/) const {
-  return apm_->kUnsupportedComponentError;
-  /*const double kIntervalSeconds = 1.5;
-  return InitLvlEst(static_cast<Handle*>(handle),
-                    apm_->sample_rate_hz(),
-                    kIntervalSeconds);*/
+int LevelEstimatorImpl::InitializeHandle(void* handle) const {
+  static_cast<RMSLevel*>(handle)->Reset();
+  return AudioProcessing::kNoError;
 }

 int LevelEstimatorImpl::ConfigureHandle(void* /*handle*/) const {
-  return apm_->kUnsupportedComponentError;
-  //return apm_->kNoError;
+  return AudioProcessing::kNoError;
 }

 int LevelEstimatorImpl::num_handles_required() const {
-  return apm_->kUnsupportedComponentError;
-  //return 2;
+  return 1;
 }

-int LevelEstimatorImpl::GetHandleError(void* handle) const {
-  // The component has no detailed errors.
-  assert(handle != NULL);
-  return apm_->kUnspecifiedError;
+int LevelEstimatorImpl::GetHandleError(void* /*handle*/) const {
+  return AudioProcessing::kUnspecifiedError;
 }
+
 }  // namespace webrtc
--- a/webrtc/modules/audio_processing/level_estimator_impl.h
+++ b/webrtc/modules/audio_processing/level_estimator_impl.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,46 +8,46 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_LEVEL_ESTIMATOR_IMPL_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_LEVEL_ESTIMATOR_IMPL_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_

-#include "audio_processing.h"
-#include "processing_component.h"
+#include "webrtc/modules/audio_processing/include/audio_processing.h"
+#include "webrtc/modules/audio_processing/processing_component.h"
+#include "webrtc/modules/audio_processing/rms_level.h"

 namespace webrtc {
-class AudioProcessingImpl;
+
 class AudioBuffer;
+class CriticalSectionWrapper;

 class LevelEstimatorImpl : public LevelEstimator,
                           public ProcessingComponent {
 public:
-  explicit LevelEstimatorImpl(const AudioProcessingImpl* apm);
+  LevelEstimatorImpl(const AudioProcessing* apm,
+                     CriticalSectionWrapper* crit);
  virtual ~LevelEstimatorImpl();

-  int AnalyzeReverseStream(AudioBuffer* audio);
-  int ProcessCaptureAudio(AudioBuffer* audio);
+  int ProcessStream(AudioBuffer* audio);

  // LevelEstimator implementation.
-  virtual bool is_enabled() const;
-
-  // ProcessingComponent implementation.
-  virtual int get_version(char* version, int version_len_bytes) const;
+  bool is_enabled() const override;

 private:
  // LevelEstimator implementation.
-  virtual int Enable(bool enable);
-  virtual int GetMetrics(Metrics* metrics, Metrics* reverse_metrics);
+  int Enable(bool enable) override;
+  int RMS() override;

  // ProcessingComponent implementation.
-  virtual void* CreateHandle() const;
-  virtual int InitializeHandle(void* handle) const;
-  virtual int ConfigureHandle(void* handle) const;
-  virtual int DestroyHandle(void* handle) const;
-  virtual int num_handles_required() const;
-  virtual int GetHandleError(void* handle) const;
+  void* CreateHandle() const override;
+  int InitializeHandle(void* handle) const override;
+  int ConfigureHandle(void* handle) const override;
+  void DestroyHandle(void* handle) const override;
+  int num_handles_required() const override;
+  int GetHandleError(void* handle) const override;

-  const AudioProcessingImpl* apm_;
+  CriticalSectionWrapper* crit_;
 };
+
 }  // namespace webrtc

-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_LEVEL_ESTIMATOR_IMPL_H_
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_
--- a/webrtc/modules/audio_processing/logging/aec_logging.h
+++ b/webrtc/modules/audio_processing/logging/aec_logging.h
@@ -0,0 +1,86 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
+
+#include <stdio.h>
+
+#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h"
+
+// To enable AEC logging, invoke GYP with -Daec_debug_dump=1.
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+// Dumps a wav data to file.
+#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \
+  do {                                                   \
+    rtc_WavWriteSamples(file, data, num_samples);        \
+  } while (0)
+
+// (Re)opens a wav file for writing using the specified sample rate.
+#define RTC_AEC_DEBUG_WAV_REOPEN(name, instance_index, process_rate,     \
+                                 sample_rate, wav_file)                  \
+  do {                                                                   \
+    WebRtcAec_ReopenWav(name, instance_index, process_rate, sample_rate, \
+                        wav_file);                                       \
+  } while (0)
+
+// Closes a wav file.
+#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \
+  do {                                    \
+    rtc_WavClose(wav_file);               \
+  } while (0)
+
+// Dumps a raw data to file.
+#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \
+  do {                                                 \
+    (void) fwrite(data, data_size, 1, file);           \
+  } while (0)
+
+// Opens a raw data file for writing using the specified sample rate.
+#define RTC_AEC_DEBUG_RAW_OPEN(name, instance_counter, file) \
+  do {                                                       \
+    WebRtcAec_RawFileOpen(name, instance_counter, file);     \
+  } while (0)
+
+// Closes a raw data file.
+#define RTC_AEC_DEBUG_RAW_CLOSE(file) \
+  do {                                \
+    fclose(file);                     \
+  } while (0)
+
+#else  // RTC_AEC_DEBUG_DUMP
+#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \
+  do {                                                   \
+  } while (0)
+
+#define RTC_AEC_DEBUG_WAV_REOPEN(wav_file, name, instance_index, process_rate, \
+                                 sample_rate)                                  \
+  do {                                                                         \
+  } while (0)
+
+#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \
+  do {                                    \
+  } while (0)
+
+#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \
+  do {                                                 \
+  } while (0)
+
+#define RTC_AEC_DEBUG_RAW_OPEN(file, name, instance_counter) \
+  do {                                                       \
+  } while (0)
+
+#define RTC_AEC_DEBUG_RAW_CLOSE(file) \
+  do {                                \
+  } while (0)
+
+#endif  // WEBRTC_AEC_DEBUG_DUMP
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
--- a/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc
+++ b/webrtc/modules/audio_processing/logging/aec_logging_file_handling.cc
@@ -0,0 +1,57 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h"
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "webrtc/base/checks.h"
+#include "webrtc/base/stringutils.h"
+#include "webrtc/common_audio/wav_file.h"
+#include "webrtc/typedefs.h"
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+void WebRtcAec_ReopenWav(const char* name,
+                         int instance_index,
+                         int process_rate,
+                         int sample_rate,
+                         rtc_WavWriter** wav_file) {
+  if (*wav_file) {
+    if (rtc_WavSampleRate(*wav_file) == sample_rate)
+      return;
+    rtc_WavClose(*wav_file);
+  }
+  char filename[64];
+  int written = rtc::sprintfn(filename, sizeof(filename), "%s%d-%d.wav", name,
+                              instance_index, process_rate);
+
+  // Ensure there was no buffer output error.
+  RTC_DCHECK_GE(written, 0);
+  // Ensure that the buffer size was sufficient.
+  RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename));
+
+  *wav_file = rtc_WavOpen(filename, sample_rate, 1);
+}
+
+void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file) {
+  char filename[64];
+  int written = rtc::sprintfn(filename, sizeof(filename), "%s_%d.dat", name,
+                              instance_index);
+
+  // Ensure there was no buffer output error.
+  RTC_DCHECK_GE(written, 0);
+  // Ensure that the buffer size was sufficient.
+  RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename));
+
+  *file = fopen(filename, "wb");
+}
+
+#endif  // WEBRTC_AEC_DEBUG_DUMP
--- a/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h
+++ b/webrtc/modules/audio_processing/logging/aec_logging_file_handling.h
@@ -0,0 +1,41 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
+
+#include <stdio.h>
+
+#include "webrtc/common_audio/wav_file.h"
+#include "webrtc/typedefs.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef WEBRTC_AEC_DEBUG_DUMP
+// Opens a new Wav file for writing. If it was already open with a different
+// sample frequency, it closes it first.
+void WebRtcAec_ReopenWav(const char* name,
+                         int instance_index,
+                         int process_rate,
+                         int sample_rate,
+                         rtc_WavWriter** wav_file);
+
+// Opens dumpfile with instance-specific filename.
+void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file);
+
+#endif  // WEBRTC_AEC_DEBUG_DUMP
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
--- a/webrtc/modules/audio_processing/noise_suppression_impl.cc
+++ b/webrtc/modules/audio_processing/noise_suppression_impl.cc
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,19 +8,18 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#include "noise_suppression_impl.h"
+#include "webrtc/modules/audio_processing/noise_suppression_impl.h"

-#include <cassert>
+#include <assert.h>

-#include "critical_section_wrapper.h"
+#include "webrtc/modules/audio_processing/audio_buffer.h"
 #if defined(WEBRTC_NS_FLOAT)
-#include "noise_suppression.h"
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h"
 #elif defined(WEBRTC_NS_FIXED)
-#include "noise_suppression_x.h"
+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
 #endif
+#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"

-#include "audio_processing_impl.h"
-#include "audio_buffer.h"

 namespace webrtc {

@@ -41,54 +40,64 @@ int MapSetting(NoiseSuppression::Level level) {
      return 2;
    case NoiseSuppression::kVeryHigh:
      return 3;
-    default:
-      return -1;
  }
+  assert(false);
+  return -1;
 }
 }  // namespace

-NoiseSuppressionImpl::NoiseSuppressionImpl(const AudioProcessingImpl* apm)
-  : ProcessingComponent(apm),
+NoiseSuppressionImpl::NoiseSuppressionImpl(const AudioProcessing* apm,
+                                           CriticalSectionWrapper* crit)
+  : ProcessingComponent(),
    apm_(apm),
+    crit_(crit),
    level_(kModerate) {}

 NoiseSuppressionImpl::~NoiseSuppressionImpl() {}

-int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
-  int err = apm_->kNoError;
-
+int NoiseSuppressionImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
+#if defined(WEBRTC_NS_FLOAT)
  if (!is_component_enabled()) {
    return apm_->kNoError;
  }
-  assert(audio->samples_per_split_channel() <= 160);
+  assert(audio->num_frames_per_band() <= 160);
  assert(audio->num_channels() == num_handles());

-  for (int i = 0; i < num_handles(); i++) {
+  for (int i = 0; i < num_handles(); ++i) {
+    Handle* my_handle = static_cast<Handle*>(handle(i));
+
+    WebRtcNs_Analyze(my_handle, audio->split_bands_const_f(i)[kBand0To8kHz]);
+  }
+#endif
+  return apm_->kNoError;
+}
+
+int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
+  if (!is_component_enabled()) {
+    return apm_->kNoError;
+  }
+  assert(audio->num_frames_per_band() <= 160);
+  assert(audio->num_channels() == num_handles());
+
+  for (int i = 0; i < num_handles(); ++i) {
    Handle* my_handle = static_cast<Handle*>(handle(i));
 #if defined(WEBRTC_NS_FLOAT)
-    err = WebRtcNs_Process(static_cast<Handle*>(handle(i)),
-                           audio->low_pass_split_data(i),
-                           audio->high_pass_split_data(i),
-                           audio->low_pass_split_data(i),
-                           audio->high_pass_split_data(i));
+    WebRtcNs_Process(my_handle,
+                     audio->split_bands_const_f(i),
+                     audio->num_bands(),
+                     audio->split_bands_f(i));
 #elif defined(WEBRTC_NS_FIXED)
-    err = WebRtcNsx_Process(static_cast<Handle*>(handle(i)),
-                            audio->low_pass_split_data(i),
-                            audio->high_pass_split_data(i),
-                            audio->low_pass_split_data(i),
-                            audio->high_pass_split_data(i));
+    WebRtcNsx_Process(my_handle,
+                      audio->split_bands_const(i),
+                      audio->num_bands(),
+                      audio->split_bands(i));
 #endif
-
-    if (err != apm_->kNoError) {
-      return GetHandleError(my_handle);
-    }
  }
-
  return apm_->kNoError;
 }

 int NoiseSuppressionImpl::Enable(bool enable) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  return EnableComponent(enable);
 }

@@ -97,7 +106,7 @@ bool NoiseSuppressionImpl::is_enabled() const {
 }

 int NoiseSuppressionImpl::set_level(Level level) {
-  CriticalSectionScoped crit_scoped(*apm_->crit());
+  CriticalSectionScoped crit_scoped(crit_);
  if (MapSetting(level) == -1) {
    return apm_->kBadParameterError;
  }
@@ -110,49 +119,43 @@ NoiseSuppression::Level NoiseSuppressionImpl::level() const {
  return level_;
 }

-int NoiseSuppressionImpl::get_version(char* version,
-                                      int version_len_bytes) const {
+float NoiseSuppressionImpl::speech_probability() const {
 #if defined(WEBRTC_NS_FLOAT)
-  if (WebRtcNs_get_version(version, version_len_bytes) != 0)
-#elif defined(WEBRTC_NS_FIXED)
-  if (WebRtcNsx_get_version(version, version_len_bytes) != 0)
-#endif
-  {
-      return apm_->kBadParameterError;
+  float probability_average = 0.0f;
+  for (int i = 0; i < num_handles(); i++) {
+    Handle* my_handle = static_cast<Handle*>(handle(i));
+    probability_average += WebRtcNs_prior_speech_probability(my_handle);
  }
-
-  return apm_->kNoError;
+  return probability_average / num_handles();
+#elif defined(WEBRTC_NS_FIXED)
+  // Currently not available for the fixed point implementation.
+  return apm_->kUnsupportedFunctionError;
+#endif
 }

 void* NoiseSuppressionImpl::CreateHandle() const {
-  Handle* handle = NULL;
 #if defined(WEBRTC_NS_FLOAT)
-  if (WebRtcNs_Create(&handle) != apm_->kNoError)
+  return WebRtcNs_Create();
 #elif defined(WEBRTC_NS_FIXED)
-  if (WebRtcNsx_Create(&handle) != apm_->kNoError)
+  return WebRtcNsx_Create();
 #endif
-  {
-    handle = NULL;
-  } else {
-    assert(handle != NULL);
-  }
-
-  return handle;
 }

-int NoiseSuppressionImpl::DestroyHandle(void* handle) const {
+void NoiseSuppressionImpl::DestroyHandle(void* handle) const {
 #if defined(WEBRTC_NS_FLOAT)
-  return WebRtcNs_Free(static_cast<Handle*>(handle));
+  WebRtcNs_Free(static_cast<Handle*>(handle));
 #elif defined(WEBRTC_NS_FIXED)
-  return WebRtcNsx_Free(static_cast<Handle*>(handle));
+  WebRtcNsx_Free(static_cast<Handle*>(handle));
 #endif
 }

 int NoiseSuppressionImpl::InitializeHandle(void* handle) const {
 #if defined(WEBRTC_NS_FLOAT)
-  return WebRtcNs_Init(static_cast<Handle*>(handle), apm_->sample_rate_hz());
+  return WebRtcNs_Init(static_cast<Handle*>(handle),
+                       apm_->proc_sample_rate_hz());
 #elif defined(WEBRTC_NS_FIXED)
-  return WebRtcNsx_Init(static_cast<Handle*>(handle), apm_->sample_rate_hz());
+  return WebRtcNsx_Init(static_cast<Handle*>(handle),
+                        apm_->proc_sample_rate_hz());
 #endif
 }

@@ -176,4 +179,3 @@ int NoiseSuppressionImpl::GetHandleError(void* handle) const {
  return apm_->kUnspecifiedError;
 }
 }  // namespace webrtc
-
--- a/webrtc/modules/audio_processing/noise_suppression_impl.h
+++ b/webrtc/modules/audio_processing/noise_suppression_impl.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,47 +8,50 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_NOISE_SUPPRESSION_IMPL_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_NOISE_SUPPRESSION_IMPL_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_

-#include "audio_processing.h"
-#include "processing_component.h"
+#include "webrtc/modules/audio_processing/include/audio_processing.h"
+#include "webrtc/modules/audio_processing/processing_component.h"

 namespace webrtc {
-class AudioProcessingImpl;
+
 class AudioBuffer;
+class CriticalSectionWrapper;

 class NoiseSuppressionImpl : public NoiseSuppression,
                             public ProcessingComponent {
 public:
-  explicit NoiseSuppressionImpl(const AudioProcessingImpl* apm);
+  NoiseSuppressionImpl(const AudioProcessing* apm,
+                       CriticalSectionWrapper* crit);
  virtual ~NoiseSuppressionImpl();

+  int AnalyzeCaptureAudio(AudioBuffer* audio);
  int ProcessCaptureAudio(AudioBuffer* audio);

  // NoiseSuppression implementation.
-  virtual bool is_enabled() const;
-
-  // ProcessingComponent implementation.
-  virtual int get_version(char* version, int version_len_bytes) const;
+  bool is_enabled() const override;
+  float speech_probability() const override;
+  Level level() const override;

 private:
  // NoiseSuppression implementation.
-  virtual int Enable(bool enable);
-  virtual int set_level(Level level);
-  virtual Level level() const;
+  int Enable(bool enable) override;
+  int set_level(Level level) override;

  // ProcessingComponent implementation.
-  virtual void* CreateHandle() const;
-  virtual int InitializeHandle(void* handle) const;
-  virtual int ConfigureHandle(void* handle) const;
-  virtual int DestroyHandle(void* handle) const;
-  virtual int num_handles_required() const;
-  virtual int GetHandleError(void* handle) const;
+  void* CreateHandle() const override;
+  int InitializeHandle(void* handle) const override;
+  int ConfigureHandle(void* handle) const override;
+  void DestroyHandle(void* handle) const override;
+  int num_handles_required() const override;
+  int GetHandleError(void* handle) const override;

-  const AudioProcessingImpl* apm_;
+  const AudioProcessing* apm_;
+  CriticalSectionWrapper* crit_;
  Level level_;
 };
+
 }  // namespace webrtc

-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_NOISE_SUPPRESSION_IMPL_H_
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_
--- a/webrtc/modules/audio_processing/ns/Makefile.am
+++ b/webrtc/modules/audio_processing/ns/Makefile.am
@@ -1,20 +0,0 @@
-noinst_LTLIBRARIES = libns.la libns_fix.la
-
-libns_la_SOURCES = interface/noise_suppression.h \
-		   noise_suppression.c \
-		   windows_private.h \
-		   defines.h \
-		   ns_core.c \
-		   ns_core.h
-libns_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
-		  -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-		  -I$(top_srcdir)/src/modules/audio_processing/utility
-
-libns_fix_la_SOURCES = interface/noise_suppression_x.h \
-		       noise_suppression_x.c \
-		       nsx_defines.h \
-		       nsx_core.c \
-		       nsx_core.h
-libns_fix_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
-		      -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-		      -I$(top_srcdir)/src/modules/audio_processing/utility
--- a/webrtc/modules/audio_processing/ns/defines.h
+++ b/webrtc/modules/audio_processing/ns/defines.h
@@ -11,13 +11,10 @@
 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_
 #define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_

-//#define PROCESS_FLOW_0    // Use the traditional method.
-//#define PROCESS_FLOW_1    // Use traditional with DD estimate of prior SNR.
-#define PROCESS_FLOW_2    // Use the new method of speech/noise classification.
-
 #define BLOCKL_MAX          160 // max processing block length: 160
 #define ANAL_BLOCKL_MAX     256 // max analysis block length: 256
 #define HALF_ANAL_BLOCKL    129 // half max analysis block length + 1
+#define NUM_HIGH_BANDS_MAX  2   // max number of high bands: 2

 #define QUANTILE            (float)0.25

@@ -27,7 +24,6 @@
 #define FACTOR              (float)40.0
 #define WIDTH               (float)0.01

-#define SMOOTH              (float)0.75 // filter smoothing
 // Length of fft work arrays.
 #define IP_LENGTH (ANAL_BLOCKL_MAX >> 1) // must be at least ceil(2 + sqrt(ANAL_BLOCKL_MAX/2))
 #define W_LENGTH (ANAL_BLOCKL_MAX >> 1)
--- a/webrtc/modules/audio_processing/ns/include/noise_suppression.h
+++ b/webrtc/modules/audio_processing/ns/include/noise_suppression.h
@@ -0,0 +1,116 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_
+
+#include <stddef.h>
+
+#include "webrtc/typedefs.h"
+
+typedef struct NsHandleT NsHandle;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This function creates an instance of the floating point Noise Suppression.
+ */
+NsHandle* WebRtcNs_Create();
+
+/*
+ * This function frees the dynamic memory of a specified noise suppression
+ * instance.
+ *
+ * Input:
+ *      - NS_inst       : Pointer to NS instance that should be freed
+ */
+void WebRtcNs_Free(NsHandle* NS_inst);
+
+/*
+ * This function initializes a NS instance and has to be called before any other
+ * processing is made.
+ *
+ * Input:
+ *      - NS_inst       : Instance that should be initialized
+ *      - fs            : sampling frequency
+ *
+ * Output:
+ *      - NS_inst       : Initialized instance
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs);
+
+/*
+ * This changes the aggressiveness of the noise suppression method.
+ *
+ * Input:
+ *      - NS_inst       : Noise suppression instance.
+ *      - mode          : 0: Mild, 1: Medium , 2: Aggressive
+ *
+ * Output:
+ *      - NS_inst       : Updated instance.
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
+
+/*
+ * This functions estimates the background noise for the inserted speech frame.
+ * The input and output signals should always be 10ms (80 or 160 samples).
+ *
+ * Input
+ *      - NS_inst       : Noise suppression instance.
+ *      - spframe       : Pointer to speech frame buffer for L band
+ *
+ * Output:
+ *      - NS_inst       : Updated NS instance
+ */
+void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe);
+
+/*
+ * This functions does Noise Suppression for the inserted speech frame. The
+ * input and output signals should always be 10ms (80 or 160 samples).
+ *
+ * Input
+ *      - NS_inst       : Noise suppression instance.
+ *      - spframe       : Pointer to speech frame buffer for each band
+ *      - num_bands     : Number of bands
+ *
+ * Output:
+ *      - NS_inst       : Updated NS instance
+ *      - outframe      : Pointer to output frame for each band
+ */
+void WebRtcNs_Process(NsHandle* NS_inst,
+                     const float* const* spframe,
+                     size_t num_bands,
+                     float* const* outframe);
+
+/* Returns the internally used prior speech probability of the current frame.
+ * There is a frequency bin based one as well, with which this should not be
+ * confused.
+ *
+ * Input
+ *      - handle        : Noise suppression instance.
+ *
+ * Return value         : Prior speech probability in interval [0.0, 1.0].
+ *                        -1 - NULL pointer or uninitialized instance.
+ */
+float WebRtcNs_prior_speech_probability(NsHandle* handle);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_
--- a/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h
+++ b/webrtc/modules/audio_processing/ns/include/noise_suppression_x.h
@@ -0,0 +1,88 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_
+
+#include "webrtc/typedefs.h"
+
+typedef struct NsxHandleT NsxHandle;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/*
+ * This function creates an instance of the fixed point Noise Suppression.
+ */
+NsxHandle* WebRtcNsx_Create();
+
+/*
+ * This function frees the dynamic memory of a specified Noise Suppression
+ * instance.
+ *
+ * Input:
+ *      - nsxInst       : Pointer to NS instance that should be freed
+ */
+void WebRtcNsx_Free(NsxHandle* nsxInst);
+
+/*
+ * This function initializes a NS instance
+ *
+ * Input:
+ *      - nsxInst       : Instance that should be initialized
+ *      - fs            : sampling frequency
+ *
+ * Output:
+ *      - nsxInst       : Initialized instance
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs);
+
+/*
+ * This changes the aggressiveness of the noise suppression method.
+ *
+ * Input:
+ *      - nsxInst       : Instance that should be initialized
+ *      - mode          : 0: Mild, 1: Medium , 2: Aggressive
+ *
+ * Output:
+ *      - nsxInst       : Initialized instance
+ *
+ * Return value         :  0 - Ok
+ *                        -1 - Error
+ */
+int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode);
+
+/*
+ * This functions does noise suppression for the inserted speech frame. The
+ * input and output signals should always be 10ms (80 or 160 samples).
+ *
+ * Input
+ *      - nsxInst       : NSx instance. Needs to be initiated before call.
+ *      - speechFrame   : Pointer to speech frame buffer for each band
+ *      - num_bands     : Number of bands
+ *
+ * Output:
+ *      - nsxInst       : Updated NSx instance
+ *      - outFrame      : Pointer to output frame for each band
+ */
+void WebRtcNsx_Process(NsxHandle* nsxInst,
+                       const short* const* speechFrame,
+                       int num_bands,
+                       short* const* outFrame);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_
--- a/webrtc/modules/audio_processing/ns/interface/noise_suppression.h
+++ b/webrtc/modules/audio_processing/ns/interface/noise_suppression.h
@@ -1,124 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_H_
-
-#include "typedefs.h"
-
-typedef struct NsHandleT NsHandle;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * This function returns the version number of the code.
- *
- * Input:
- *      - version       : Pointer to a character array where the version
- *                        info is stored.
- *      - length        : Length of version.
- *
- * Return value         :  0 - Ok
- *                        -1 - Error (probably length is not sufficient)
- */
-int WebRtcNs_get_version(char* version, short length);
-
-
-/*
- * This function creates an instance to the noise reduction structure
- *
- * Input:
- *      - NS_inst       : Pointer to noise reduction instance that should be
- *                        created
- *
- * Output:
- *      - NS_inst       : Pointer to created noise reduction instance
- *
- * Return value         :  0 - Ok
- *                        -1 - Error
- */
-int WebRtcNs_Create(NsHandle** NS_inst);
-
-
-/*
- * This function frees the dynamic memory of a specified Noise Reduction
- * instance.
- *
- * Input:
- *      - NS_inst       : Pointer to NS instance that should be freed
- *
- * Return value         :  0 - Ok
- *                        -1 - Error
- */
-int WebRtcNs_Free(NsHandle* NS_inst);
-
-
-/*
- * This function initializes a NS instance
- *
- * Input:
- *      - NS_inst       : Instance that should be initialized
- *      - fs            : sampling frequency
- *
- * Output:
- *      - NS_inst       : Initialized instance
- *
- * Return value         :  0 - Ok
- *                        -1 - Error
- */
-int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs);
-
-/*
- * This changes the aggressiveness of the noise suppression method.
- *
- * Input:
- *      - NS_inst       : Instance that should be initialized
- *      - mode          : 0: Mild, 1: Medium , 2: Aggressive
- *
- * Output:
- *      - NS_inst       : Initialized instance
- *
- * Return value         :  0 - Ok
- *                        -1 - Error
- */
-int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
-
-
-/*
- * This functions does Noise Suppression for the inserted speech frame. The
- * input and output signals should always be 10ms (80 or 160 samples).
- *
- * Input
- *      - NS_inst       : NS Instance. Needs to be initiated before call.
- *      - spframe       : Pointer to speech frame buffer for L band
- *      - spframe_H     : Pointer to speech frame buffer for H band
- *      - fs            : sampling frequency
- *
- * Output:
- *      - NS_inst       : Updated NS instance
- *      - outframe      : Pointer to output frame for L band
- *      - outframe_H    : Pointer to output frame for H band
- *
- * Return value         :  0 - OK
- *                        -1 - Error
- */
-int WebRtcNs_Process(NsHandle* NS_inst,
-                     short* spframe,
-                     short* spframe_H,
-                     short* outframe,
-                     short* outframe_H);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_H_
--- a/webrtc/modules/audio_processing/ns/interface/noise_suppression_x.h
+++ b/webrtc/modules/audio_processing/ns/interface/noise_suppression_x.h
@@ -1,123 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_
-
-#include "typedefs.h"
-
-typedef struct NsxHandleT NsxHandle;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * This function returns the version number of the code.
- *
- * Input:
- *      - version       : Pointer to a character array where the version
- *                        info is stored.
- *      - length        : Length of version.
- *
- * Return value         :  0 - Ok
- *                        -1 - Error (probably length is not sufficient)
- */
-int WebRtcNsx_get_version(char* version, short length);
-
-
-/*
- * This function creates an instance to the noise reduction structure
- *
- * Input:
- *      - nsxInst       : Pointer to noise reduction instance that should be
- *                       created
- *
- * Output:
- *      - nsxInst       : Pointer to created noise reduction instance
- *
- * Return value         :  0 - Ok
- *                        -1 - Error
- */
-int WebRtcNsx_Create(NsxHandle** nsxInst);
-
-
-/*
- * This function frees the dynamic memory of a specified Noise Suppression
- * instance.
- *
- * Input:
- *      - nsxInst       : Pointer to NS instance that should be freed
- *
- * Return value         :  0 - Ok
- *                        -1 - Error
- */
-int WebRtcNsx_Free(NsxHandle* nsxInst);
-
-
-/*
- * This function initializes a NS instance
- *
- * Input:
- *      - nsxInst       : Instance that should be initialized
- *      - fs            : sampling frequency
- *
- * Output:
- *      - nsxInst       : Initialized instance
- *
- * Return value         :  0 - Ok
- *                        -1 - Error
- */
-int WebRtcNsx_Init(NsxHandle* nsxInst, WebRtc_UWord32 fs);
-
-/*
- * This changes the aggressiveness of the noise suppression method.
- *
- * Input:
- *      - nsxInst       : Instance that should be initialized
- *      - mode          : 0: Mild, 1: Medium , 2: Aggressive
- *
- * Output:
- *      - nsxInst       : Initialized instance
- *
- * Return value         :  0 - Ok
- *                        -1 - Error
- */
-int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode);
-
-/*
- * This functions does noise suppression for the inserted speech frame. The
- * input and output signals should always be 10ms (80 or 160 samples).
- *
- * Input
- *      - nsxInst       : NSx instance. Needs to be initiated before call.
- *      - speechFrame   : Pointer to speech frame buffer for L band
- *      - speechFrameHB : Pointer to speech frame buffer for H band
- *      - fs            : sampling frequency
- *
- * Output:
- *      - nsxInst       : Updated NSx instance
- *      - outFrame      : Pointer to output frame for L band
- *      - outFrameHB    : Pointer to output frame for H band
- *
- * Return value         :  0 - OK
- *                        -1 - Error
- */
-int WebRtcNsx_Process(NsxHandle* nsxInst,
-                      short* speechFrame,
-                      short* speechFrameHB,
-                      short* outFrame,
-                      short* outFrameHB);
-
-#ifdef __cplusplus
-}
-#endif
-
-#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_
--- a/webrtc/modules/audio_processing/ns/noise_suppression.c
+++ b/webrtc/modules/audio_processing/ns/noise_suppression.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,58 +8,52 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h"
+
 #include <stdlib.h>
 #include <string.h>

-#include "noise_suppression.h"
-#include "ns_core.h"
-#include "defines.h"
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/ns/defines.h"
+#include "webrtc/modules/audio_processing/ns/ns_core.h"

-int WebRtcNs_get_version(char* versionStr, short length) {
-  const char version[] = "NS 2.2.0";
-  const short versionLen = (short)strlen(version) + 1; // +1: null-termination
-
-  if (versionStr == NULL) {
-    return -1;
-  }
-
-  if (versionLen > length) {
-    return -1;
-  }
-
-  strncpy(versionStr, version, versionLen);
-
-  return 0;
+NsHandle* WebRtcNs_Create() {
+  NoiseSuppressionC* self = malloc(sizeof(NoiseSuppressionC));
+  self->initFlag = 0;
+  return (NsHandle*)self;
 }

-int WebRtcNs_Create(NsHandle** NS_inst) {
-  *NS_inst = (NsHandle*) malloc(sizeof(NSinst_t));
-  if (*NS_inst != NULL) {
-    (*(NSinst_t**)NS_inst)->initFlag = 0;
-    return 0;
-  } else {
-    return -1;
-  }
-
-}
-
-int WebRtcNs_Free(NsHandle* NS_inst) {
+void WebRtcNs_Free(NsHandle* NS_inst) {
  free(NS_inst);
-  return 0;
 }

-
-int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs) {
-  return WebRtcNs_InitCore((NSinst_t*) NS_inst, fs);
+int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs) {
+  return WebRtcNs_InitCore((NoiseSuppressionC*)NS_inst, fs);
 }

 int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) {
-  return WebRtcNs_set_policy_core((NSinst_t*) NS_inst, mode);
+  return WebRtcNs_set_policy_core((NoiseSuppressionC*)NS_inst, mode);
 }

-
-int WebRtcNs_Process(NsHandle* NS_inst, short* spframe, short* spframe_H,
-                     short* outframe, short* outframe_H) {
-  return WebRtcNs_ProcessCore(
-      (NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H);
+void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe) {
+  WebRtcNs_AnalyzeCore((NoiseSuppressionC*)NS_inst, spframe);
+}
+
+void WebRtcNs_Process(NsHandle* NS_inst,
+                      const float* const* spframe,
+                      size_t num_bands,
+                      float* const* outframe) {
+  WebRtcNs_ProcessCore((NoiseSuppressionC*)NS_inst, spframe, num_bands,
+                       outframe);
+}
+
+float WebRtcNs_prior_speech_probability(NsHandle* handle) {
+  NoiseSuppressionC* self = (NoiseSuppressionC*)handle;
+  if (handle == NULL) {
+    return -1;
+  }
+  if (self->initFlag == 0) {
+    return -1;
+  }
+  return self->priorSpeechProb;
 }
--- a/webrtc/modules/audio_processing/ns/noise_suppression_x.c
+++ b/webrtc/modules/audio_processing/ns/noise_suppression_x.c
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -8,58 +8,39 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

+#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
+
 #include <stdlib.h>
-#include <string.h>

-#include "noise_suppression_x.h"
-#include "nsx_core.h"
-#include "nsx_defines.h"
+#include "webrtc/common_audio/signal_processing/include/real_fft.h"
+#include "webrtc/modules/audio_processing/ns/nsx_core.h"
+#include "webrtc/modules/audio_processing/ns/nsx_defines.h"

-int WebRtcNsx_get_version(char* versionStr, short length) {
-  const char version[] = "NS\t3.1.0";
-  const short versionLen = (short)strlen(version) + 1; // +1: null-termination
-
-  if (versionStr == NULL) {
-    return -1;
-  }
-
-  if (versionLen > length) {
-    return -1;
-  }
-
-  strncpy(versionStr, version, versionLen);
-
-  return 0;
+NsxHandle* WebRtcNsx_Create() {
+  NoiseSuppressionFixedC* self = malloc(sizeof(NoiseSuppressionFixedC));
+  WebRtcSpl_Init();
+  self->real_fft = NULL;
+  self->initFlag = 0;
+  return (NsxHandle*)self;
 }

-int WebRtcNsx_Create(NsxHandle** nsxInst) {
-  *nsxInst = (NsxHandle*)malloc(sizeof(NsxInst_t));
-  if (*nsxInst != NULL) {
-    (*(NsxInst_t**)nsxInst)->initFlag = 0;
-    return 0;
-  } else {
-    return -1;
-  }
-
-}
-
-int WebRtcNsx_Free(NsxHandle* nsxInst) {
+void WebRtcNsx_Free(NsxHandle* nsxInst) {
+  WebRtcSpl_FreeRealFFT(((NoiseSuppressionFixedC*)nsxInst)->real_fft);
  free(nsxInst);
-  return 0;
 }

-int WebRtcNsx_Init(NsxHandle* nsxInst, WebRtc_UWord32 fs) {
-  return WebRtcNsx_InitCore((NsxInst_t*)nsxInst, fs);
+int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs) {
+  return WebRtcNsx_InitCore((NoiseSuppressionFixedC*)nsxInst, fs);
 }

 int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) {
-  return WebRtcNsx_set_policy_core((NsxInst_t*)nsxInst, mode);
+  return WebRtcNsx_set_policy_core((NoiseSuppressionFixedC*)nsxInst, mode);
 }

-int WebRtcNsx_Process(NsxHandle* nsxInst, short* speechFrame,
-                      short* speechFrameHB, short* outFrame,
-                      short* outFrameHB) {
-  return WebRtcNsx_ProcessCore(
-      (NsxInst_t*)nsxInst, speechFrame, speechFrameHB, outFrame, outFrameHB);
+void WebRtcNsx_Process(NsxHandle* nsxInst,
+                      const short* const* speechFrame,
+                      int num_bands,
+                      short* const* outFrame) {
+  WebRtcNsx_ProcessCore((NoiseSuppressionFixedC*)nsxInst, speechFrame,
+                        num_bands, outFrame);
 }
-
--- a/webrtc/modules/audio_processing/ns/ns.gypi
+++ b/webrtc/modules/audio_processing/ns/ns.gypi
@@ -1,58 +0,0 @@
-# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
-#
-# Use of this source code is governed by a BSD-style license
-# that can be found in the LICENSE file in the root of the source
-# tree. An additional intellectual property rights grant can be found
-# in the file PATENTS.  All contributing project authors may
-# be found in the AUTHORS file in the root of the source tree.
-
-{
-  'targets': [
-    {
-      'target_name': 'ns',
-      'type': '<(library)',
-      'dependencies': [
-        '<(webrtc_root)/common_audio/common_audio.gyp:spl',
-        'apm_util'
-      ],
-      'include_dirs': [
-        'interface',
-      ],
-      'direct_dependent_settings': {
-        'include_dirs': [
-          'interface',
-        ],
-      },
-      'sources': [
-        'interface/noise_suppression.h',
-        'noise_suppression.c',
-        'windows_private.h',
-        'defines.h',
-        'ns_core.c',
-        'ns_core.h',
-      ],
-    },
-    {
-      'target_name': 'ns_fix',
-      'type': '<(library)',
-      'dependencies': [
-        '<(webrtc_root)/common_audio/common_audio.gyp:spl',
-      ],
-      'include_dirs': [
-        'interface',
-      ],
-      'direct_dependent_settings': {
-        'include_dirs': [
-          'interface',
-        ],
-      },
-      'sources': [
-        'interface/noise_suppression_x.h',
-        'noise_suppression_x.c',
-        'nsx_defines.h',
-        'nsx_core.c',
-        'nsx_core.h',
-      ],
-    },
-  ],
-}
--- a/webrtc/modules/audio_processing/ns/ns_core.c
+++ b/webrtc/modules/audio_processing/ns/ns_core.c
--- a/webrtc/modules/audio_processing/ns/ns_core.h
+++ b/webrtc/modules/audio_processing/ns/ns_core.h
@@ -8,105 +8,110 @@
 *  be found in the AUTHORS file in the root of the source tree.
 */

-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_

-#include "defines.h"
+#include "webrtc/modules/audio_processing/ns/defines.h"

-typedef struct NSParaExtract_t_ {
-
-  //bin size of histogram
+typedef struct NSParaExtract_ {
+  // Bin size of histogram.
  float binSizeLrt;
  float binSizeSpecFlat;
  float binSizeSpecDiff;
-  //range of histogram over which lrt threshold is computed
+  // Range of histogram over which LRT threshold is computed.
  float rangeAvgHistLrt;
-  //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
-  //thresholds for prior model
-  float factor1ModelPars; //for lrt and spectral difference
-  float factor2ModelPars; //for spectral_flatness: used when noise is flatter than speech
-  //peak limit for spectral flatness (varies between 0 and 1)
+  // Scale parameters: multiply dominant peaks of the histograms by scale factor
+  // to obtain thresholds for prior model.
+  float factor1ModelPars;  // For LRT and spectral difference.
+  float factor2ModelPars;  // For spectral_flatness: used when noise is flatter
+                           // than speech.
+  // Peak limit for spectral flatness (varies between 0 and 1).
  float thresPosSpecFlat;
-  //limit on spacing of two highest peaks in histogram: spacing determined by bin size
+  // Limit on spacing of two highest peaks in histogram: spacing determined by
+  // bin size.
  float limitPeakSpacingSpecFlat;
  float limitPeakSpacingSpecDiff;
-  //limit on relevance of second peak:
+  // Limit on relevance of second peak.
  float limitPeakWeightsSpecFlat;
  float limitPeakWeightsSpecDiff;
-  //limit on fluctuation of lrt feature
+  // Limit on fluctuation of LRT feature.
  float thresFluctLrt;
-  //limit on the max and min values for the feature thresholds
+  // Limit on the max and min values for the feature thresholds.
  float maxLrt;
  float minLrt;
  float maxSpecFlat;
  float minSpecFlat;
  float maxSpecDiff;
  float minSpecDiff;
-  //criteria of weight of histogram peak  to accept/reject feature
+  // Criteria of weight of histogram peak to accept/reject feature.
  int thresWeightSpecFlat;
  int thresWeightSpecDiff;

-} NSParaExtract_t;
+} NSParaExtract;

-typedef struct NSinst_t_ {
+typedef struct NoiseSuppressionC_ {
+  uint32_t fs;
+  size_t blockLen;
+  size_t windShift;
+  size_t anaLen;
+  size_t magnLen;
+  int aggrMode;
+  const float* window;
+  float analyzeBuf[ANAL_BLOCKL_MAX];
+  float dataBuf[ANAL_BLOCKL_MAX];
+  float syntBuf[ANAL_BLOCKL_MAX];

-  WebRtc_UWord32  fs;
-  int             blockLen;
-  int             blockLen10ms;
-  int             windShift;
-  int             outLen;
-  int             anaLen;
-  int             magnLen;
-  int             aggrMode;
-  const float*    window;
-  float           dataBuf[ANAL_BLOCKL_MAX];
-  float           syntBuf[ANAL_BLOCKL_MAX];
-  float           outBuf[3 * BLOCKL_MAX];
+  int initFlag;
+  // Parameters for quantile noise estimation.
+  float density[SIMULT * HALF_ANAL_BLOCKL];
+  float lquantile[SIMULT * HALF_ANAL_BLOCKL];
+  float quantile[HALF_ANAL_BLOCKL];
+  int counter[SIMULT];
+  int updates;
+  // Parameters for Wiener filter.
+  float smooth[HALF_ANAL_BLOCKL];
+  float overdrive;
+  float denoiseBound;
+  int gainmap;
+  // FFT work arrays.
+  size_t ip[IP_LENGTH];
+  float wfft[W_LENGTH];

-  int             initFlag;
-  // parameters for quantile noise estimation
-  float           density[SIMULT* HALF_ANAL_BLOCKL];
-  float           lquantile[SIMULT* HALF_ANAL_BLOCKL];
-  float           quantile[HALF_ANAL_BLOCKL];
-  int             counter[SIMULT];
-  int             updates;
-  // parameters for Wiener filter
-  float           smooth[HALF_ANAL_BLOCKL];
-  float           overdrive;
-  float           denoiseBound;
-  int             gainmap;
-  // fft work arrays.
-  int             ip[IP_LENGTH];
-  float           wfft[W_LENGTH];
-
-  // parameters for new method: some not needed, will reduce/cleanup later
-  WebRtc_Word32   blockInd;                           //frame index counter
-  int             modelUpdatePars[4];                 //parameters for updating or estimating
-  // thresholds/weights for prior model
-  float           priorModelPars[7];                  //parameters for prior model
-  float           noisePrev[HALF_ANAL_BLOCKL];        //noise spectrum from previous frame
-  float           magnPrev[HALF_ANAL_BLOCKL];         //magnitude spectrum of previous frame
-  float           logLrtTimeAvg[HALF_ANAL_BLOCKL];    //log lrt factor with time-smoothing
-  float           priorSpeechProb;                    //prior speech/noise probability
-  float           featureData[7];                     //data for features
-  float           magnAvgPause[HALF_ANAL_BLOCKL];     //conservative noise spectrum estimate
-  float           signalEnergy;                       //energy of magn
-  float           sumMagn;                            //sum of magn
-  float           whiteNoiseLevel;                    //initial noise estimate
-  float           initMagnEst[HALF_ANAL_BLOCKL];      //initial magnitude spectrum estimate
-  float           pinkNoiseNumerator;                 //pink noise parameter: numerator
-  float           pinkNoiseExp;                       //pink noise parameter: power of freq
-  NSParaExtract_t featureExtractionParams;            //parameters for feature extraction
-  //histograms for parameter estimation
-  int             histLrt[HIST_PAR_EST];
-  int             histSpecFlat[HIST_PAR_EST];
-  int             histSpecDiff[HIST_PAR_EST];
-  //quantities for high band estimate
-  float           speechProbHB[HALF_ANAL_BLOCKL];     //final speech/noise prob: prior + LRT
-  float           dataBufHB[ANAL_BLOCKL_MAX];         //buffering data for HB
-
-} NSinst_t;
+  // Parameters for new method: some not needed, will reduce/cleanup later.
+  int32_t blockInd;  // Frame index counter.
+  int modelUpdatePars[4];  // Parameters for updating or estimating.
+  // Thresholds/weights for prior model.
+  float priorModelPars[7];  // Parameters for prior model.
+  float noise[HALF_ANAL_BLOCKL];  // Noise spectrum from current frame.
+  float noisePrev[HALF_ANAL_BLOCKL];  // Noise spectrum from previous frame.
+  // Magnitude spectrum of previous analyze frame.
+  float magnPrevAnalyze[HALF_ANAL_BLOCKL];
+  // Magnitude spectrum of previous process frame.
+  float magnPrevProcess[HALF_ANAL_BLOCKL];
+  float logLrtTimeAvg[HALF_ANAL_BLOCKL];  // Log LRT factor with time-smoothing.
+  float priorSpeechProb;  // Prior speech/noise probability.
+  float featureData[7];
+  // Conservative noise spectrum estimate.
+  float magnAvgPause[HALF_ANAL_BLOCKL];
+  float signalEnergy;  // Energy of |magn|.
+  float sumMagn;
+  float whiteNoiseLevel;  // Initial noise estimate.
+  float initMagnEst[HALF_ANAL_BLOCKL];  // Initial magnitude spectrum estimate.
+  float pinkNoiseNumerator;  // Pink noise parameter: numerator.
+  float pinkNoiseExp;  // Pink noise parameter: power of frequencies.
+  float parametricNoise[HALF_ANAL_BLOCKL];
+  // Parameters for feature extraction.
+  NSParaExtract featureExtractionParams;
+  // Histograms for parameter estimation.
+  int histLrt[HIST_PAR_EST];
+  int histSpecFlat[HIST_PAR_EST];
+  int histSpecDiff[HIST_PAR_EST];
+  // Quantities for high band estimate.
+  float speechProb[HALF_ANAL_BLOCKL];  // Final speech/noise prob: prior + LRT.
+  // Buffering data for HB.
+  float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];

+} NoiseSuppressionC;

 #ifdef __cplusplus
 extern "C" {
@@ -118,16 +123,16 @@ extern "C" {
 * This function initializes a noise suppression instance
 *
 * Input:
- *      - inst          : Instance that should be initialized
+ *      - self          : Instance that should be initialized
 *      - fs            : Sampling frequency
 *
 * Output:
- *      - inst          : Initialized instance
+ *      - self          : Initialized instance
 *
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-int WebRtcNs_InitCore(NSinst_t* inst, WebRtc_UWord32 fs);
+int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs);

 /****************************************************************************
 * WebRtcNs_set_policy_core(...)
@@ -135,16 +140,30 @@ int WebRtcNs_InitCore(NSinst_t* inst, WebRtc_UWord32 fs);
 * This changes the aggressiveness of the noise suppression method.
 *
 * Input:
- *      - inst          : Instance that should be initialized
- *      - mode          : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
+ *      - self          : Instance that should be initialized
+ *      - mode          : 0: Mild (6dB), 1: Medium (10dB), 2: Aggressive (15dB)
 *
 * Output:
- *      - NS_inst      : Initialized instance
+ *      - self          : Initialized instance
 *
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-int WebRtcNs_set_policy_core(NSinst_t* inst, int mode);
+int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode);
+
+/****************************************************************************
+ * WebRtcNs_AnalyzeCore
+ *
+ * Estimate the background noise.
+ *
+ * Input:
+ *      - self          : Instance that should be initialized
+ *      - speechFrame   : Input speech frame for lower band
+ *
+ * Output:
+ *      - self          : Updated instance
+ */
+void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame);

 /****************************************************************************
 * WebRtcNs_ProcessCore
@@ -152,28 +171,20 @@ int WebRtcNs_set_policy_core(NSinst_t* inst, int mode);
 * Do noise suppression.
 *
 * Input:
- *      - inst          : Instance that should be initialized
- *      - inFrameLow    : Input speech frame for lower band
- *      - inFrameHigh   : Input speech frame for higher band
+ *      - self          : Instance that should be initialized
+ *      - inFrame       : Input speech frame for each band
+ *      - num_bands     : Number of bands
 *
 * Output:
- *      - inst          : Updated instance
- *      - outFrameLow   : Output speech frame for lower band
- *      - outFrameHigh  : Output speech frame for higher band
- *
- * Return value         :  0 - OK
- *                        -1 - Error
+ *      - self          : Updated instance
+ *      - outFrame      : Output speech frame for each band
 */
-
-
-int WebRtcNs_ProcessCore(NSinst_t* inst,
-                         short* inFrameLow,
-                         short* inFrameHigh,
-                         short* outFrameLow,
-                         short* outFrameHigh);
-
+void WebRtcNs_ProcessCore(NoiseSuppressionC* self,
+                          const float* const* inFrame,
+                          size_t num_bands,
+                          float* const* outFrame);

 #ifdef __cplusplus
 }
 #endif
-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
--- a/webrtc/modules/audio_processing/ns/nsx_core.c
+++ b/webrtc/modules/audio_processing/ns/nsx_core.c
--- a/webrtc/modules/audio_processing/ns/nsx_core.h
+++ b/webrtc/modules/audio_processing/ns/nsx_core.h
@@ -1,5 +1,5 @@
 /*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
@@ -11,95 +11,103 @@
 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_
 #define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_

-#include "typedefs.h"
-#include "signal_processing_library.h"
-
-#include "nsx_defines.h"
-
 #ifdef NS_FILEDEBUG
 #include <stdio.h>
 #endif

-typedef struct NsxInst_t_ {
-  WebRtc_UWord32          fs;
+#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
+#include "webrtc/modules/audio_processing/ns/nsx_defines.h"
+#include "webrtc/typedefs.h"

-  const WebRtc_Word16*    window;
-  WebRtc_Word16           analysisBuffer[ANAL_BLOCKL_MAX];
-  WebRtc_Word16           synthesisBuffer[ANAL_BLOCKL_MAX];
-  WebRtc_UWord16          noiseSupFilter[HALF_ANAL_BLOCKL];
-  WebRtc_UWord16          overdrive; /* Q8 */
-  WebRtc_UWord16          denoiseBound; /* Q14 */
-  const WebRtc_Word16*    factor2Table;
-  WebRtc_Word16           noiseEstLogQuantile[SIMULT* HALF_ANAL_BLOCKL];
-  WebRtc_Word16           noiseEstDensity[SIMULT* HALF_ANAL_BLOCKL];
-  WebRtc_Word16           noiseEstCounter[SIMULT];
-  WebRtc_Word16           noiseEstQuantile[HALF_ANAL_BLOCKL];
+typedef struct NoiseSuppressionFixedC_ {
+  uint32_t                fs;

-  WebRtc_Word16           anaLen;
-  int                     anaLen2;
-  int                     magnLen;
+  const int16_t*          window;
+  int16_t                 analysisBuffer[ANAL_BLOCKL_MAX];
+  int16_t                 synthesisBuffer[ANAL_BLOCKL_MAX];
+  uint16_t                noiseSupFilter[HALF_ANAL_BLOCKL];
+  uint16_t                overdrive; /* Q8 */
+  uint16_t                denoiseBound; /* Q14 */
+  const int16_t*          factor2Table;
+  int16_t                 noiseEstLogQuantile[SIMULT* HALF_ANAL_BLOCKL];
+  int16_t                 noiseEstDensity[SIMULT* HALF_ANAL_BLOCKL];
+  int16_t                 noiseEstCounter[SIMULT];
+  int16_t                 noiseEstQuantile[HALF_ANAL_BLOCKL];
+
+  size_t                  anaLen;
+  size_t                  anaLen2;
+  size_t                  magnLen;
  int                     aggrMode;
  int                     stages;
  int                     initFlag;
  int                     gainMap;

-  WebRtc_Word32           maxLrt;
-  WebRtc_Word32           minLrt;
-  WebRtc_Word32           logLrtTimeAvgW32[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing in Q8
-  WebRtc_Word32           featureLogLrt;
-  WebRtc_Word32           thresholdLogLrt;
-  WebRtc_Word16           weightLogLrt;
+  int32_t                 maxLrt;
+  int32_t                 minLrt;
+  // Log LRT factor with time-smoothing in Q8.
+  int32_t                 logLrtTimeAvgW32[HALF_ANAL_BLOCKL];
+  int32_t                 featureLogLrt;
+  int32_t                 thresholdLogLrt;
+  int16_t                 weightLogLrt;

-  WebRtc_UWord32          featureSpecDiff;
-  WebRtc_UWord32          thresholdSpecDiff;
-  WebRtc_Word16           weightSpecDiff;
+  uint32_t                featureSpecDiff;
+  uint32_t                thresholdSpecDiff;
+  int16_t                 weightSpecDiff;

-  WebRtc_UWord32          featureSpecFlat;
-  WebRtc_UWord32          thresholdSpecFlat;
-  WebRtc_Word16           weightSpecFlat;
+  uint32_t                featureSpecFlat;
+  uint32_t                thresholdSpecFlat;
+  int16_t                 weightSpecFlat;

-  WebRtc_Word32           avgMagnPause[HALF_ANAL_BLOCKL]; //conservative estimate of noise spectrum
-  WebRtc_UWord32          magnEnergy;
-  WebRtc_UWord32          sumMagn;
-  WebRtc_UWord32          curAvgMagnEnergy;
-  WebRtc_UWord32          timeAvgMagnEnergy;
-  WebRtc_UWord32          timeAvgMagnEnergyTmp;
+  // Conservative estimate of noise spectrum.
+  int32_t                 avgMagnPause[HALF_ANAL_BLOCKL];
+  uint32_t                magnEnergy;
+  uint32_t                sumMagn;
+  uint32_t                curAvgMagnEnergy;
+  uint32_t                timeAvgMagnEnergy;
+  uint32_t                timeAvgMagnEnergyTmp;

-  WebRtc_UWord32          whiteNoiseLevel;              //initial noise estimate
-  WebRtc_UWord32          initMagnEst[HALF_ANAL_BLOCKL];//initial magnitude spectrum estimate
-  WebRtc_Word32           pinkNoiseNumerator;           //pink noise parameter: numerator
-  WebRtc_Word32           pinkNoiseExp;                 //pink noise parameter: power of freq
-  int                     minNorm;                      //smallest normalization factor
-  int                     zeroInputSignal;              //zero input signal flag
+  uint32_t                whiteNoiseLevel;  // Initial noise estimate.
+  // Initial magnitude spectrum estimate.
+  uint32_t                initMagnEst[HALF_ANAL_BLOCKL];
+  // Pink noise parameters:
+  int32_t                 pinkNoiseNumerator;  // Numerator.
+  int32_t                 pinkNoiseExp;  // Power of freq.
+  int                     minNorm;  // Smallest normalization factor.
+  int                     zeroInputSignal;  // Zero input signal flag.

-  WebRtc_UWord32          prevNoiseU32[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame
-  WebRtc_UWord16          prevMagnU16[HALF_ANAL_BLOCKL]; //magnitude spectrum from previous frame
-  WebRtc_Word16           priorNonSpeechProb; //prior speech/noise probability // Q14
+  // Noise spectrum from previous frame.
+  uint32_t                prevNoiseU32[HALF_ANAL_BLOCKL];
+  // Magnitude spectrum from previous frame.
+  uint16_t                prevMagnU16[HALF_ANAL_BLOCKL];
+  // Prior speech/noise probability in Q14.
+  int16_t                 priorNonSpeechProb;

-  int                     blockIndex; //frame index counter
-  int                     modelUpdate; //parameter for updating or estimating thresholds/weights for prior model
+  int                     blockIndex;  // Frame index counter.
+  // Parameter for updating or estimating thresholds/weights for prior model.
+  int                     modelUpdate;
  int                     cntThresUpdate;

-  //histograms for parameter estimation
-  WebRtc_Word16           histLrt[HIST_PAR_EST];
-  WebRtc_Word16           histSpecFlat[HIST_PAR_EST];
-  WebRtc_Word16           histSpecDiff[HIST_PAR_EST];
+  // Histograms for parameter estimation.
+  int16_t                 histLrt[HIST_PAR_EST];
+  int16_t                 histSpecFlat[HIST_PAR_EST];
+  int16_t                 histSpecDiff[HIST_PAR_EST];

-  //quantities for high band estimate
-  WebRtc_Word16           dataBufHBFX[ANAL_BLOCKL_MAX]; /* Q0 */
+  // Quantities for high band estimate.
+  int16_t                 dataBufHBFX[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];

  int                     qNoise;
  int                     prevQNoise;
  int                     prevQMagn;
-  int                     blockLen10ms;
+  size_t                  blockLen10ms;

-  WebRtc_Word16           real[ANAL_BLOCKL_MAX];
-  WebRtc_Word16           imag[ANAL_BLOCKL_MAX];
-  WebRtc_Word32           energyIn;
+  int16_t                 real[ANAL_BLOCKL_MAX];
+  int16_t                 imag[ANAL_BLOCKL_MAX];
+  int32_t                 energyIn;
  int                     scaleEnergyIn;
  int                     normData;

-} NsxInst_t;
+  struct RealFFT* real_fft;
+} NoiseSuppressionFixedC;

 #ifdef __cplusplus
 extern "C"
@@ -121,7 +129,7 @@ extern "C"
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs);
+int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs);

 /****************************************************************************
 * WebRtcNsx_set_policy_core(...)
@@ -129,16 +137,16 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs);
 * This changes the aggressiveness of the noise suppression method.
 *
 * Input:
- *      - inst          : Instance that should be initialized
- *      - mode          : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
+ *      - inst       : Instance that should be initialized
+ *      - mode       : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
 *
 * Output:
- *      - NS_inst      : Initialized instance
+ *      - inst       : Initialized instance
 *
- * Return value         :  0 - Ok
- *                        -1 - Error
+ * Return value      :  0 - Ok
+ *                     -1 - Error
 */
-int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode);
+int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode);

 /****************************************************************************
 * WebRtcNsx_ProcessCore
@@ -147,34 +155,109 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode);
 *
 * Input:
 *      - inst          : Instance that should be initialized
- *      - inFrameLow    : Input speech frame for lower band
- *      - inFrameHigh   : Input speech frame for higher band
+ *      - inFrame       : Input speech frame for each band
+ *      - num_bands     : Number of bands
 *
 * Output:
 *      - inst          : Updated instance
- *      - outFrameLow   : Output speech frame for lower band
- *      - outFrameHigh  : Output speech frame for higher band
- *
- * Return value         :  0 - OK
- *                        -1 - Error
+ *      - outFrame      : Output speech frame for each band
 */
-int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* inFrameLow, short* inFrameHigh,
-                          short* outFrameLow, short* outFrameHigh);
+void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst,
+                           const short* const* inFrame,
+                           int num_bands,
+                           short* const* outFrame);

 /****************************************************************************
- * Internal functions and variable declarations shared with optimized code.
+ * Some function pointers, for internal functions shared by ARM NEON and
+ * generic C code.
 */
-void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset);
+// Noise Estimation.
+typedef void (*NoiseEstimation)(NoiseSuppressionFixedC* inst,
+                                uint16_t* magn,
+                                uint32_t* noise,
+                                int16_t* q_noise);
+extern NoiseEstimation WebRtcNsx_NoiseEstimation;

-void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise,
-                               WebRtc_Word16* qNoise);
+// Filter the data in the frequency domain, and create spectrum.
+typedef void (*PrepareSpectrum)(NoiseSuppressionFixedC* inst,
+                                int16_t* freq_buff);
+extern PrepareSpectrum WebRtcNsx_PrepareSpectrum;

-extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
-extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];
-extern const WebRtc_Word16 WebRtcNsx_kCounterDiv[201];
+// For the noise supression process, synthesis, read out fully processed
+// segment, and update synthesis buffer.
+typedef void (*SynthesisUpdate)(NoiseSuppressionFixedC* inst,
+                                int16_t* out_frame,
+                                int16_t gain_factor);
+extern SynthesisUpdate WebRtcNsx_SynthesisUpdate;
+
+// Update analysis buffer for lower band, and window data before FFT.
+typedef void (*AnalysisUpdate)(NoiseSuppressionFixedC* inst,
+                               int16_t* out,
+                               int16_t* new_speech);
+extern AnalysisUpdate WebRtcNsx_AnalysisUpdate;
+
+// Denormalize the real-valued signal |in|, the output from inverse FFT.
+typedef void (*Denormalize)(NoiseSuppressionFixedC* inst,
+                            int16_t* in,
+                            int factor);
+extern Denormalize WebRtcNsx_Denormalize;
+
+// Normalize the real-valued signal |in|, the input to forward FFT.
+typedef void (*NormalizeRealBuffer)(NoiseSuppressionFixedC* inst,
+                                    const int16_t* in,
+                                    int16_t* out);
+extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
+
+// Compute speech/noise probability.
+// Intended to be private.
+void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
+                               uint16_t* nonSpeechProbFinal,
+                               uint32_t* priorLocSnr,
+                               uint32_t* postLocSnr);
+
+#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON)
+// For the above function pointers, functions for generic platforms are declared
+// and defined as static in file nsx_core.c, while those for ARM Neon platforms
+// are declared below and defined in file nsx_core_neon.c.
+void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst,
+                                   uint16_t* magn,
+                                   uint32_t* noise,
+                                   int16_t* q_noise);
+void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst,
+                                   int16_t* out_frame,
+                                   int16_t gain_factor);
+void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst,
+                                  int16_t* out,
+                                  int16_t* new_speech);
+void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst,
+                                   int16_t* freq_buff);
+#endif
+
+#if defined(MIPS32_LE)
+// For the above function pointers, functions for generic platforms are declared
+// and defined as static in file nsx_core.c, while those for MIPS platforms
+// are declared below and defined in file nsx_core_mips.c.
+void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst,
+                                    int16_t* out_frame,
+                                    int16_t gain_factor);
+void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst,
+                                   int16_t* out,
+                                   int16_t* new_speech);
+void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst,
+                                    int16_t* freq_buff);
+void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst,
+                                        const int16_t* in,
+                                        int16_t* out);
+#if defined(MIPS_DSP_R1_LE)
+void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst,
+                                int16_t* in,
+                                int factor);
+#endif
+
+#endif

 #ifdef __cplusplus
 }
 #endif

-#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_
--- a/Show More
+++ b/Show More