Update to current webrtc library

This is from the upstream library commit id 3326535126e435f1ba647885ce43a8f0f3d317eb, corresponding to Chromium 88.0.4290.1.
2020-10-12 18:08:02 -04:00
parent b1b02581d3
commit bcec8b0b21
859 changed files with 76187 additions and 49580 deletions
--- a/webrtc/modules/audio_processing/BUILD.gn
+++ b/webrtc/modules/audio_processing/BUILD.gn
@ -6,281 +6,606 @@
 # in the file PATENTS.  All contributing project authors may
 # be found in the AUTHORS file in the root of the source tree.

-import("//build/config/arm.gni")
-import("//third_party/protobuf/proto_library.gni")
-import("../../build/webrtc.gni")
-
-declare_args() {
-  # Outputs some low-level debug files.
-  aec_debug_dump = false
-
-  # Disables the usual mode where we trust the reported system delay
-  # values the AEC receives. The corresponding define is set appropriately
-  # in the code, but it can be force-enabled here for testing.
-  aec_untrusted_delay_for_testing = false
+import("../../webrtc.gni")
+if (rtc_enable_protobuf) {
+  import("//third_party/protobuf/proto_library.gni")
 }

-source_set("audio_processing") {
+config("apm_debug_dump") {
+  if (apm_debug_dump) {
+    defines = [ "WEBRTC_APM_DEBUG_DUMP=1" ]
+  } else {
+    defines = [ "WEBRTC_APM_DEBUG_DUMP=0" ]
+  }
+}
+
+rtc_library("config") {
+  visibility = [ ":*" ]
+  sources = [
+    "include/config.cc",
+    "include/config.h",
+  ]
+  deps = [ "../../rtc_base/system:rtc_export" ]
+}
+
+rtc_library("api") {
+  visibility = [ "*" ]
+  sources = [
+    "include/audio_processing.cc",
+    "include/audio_processing.h",
+  ]
+  deps = [
+    ":audio_frame_view",
+    ":audio_processing_statistics",
+    ":config",
+    "../../api:array_view",
+    "../../api:scoped_refptr",
+    "../../api/audio:aec3_config",
+    "../../api/audio:audio_frame_api",
+    "../../api/audio:echo_control",
+    "../../rtc_base:deprecation",
+    "../../rtc_base:rtc_base_approved",
+    "../../rtc_base/system:arch",
+    "../../rtc_base/system:file_wrapper",
+    "../../rtc_base/system:rtc_export",
+    "agc:gain_control_interface",
+  ]
+  absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
+}
+
+rtc_library("audio_frame_proxies") {
+  visibility = [ "*" ]
+  sources = [
+    "include/audio_frame_proxies.cc",
+    "include/audio_frame_proxies.h",
+  ]
+  deps = [
+    ":api",
+    ":audio_frame_view",
+    "../../api/audio:audio_frame_api",
+  ]
+}
+
+rtc_library("audio_buffer") {
+  visibility = [ "*" ]
+
+  configs += [ ":apm_debug_dump" ]
+
  sources = [
-    "aec/aec_core.c",
-    "aec/aec_core.h",
-    "aec/aec_core_internal.h",
-    "aec/aec_rdft.c",
-    "aec/aec_rdft.h",
-    "aec/aec_resampler.c",
-    "aec/aec_resampler.h",
-    "aec/echo_cancellation.c",
-    "aec/echo_cancellation_internal.h",
-    "aec/include/echo_cancellation.h",
-    "aecm/aecm_core.c",
-    "aecm/aecm_core.h",
-    "aecm/echo_control_mobile.c",
-    "aecm/include/echo_control_mobile.h",
-    "agc/agc.cc",
-    "agc/agc.h",
-    "agc/agc_manager_direct.cc",
-    "agc/agc_manager_direct.h",
-    "agc/gain_map_internal.h",
-    "agc/histogram.cc",
-    "agc/histogram.h",
-    "agc/legacy/analog_agc.c",
-    "agc/legacy/analog_agc.h",
-    "agc/legacy/digital_agc.c",
-    "agc/legacy/digital_agc.h",
-    "agc/legacy/gain_control.h",
-    "agc/utility.cc",
-    "agc/utility.h",
    "audio_buffer.cc",
    "audio_buffer.h",
-    "audio_processing_impl.cc",
-    "audio_processing_impl.h",
-    "beamformer/array_util.cc",
-    "beamformer/array_util.h",
-    "beamformer/beamformer.h",
-    "beamformer/complex_matrix.h",
-    "beamformer/covariance_matrix_generator.cc",
-    "beamformer/covariance_matrix_generator.h",
-    "beamformer/matrix.h",
-    "beamformer/nonlinear_beamformer.cc",
-    "beamformer/nonlinear_beamformer.h",
-    "common.h",
-    "echo_cancellation_impl.cc",
-    "echo_cancellation_impl.h",
-    "echo_control_mobile_impl.cc",
-    "echo_control_mobile_impl.h",
-    "gain_control_impl.cc",
-    "gain_control_impl.h",
-    "high_pass_filter_impl.cc",
-    "high_pass_filter_impl.h",
-    "include/audio_processing.h",
-    "intelligibility/intelligibility_enhancer.cc",
-    "intelligibility/intelligibility_enhancer.h",
-    "intelligibility/intelligibility_utils.cc",
-    "intelligibility/intelligibility_utils.h",
-    "level_estimator_impl.cc",
-    "level_estimator_impl.h",
-    "logging/aec_logging.h",
-    "logging/aec_logging_file_handling.cc",
-    "logging/aec_logging_file_handling.h",
-    "noise_suppression_impl.cc",
-    "noise_suppression_impl.h",
-    "processing_component.cc",
-    "processing_component.h",
-    "rms_level.cc",
-    "rms_level.h",
    "splitting_filter.cc",
    "splitting_filter.h",
    "three_band_filter_bank.cc",
    "three_band_filter_bank.h",
-    "transient/common.h",
-    "transient/daubechies_8_wavelet_coeffs.h",
-    "transient/dyadic_decimator.h",
-    "transient/moving_moments.cc",
-    "transient/moving_moments.h",
-    "transient/transient_detector.cc",
-    "transient/transient_detector.h",
-    "transient/transient_suppressor.cc",
-    "transient/transient_suppressor.h",
-    "transient/wpd_node.cc",
-    "transient/wpd_node.h",
-    "transient/wpd_tree.cc",
-    "transient/wpd_tree.h",
-    "typing_detection.cc",
-    "typing_detection.h",
-    "utility/delay_estimator.c",
-    "utility/delay_estimator.h",
-    "utility/delay_estimator_internal.h",
-    "utility/delay_estimator_wrapper.c",
-    "utility/delay_estimator_wrapper.h",
-    "vad/common.h",
-    "vad/gmm.cc",
-    "vad/gmm.h",
-    "vad/noise_gmm_tables.h",
-    "vad/pitch_based_vad.cc",
-    "vad/pitch_based_vad.h",
-    "vad/pitch_internal.cc",
-    "vad/pitch_internal.h",
-    "vad/pole_zero_filter.cc",
-    "vad/pole_zero_filter.h",
-    "vad/standalone_vad.cc",
-    "vad/standalone_vad.h",
-    "vad/vad_audio_proc.cc",
-    "vad/vad_audio_proc.h",
-    "vad/vad_audio_proc_internal.h",
-    "vad/vad_circular_buffer.cc",
-    "vad/vad_circular_buffer.h",
-    "vad/voice_activity_detector.cc",
-    "vad/voice_activity_detector.h",
-    "vad/voice_gmm_tables.h",
-    "voice_detection_impl.cc",
-    "voice_detection_impl.h",
  ]

-  configs += [ "../..:common_config" ]
-  public_configs = [ "../..:common_inherited_config" ]
+  defines = []
+
+  deps = [
+    ":api",
+    "../../api:array_view",
+    "../../common_audio",
+    "../../common_audio:common_audio_c",
+    "../../rtc_base:checks",
+  ]
+}
+
+rtc_library("high_pass_filter") {
+  visibility = [ "*" ]
+
+  sources = [
+    "high_pass_filter.cc",
+    "high_pass_filter.h",
+  ]
+
+  defines = []
+
+  deps = [
+    ":audio_buffer",
+    "../../api:array_view",
+    "../../rtc_base:checks",
+    "utility:cascaded_biquad_filter",
+  ]
+}
+
+rtc_source_set("aec_dump_interface") {
+  visibility = [ "*" ]
+  sources = [
+    "include/aec_dump.cc",
+    "include/aec_dump.h",
+  ]
+
+  deps = [
+    ":api",
+    ":audio_frame_view",
+    "../../rtc_base:deprecation",
+  ]
+}
+
+rtc_library("audio_processing") {
+  visibility = [ "*" ]
+  configs += [ ":apm_debug_dump" ]
+  sources = [
+    "audio_processing_builder_impl.cc",
+    "audio_processing_impl.cc",
+    "audio_processing_impl.h",
+    "common.h",
+    "echo_control_mobile_impl.cc",
+    "echo_control_mobile_impl.h",
+    "echo_detector/circular_buffer.cc",
+    "echo_detector/circular_buffer.h",
+    "echo_detector/mean_variance_estimator.cc",
+    "echo_detector/mean_variance_estimator.h",
+    "echo_detector/moving_max.cc",
+    "echo_detector/moving_max.h",
+    "echo_detector/normalized_covariance_estimator.cc",
+    "echo_detector/normalized_covariance_estimator.h",
+    "gain_control_impl.cc",
+    "gain_control_impl.h",
+    "gain_controller2.cc",
+    "gain_controller2.h",
+    "level_estimator.cc",
+    "level_estimator.h",
+    "render_queue_item_verifier.h",
+    "residual_echo_detector.cc",
+    "residual_echo_detector.h",
+    "typing_detection.cc",
+    "typing_detection.h",
+  ]

  defines = []
  deps = [
-    "../..:webrtc_common",
-    "../audio_coding:isac",
+    ":aec_dump_interface",
+    ":api",
+    ":apm_logging",
+    ":audio_buffer",
+    ":audio_frame_proxies",
+    ":audio_frame_view",
+    ":audio_processing_statistics",
+    ":config",
+    ":high_pass_filter",
+    ":optionally_built_submodule_creators",
+    ":rms_level",
+    ":voice_detection",
+    "../../api:array_view",
+    "../../api:function_view",
+    "../../api/audio:aec3_config",
+    "../../api/audio:audio_frame_api",
+    "../../api/audio:echo_control",
+    "../../audio/utility:audio_frame_operations",
+    "../../common_audio:common_audio_c",
+    "../../common_audio/third_party/ooura:fft_size_256",
+    "../../rtc_base:checks",
+    "../../rtc_base:deprecation",
+    "../../rtc_base:gtest_prod",
+    "../../rtc_base:ignore_wundef",
+    "../../rtc_base:refcount",
+    "../../rtc_base:safe_minmax",
+    "../../rtc_base:sanitizer",
+    "../../rtc_base/synchronization:mutex",
+    "../../rtc_base/system:rtc_export",
+    "../../system_wrappers",
+    "../../system_wrappers:field_trial",
+    "../../system_wrappers:metrics",
+    "aec3",
+    "aec_dump:aec_dump",
+    "aecm:aecm_core",
+    "agc",
+    "agc:gain_control_interface",
+    "agc:legacy_agc",
+    "agc2:adaptive_digital",
+    "agc2:fixed_digital",
+    "agc2:gain_applier",
+    "ns",
+    "transient:transient_suppressor_api",
+    "vad",
  ]
-
-  if (aec_debug_dump) {
-    defines += [ "WEBRTC_AEC_DEBUG_DUMP" ]
-  }
-
-  if (aec_untrusted_delay_for_testing) {
-    defines += [ "WEBRTC_UNTRUSTED_DELAY" ]
-  }
-
-  if (rtc_enable_protobuf) {
-    defines += [ "WEBRTC_AUDIOPROC_DEBUG_DUMP" ]
-    deps += [ ":audioproc_debug_proto" ]
-  }
-
-  if (rtc_prefer_fixed_point) {
-    defines += [ "WEBRTC_NS_FIXED" ]
-    sources += [
-      "ns/include/noise_suppression_x.h",
-      "ns/noise_suppression_x.c",
-      "ns/nsx_core.c",
-      "ns/nsx_core.h",
-      "ns/nsx_defines.h",
-    ]
-    if (current_cpu == "mipsel") {
-      sources += [ "ns/nsx_core_mips.c" ]
-    } else {
-      sources += [ "ns/nsx_core_c.c" ]
-    }
-  } else {
-    defines += [ "WEBRTC_NS_FLOAT" ]
-    sources += [
-      "ns/defines.h",
-      "ns/include/noise_suppression.h",
-      "ns/noise_suppression.c",
-      "ns/ns_core.c",
-      "ns/ns_core.h",
-      "ns/windows_private.h",
-    ]
-  }
-
-  if (current_cpu == "x86" || current_cpu == "x64") {
-    deps += [ ":audio_processing_sse2" ]
-  }
-
-  if (rtc_build_with_neon) {
-    deps += [ ":audio_processing_neon" ]
-  }
-
-  if (current_cpu == "mipsel") {
-    sources += [ "aecm/aecm_core_mips.c" ]
-    if (mips_float_abi == "hard") {
-      sources += [
-        "aec/aec_core_mips.c",
-        "aec/aec_rdft_mips.c",
-      ]
-    }
-  } else {
-    sources += [ "aecm/aecm_core_c.c" ]
-  }
-
-  if (is_win) {
-    cflags = [
-      # TODO(jschuh): Bug 1348: fix this warning.
-      "/wd4267",  # size_t to int truncations
-    ]
-  }
-
-  if (is_clang) {
-    # Suppress warnings from Chrome's Clang plugins.
-    # See http://code.google.com/p/webrtc/issues/detail?id=163 for details.
-    configs -= [ "//build/config/clang:find_bad_constructs" ]
-  }
+  absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]

  deps += [
-    "../../base:rtc_base_approved",
    "../../common_audio",
+    "../../common_audio:fir_filter",
+    "../../common_audio:fir_filter_factory",
+    "../../rtc_base:rtc_base_approved",
    "../../system_wrappers",
  ]
+
+  if (rtc_enable_protobuf) {
+    deps += [ "aec_dump:aec_dump_impl" ]
+  } else {
+    deps += [ "aec_dump:null_aec_dump_factory" ]
+  }
+}
+
+rtc_library("voice_detection") {
+  sources = [
+    "voice_detection.cc",
+    "voice_detection.h",
+  ]
+  deps = [
+    ":api",
+    ":audio_buffer",
+    "../../api/audio:audio_frame_api",
+    "../../common_audio:common_audio_c",
+    "../../rtc_base:checks",
+  ]
+}
+
+rtc_library("optionally_built_submodule_creators") {
+  sources = [
+    "optionally_built_submodule_creators.cc",
+    "optionally_built_submodule_creators.h",
+  ]
+  deps = [
+    "transient:transient_suppressor_api",
+    "transient:transient_suppressor_impl",
+  ]
+}
+
+rtc_source_set("rms_level") {
+  visibility = [ "*" ]
+  sources = [
+    "rms_level.cc",
+    "rms_level.h",
+  ]
+  deps = [
+    "../../api:array_view",
+    "../../rtc_base:checks",
+  ]
+  absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
+}
+
+rtc_library("audio_processing_statistics") {
+  visibility = [ "*" ]
+  sources = [
+    "include/audio_processing_statistics.cc",
+    "include/audio_processing_statistics.h",
+  ]
+  deps = [ "../../rtc_base/system:rtc_export" ]
+  absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
+}
+
+rtc_source_set("audio_frame_view") {
+  sources = [ "include/audio_frame_view.h" ]
+  deps = [ "../../api:array_view" ]
 }

 if (rtc_enable_protobuf) {
  proto_library("audioproc_debug_proto") {
-    sources = [
-      "debug.proto",
-    ]
+    sources = [ "debug.proto" ]

-    proto_out_dir = "webrtc/audio_processing"
+    proto_out_dir = "modules/audio_processing"
  }
 }

-if (current_cpu == "x86" || current_cpu == "x64") {
-  source_set("audio_processing_sse2") {
-    sources = [
-      "aec/aec_core_sse2.c",
-      "aec/aec_rdft_sse2.c",
-    ]
-
-    if (is_posix) {
-      cflags = [ "-msse2" ]
-    }
-
-    configs += [ "../..:common_config" ]
-    public_configs = [ "../..:common_inherited_config" ]
-  }
+rtc_library("apm_logging") {
+  configs += [ ":apm_debug_dump" ]
+  sources = [
+    "logging/apm_data_dumper.cc",
+    "logging/apm_data_dumper.h",
+  ]
+  deps = [
+    "../../api:array_view",
+    "../../common_audio",
+    "../../rtc_base:checks",
+    "../../rtc_base:rtc_base_approved",
+  ]
+  defines = []
 }

-if (rtc_build_with_neon) {
-  source_set("audio_processing_neon") {
-    sources = [
-      "aec/aec_core_neon.c",
-      "aec/aec_rdft_neon.c",
-      "aecm/aecm_core_neon.c",
-      "ns/nsx_core_neon.c",
+if (rtc_include_tests) {
+  rtc_source_set("mocks") {
+    testonly = true
+    sources = [ "include/mock_audio_processing.h" ]
+    deps = [
+      ":aec_dump_interface",
+      ":api",
+      ":audio_buffer",
+      ":audio_processing",
+      ":audio_processing_statistics",
+      "../../test:test_support",
+    ]
+  }
+
+  group("audio_processing_tests") {
+    testonly = true
+    deps = [
+      ":audioproc_test_utils",
+      "transient:click_annotate",
+      "transient:transient_suppression_test",
    ]

-    if (current_cpu != "arm64") {
-      # Enable compilation for the NEON instruction set. This is needed
-      # since //build/config/arm.gni only enables NEON for iOS, not Android.
-      # This provides the same functionality as webrtc/build/arm_neon.gypi.
-      configs -= [ "//build/config/compiler:compiler_arm_fpu" ]
-      cflags = [ "-mfpu=neon" ]
+    if (rtc_enable_protobuf) {
+      deps += [
+        ":audioproc_unittest_proto",
+        "aec_dump:aec_dump_unittests",
+        "test/conversational_speech",
+        "test/py_quality_assessment",
+      ]
+    }
+  }
+
+  rtc_library("audio_processing_unittests") {
+    testonly = true
+
+    configs += [ ":apm_debug_dump" ]
+    sources = [
+      "audio_buffer_unittest.cc",
+      "audio_frame_view_unittest.cc",
+      "config_unittest.cc",
+      "echo_control_mobile_unittest.cc",
+      "gain_controller2_unittest.cc",
+      "splitting_filter_unittest.cc",
+      "test/fake_recording_device_unittest.cc",
+    ]
+
+    deps = [
+      ":analog_mic_simulation",
+      ":api",
+      ":apm_logging",
+      ":audio_buffer",
+      ":audio_frame_view",
+      ":audio_processing",
+      ":audioproc_test_utils",
+      ":config",
+      ":high_pass_filter",
+      ":mocks",
+      ":voice_detection",
+      "../../api:array_view",
+      "../../api:scoped_refptr",
+      "../../api/audio:aec3_config",
+      "../../api/audio:aec3_factory",
+      "../../common_audio",
+      "../../common_audio:common_audio_c",
+      "../../rtc_base",
+      "../../rtc_base:checks",
+      "../../rtc_base:gtest_prod",
+      "../../rtc_base:ignore_wundef",
+      "../../rtc_base:protobuf_utils",
+      "../../rtc_base:rtc_base_approved",
+      "../../rtc_base:rtc_base_tests_utils",
+      "../../rtc_base:safe_minmax",
+      "../../rtc_base:task_queue_for_test",
+      "../../rtc_base/synchronization:mutex",
+      "../../rtc_base/system:arch",
+      "../../rtc_base/system:file_wrapper",
+      "../../system_wrappers",
+      "../../test:fileutils",
+      "../../test:rtc_expect_death",
+      "../../test:test_support",
+      "../audio_coding:neteq_input_audio_tools",
+      "aec_dump:mock_aec_dump_unittests",
+      "agc:agc_unittests",
+      "agc2:adaptive_digital_unittests",
+      "agc2:biquad_filter_unittests",
+      "agc2:fixed_digital_unittests",
+      "agc2:noise_estimator_unittests",
+      "agc2:rnn_vad_with_level_unittests",
+      "agc2:test_utils",
+      "agc2/rnn_vad:unittests",
+      "test/conversational_speech:unittest",
+      "transient:transient_suppression_unittests",
+      "utility:legacy_delay_estimator_unittest",
+      "utility:pffft_wrapper_unittest",
+      "vad:vad_unittests",
+      "//testing/gtest",
+    ]
+    absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
+
+    defines = []
+
+    if (rtc_prefer_fixed_point) {
+      defines += [ "WEBRTC_AUDIOPROC_FIXED_PROFILE" ]
+    } else {
+      defines += [ "WEBRTC_AUDIOPROC_FLOAT_PROFILE" ]
    }

-    # Disable LTO on NEON targets due to compiler bug.
-    # TODO(fdegans): Enable this. See crbug.com/408997.
-    if (rtc_use_lto) {
-      cflags -= [
-        "-flto",
-        "-ffat-lto-objects",
+    if (rtc_enable_protobuf) {
+      defines += [ "WEBRTC_AUDIOPROC_DEBUG_DUMP" ]
+      deps += [
+        ":audioproc_debug_proto",
+        ":audioproc_protobuf_utils",
+        ":audioproc_test_utils",
+        ":audioproc_unittest_proto",
+        ":optionally_built_submodule_creators",
+        ":rms_level",
+        ":runtime_settings_protobuf_utils",
+        "../../api/audio:audio_frame_api",
+        "../../api/audio:echo_control",
+        "../../rtc_base:rtc_base_tests_utils",
+        "../../rtc_base:rtc_task_queue",
+        "aec_dump",
+        "aec_dump:aec_dump_unittests",
+      ]
+      absl_deps += [ "//third_party/abseil-cpp/absl/flags:flag" ]
+      sources += [
+        "audio_processing_impl_locking_unittest.cc",
+        "audio_processing_impl_unittest.cc",
+        "audio_processing_unittest.cc",
+        "echo_control_mobile_bit_exact_unittest.cc",
+        "echo_detector/circular_buffer_unittest.cc",
+        "echo_detector/mean_variance_estimator_unittest.cc",
+        "echo_detector/moving_max_unittest.cc",
+        "echo_detector/normalized_covariance_estimator_unittest.cc",
+        "gain_control_unittest.cc",
+        "high_pass_filter_unittest.cc",
+        "level_estimator_unittest.cc",
+        "residual_echo_detector_unittest.cc",
+        "rms_level_unittest.cc",
+        "test/debug_dump_replayer.cc",
+        "test/debug_dump_replayer.h",
+        "test/debug_dump_test.cc",
+        "test/echo_canceller_test_tools.cc",
+        "test/echo_canceller_test_tools.h",
+        "test/echo_canceller_test_tools_unittest.cc",
+        "test/echo_control_mock.h",
+        "test/test_utils.h",
+        "voice_detection_unittest.cc",
+      ]
+    }
+  }
+
+  rtc_library("audio_processing_perf_tests") {
+    testonly = true
+    configs += [ ":apm_debug_dump" ]
+
+    sources = [ "audio_processing_performance_unittest.cc" ]
+    deps = [
+      ":audio_processing",
+      ":audioproc_test_utils",
+      "../../api:array_view",
+      "../../rtc_base:protobuf_utils",
+      "../../rtc_base:rtc_base_approved",
+      "../../system_wrappers",
+      "../../test:perf_test",
+      "../../test:test_support",
+    ]
+  }
+
+  rtc_library("analog_mic_simulation") {
+    sources = [
+      "test/fake_recording_device.cc",
+      "test/fake_recording_device.h",
+    ]
+    deps = [
+      "../../api:array_view",
+      "../../api/audio:audio_frame_api",
+      "../../common_audio",
+      "../../rtc_base:checks",
+      "../../rtc_base:rtc_base_approved",
+      "../../rtc_base:safe_minmax",
+      "agc:gain_map",
+    ]
+    absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
+  }
+
+  if (rtc_enable_protobuf) {
+    rtc_library("audioproc_f_impl") {
+      testonly = true
+      configs += [ ":apm_debug_dump" ]
+      sources = [
+        "test/aec_dump_based_simulator.cc",
+        "test/aec_dump_based_simulator.h",
+        "test/api_call_statistics.cc",
+        "test/api_call_statistics.h",
+        "test/audio_processing_simulator.cc",
+        "test/audio_processing_simulator.h",
+        "test/audioproc_float_impl.cc",
+        "test/audioproc_float_impl.h",
+        "test/wav_based_simulator.cc",
+        "test/wav_based_simulator.h",
+      ]
+
+      deps = [
+        ":analog_mic_simulation",
+        ":api",
+        ":apm_logging",
+        ":audio_processing",
+        ":audioproc_debug_proto",
+        ":audioproc_protobuf_utils",
+        ":audioproc_test_utils",
+        ":runtime_settings_protobuf_utils",
+        "../../api/audio:aec3_config_json",
+        "../../api/audio:aec3_factory",
+        "../../common_audio",
+        "../../rtc_base:checks",
+        "../../rtc_base:ignore_wundef",
+        "../../rtc_base:protobuf_utils",
+        "../../rtc_base:rtc_base_approved",
+        "../../rtc_base:rtc_json",
+        "../../rtc_base:task_queue_for_test",
+        "../../rtc_base/system:file_wrapper",
+        "../../system_wrappers",
+        "../../system_wrappers:field_trial",
+        "../../test:test_support",
+        "aec_dump",
+        "aec_dump:aec_dump_impl",
+        "//testing/gtest",
+      ]
+      absl_deps = [
+        "//third_party/abseil-cpp/absl/flags:flag",
+        "//third_party/abseil-cpp/absl/flags:parse",
+        "//third_party/abseil-cpp/absl/strings",
+        "//third_party/abseil-cpp/absl/types:optional",
+      ]
+    }  # audioproc_f_impl
+  }
+
+  if (rtc_enable_protobuf) {
+    proto_library("audioproc_unittest_proto") {
+      sources = [ "test/unittest.proto" ]
+      proto_out_dir = "modules/audio_processing/test"
+    }
+
+    rtc_library("audioproc_protobuf_utils") {
+      sources = [
+        "test/protobuf_utils.cc",
+        "test/protobuf_utils.h",
+      ]
+
+      deps = [
+        ":audioproc_debug_proto",
+        "../../rtc_base:checks",
+        "../../rtc_base:ignore_wundef",
+        "../../rtc_base:protobuf_utils",
+        "../../rtc_base:rtc_base_approved",
+        "../../rtc_base/system:arch",
      ]
    }

-    configs += [ "../..:common_config" ]
-    public_configs = [ "../..:common_inherited_config" ]
+    rtc_library("runtime_settings_protobuf_utils") {
+      testonly = true
+      sources = [
+        "test/runtime_setting_util.cc",
+        "test/runtime_setting_util.h",
+      ]

-    deps = [
-      "../../common_audio",
-    ]
+      deps = [
+        ":api",
+        ":audioproc_debug_proto",
+        ":audioproc_protobuf_utils",
+        "../../rtc_base:checks",
+      ]
+    }
  }
 }
+
+rtc_library("audioproc_test_utils") {
+  visibility = [ "*" ]
+  testonly = true
+  sources = [
+    "test/audio_buffer_tools.cc",
+    "test/audio_buffer_tools.h",
+    "test/audio_processing_builder_for_testing.cc",
+    "test/audio_processing_builder_for_testing.h",
+    "test/bitexactness_tools.cc",
+    "test/bitexactness_tools.h",
+    "test/performance_timer.cc",
+    "test/performance_timer.h",
+    "test/simulator_buffers.cc",
+    "test/simulator_buffers.h",
+    "test/test_utils.cc",
+    "test/test_utils.h",
+  ]
+
+  configs += [ ":apm_debug_dump" ]
+
+  deps = [
+    ":api",
+    ":audio_buffer",
+    ":audio_processing",
+    "../../api:array_view",
+    "../../api/audio:audio_frame_api",
+    "../../common_audio",
+    "../../rtc_base:checks",
+    "../../rtc_base:rtc_base_approved",
+    "../../rtc_base/system:arch",
+    "../../system_wrappers",
+    "../../test:fileutils",
+    "../../test:test_support",
+    "../audio_coding:neteq_input_audio_tools",
+    "//testing/gtest",
+  ]
+  absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
+}
--- a/webrtc/modules/audio_processing/aec/aec_common.h
+++ b/webrtc/modules/audio_processing/aec/aec_common.h
@ -1,32 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
-
-#include "webrtc/typedefs.h"
-
-#ifdef _MSC_VER /* visual c++ */
-#define ALIGN16_BEG __declspec(align(16))
-#define ALIGN16_END
-#else /* gcc or icc */
-#define ALIGN16_BEG
-#define ALIGN16_END __attribute__((aligned(16)))
-#endif
-
-extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65];
-extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65];
-extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65];
-extern const float WebRtcAec_kExtendedSmoothingCoefficients[2][2];
-extern const float WebRtcAec_kNormalSmoothingCoefficients[2][2];
-extern const float WebRtcAec_kMinFarendPSD;
-
-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
-
--- a/webrtc/modules/audio_processing/aec/aec_core.c
+++ b/webrtc/modules/audio_processing/aec/aec_core.c
--- a/webrtc/modules/audio_processing/aec/aec_core.h
+++ b/webrtc/modules/audio_processing/aec/aec_core.h
@ -1,129 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/*
- * Specifies the interface for the AEC core.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
-
-#include <stddef.h>
-
-#include "webrtc/typedefs.h"
-
-#define FRAME_LEN 80
-#define PART_LEN 64               // Length of partition
-#define PART_LEN1 (PART_LEN + 1)  // Unique fft coefficients
-#define PART_LEN2 (PART_LEN * 2)  // Length of partition * 2
-#define NUM_HIGH_BANDS_MAX  2     // Max number of high bands
-
-typedef float complex_t[2];
-// For performance reasons, some arrays of complex numbers are replaced by twice
-// as long arrays of float, all the real parts followed by all the imaginary
-// ones (complex_t[SIZE] -> float[2][SIZE]). This allows SIMD optimizations and
-// is better than two arrays (one for the real parts and one for the imaginary
-// parts) as this other way would require two pointers instead of one and cause
-// extra register spilling. This also allows the offsets to be calculated at
-// compile time.
-
-// Metrics
-enum {
-  kOffsetLevel = -100
-};
-
-typedef struct Stats {
-  float instant;
-  float average;
-  float min;
-  float max;
-  float sum;
-  float hisum;
-  float himean;
-  int counter;
-  int hicounter;
-} Stats;
-
-typedef struct AecCore AecCore;
-
-AecCore* WebRtcAec_CreateAec();  // Returns NULL on error.
-void WebRtcAec_FreeAec(AecCore* aec);
-int WebRtcAec_InitAec(AecCore* aec, int sampFreq);
-void WebRtcAec_InitAec_SSE2(void);
-#if defined(MIPS_FPU_LE)
-void WebRtcAec_InitAec_mips(void);
-#endif
-#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
-void WebRtcAec_InitAec_neon(void);
-#endif
-
-void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend);
-void WebRtcAec_ProcessFrames(AecCore* aec,
-                             const float* const* nearend,
-                             size_t num_bands,
-                             size_t num_samples,
-                             int knownDelay,
-                             float* const* out);
-
-// A helper function to call WebRtc_MoveReadPtr() for all far-end buffers.
-// Returns the number of elements moved, and adjusts |system_delay| by the
-// corresponding amount in ms.
-int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements);
-
-// Calculates the median, standard deviation and amount of poor values among the
-// delay estimates aggregated up to the first call to the function. After that
-// first call the metrics are aggregated and updated every second. With poor
-// values we mean values that most likely will cause the AEC to perform poorly.
-// TODO(bjornv): Consider changing tests and tools to handle constant
-// constant aggregation window throughout the session instead.
-int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std,
-                                  float* fraction_poor_delays);
-
-// Returns the echo state (1: echo, 0: no echo).
-int WebRtcAec_echo_state(AecCore* self);
-
-// Gets statistics of the echo metrics ERL, ERLE, A_NLP.
-void WebRtcAec_GetEchoStats(AecCore* self,
-                            Stats* erl,
-                            Stats* erle,
-                            Stats* a_nlp);
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-void* WebRtcAec_far_time_buf(AecCore* self);
-#endif
-
-// Sets local configuration modes.
-void WebRtcAec_SetConfigCore(AecCore* self,
-                             int nlp_mode,
-                             int metrics_mode,
-                             int delay_logging);
-
-// Non-zero enables, zero disables.
-void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable);
-
-// Returns non-zero if delay agnostic (i.e., signal based delay estimation) is
-// enabled and zero if disabled.
-int WebRtcAec_delay_agnostic_enabled(AecCore* self);
-
-// Enables or disables extended filter mode. Non-zero enables, zero disables.
-void WebRtcAec_enable_extended_filter(AecCore* self, int enable);
-
-// Returns non-zero if extended filter mode is enabled and zero if disabled.
-int WebRtcAec_extended_filter_enabled(AecCore* self);
-
-// Returns the current |system_delay|, i.e., the buffered difference between
-// far-end and near-end.
-int WebRtcAec_system_delay(AecCore* self);
-
-// Sets the |system_delay| to |value|.  Note that if the value is changed
-// improperly, there can be a performance regression.  So it should be used with
-// care.
-void WebRtcAec_SetSystemDelay(AecCore* self, int delay);
-
-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
--- a/webrtc/modules/audio_processing/aec/aec_core_internal.h
+++ b/webrtc/modules/audio_processing/aec/aec_core_internal.h
@ -1,202 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
-
-#include "webrtc/common_audio/ring_buffer.h"
-#include "webrtc/common_audio/wav_file.h"
-#include "webrtc/modules/audio_processing/aec/aec_common.h"
-#include "webrtc/modules/audio_processing/aec/aec_core.h"
-#include "webrtc/typedefs.h"
-
-// Number of partitions for the extended filter mode. The first one is an enum
-// to be used in array declarations, as it represents the maximum filter length.
-enum {
-  kExtendedNumPartitions = 32
-};
-static const int kNormalNumPartitions = 12;
-
-// Delay estimator constants, used for logging and delay compensation if
-// if reported delays are disabled.
-enum {
-  kLookaheadBlocks = 15
-};
-enum {
-  // 500 ms for 16 kHz which is equivalent with the limit of reported delays.
-  kHistorySizeBlocks = 125
-};
-
-// Extended filter adaptation parameters.
-// TODO(ajm): No narrowband tuning yet.
-static const float kExtendedMu = 0.4f;
-static const float kExtendedErrorThreshold = 1.0e-6f;
-
-typedef struct PowerLevel {
-  float sfrsum;
-  int sfrcounter;
-  float framelevel;
-  float frsum;
-  int frcounter;
-  float minlevel;
-  float averagelevel;
-} PowerLevel;
-
-struct AecCore {
-  int farBufWritePos, farBufReadPos;
-
-  int knownDelay;
-  int inSamples, outSamples;
-  int delayEstCtr;
-
-  RingBuffer* nearFrBuf;
-  RingBuffer* outFrBuf;
-
-  RingBuffer* nearFrBufH[NUM_HIGH_BANDS_MAX];
-  RingBuffer* outFrBufH[NUM_HIGH_BANDS_MAX];
-
-  float dBuf[PART_LEN2];  // nearend
-  float eBuf[PART_LEN2];  // error
-
-  float dBufH[NUM_HIGH_BANDS_MAX][PART_LEN2];  // nearend
-
-  float xPow[PART_LEN1];
-  float dPow[PART_LEN1];
-  float dMinPow[PART_LEN1];
-  float dInitMinPow[PART_LEN1];
-  float* noisePow;
-
-  float xfBuf[2][kExtendedNumPartitions * PART_LEN1];  // farend fft buffer
-  float wfBuf[2][kExtendedNumPartitions * PART_LEN1];  // filter fft
-  complex_t sde[PART_LEN1];  // cross-psd of nearend and error
-  complex_t sxd[PART_LEN1];  // cross-psd of farend and nearend
-  // Farend windowed fft buffer.
-  complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1];
-
-  float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1];  // far, near, error psd
-  float hNs[PART_LEN1];
-  float hNlFbMin, hNlFbLocalMin;
-  float hNlXdAvgMin;
-  int hNlNewMin, hNlMinCtr;
-  float overDrive, overDriveSm;
-  int nlp_mode;
-  float outBuf[PART_LEN];
-  int delayIdx;
-
-  short stNearState, echoState;
-  short divergeState;
-
-  int xfBufBlockPos;
-
-  RingBuffer* far_buf;
-  RingBuffer* far_buf_windowed;
-  int system_delay;  // Current system delay buffered in AEC.
-
-  int mult;  // sampling frequency multiple
-  int sampFreq;
-  size_t num_bands;
-  uint32_t seed;
-
-  float normal_mu;               // stepsize
-  float normal_error_threshold;  // error threshold
-
-  int noiseEstCtr;
-
-  PowerLevel farlevel;
-  PowerLevel nearlevel;
-  PowerLevel linoutlevel;
-  PowerLevel nlpoutlevel;
-
-  int metricsMode;
-  int stateCounter;
-  Stats erl;
-  Stats erle;
-  Stats aNlp;
-  Stats rerl;
-
-  // Quantities to control H band scaling for SWB input
-  int freq_avg_ic;       // initial bin for averaging nlp gain
-  int flag_Hband_cn;     // for comfort noise
-  float cn_scale_Hband;  // scale for comfort noise in H band
-
-  int delay_metrics_delivered;
-  int delay_histogram[kHistorySizeBlocks];
-  int num_delay_values;
-  int delay_median;
-  int delay_std;
-  float fraction_poor_delays;
-  int delay_logging_enabled;
-  void* delay_estimator_farend;
-  void* delay_estimator;
-  // Variables associated with delay correction through signal based delay
-  // estimation feedback.
-  int signal_delay_correction;
-  int previous_delay;
-  int delay_correction_count;
-  int shift_offset;
-  float delay_quality_threshold;
-  int frame_count;
-
-  // 0 = delay agnostic mode (signal based delay correction) disabled.
-  // Otherwise enabled.
-  int delay_agnostic_enabled;
-  // 1 = extended filter mode enabled, 0 = disabled.
-  int extended_filter_enabled;
-  // Runtime selection of number of filter partitions.
-  int num_partitions;
-
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  // Sequence number of this AEC instance, so that different instances can
-  // choose different dump file names.
-  int instance_index;
-
-  // Number of times we've restarted dumping; used to pick new dump file names
-  // each time.
-  int debug_dump_count;
-
-  RingBuffer* far_time_buf;
-  rtc_WavWriter* farFile;
-  rtc_WavWriter* nearFile;
-  rtc_WavWriter* outFile;
-  rtc_WavWriter* outLinearFile;
-  FILE* e_fft_file;
-#endif
-};
-
-typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]);
-extern WebRtcAecFilterFar WebRtcAec_FilterFar;
-typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]);
-extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
-typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec,
-                                          float* fft,
-                                          float ef[2][PART_LEN1]);
-extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
-typedef void (*WebRtcAecOverdriveAndSuppress)(AecCore* aec,
-                                              float hNl[PART_LEN1],
-                                              const float hNlFb,
-                                              float efw[2][PART_LEN1]);
-extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
-
-typedef void (*WebRtcAecComfortNoise)(AecCore* aec,
-                                      float efw[2][PART_LEN1],
-                                      complex_t* comfortNoiseHband,
-                                      const float* noisePow,
-                                      const float* lambda);
-extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
-
-typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec,
-                                          float efw[2][PART_LEN1],
-                                          float xfw[2][PART_LEN1],
-                                          float* fft,
-                                          float* cohde,
-                                          float* cohxd);
-extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
-
-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
--- a/webrtc/modules/audio_processing/aec/aec_core_mips.c
+++ b/webrtc/modules/audio_processing/aec/aec_core_mips.c
@ -1,774 +0,0 @@
-/*
- *  Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/*
- * The core AEC algorithm, which is presented with time-aligned signals.
- */
-
-#include "webrtc/modules/audio_processing/aec/aec_core.h"
-
-#include <math.h>
-
-#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
-#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
-
-static const int flagHbandCn = 1; // flag for adding comfort noise in H band
-extern const float WebRtcAec_weightCurve[65];
-extern const float WebRtcAec_overDriveCurve[65];
-
-void WebRtcAec_ComfortNoise_mips(AecCore* aec,
-                                 float efw[2][PART_LEN1],
-                                 complex_t* comfortNoiseHband,
-                                 const float* noisePow,
-                                 const float* lambda) {
-  int i, num;
-  float rand[PART_LEN];
-  float noise, noiseAvg, tmp, tmpAvg;
-  int16_t randW16[PART_LEN];
-  complex_t u[PART_LEN1];
-
-  const float pi2 = 6.28318530717959f;
-  const float pi2t = pi2 / 32768;
-
-  // Generate a uniform random array on [0 1]
-  WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
-
-  int16_t* randWptr = randW16;
-  float randTemp, randTemp2, randTemp3, randTemp4;
-  int32_t tmp1s, tmp2s, tmp3s, tmp4s;
-
-  for (i = 0; i < PART_LEN; i+=4) {
-    __asm __volatile (
-      ".set     push                                           \n\t"
-      ".set     noreorder                                      \n\t"
-      "lh       %[tmp1s],       0(%[randWptr])                 \n\t"
-      "lh       %[tmp2s],       2(%[randWptr])                 \n\t"
-      "lh       %[tmp3s],       4(%[randWptr])                 \n\t"
-      "lh       %[tmp4s],       6(%[randWptr])                 \n\t"
-      "mtc1     %[tmp1s],       %[randTemp]                    \n\t"
-      "mtc1     %[tmp2s],       %[randTemp2]                   \n\t"
-      "mtc1     %[tmp3s],       %[randTemp3]                   \n\t"
-      "mtc1     %[tmp4s],       %[randTemp4]                   \n\t"
-      "cvt.s.w  %[randTemp],    %[randTemp]                    \n\t"
-      "cvt.s.w  %[randTemp2],   %[randTemp2]                   \n\t"
-      "cvt.s.w  %[randTemp3],   %[randTemp3]                   \n\t"
-      "cvt.s.w  %[randTemp4],   %[randTemp4]                   \n\t"
-      "addiu    %[randWptr],    %[randWptr],      8            \n\t"
-      "mul.s    %[randTemp],    %[randTemp],      %[pi2t]      \n\t"
-      "mul.s    %[randTemp2],   %[randTemp2],     %[pi2t]      \n\t"
-      "mul.s    %[randTemp3],   %[randTemp3],     %[pi2t]      \n\t"
-      "mul.s    %[randTemp4],   %[randTemp4],     %[pi2t]      \n\t"
-      ".set     pop                                            \n\t"
-      : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp),
-        [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3),
-        [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s),
-        [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s),
-        [tmp4s] "=&r" (tmp4s)
-      : [pi2t] "f" (pi2t)
-      : "memory"
-    );
-
-    u[i+1][0] = cosf(randTemp);
-    u[i+1][1] = sinf(randTemp);
-    u[i+2][0] = cosf(randTemp2);
-    u[i+2][1] = sinf(randTemp2);
-    u[i+3][0] = cosf(randTemp3);
-    u[i+3][1] = sinf(randTemp3);
-    u[i+4][0] = cosf(randTemp4);
-    u[i+4][1] = sinf(randTemp4);
-  }
-
-  // Reject LF noise
-  float* u_ptr = &u[1][0];
-  float noise2, noise3, noise4;
-  float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f;
-
-  u[0][0] = 0;
-  u[0][1] = 0;
-  for (i = 1; i < PART_LEN1; i+=4) {
-    __asm __volatile (
-      ".set     push                                            \n\t"
-      ".set     noreorder                                       \n\t"
-      "lwc1     %[noise],       4(%[noisePow])                  \n\t"
-      "lwc1     %[noise2],      8(%[noisePow])                  \n\t"
-      "lwc1     %[noise3],      12(%[noisePow])                 \n\t"
-      "lwc1     %[noise4],      16(%[noisePow])                 \n\t"
-      "sqrt.s   %[noise],       %[noise]                        \n\t"
-      "sqrt.s   %[noise2],      %[noise2]                       \n\t"
-      "sqrt.s   %[noise3],      %[noise3]                       \n\t"
-      "sqrt.s   %[noise4],      %[noise4]                       \n\t"
-      "lwc1     %[tmp1f],       0(%[u_ptr])                     \n\t"
-      "lwc1     %[tmp2f],       4(%[u_ptr])                     \n\t"
-      "lwc1     %[tmp3f],       8(%[u_ptr])                     \n\t"
-      "lwc1     %[tmp4f],       12(%[u_ptr])                    \n\t"
-      "lwc1     %[tmp5f],       16(%[u_ptr])                    \n\t"
-      "lwc1     %[tmp6f],       20(%[u_ptr])                    \n\t"
-      "lwc1     %[tmp7f],       24(%[u_ptr])                    \n\t"
-      "lwc1     %[tmp8f],       28(%[u_ptr])                    \n\t"
-      "addiu    %[noisePow],    %[noisePow],      16            \n\t"
-      "mul.s    %[tmp1f],       %[tmp1f],         %[noise]      \n\t"
-      "mul.s    %[tmp2f],       %[tmp2f],         %[noise]      \n\t"
-      "mul.s    %[tmp3f],       %[tmp3f],         %[noise2]     \n\t"
-      "mul.s    %[tmp4f],       %[tmp4f],         %[noise2]     \n\t"
-      "mul.s    %[tmp5f],       %[tmp5f],         %[noise3]     \n\t"
-      "mul.s    %[tmp6f],       %[tmp6f],         %[noise3]     \n\t"
-      "swc1     %[tmp1f],       0(%[u_ptr])                     \n\t"
-      "swc1     %[tmp3f],       8(%[u_ptr])                     \n\t"
-      "mul.s    %[tmp8f],       %[tmp8f],         %[noise4]     \n\t"
-      "mul.s    %[tmp7f],       %[tmp7f],         %[noise4]     \n\t"
-      "neg.s    %[tmp2f]                                        \n\t"
-      "neg.s    %[tmp4f]                                        \n\t"
-      "neg.s    %[tmp6f]                                        \n\t"
-      "neg.s    %[tmp8f]                                        \n\t"
-      "swc1     %[tmp5f],       16(%[u_ptr])                    \n\t"
-      "swc1     %[tmp7f],       24(%[u_ptr])                    \n\t"
-      "swc1     %[tmp2f],       4(%[u_ptr])                     \n\t"
-      "swc1     %[tmp4f],       12(%[u_ptr])                    \n\t"
-      "swc1     %[tmp6f],       20(%[u_ptr])                    \n\t"
-      "swc1     %[tmp8f],       28(%[u_ptr])                    \n\t"
-      "addiu    %[u_ptr],       %[u_ptr],         32            \n\t"
-      ".set     pop                                             \n\t"
-      : [u_ptr] "+r" (u_ptr),  [noisePow] "+r" (noisePow),
-        [noise] "=&f" (noise), [noise2] "=&f" (noise2),
-        [noise3] "=&f" (noise3), [noise4] "=&f" (noise4),
-        [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f),
-        [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f),
-        [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f),
-        [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f)
-      :
-      : "memory"
-    );
-  }
-  u[PART_LEN][1] = 0;
-  noisePow -= PART_LEN;
-
-  u_ptr = &u[0][0];
-  float* u_ptr_end = &u[PART_LEN][0];
-  float* efw_ptr_0 = &efw[0][0];
-  float* efw_ptr_1 = &efw[1][0];
-  float tmp9f, tmp10f;
-  const float tmp1c = 1.0;
-
-  __asm __volatile (
-    ".set     push                                                        \n\t"
-    ".set     noreorder                                                   \n\t"
-   "1:                                                                    \n\t"
-    "lwc1     %[tmp1f],       0(%[lambda])                                \n\t"
-    "lwc1     %[tmp6f],       4(%[lambda])                                \n\t"
-    "addiu    %[lambda],      %[lambda],        8                         \n\t"
-    "c.lt.s   %[tmp1f],       %[tmp1c]                                    \n\t"
-    "bc1f     4f                                                          \n\t"
-    " nop                                                                 \n\t"
-    "c.lt.s   %[tmp6f],       %[tmp1c]                                    \n\t"
-    "bc1f     3f                                                          \n\t"
-    " nop                                                                 \n\t"
-   "2:                                                                    \n\t"
-    "mul.s    %[tmp1f],       %[tmp1f],         %[tmp1f]                  \n\t"
-    "mul.s    %[tmp6f],       %[tmp6f],         %[tmp6f]                  \n\t"
-    "sub.s    %[tmp1f],       %[tmp1c],         %[tmp1f]                  \n\t"
-    "sub.s    %[tmp6f],       %[tmp1c],         %[tmp6f]                  \n\t"
-    "sqrt.s   %[tmp1f],       %[tmp1f]                                    \n\t"
-    "sqrt.s   %[tmp6f],       %[tmp6f]                                    \n\t"
-    "lwc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
-    "lwc1     %[tmp3f],       0(%[u_ptr])                                 \n\t"
-    "lwc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
-    "lwc1     %[tmp8f],       8(%[u_ptr])                                 \n\t"
-    "lwc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
-    "lwc1     %[tmp5f],       4(%[u_ptr])                                 \n\t"
-    "lwc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
-    "lwc1     %[tmp10f],      12(%[u_ptr])                                \n\t"
-#if !defined(MIPS32_R2_LE)
-    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp3f]                  \n\t"
-    "add.s    %[tmp2f],       %[tmp2f],         %[tmp3f]                  \n\t"
-    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp5f]                  \n\t"
-    "add.s    %[tmp4f],       %[tmp4f],         %[tmp3f]                  \n\t"
-    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp8f]                  \n\t"
-    "add.s    %[tmp7f],       %[tmp7f],         %[tmp3f]                  \n\t"
-    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp10f]                 \n\t"
-    "add.s    %[tmp9f],       %[tmp9f],         %[tmp3f]                  \n\t"
-#else // #if !defined(MIPS32_R2_LE)
-    "madd.s   %[tmp2f],       %[tmp2f],         %[tmp1f],     %[tmp3f]    \n\t"
-    "madd.s   %[tmp4f],       %[tmp4f],         %[tmp1f],     %[tmp5f]    \n\t"
-    "madd.s   %[tmp7f],       %[tmp7f],         %[tmp6f],     %[tmp8f]    \n\t"
-    "madd.s   %[tmp9f],       %[tmp9f],         %[tmp6f],     %[tmp10f]   \n\t"
-#endif // #if !defined(MIPS32_R2_LE)
-    "swc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
-    "swc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
-    "swc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
-    "b        5f                                                          \n\t"
-    " swc1    %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
-   "3:                                                                    \n\t"
-    "mul.s    %[tmp1f],       %[tmp1f],         %[tmp1f]                  \n\t"
-    "sub.s    %[tmp1f],       %[tmp1c],         %[tmp1f]                  \n\t"
-    "sqrt.s   %[tmp1f],       %[tmp1f]                                    \n\t"
-    "lwc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
-    "lwc1     %[tmp3f],       0(%[u_ptr])                                 \n\t"
-    "lwc1     %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
-    "lwc1     %[tmp5f],       4(%[u_ptr])                                 \n\t"
-#if !defined(MIPS32_R2_LE)
-    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp3f]                  \n\t"
-    "add.s    %[tmp2f],       %[tmp2f],         %[tmp3f]                  \n\t"
-    "mul.s    %[tmp3f],       %[tmp1f],         %[tmp5f]                  \n\t"
-    "add.s    %[tmp4f],       %[tmp4f],         %[tmp3f]                  \n\t"
-#else // #if !defined(MIPS32_R2_LE)
-    "madd.s   %[tmp2f],       %[tmp2f],         %[tmp1f],     %[tmp3f]    \n\t"
-    "madd.s   %[tmp4f],       %[tmp4f],         %[tmp1f],     %[tmp5f]    \n\t"
-#endif // #if !defined(MIPS32_R2_LE)
-    "swc1     %[tmp2f],       0(%[efw_ptr_0])                             \n\t"
-    "b        5f                                                          \n\t"
-    " swc1    %[tmp4f],       0(%[efw_ptr_1])                             \n\t"
-   "4:                                                                    \n\t"
-    "c.lt.s   %[tmp6f],       %[tmp1c]                                    \n\t"
-    "bc1f     5f                                                          \n\t"
-    " nop                                                                 \n\t"
-    "mul.s    %[tmp6f],       %[tmp6f],         %[tmp6f]                  \n\t"
-    "sub.s    %[tmp6f],       %[tmp1c],         %[tmp6f]                  \n\t"
-    "sqrt.s   %[tmp6f],       %[tmp6f]                                    \n\t"
-    "lwc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
-    "lwc1     %[tmp8f],       8(%[u_ptr])                                 \n\t"
-    "lwc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
-    "lwc1     %[tmp10f],      12(%[u_ptr])                                \n\t"
-#if !defined(MIPS32_R2_LE)
-    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp8f]                  \n\t"
-    "add.s    %[tmp7f],       %[tmp7f],         %[tmp3f]                  \n\t"
-    "mul.s    %[tmp3f],       %[tmp6f],         %[tmp10f]                 \n\t"
-    "add.s    %[tmp9f],       %[tmp9f],         %[tmp3f]                  \n\t"
-#else // #if !defined(MIPS32_R2_LE)
-    "madd.s   %[tmp7f],       %[tmp7f],         %[tmp6f],     %[tmp8f]    \n\t"
-    "madd.s   %[tmp9f],       %[tmp9f],         %[tmp6f],     %[tmp10f]   \n\t"
-#endif // #if !defined(MIPS32_R2_LE)
-    "swc1     %[tmp7f],       4(%[efw_ptr_0])                             \n\t"
-    "swc1     %[tmp9f],       4(%[efw_ptr_1])                             \n\t"
-   "5:                                                                    \n\t"
-    "addiu    %[u_ptr],       %[u_ptr],         16                        \n\t"
-    "addiu    %[efw_ptr_0],   %[efw_ptr_0],     8                         \n\t"
-    "bne      %[u_ptr],       %[u_ptr_end],     1b                        \n\t"
-    " addiu   %[efw_ptr_1],   %[efw_ptr_1],     8                         \n\t"
-    ".set     pop                                                         \n\t"
-    : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr),
-      [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1),
-      [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f),
-      [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f),
-      [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f),
-      [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f)
-    : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end)
-    : "memory"
-  );
-
-  lambda -= PART_LEN;
-  tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0));
-  //tmp = 1 - lambda[i];
-  efw[0][PART_LEN] += tmp * u[PART_LEN][0];
-  efw[1][PART_LEN] += tmp * u[PART_LEN][1];
-
-  // For H band comfort noise
-  // TODO: don't compute noise and "tmp" twice. Use the previous results.
-  noiseAvg = 0.0;
-  tmpAvg = 0.0;
-  num = 0;
-  if ((aec->sampFreq == 32000 || aec->sampFreq == 48000) && flagHbandCn == 1) {
-    for (i = 0; i < PART_LEN; i++) {
-      rand[i] = ((float)randW16[i]) / 32768;
-    }
-
-    // average noise scale
-    // average over second half of freq spectrum (i.e., 4->8khz)
-    // TODO: we shouldn't need num. We know how many elements we're summing.
-    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
-      num++;
-      noiseAvg += sqrtf(noisePow[i]);
-    }
-    noiseAvg /= (float)num;
-
-    // average nlp scale
-    // average over second half of freq spectrum (i.e., 4->8khz)
-    // TODO: we shouldn't need num. We know how many elements we're summing.
-    num = 0;
-    for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
-      num++;
-      tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
-    }
-    tmpAvg /= (float)num;
-
-    // Use average noise for H band
-    // TODO: we should probably have a new random vector here.
-    // Reject LF noise
-    u[0][0] = 0;
-    u[0][1] = 0;
-    for (i = 1; i < PART_LEN1; i++) {
-      tmp = pi2 * rand[i - 1];
-
-      // Use average noise for H band
-      u[i][0] = noiseAvg * (float)cos(tmp);
-      u[i][1] = -noiseAvg * (float)sin(tmp);
-    }
-    u[PART_LEN][1] = 0;
-
-    for (i = 0; i < PART_LEN1; i++) {
-      // Use average NLP weight for H band
-      comfortNoiseHband[i][0] = tmpAvg * u[i][0];
-      comfortNoiseHband[i][1] = tmpAvg * u[i][1];
-    }
-  }
-}
-
-void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) {
-  int i;
-  for (i = 0; i < aec->num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
-    int pos = i * PART_LEN1;
-    // Check for wrap
-    if (i + aec->xfBufBlockPos >=  aec->num_partitions) {
-      xPos -=  aec->num_partitions * (PART_LEN1);
-    }
-    float* yf0 = yf[0];
-    float* yf1 = yf[1];
-    float* aRe = aec->xfBuf[0] + xPos;
-    float* aIm = aec->xfBuf[1] + xPos;
-    float* bRe = aec->wfBuf[0] + pos;
-    float* bIm = aec->wfBuf[1] + pos;
-    float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
-    int len = PART_LEN1 >> 1;
-
-    __asm __volatile (
-      ".set       push                                                \n\t"
-      ".set       noreorder                                           \n\t"
-     "1:                                                              \n\t"
-      "lwc1       %[f0],      0(%[aRe])                               \n\t"
-      "lwc1       %[f1],      0(%[bRe])                               \n\t"
-      "lwc1       %[f2],      0(%[bIm])                               \n\t"
-      "lwc1       %[f3],      0(%[aIm])                               \n\t"
-      "lwc1       %[f4],      4(%[aRe])                               \n\t"
-      "lwc1       %[f5],      4(%[bRe])                               \n\t"
-      "lwc1       %[f6],      4(%[bIm])                               \n\t"
-      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
-      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
-      "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
-      "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
-      "lwc1       %[f7],      4(%[aIm])                               \n\t"
-#if !defined(MIPS32_R2_LE)
-      "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
-      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
-      "mul.s      %[f11],     %[f6],          %[f7]                   \n\t"
-      "addiu      %[aRe],     %[aRe],         8                       \n\t"
-      "addiu      %[aIm],     %[aIm],         8                       \n\t"
-      "addiu      %[len],     %[len],         -1                      \n\t"
-      "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
-      "mul.s      %[f12],     %[f7],          %[f5]                   \n\t"
-      "lwc1       %[f2],      0(%[yf0])                               \n\t"
-      "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
-      "lwc1       %[f3],      0(%[yf1])                               \n\t"
-      "sub.s      %[f9],      %[f9],          %[f11]                  \n\t"
-      "lwc1       %[f6],      4(%[yf0])                               \n\t"
-      "add.s      %[f4],      %[f4],          %[f12]                  \n\t"
-#else // #if !defined(MIPS32_R2_LE)
-      "addiu      %[aRe],     %[aRe],         8                       \n\t"
-      "addiu      %[aIm],     %[aIm],         8                       \n\t"
-      "addiu      %[len],     %[len],         -1                      \n\t"
-      "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
-      "lwc1       %[f2],      0(%[yf0])                               \n\t"
-      "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
-      "lwc1       %[f3],      0(%[yf1])                               \n\t"
-      "nmsub.s    %[f9],      %[f9],          %[f6],      %[f7]       \n\t"
-      "lwc1       %[f6],      4(%[yf0])                               \n\t"
-      "madd.s     %[f4],      %[f4],          %[f7],      %[f5]       \n\t"
-#endif // #if !defined(MIPS32_R2_LE)
-      "lwc1       %[f5],      4(%[yf1])                               \n\t"
-      "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
-      "addiu      %[bRe],     %[bRe],         8                       \n\t"
-      "addiu      %[bIm],     %[bIm],         8                       \n\t"
-      "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
-      "add.s      %[f6],      %[f6],          %[f9]                   \n\t"
-      "add.s      %[f5],      %[f5],          %[f4]                   \n\t"
-      "swc1       %[f2],      0(%[yf0])                               \n\t"
-      "swc1       %[f3],      0(%[yf1])                               \n\t"
-      "swc1       %[f6],      4(%[yf0])                               \n\t"
-      "swc1       %[f5],      4(%[yf1])                               \n\t"
-      "addiu      %[yf0],     %[yf0],         8                       \n\t"
-      "bgtz       %[len],     1b                                      \n\t"
-      " addiu     %[yf1],     %[yf1],         8                       \n\t"
-      "lwc1       %[f0],      0(%[aRe])                               \n\t"
-      "lwc1       %[f1],      0(%[bRe])                               \n\t"
-      "lwc1       %[f2],      0(%[bIm])                               \n\t"
-      "lwc1       %[f3],      0(%[aIm])                               \n\t"
-      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
-      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
-#if !defined(MIPS32_R2_LE)
-      "mul.s      %[f12],     %[f2],          %[f3]                   \n\t"
-      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
-      "sub.s      %[f8],      %[f8],          %[f12]                  \n\t"
-      "lwc1       %[f2],      0(%[yf0])                               \n\t"
-      "add.s      %[f1],      %[f0],          %[f1]                   \n\t"
-      "lwc1       %[f3],      0(%[yf1])                               \n\t"
-#else // #if !defined(MIPS32_R2_LE)
-      "nmsub.s    %[f8],      %[f8],          %[f2],      %[f3]       \n\t"
-      "lwc1       %[f2],      0(%[yf0])                               \n\t"
-      "madd.s     %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
-      "lwc1       %[f3],      0(%[yf1])                               \n\t"
-#endif // #if !defined(MIPS32_R2_LE)
-      "add.s      %[f2],      %[f2],          %[f8]                   \n\t"
-      "add.s      %[f3],      %[f3],          %[f1]                   \n\t"
-      "swc1       %[f2],      0(%[yf0])                               \n\t"
-      "swc1       %[f3],      0(%[yf1])                               \n\t"
-      ".set       pop                                                 \n\t"
-      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
-        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
-        [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
-        [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
-        [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe),
-        [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm),
-        [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len)
-      :
-      : "memory"
-    );
-  }
-}
-
-void WebRtcAec_FilterAdaptation_mips(AecCore* aec,
-                                     float* fft,
-                                     float ef[2][PART_LEN1]) {
-  int i;
-  for (i = 0; i < aec->num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
-    int pos;
-    // Check for wrap
-    if (i + aec->xfBufBlockPos >= aec->num_partitions) {
-      xPos -= aec->num_partitions * PART_LEN1;
-    }
-
-    pos = i * PART_LEN1;
-    float* aRe = aec->xfBuf[0] + xPos;
-    float* aIm = aec->xfBuf[1] + xPos;
-    float* bRe = ef[0];
-    float* bIm = ef[1];
-    float* fft_tmp;
-
-    float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12;
-    int len = PART_LEN >> 1;
-
-    __asm __volatile (
-      ".set       push                                                \n\t"
-      ".set       noreorder                                           \n\t"
-      "addiu      %[fft_tmp], %[fft],         0                       \n\t"
-     "1:                                                              \n\t"
-      "lwc1       %[f0],      0(%[aRe])                               \n\t"
-      "lwc1       %[f1],      0(%[bRe])                               \n\t"
-      "lwc1       %[f2],      0(%[bIm])                               \n\t"
-      "lwc1       %[f4],      4(%[aRe])                               \n\t"
-      "lwc1       %[f5],      4(%[bRe])                               \n\t"
-      "lwc1       %[f6],      4(%[bIm])                               \n\t"
-      "addiu      %[aRe],     %[aRe],         8                       \n\t"
-      "addiu      %[bRe],     %[bRe],         8                       \n\t"
-      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
-      "mul.s      %[f0],      %[f0],          %[f2]                   \n\t"
-      "lwc1       %[f3],      0(%[aIm])                               \n\t"
-      "mul.s      %[f9],      %[f4],          %[f5]                   \n\t"
-      "lwc1       %[f7],      4(%[aIm])                               \n\t"
-      "mul.s      %[f4],      %[f4],          %[f6]                   \n\t"
-#if !defined(MIPS32_R2_LE)
-      "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
-      "mul.s      %[f1],      %[f3],          %[f1]                   \n\t"
-      "mul.s      %[f11],     %[f7],          %[f6]                   \n\t"
-      "mul.s      %[f5],      %[f7],          %[f5]                   \n\t"
-      "addiu      %[aIm],     %[aIm],         8                       \n\t"
-      "addiu      %[bIm],     %[bIm],         8                       \n\t"
-      "addiu      %[len],     %[len],         -1                      \n\t"
-      "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
-      "sub.s      %[f1],      %[f0],          %[f1]                   \n\t"
-      "add.s      %[f9],      %[f9],          %[f11]                  \n\t"
-      "sub.s      %[f5],      %[f4],          %[f5]                   \n\t"
-#else // #if !defined(MIPS32_R2_LE)
-      "addiu      %[aIm],     %[aIm],         8                       \n\t"
-      "addiu      %[bIm],     %[bIm],         8                       \n\t"
-      "addiu      %[len],     %[len],         -1                      \n\t"
-      "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
-      "nmsub.s    %[f1],      %[f0],          %[f3],      %[f1]       \n\t"
-      "madd.s     %[f9],      %[f9],          %[f7],      %[f6]       \n\t"
-      "nmsub.s    %[f5],      %[f4],          %[f7],      %[f5]       \n\t"
-#endif // #if !defined(MIPS32_R2_LE)
-      "swc1       %[f8],      0(%[fft_tmp])                           \n\t"
-      "swc1       %[f1],      4(%[fft_tmp])                           \n\t"
-      "swc1       %[f9],      8(%[fft_tmp])                           \n\t"
-      "swc1       %[f5],      12(%[fft_tmp])                          \n\t"
-      "bgtz       %[len],     1b                                      \n\t"
-      " addiu     %[fft_tmp], %[fft_tmp],     16                      \n\t"
-      "lwc1       %[f0],      0(%[aRe])                               \n\t"
-      "lwc1       %[f1],      0(%[bRe])                               \n\t"
-      "lwc1       %[f2],      0(%[bIm])                               \n\t"
-      "lwc1       %[f3],      0(%[aIm])                               \n\t"
-      "mul.s      %[f8],      %[f0],          %[f1]                   \n\t"
-#if !defined(MIPS32_R2_LE)
-      "mul.s      %[f10],     %[f3],          %[f2]                   \n\t"
-      "add.s      %[f8],      %[f8],          %[f10]                  \n\t"
-#else // #if !defined(MIPS32_R2_LE)
-      "madd.s     %[f8],      %[f8],          %[f3],      %[f2]       \n\t"
-#endif // #if !defined(MIPS32_R2_LE)
-      "swc1       %[f8],      4(%[fft])                               \n\t"
-      ".set       pop                                                 \n\t"
-      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
-        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
-        [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
-        [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
-        [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm),
-        [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp),
-        [len] "+r" (len)
-      : [fft] "r" (fft)
-      : "memory"
-    );
-
-    aec_rdft_inverse_128(fft);
-    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
-
-    // fft scaling
-    {
-      float scale = 2.0f / PART_LEN2;
-      __asm __volatile (
-        ".set     push                                    \n\t"
-        ".set     noreorder                               \n\t"
-        "addiu    %[fft_tmp], %[fft],        0            \n\t"
-        "addiu    %[len],     $zero,         8            \n\t"
-       "1:                                                \n\t"
-        "addiu    %[len],     %[len],        -1           \n\t"
-        "lwc1     %[f0],      0(%[fft_tmp])               \n\t"
-        "lwc1     %[f1],      4(%[fft_tmp])               \n\t"
-        "lwc1     %[f2],      8(%[fft_tmp])               \n\t"
-        "lwc1     %[f3],      12(%[fft_tmp])              \n\t"
-        "mul.s    %[f0],      %[f0],         %[scale]     \n\t"
-        "mul.s    %[f1],      %[f1],         %[scale]     \n\t"
-        "mul.s    %[f2],      %[f2],         %[scale]     \n\t"
-        "mul.s    %[f3],      %[f3],         %[scale]     \n\t"
-        "lwc1     %[f4],      16(%[fft_tmp])              \n\t"
-        "lwc1     %[f5],      20(%[fft_tmp])              \n\t"
-        "lwc1     %[f6],      24(%[fft_tmp])              \n\t"
-        "lwc1     %[f7],      28(%[fft_tmp])              \n\t"
-        "mul.s    %[f4],      %[f4],         %[scale]     \n\t"
-        "mul.s    %[f5],      %[f5],         %[scale]     \n\t"
-        "mul.s    %[f6],      %[f6],         %[scale]     \n\t"
-        "mul.s    %[f7],      %[f7],         %[scale]     \n\t"
-        "swc1     %[f0],      0(%[fft_tmp])               \n\t"
-        "swc1     %[f1],      4(%[fft_tmp])               \n\t"
-        "swc1     %[f2],      8(%[fft_tmp])               \n\t"
-        "swc1     %[f3],      12(%[fft_tmp])              \n\t"
-        "swc1     %[f4],      16(%[fft_tmp])              \n\t"
-        "swc1     %[f5],      20(%[fft_tmp])              \n\t"
-        "swc1     %[f6],      24(%[fft_tmp])              \n\t"
-        "swc1     %[f7],      28(%[fft_tmp])              \n\t"
-        "bgtz     %[len],     1b                          \n\t"
-        " addiu   %[fft_tmp], %[fft_tmp],    32           \n\t"
-        ".set     pop                                     \n\t"
-        : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
-          [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
-          [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
-          [fft_tmp] "=&r" (fft_tmp)
-        : [scale] "f" (scale), [fft] "r" (fft)
-        : "memory"
-      );
-    }
-    aec_rdft_forward_128(fft);
-    aRe = aec->wfBuf[0] + pos;
-    aIm = aec->wfBuf[1] + pos;
-    __asm __volatile (
-      ".set     push                                    \n\t"
-      ".set     noreorder                               \n\t"
-      "addiu    %[fft_tmp], %[fft],        0            \n\t"
-      "addiu    %[len],     $zero,         31           \n\t"
-      "lwc1     %[f0],      0(%[aRe])                   \n\t"
-      "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
-      "lwc1     %[f2],      256(%[aRe])                 \n\t"
-      "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
-      "lwc1     %[f4],      4(%[aRe])                   \n\t"
-      "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
-      "lwc1     %[f6],      4(%[aIm])                   \n\t"
-      "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
-      "add.s    %[f0],      %[f0],         %[f1]        \n\t"
-      "add.s    %[f2],      %[f2],         %[f3]        \n\t"
-      "add.s    %[f4],      %[f4],         %[f5]        \n\t"
-      "add.s    %[f6],      %[f6],         %[f7]        \n\t"
-      "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
-      "swc1     %[f0],      0(%[aRe])                   \n\t"
-      "swc1     %[f2],      256(%[aRe])                 \n\t"
-      "swc1     %[f4],      4(%[aRe])                   \n\t"
-      "addiu    %[aRe],     %[aRe],        8            \n\t"
-      "swc1     %[f6],      4(%[aIm])                   \n\t"
-      "addiu    %[aIm],     %[aIm],        8            \n\t"
-     "1:                                                \n\t"
-      "lwc1     %[f0],      0(%[aRe])                   \n\t"
-      "lwc1     %[f1],      0(%[fft_tmp])               \n\t"
-      "lwc1     %[f2],      0(%[aIm])                   \n\t"
-      "lwc1     %[f3],      4(%[fft_tmp])               \n\t"
-      "lwc1     %[f4],      4(%[aRe])                   \n\t"
-      "lwc1     %[f5],      8(%[fft_tmp])               \n\t"
-      "lwc1     %[f6],      4(%[aIm])                   \n\t"
-      "lwc1     %[f7],      12(%[fft_tmp])              \n\t"
-      "add.s    %[f0],      %[f0],         %[f1]        \n\t"
-      "add.s    %[f2],      %[f2],         %[f3]        \n\t"
-      "add.s    %[f4],      %[f4],         %[f5]        \n\t"
-      "add.s    %[f6],      %[f6],         %[f7]        \n\t"
-      "addiu    %[len],     %[len],        -1           \n\t"
-      "addiu    %[fft_tmp], %[fft_tmp],    16           \n\t"
-      "swc1     %[f0],      0(%[aRe])                   \n\t"
-      "swc1     %[f2],      0(%[aIm])                   \n\t"
-      "swc1     %[f4],      4(%[aRe])                   \n\t"
-      "addiu    %[aRe],     %[aRe],        8            \n\t"
-      "swc1     %[f6],      4(%[aIm])                   \n\t"
-      "bgtz     %[len],     1b                          \n\t"
-      " addiu   %[aIm],     %[aIm],        8            \n\t"
-      ".set     pop                                     \n\t"
-      : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
-        [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
-        [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
-        [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm)
-      : [fft] "r" (fft)
-      : "memory"
-    );
-  }
-}
-
-void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec,
-                                         float hNl[PART_LEN1],
-                                         const float hNlFb,
-                                         float efw[2][PART_LEN1]) {
-  int i;
-  const float one = 1.0;
-  float* p_hNl;
-  float* p_efw0;
-  float* p_efw1;
-  float* p_WebRtcAec_wC;
-  float temp1, temp2, temp3, temp4;
-
-  p_hNl = &hNl[0];
-  p_efw0 = &efw[0][0];
-  p_efw1 = &efw[1][0];
-  p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0];
-
-  for (i = 0; i < PART_LEN1; i++) {
-    // Weight subbands
-    __asm __volatile (
-      ".set      push                                              \n\t"
-      ".set      noreorder                                         \n\t"
-      "lwc1      %[temp1],    0(%[p_hNl])                          \n\t"
-      "lwc1      %[temp2],    0(%[p_wC])                           \n\t"
-      "c.lt.s    %[hNlFb],    %[temp1]                             \n\t"
-      "bc1f      1f                                                \n\t"
-      " mul.s    %[temp3],    %[temp2],     %[hNlFb]               \n\t"
-      "sub.s     %[temp4],    %[one],       %[temp2]               \n\t"
-#if !defined(MIPS32_R2_LE)
-      "mul.s     %[temp1],    %[temp1],     %[temp4]               \n\t"
-      "add.s     %[temp1],    %[temp3],     %[temp1]               \n\t"
-#else // #if !defined(MIPS32_R2_LE)
-      "madd.s    %[temp1],    %[temp3],     %[temp1],   %[temp4]   \n\t"
-#endif // #if !defined(MIPS32_R2_LE)
-      "swc1      %[temp1],    0(%[p_hNl])                          \n\t"
-     "1:                                                           \n\t"
-      "addiu     %[p_wC],     %[p_wC],      4                      \n\t"
-      ".set      pop                                               \n\t"
-      : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
-        [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC)
-      : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl)
-      : "memory"
-    );
-
-    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
-
-    __asm __volatile (
-      "lwc1      %[temp1],    0(%[p_hNl])              \n\t"
-      "lwc1      %[temp3],    0(%[p_efw1])             \n\t"
-      "lwc1      %[temp2],    0(%[p_efw0])             \n\t"
-      "addiu     %[p_hNl],    %[p_hNl],     4          \n\t"
-      "mul.s     %[temp3],    %[temp3],     %[temp1]   \n\t"
-      "mul.s     %[temp2],    %[temp2],     %[temp1]   \n\t"
-      "addiu     %[p_efw0],   %[p_efw0],    4          \n\t"
-      "addiu     %[p_efw1],   %[p_efw1],    4          \n\t"
-      "neg.s     %[temp4],    %[temp3]                 \n\t"
-      "swc1      %[temp2],    -4(%[p_efw0])            \n\t"
-      "swc1      %[temp4],    -4(%[p_efw1])            \n\t"
-      : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
-        [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1),
-        [p_hNl] "+r" (p_hNl)
-      :
-      : "memory"
-    );
-  }
-}
-
-void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
-  const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
-  const float error_threshold = aec->extended_filter_enabled
-                                    ? kExtendedErrorThreshold
-                                    : aec->normal_error_threshold;
-  int len = (PART_LEN1);
-  float* ef0 = ef[0];
-  float* ef1 = ef[1];
-  float* xPow = aec->xPow;
-  float fac1 = 1e-10f;
-  float err_th2 = error_threshold * error_threshold;
-  float f0, f1, f2;
-#if !defined(MIPS32_R2_LE)
-  float f3;
-#endif
-
-  __asm __volatile (
-    ".set       push                                   \n\t"
-    ".set       noreorder                              \n\t"
-   "1:                                                 \n\t"
-    "lwc1       %[f0],     0(%[xPow])                  \n\t"
-    "lwc1       %[f1],     0(%[ef0])                   \n\t"
-    "lwc1       %[f2],     0(%[ef1])                   \n\t"
-    "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
-    "div.s      %[f1],     %[f1],       %[f0]          \n\t"
-    "div.s      %[f2],     %[f2],       %[f0]          \n\t"
-    "mul.s      %[f0],     %[f1],       %[f1]          \n\t"
-#if defined(MIPS32_R2_LE)
-    "madd.s     %[f0],     %[f0],       %[f2],   %[f2] \n\t"
-#else
-    "mul.s      %[f3],     %[f2],       %[f2]          \n\t"
-    "add.s      %[f0],     %[f0],       %[f3]          \n\t"
-#endif
-    "c.le.s     %[f0],     %[err_th2]                  \n\t"
-    "nop                                               \n\t"
-    "bc1t       2f                                     \n\t"
-    " nop                                              \n\t"
-    "sqrt.s     %[f0],     %[f0]                       \n\t"
-    "add.s      %[f0],     %[f0],       %[fac1]        \n\t"
-    "div.s      %[f0],     %[err_th],   %[f0]          \n\t"
-    "mul.s      %[f1],     %[f1],       %[f0]          \n\t"
-    "mul.s      %[f2],     %[f2],       %[f0]          \n\t"
-   "2:                                                 \n\t"
-    "mul.s      %[f1],     %[f1],       %[mu]          \n\t"
-    "mul.s      %[f2],     %[f2],       %[mu]          \n\t"
-    "swc1       %[f1],     0(%[ef0])                   \n\t"
-    "swc1       %[f2],     0(%[ef1])                   \n\t"
-    "addiu      %[len],    %[len],      -1             \n\t"
-    "addiu      %[xPow],   %[xPow],     4              \n\t"
-    "addiu      %[ef0],    %[ef0],      4              \n\t"
-    "bgtz       %[len],    1b                          \n\t"
-    " addiu     %[ef1],    %[ef1],      4              \n\t"
-    ".set       pop                                    \n\t"
-    : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
-#if !defined(MIPS32_R2_LE)
-      [f3] "=&f" (f3),
-#endif
-      [xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
-      [len] "+r" (len)
-    : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
-      [err_th] "f" (error_threshold)
-    : "memory"
-  );
-}
-
-void WebRtcAec_InitAec_mips(void) {
-  WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
-  WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
-  WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
-  WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
-  WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
-}
-
--- a/webrtc/modules/audio_processing/aec/aec_core_neon.c
+++ b/webrtc/modules/audio_processing/aec/aec_core_neon.c
@ -1,736 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/*
- * The core AEC algorithm, neon version of speed-critical functions.
- *
- * Based on aec_core_sse2.c.
- */
-
-#include <arm_neon.h>
-#include <math.h>
-#include <string.h>  // memset
-
-#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
-#include "webrtc/modules/audio_processing/aec/aec_common.h"
-#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
-
-enum { kShiftExponentIntoTopMantissa = 8 };
-enum { kFloatExponentShift = 23 };
-
-__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
-  return aRe * bRe - aIm * bIm;
-}
-
-__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
-  return aRe * bIm + aIm * bRe;
-}
-
-static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) {
-  int i;
-  const int num_partitions = aec->num_partitions;
-  for (i = 0; i < num_partitions; i++) {
-    int j;
-    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
-    int pos = i * PART_LEN1;
-    // Check for wrap
-    if (i + aec->xfBufBlockPos >= num_partitions) {
-      xPos -= num_partitions * PART_LEN1;
-    }
-
-    // vectorized code (four at once)
-    for (j = 0; j + 3 < PART_LEN1; j += 4) {
-      const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
-      const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
-      const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
-      const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
-      const float32x4_t yf_re = vld1q_f32(&yf[0][j]);
-      const float32x4_t yf_im = vld1q_f32(&yf[1][j]);
-      const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re);
-      const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im);
-      const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im);
-      const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re);
-      const float32x4_t g = vaddq_f32(yf_re, e);
-      const float32x4_t h = vaddq_f32(yf_im, f);
-      vst1q_f32(&yf[0][j], g);
-      vst1q_f32(&yf[1][j], h);
-    }
-    // scalar code for the remaining items.
-    for (; j < PART_LEN1; j++) {
-      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
-                        aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][pos + j],
-                        aec->wfBuf[1][pos + j]);
-      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
-                        aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][pos + j],
-                        aec->wfBuf[1][pos + j]);
-    }
-  }
-}
-
-// ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32.
-#if !defined (WEBRTC_ARCH_ARM64)
-static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) {
-  int i;
-  float32x4_t x = vrecpeq_f32(b);
-  // from arm documentation
-  // The Newton-Raphson iteration:
-  //     x[n+1] = x[n] * (2 - d * x[n])
-  // converges to (1/d) if x0 is the result of VRECPE applied to d.
-  //
-  // Note: The precision did not improve after 2 iterations.
-  for (i = 0; i < 2; i++) {
-    x = vmulq_f32(vrecpsq_f32(b, x), x);
-  }
-  // a/b = a*(1/b)
-  return vmulq_f32(a, x);
-}
-
-static float32x4_t vsqrtq_f32(float32x4_t s) {
-  int i;
-  float32x4_t x = vrsqrteq_f32(s);
-
-  // Code to handle sqrt(0).
-  // If the input to sqrtf() is zero, a zero will be returned.
-  // If the input to vrsqrteq_f32() is zero, positive infinity is returned.
-  const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000);
-  // check for divide by zero
-  const uint32x4_t div_by_zero = vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(x));
-  // zero out the positive infinity results
-  x = vreinterpretq_f32_u32(vandq_u32(vmvnq_u32(div_by_zero),
-                                      vreinterpretq_u32_f32(x)));
-  // from arm documentation
-  // The Newton-Raphson iteration:
-  //     x[n+1] = x[n] * (3 - d * (x[n] * x[n])) / 2)
-  // converges to (1/√d) if x0 is the result of VRSQRTE applied to d.
-  //
-  // Note: The precision did not improve after 2 iterations.
-  for (i = 0; i < 2; i++) {
-    x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x);
-  }
-  // sqrt(s) = s * 1/sqrt(s)
-  return vmulq_f32(s, x);;
-}
-#endif  // WEBRTC_ARCH_ARM64
-
-static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
-  const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
-  const float error_threshold = aec->extended_filter_enabled ?
-      kExtendedErrorThreshold : aec->normal_error_threshold;
-  const float32x4_t k1e_10f = vdupq_n_f32(1e-10f);
-  const float32x4_t kMu = vmovq_n_f32(mu);
-  const float32x4_t kThresh = vmovq_n_f32(error_threshold);
-  int i;
-  // vectorized code (four at once)
-  for (i = 0; i + 3 < PART_LEN1; i += 4) {
-    const float32x4_t xPow = vld1q_f32(&aec->xPow[i]);
-    const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]);
-    const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]);
-    const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f);
-    float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus);
-    float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus);
-    const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re);
-    const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im);
-    const float32x4_t absEf = vsqrtq_f32(ef_sum2);
-    const uint32x4_t bigger = vcgtq_f32(absEf, kThresh);
-    const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f);
-    const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus);
-    uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv));
-    uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv));
-    uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger),
-                                     vreinterpretq_u32_f32(ef_re));
-    uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger),
-                                     vreinterpretq_u32_f32(ef_im));
-    ef_re_if = vandq_u32(bigger, ef_re_if);
-    ef_im_if = vandq_u32(bigger, ef_im_if);
-    ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if);
-    ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if);
-    ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu);
-    ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu);
-    vst1q_f32(&ef[0][i], ef_re);
-    vst1q_f32(&ef[1][i], ef_im);
-  }
-  // scalar code for the remaining items.
-  for (; i < PART_LEN1; i++) {
-    float abs_ef;
-    ef[0][i] /= (aec->xPow[i] + 1e-10f);
-    ef[1][i] /= (aec->xPow[i] + 1e-10f);
-    abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
-
-    if (abs_ef > error_threshold) {
-      abs_ef = error_threshold / (abs_ef + 1e-10f);
-      ef[0][i] *= abs_ef;
-      ef[1][i] *= abs_ef;
-    }
-
-    // Stepsize factor
-    ef[0][i] *= mu;
-    ef[1][i] *= mu;
-  }
-}
-
-static void FilterAdaptationNEON(AecCore* aec,
-                                 float* fft,
-                                 float ef[2][PART_LEN1]) {
-  int i;
-  const int num_partitions = aec->num_partitions;
-  for (i = 0; i < num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
-    int pos = i * PART_LEN1;
-    int j;
-    // Check for wrap
-    if (i + aec->xfBufBlockPos >= num_partitions) {
-      xPos -= num_partitions * PART_LEN1;
-    }
-
-    // Process the whole array...
-    for (j = 0; j < PART_LEN; j += 4) {
-      // Load xfBuf and ef.
-      const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
-      const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
-      const float32x4_t ef_re = vld1q_f32(&ef[0][j]);
-      const float32x4_t ef_im = vld1q_f32(&ef[1][j]);
-      // Calculate the product of conjugate(xfBuf) by ef.
-      //   re(conjugate(a) * b) = aRe * bRe + aIm * bIm
-      //   im(conjugate(a) * b)=  aRe * bIm - aIm * bRe
-      const float32x4_t a = vmulq_f32(xfBuf_re, ef_re);
-      const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im);
-      const float32x4_t c = vmulq_f32(xfBuf_re, ef_im);
-      const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re);
-      // Interleave real and imaginary parts.
-      const float32x4x2_t g_n_h = vzipq_f32(e, f);
-      // Store
-      vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]);
-      vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]);
-    }
-    // ... and fixup the first imaginary entry.
-    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
-                   -aec->xfBuf[1][xPos + PART_LEN],
-                   ef[0][PART_LEN],
-                   ef[1][PART_LEN]);
-
-    aec_rdft_inverse_128(fft);
-    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
-
-    // fft scaling
-    {
-      const float scale = 2.0f / PART_LEN2;
-      const float32x4_t scale_ps = vmovq_n_f32(scale);
-      for (j = 0; j < PART_LEN; j += 4) {
-        const float32x4_t fft_ps = vld1q_f32(&fft[j]);
-        const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps);
-        vst1q_f32(&fft[j], fft_scale);
-      }
-    }
-    aec_rdft_forward_128(fft);
-
-    {
-      const float wt1 = aec->wfBuf[1][pos];
-      aec->wfBuf[0][pos + PART_LEN] += fft[1];
-      for (j = 0; j < PART_LEN; j += 4) {
-        float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
-        float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
-        const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]);
-        const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]);
-        const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4);
-        wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]);
-        wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]);
-
-        vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re);
-        vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im);
-      }
-      aec->wfBuf[1][pos] = wt1;
-    }
-  }
-}
-
-static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) {
-  // a^b = exp2(b * log2(a))
-  //   exp2(x) and log2(x) are calculated using polynomial approximations.
-  float32x4_t log2_a, b_log2_a, a_exp_b;
-
-  // Calculate log2(x), x = a.
-  {
-    // To calculate log2(x), we decompose x like this:
-    //   x = y * 2^n
-    //     n is an integer
-    //     y is in the [1.0, 2.0) range
-    //
-    //   log2(x) = log2(y) + n
-    //     n       can be evaluated by playing with float representation.
-    //     log2(y) in a small range can be approximated, this code uses an order
-    //             five polynomial approximation. The coefficients have been
-    //             estimated with the Remez algorithm and the resulting
-    //             polynomial has a maximum relative error of 0.00086%.
-
-    // Compute n.
-    //    This is done by masking the exponent, shifting it into the top bit of
-    //    the mantissa, putting eight into the biased exponent (to shift/
-    //    compensate the fact that the exponent has been shifted in the top/
-    //    fractional part and finally getting rid of the implicit leading one
-    //    from the mantissa by substracting it out.
-    const uint32x4_t vec_float_exponent_mask = vdupq_n_u32(0x7F800000);
-    const uint32x4_t vec_eight_biased_exponent = vdupq_n_u32(0x43800000);
-    const uint32x4_t vec_implicit_leading_one = vdupq_n_u32(0x43BF8000);
-    const uint32x4_t two_n = vandq_u32(vreinterpretq_u32_f32(a),
-                                       vec_float_exponent_mask);
-    const uint32x4_t n_1 = vshrq_n_u32(two_n, kShiftExponentIntoTopMantissa);
-    const uint32x4_t n_0 = vorrq_u32(n_1, vec_eight_biased_exponent);
-    const float32x4_t n =
-        vsubq_f32(vreinterpretq_f32_u32(n_0),
-                  vreinterpretq_f32_u32(vec_implicit_leading_one));
-    // Compute y.
-    const uint32x4_t vec_mantissa_mask = vdupq_n_u32(0x007FFFFF);
-    const uint32x4_t vec_zero_biased_exponent_is_one = vdupq_n_u32(0x3F800000);
-    const uint32x4_t mantissa = vandq_u32(vreinterpretq_u32_f32(a),
-                                          vec_mantissa_mask);
-    const float32x4_t y =
-        vreinterpretq_f32_u32(vorrq_u32(mantissa,
-                                        vec_zero_biased_exponent_is_one));
-    // Approximate log2(y) ~= (y - 1) * pol5(y).
-    //    pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
-    const float32x4_t C5 = vdupq_n_f32(-3.4436006e-2f);
-    const float32x4_t C4 = vdupq_n_f32(3.1821337e-1f);
-    const float32x4_t C3 = vdupq_n_f32(-1.2315303f);
-    const float32x4_t C2 = vdupq_n_f32(2.5988452f);
-    const float32x4_t C1 = vdupq_n_f32(-3.3241990f);
-    const float32x4_t C0 = vdupq_n_f32(3.1157899f);
-    float32x4_t pol5_y = C5;
-    pol5_y = vmlaq_f32(C4, y, pol5_y);
-    pol5_y = vmlaq_f32(C3, y, pol5_y);
-    pol5_y = vmlaq_f32(C2, y, pol5_y);
-    pol5_y = vmlaq_f32(C1, y, pol5_y);
-    pol5_y = vmlaq_f32(C0, y, pol5_y);
-    const float32x4_t y_minus_one =
-        vsubq_f32(y, vreinterpretq_f32_u32(vec_zero_biased_exponent_is_one));
-    const float32x4_t log2_y = vmulq_f32(y_minus_one, pol5_y);
-
-    // Combine parts.
-    log2_a = vaddq_f32(n, log2_y);
-  }
-
-  // b * log2(a)
-  b_log2_a = vmulq_f32(b, log2_a);
-
-  // Calculate exp2(x), x = b * log2(a).
-  {
-    // To calculate 2^x, we decompose x like this:
-    //   x = n + y
-    //     n is an integer, the value of x - 0.5 rounded down, therefore
-    //     y is in the [0.5, 1.5) range
-    //
-    //   2^x = 2^n * 2^y
-    //     2^n can be evaluated by playing with float representation.
-    //     2^y in a small range can be approximated, this code uses an order two
-    //         polynomial approximation. The coefficients have been estimated
-    //         with the Remez algorithm and the resulting polynomial has a
-    //         maximum relative error of 0.17%.
-    // To avoid over/underflow, we reduce the range of input to ]-127, 129].
-    const float32x4_t max_input = vdupq_n_f32(129.f);
-    const float32x4_t min_input = vdupq_n_f32(-126.99999f);
-    const float32x4_t x_min = vminq_f32(b_log2_a, max_input);
-    const float32x4_t x_max = vmaxq_f32(x_min, min_input);
-    // Compute n.
-    const float32x4_t half = vdupq_n_f32(0.5f);
-    const float32x4_t x_minus_half = vsubq_f32(x_max, half);
-    const int32x4_t x_minus_half_floor = vcvtq_s32_f32(x_minus_half);
-
-    // Compute 2^n.
-    const int32x4_t float_exponent_bias = vdupq_n_s32(127);
-    const int32x4_t two_n_exponent =
-        vaddq_s32(x_minus_half_floor, float_exponent_bias);
-    const float32x4_t two_n =
-        vreinterpretq_f32_s32(vshlq_n_s32(two_n_exponent, kFloatExponentShift));
-    // Compute y.
-    const float32x4_t y = vsubq_f32(x_max, vcvtq_f32_s32(x_minus_half_floor));
-
-    // Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
-    const float32x4_t C2 = vdupq_n_f32(3.3718944e-1f);
-    const float32x4_t C1 = vdupq_n_f32(6.5763628e-1f);
-    const float32x4_t C0 = vdupq_n_f32(1.0017247f);
-    float32x4_t exp2_y = C2;
-    exp2_y = vmlaq_f32(C1, y, exp2_y);
-    exp2_y = vmlaq_f32(C0, y, exp2_y);
-
-    // Combine parts.
-    a_exp_b = vmulq_f32(exp2_y, two_n);
-  }
-
-  return a_exp_b;
-}
-
-static void OverdriveAndSuppressNEON(AecCore* aec,
-                                     float hNl[PART_LEN1],
-                                     const float hNlFb,
-                                     float efw[2][PART_LEN1]) {
-  int i;
-  const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);
-  const float32x4_t vec_one = vdupq_n_f32(1.0f);
-  const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
-  const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm);
-
-  // vectorized code (four at once)
-  for (i = 0; i + 3 < PART_LEN1; i += 4) {
-    // Weight subbands
-    float32x4_t vec_hNl = vld1q_f32(&hNl[i]);
-    const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]);
-    const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb);
-    const float32x4_t vec_weightCurve_hNlFb = vmulq_f32(vec_weightCurve,
-                                                        vec_hNlFb);
-    const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve);
-    const float32x4_t vec_one_weightCurve_hNl = vmulq_f32(vec_one_weightCurve,
-                                                          vec_hNl);
-    const uint32x4_t vec_if0 = vandq_u32(vmvnq_u32(bigger),
-                                         vreinterpretq_u32_f32(vec_hNl));
-    const float32x4_t vec_one_weightCurve_add =
-        vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl);
-    const uint32x4_t vec_if1 =
-        vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add));
-
-    vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));
-
-    {
-      const float32x4_t vec_overDriveCurve =
-          vld1q_f32(&WebRtcAec_overDriveCurve[i]);
-      const float32x4_t vec_overDriveSm_overDriveCurve =
-          vmulq_f32(vec_overDriveSm, vec_overDriveCurve);
-      vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
-      vst1q_f32(&hNl[i], vec_hNl);
-    }
-
-    // Suppress error signal
-    {
-      float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
-      float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
-      vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
-      vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
-
-      // Ooura fft returns incorrect sign on imaginary component. It matters
-      // here because we are making an additive change with comfort noise.
-      vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
-      vst1q_f32(&efw[0][i], vec_efw_re);
-      vst1q_f32(&efw[1][i], vec_efw_im);
-    }
-  }
-
-  // scalar code for the remaining items.
-  for (; i < PART_LEN1; i++) {
-    // Weight subbands
-    if (hNl[i] > hNlFb) {
-      hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
-               (1 - WebRtcAec_weightCurve[i]) * hNl[i];
-    }
-
-    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
-
-    // Suppress error signal
-    efw[0][i] *= hNl[i];
-    efw[1][i] *= hNl[i];
-
-    // Ooura fft returns incorrect sign on imaginary component. It matters
-    // here because we are making an additive change with comfort noise.
-    efw[1][i] *= -1;
-  }
-}
-
-static int PartitionDelay(const AecCore* aec) {
-  // Measures the energy in each filter partition and returns the partition with
-  // highest energy.
-  // TODO(bjornv): Spread computational cost by computing one partition per
-  // block?
-  float wfEnMax = 0;
-  int i;
-  int delay = 0;
-
-  for (i = 0; i < aec->num_partitions; i++) {
-    int j;
-    int pos = i * PART_LEN1;
-    float wfEn = 0;
-    float32x4_t vec_wfEn = vdupq_n_f32(0.0f);
-    // vectorized code (four at once)
-    for (j = 0; j + 3 < PART_LEN1; j += 4) {
-      const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]);
-      const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]);
-      vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0);
-      vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1);
-    }
-    {
-      float32x2_t vec_total;
-      // A B C D
-      vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn));
-      // A+B C+D
-      vec_total = vpadd_f32(vec_total, vec_total);
-      // A+B+C+D A+B+C+D
-      wfEn = vget_lane_f32(vec_total, 0);
-    }
-
-    // scalar code for the remaining items.
-    for (; j < PART_LEN1; j++) {
-      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
-              aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
-    }
-
-    if (wfEn > wfEnMax) {
-      wfEnMax = wfEn;
-      delay = i;
-    }
-  }
-  return delay;
-}
-
-// Updates the following smoothed  Power Spectral Densities (PSD):
-//  - sd  : near-end
-//  - se  : residual echo
-//  - sx  : far-end
-//  - sde : cross-PSD of near-end and residual echo
-//  - sxd : cross-PSD of near-end and far-end
-//
-// In addition to updating the PSDs, also the filter diverge state is determined
-// upon actions are taken.
-static void SmoothedPSD(AecCore* aec,
-                        float efw[2][PART_LEN1],
-                        float dfw[2][PART_LEN1],
-                        float xfw[2][PART_LEN1]) {
-  // Power estimate smoothing coefficients.
-  const float* ptrGCoh = aec->extended_filter_enabled
-      ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
-      : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
-  int i;
-  float sdSum = 0, seSum = 0;
-  const float32x4_t vec_15 =  vdupq_n_f32(WebRtcAec_kMinFarendPSD);
-  float32x4_t vec_sdSum = vdupq_n_f32(0.0f);
-  float32x4_t vec_seSum = vdupq_n_f32(0.0f);
-
-  for (i = 0; i + 3 < PART_LEN1; i += 4) {
-    const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]);
-    const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]);
-    const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]);
-    const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]);
-    const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]);
-    const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]);
-    float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]);
-    float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]);
-    float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]);
-    float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0);
-    float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0);
-    float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0);
-
-    vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1);
-    vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1);
-    vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1);
-    vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15);
-    vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]);
-    vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]);
-    vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]);
-
-    vst1q_f32(&aec->sd[i], vec_sd);
-    vst1q_f32(&aec->se[i], vec_se);
-    vst1q_f32(&aec->sx[i], vec_sx);
-
-    {
-      float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
-      float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0);
-      float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1);
-      vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]);
-      vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]);
-      vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1);
-      vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0);
-      vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]);
-      vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]);
-      vst2q_f32(&aec->sde[i][0], vec_sde);
-    }
-
-    {
-      float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
-      float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0);
-      float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1);
-      vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]);
-      vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]);
-      vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1);
-      vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0);
-      vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]);
-      vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]);
-      vst2q_f32(&aec->sxd[i][0], vec_sxd);
-    }
-
-    vec_sdSum = vaddq_f32(vec_sdSum, vec_sd);
-    vec_seSum = vaddq_f32(vec_seSum, vec_se);
-  }
-  {
-    float32x2_t vec_sdSum_total;
-    float32x2_t vec_seSum_total;
-    // A B C D
-    vec_sdSum_total = vpadd_f32(vget_low_f32(vec_sdSum),
-                                vget_high_f32(vec_sdSum));
-    vec_seSum_total = vpadd_f32(vget_low_f32(vec_seSum),
-                                vget_high_f32(vec_seSum));
-    // A+B C+D
-    vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total);
-    vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total);
-    // A+B+C+D A+B+C+D
-    sdSum = vget_lane_f32(vec_sdSum_total, 0);
-    seSum = vget_lane_f32(vec_seSum_total, 0);
-  }
-
-  // scalar code for the remaining items.
-  for (; i < PART_LEN1; i++) {
-    aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
-                 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
-    aec->se[i] = ptrGCoh[0] * aec->se[i] +
-                 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
-    // We threshold here to protect against the ill-effects of a zero farend.
-    // The threshold is not arbitrarily chosen, but balances protection and
-    // adverse interaction with the algorithm's tuning.
-    // TODO(bjornv): investigate further why this is so sensitive.
-    aec->sx[i] =
-        ptrGCoh[0] * aec->sx[i] +
-        ptrGCoh[1] * WEBRTC_SPL_MAX(
-            xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
-            WebRtcAec_kMinFarendPSD);
-
-    aec->sde[i][0] =
-        ptrGCoh[0] * aec->sde[i][0] +
-        ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
-    aec->sde[i][1] =
-        ptrGCoh[0] * aec->sde[i][1] +
-        ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
-
-    aec->sxd[i][0] =
-        ptrGCoh[0] * aec->sxd[i][0] +
-        ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
-    aec->sxd[i][1] =
-        ptrGCoh[0] * aec->sxd[i][1] +
-        ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
-
-    sdSum += aec->sd[i];
-    seSum += aec->se[i];
-  }
-
-  // Divergent filter safeguard.
-  aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
-
-  if (aec->divergeState)
-    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
-
-  // Reset if error is significantly larger than nearend (13 dB).
-  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
-    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
-}
-
-// Window time domain data to be used by the fft.
-__inline static void WindowData(float* x_windowed, const float* x) {
-  int i;
-  for (i = 0; i < PART_LEN; i += 4) {
-    const float32x4_t vec_Buf1 = vld1q_f32(&x[i]);
-    const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]);
-    const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]);
-    // A B C D
-    float32x4_t vec_sqrtHanning_rev =
-        vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
-    // B A D C
-    vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev);
-    // D C B A
-    vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev),
-                                       vget_low_f32(vec_sqrtHanning_rev));
-    vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning));
-    vst1q_f32(&x_windowed[PART_LEN + i],
-            vmulq_f32(vec_Buf2, vec_sqrtHanning_rev));
-  }
-}
-
-// Puts fft output data into a complex valued array.
-__inline static void StoreAsComplex(const float* data,
-                                    float data_complex[2][PART_LEN1]) {
-  int i;
-  for (i = 0; i < PART_LEN; i += 4) {
-    const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]);
-    vst1q_f32(&data_complex[0][i], vec_data.val[0]);
-    vst1q_f32(&data_complex[1][i], vec_data.val[1]);
-  }
-  // fix beginning/end values
-  data_complex[1][0] = 0;
-  data_complex[1][PART_LEN] = 0;
-  data_complex[0][0] = data[0];
-  data_complex[0][PART_LEN] = data[1];
-}
-
-static void SubbandCoherenceNEON(AecCore* aec,
-                                 float efw[2][PART_LEN1],
-                                 float xfw[2][PART_LEN1],
-                                 float* fft,
-                                 float* cohde,
-                                 float* cohxd) {
-  float dfw[2][PART_LEN1];
-  int i;
-
-  if (aec->delayEstCtr == 0)
-    aec->delayIdx = PartitionDelay(aec);
-
-  // Use delayed far.
-  memcpy(xfw,
-         aec->xfwBuf + aec->delayIdx * PART_LEN1,
-         sizeof(xfw[0][0]) * 2 * PART_LEN1);
-
-  // Windowed near fft
-  WindowData(fft, aec->dBuf);
-  aec_rdft_forward_128(fft);
-  StoreAsComplex(fft, dfw);
-
-  // Windowed error fft
-  WindowData(fft, aec->eBuf);
-  aec_rdft_forward_128(fft);
-  StoreAsComplex(fft, efw);
-
-  SmoothedPSD(aec, efw, dfw, xfw);
-
-  {
-    const float32x4_t vec_1eminus10 =  vdupq_n_f32(1e-10f);
-
-    // Subband coherence
-    for (i = 0; i + 3 < PART_LEN1; i += 4) {
-      const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]);
-      const float32x4_t vec_se = vld1q_f32(&aec->se[i]);
-      const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]);
-      const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se);
-      const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx);
-      float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
-      float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
-      float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]);
-      float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]);
-      vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]);
-      vec_cohde = vdivq_f32(vec_cohde, vec_sdse);
-      vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]);
-      vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx);
-
-      vst1q_f32(&cohde[i], vec_cohde);
-      vst1q_f32(&cohxd[i], vec_cohxd);
-    }
-  }
-  // scalar code for the remaining items.
-  for (; i < PART_LEN1; i++) {
-    cohde[i] =
-        (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
-        (aec->sd[i] * aec->se[i] + 1e-10f);
-    cohxd[i] =
-        (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
-        (aec->sx[i] * aec->sd[i] + 1e-10f);
-  }
-}
-
-void WebRtcAec_InitAec_neon(void) {
-  WebRtcAec_FilterFar = FilterFarNEON;
-  WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
-  WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
-  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
-  WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
-}
-
--- a/webrtc/modules/audio_processing/aec/aec_core_sse2.c
+++ b/webrtc/modules/audio_processing/aec/aec_core_sse2.c
@ -1,731 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/*
- * The core AEC algorithm, SSE2 version of speed-critical functions.
- */
-
-#include <emmintrin.h>
-#include <math.h>
-#include <string.h>  // memset
-
-#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
-#include "webrtc/modules/audio_processing/aec/aec_common.h"
-#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
-
-__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
-  return aRe * bRe - aIm * bIm;
-}
-
-__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
-  return aRe * bIm + aIm * bRe;
-}
-
-static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) {
-  int i;
-  const int num_partitions = aec->num_partitions;
-  for (i = 0; i < num_partitions; i++) {
-    int j;
-    int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
-    int pos = i * PART_LEN1;
-    // Check for wrap
-    if (i + aec->xfBufBlockPos >= num_partitions) {
-      xPos -= num_partitions * (PART_LEN1);
-    }
-
-    // vectorized code (four at once)
-    for (j = 0; j + 3 < PART_LEN1; j += 4) {
-      const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
-      const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
-      const __m128 wfBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
-      const __m128 wfBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
-      const __m128 yf_re = _mm_loadu_ps(&yf[0][j]);
-      const __m128 yf_im = _mm_loadu_ps(&yf[1][j]);
-      const __m128 a = _mm_mul_ps(xfBuf_re, wfBuf_re);
-      const __m128 b = _mm_mul_ps(xfBuf_im, wfBuf_im);
-      const __m128 c = _mm_mul_ps(xfBuf_re, wfBuf_im);
-      const __m128 d = _mm_mul_ps(xfBuf_im, wfBuf_re);
-      const __m128 e = _mm_sub_ps(a, b);
-      const __m128 f = _mm_add_ps(c, d);
-      const __m128 g = _mm_add_ps(yf_re, e);
-      const __m128 h = _mm_add_ps(yf_im, f);
-      _mm_storeu_ps(&yf[0][j], g);
-      _mm_storeu_ps(&yf[1][j], h);
-    }
-    // scalar code for the remaining items.
-    for (; j < PART_LEN1; j++) {
-      yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
-                        aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][pos + j],
-                        aec->wfBuf[1][pos + j]);
-      yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
-                        aec->xfBuf[1][xPos + j],
-                        aec->wfBuf[0][pos + j],
-                        aec->wfBuf[1][pos + j]);
-    }
-  }
-}
-
-static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
-  const __m128 k1e_10f = _mm_set1_ps(1e-10f);
-  const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
-                                                  : _mm_set1_ps(aec->normal_mu);
-  const __m128 kThresh = aec->extended_filter_enabled
-                             ? _mm_set1_ps(kExtendedErrorThreshold)
-                             : _mm_set1_ps(aec->normal_error_threshold);
-
-  int i;
-  // vectorized code (four at once)
-  for (i = 0; i + 3 < PART_LEN1; i += 4) {
-    const __m128 xPow = _mm_loadu_ps(&aec->xPow[i]);
-    const __m128 ef_re_base = _mm_loadu_ps(&ef[0][i]);
-    const __m128 ef_im_base = _mm_loadu_ps(&ef[1][i]);
-
-    const __m128 xPowPlus = _mm_add_ps(xPow, k1e_10f);
-    __m128 ef_re = _mm_div_ps(ef_re_base, xPowPlus);
-    __m128 ef_im = _mm_div_ps(ef_im_base, xPowPlus);
-    const __m128 ef_re2 = _mm_mul_ps(ef_re, ef_re);
-    const __m128 ef_im2 = _mm_mul_ps(ef_im, ef_im);
-    const __m128 ef_sum2 = _mm_add_ps(ef_re2, ef_im2);
-    const __m128 absEf = _mm_sqrt_ps(ef_sum2);
-    const __m128 bigger = _mm_cmpgt_ps(absEf, kThresh);
-    __m128 absEfPlus = _mm_add_ps(absEf, k1e_10f);
-    const __m128 absEfInv = _mm_div_ps(kThresh, absEfPlus);
-    __m128 ef_re_if = _mm_mul_ps(ef_re, absEfInv);
-    __m128 ef_im_if = _mm_mul_ps(ef_im, absEfInv);
-    ef_re_if = _mm_and_ps(bigger, ef_re_if);
-    ef_im_if = _mm_and_ps(bigger, ef_im_if);
-    ef_re = _mm_andnot_ps(bigger, ef_re);
-    ef_im = _mm_andnot_ps(bigger, ef_im);
-    ef_re = _mm_or_ps(ef_re, ef_re_if);
-    ef_im = _mm_or_ps(ef_im, ef_im_if);
-    ef_re = _mm_mul_ps(ef_re, kMu);
-    ef_im = _mm_mul_ps(ef_im, kMu);
-
-    _mm_storeu_ps(&ef[0][i], ef_re);
-    _mm_storeu_ps(&ef[1][i], ef_im);
-  }
-  // scalar code for the remaining items.
-  {
-    const float mu =
-        aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
-    const float error_threshold = aec->extended_filter_enabled
-                                      ? kExtendedErrorThreshold
-                                      : aec->normal_error_threshold;
-    for (; i < (PART_LEN1); i++) {
-      float abs_ef;
-      ef[0][i] /= (aec->xPow[i] + 1e-10f);
-      ef[1][i] /= (aec->xPow[i] + 1e-10f);
-      abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
-
-      if (abs_ef > error_threshold) {
-        abs_ef = error_threshold / (abs_ef + 1e-10f);
-        ef[0][i] *= abs_ef;
-        ef[1][i] *= abs_ef;
-      }
-
-      // Stepsize factor
-      ef[0][i] *= mu;
-      ef[1][i] *= mu;
-    }
-  }
-}
-
-static void FilterAdaptationSSE2(AecCore* aec,
-                                 float* fft,
-                                 float ef[2][PART_LEN1]) {
-  int i, j;
-  const int num_partitions = aec->num_partitions;
-  for (i = 0; i < num_partitions; i++) {
-    int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
-    int pos = i * PART_LEN1;
-    // Check for wrap
-    if (i + aec->xfBufBlockPos >= num_partitions) {
-      xPos -= num_partitions * PART_LEN1;
-    }
-
-    // Process the whole array...
-    for (j = 0; j < PART_LEN; j += 4) {
-      // Load xfBuf and ef.
-      const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
-      const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
-      const __m128 ef_re = _mm_loadu_ps(&ef[0][j]);
-      const __m128 ef_im = _mm_loadu_ps(&ef[1][j]);
-      // Calculate the product of conjugate(xfBuf) by ef.
-      //   re(conjugate(a) * b) = aRe * bRe + aIm * bIm
-      //   im(conjugate(a) * b)=  aRe * bIm - aIm * bRe
-      const __m128 a = _mm_mul_ps(xfBuf_re, ef_re);
-      const __m128 b = _mm_mul_ps(xfBuf_im, ef_im);
-      const __m128 c = _mm_mul_ps(xfBuf_re, ef_im);
-      const __m128 d = _mm_mul_ps(xfBuf_im, ef_re);
-      const __m128 e = _mm_add_ps(a, b);
-      const __m128 f = _mm_sub_ps(c, d);
-      // Interleave real and imaginary parts.
-      const __m128 g = _mm_unpacklo_ps(e, f);
-      const __m128 h = _mm_unpackhi_ps(e, f);
-      // Store
-      _mm_storeu_ps(&fft[2 * j + 0], g);
-      _mm_storeu_ps(&fft[2 * j + 4], h);
-    }
-    // ... and fixup the first imaginary entry.
-    fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
-                   -aec->xfBuf[1][xPos + PART_LEN],
-                   ef[0][PART_LEN],
-                   ef[1][PART_LEN]);
-
-    aec_rdft_inverse_128(fft);
-    memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
-
-    // fft scaling
-    {
-      float scale = 2.0f / PART_LEN2;
-      const __m128 scale_ps = _mm_load_ps1(&scale);
-      for (j = 0; j < PART_LEN; j += 4) {
-        const __m128 fft_ps = _mm_loadu_ps(&fft[j]);
-        const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps);
-        _mm_storeu_ps(&fft[j], fft_scale);
-      }
-    }
-    aec_rdft_forward_128(fft);
-
-    {
-      float wt1 = aec->wfBuf[1][pos];
-      aec->wfBuf[0][pos + PART_LEN] += fft[1];
-      for (j = 0; j < PART_LEN; j += 4) {
-        __m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
-        __m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
-        const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);
-        const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);
-        const __m128 fft_re =
-            _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0));
-        const __m128 fft_im =
-            _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));
-        wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);
-        wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);
-        _mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re);
-        _mm_storeu_ps(&aec->wfBuf[1][pos + j], wtBuf_im);
-      }
-      aec->wfBuf[1][pos] = wt1;
-    }
-  }
-}
-
-static __m128 mm_pow_ps(__m128 a, __m128 b) {
-  // a^b = exp2(b * log2(a))
-  //   exp2(x) and log2(x) are calculated using polynomial approximations.
-  __m128 log2_a, b_log2_a, a_exp_b;
-
-  // Calculate log2(x), x = a.
-  {
-    // To calculate log2(x), we decompose x like this:
-    //   x = y * 2^n
-    //     n is an integer
-    //     y is in the [1.0, 2.0) range
-    //
-    //   log2(x) = log2(y) + n
-    //     n       can be evaluated by playing with float representation.
-    //     log2(y) in a small range can be approximated, this code uses an order
-    //             five polynomial approximation. The coefficients have been
-    //             estimated with the Remez algorithm and the resulting
-    //             polynomial has a maximum relative error of 0.00086%.
-
-    // Compute n.
-    //    This is done by masking the exponent, shifting it into the top bit of
-    //    the mantissa, putting eight into the biased exponent (to shift/
-    //    compensate the fact that the exponent has been shifted in the top/
-    //    fractional part and finally getting rid of the implicit leading one
-    //    from the mantissa by substracting it out.
-    static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = {
-        0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
-    static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = {
-        0x43800000, 0x43800000, 0x43800000, 0x43800000};
-    static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = {
-        0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};
-    static const int shift_exponent_into_top_mantissa = 8;
-    const __m128 two_n = _mm_and_ps(a, *((__m128*)float_exponent_mask));
-    const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(
-        _mm_castps_si128(two_n), shift_exponent_into_top_mantissa));
-    const __m128 n_0 = _mm_or_ps(n_1, *((__m128*)eight_biased_exponent));
-    const __m128 n = _mm_sub_ps(n_0, *((__m128*)implicit_leading_one));
-
-    // Compute y.
-    static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = {
-        0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};
-    static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = {
-        0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000};
-    const __m128 mantissa = _mm_and_ps(a, *((__m128*)mantissa_mask));
-    const __m128 y =
-        _mm_or_ps(mantissa, *((__m128*)zero_biased_exponent_is_one));
-
-    // Approximate log2(y) ~= (y - 1) * pol5(y).
-    //    pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
-    static const ALIGN16_BEG float ALIGN16_END C5[4] = {
-        -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f};
-    static const ALIGN16_BEG float ALIGN16_END
-        C4[4] = {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f};
-    static const ALIGN16_BEG float ALIGN16_END
-        C3[4] = {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f};
-    static const ALIGN16_BEG float ALIGN16_END
-        C2[4] = {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f};
-    static const ALIGN16_BEG float ALIGN16_END
-        C1[4] = {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f};
-    static const ALIGN16_BEG float ALIGN16_END
-        C0[4] = {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f};
-    const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128*)C5));
-    const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128*)C4));
-    const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y);
-    const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128*)C3));
-    const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y);
-    const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128*)C2));
-    const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y);
-    const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128*)C1));
-    const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y);
-    const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128*)C0));
-    const __m128 y_minus_one =
-        _mm_sub_ps(y, *((__m128*)zero_biased_exponent_is_one));
-    const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y);
-
-    // Combine parts.
-    log2_a = _mm_add_ps(n, log2_y);
-  }
-
-  // b * log2(a)
-  b_log2_a = _mm_mul_ps(b, log2_a);
-
-  // Calculate exp2(x), x = b * log2(a).
-  {
-    // To calculate 2^x, we decompose x like this:
-    //   x = n + y
-    //     n is an integer, the value of x - 0.5 rounded down, therefore
-    //     y is in the [0.5, 1.5) range
-    //
-    //   2^x = 2^n * 2^y
-    //     2^n can be evaluated by playing with float representation.
-    //     2^y in a small range can be approximated, this code uses an order two
-    //         polynomial approximation. The coefficients have been estimated
-    //         with the Remez algorithm and the resulting polynomial has a
-    //         maximum relative error of 0.17%.
-
-    // To avoid over/underflow, we reduce the range of input to ]-127, 129].
-    static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f,
-                                                               129.f, 129.f};
-    static const ALIGN16_BEG float min_input[4] ALIGN16_END = {
-        -126.99999f, -126.99999f, -126.99999f, -126.99999f};
-    const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128*)max_input));
-    const __m128 x_max = _mm_max_ps(x_min, *((__m128*)min_input));
-    // Compute n.
-    static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f,
-                                                          0.5f, 0.5f};
-    const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128*)half));
-    const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half);
-    // Compute 2^n.
-    static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = {
-        127, 127, 127, 127};
-    static const int float_exponent_shift = 23;
-    const __m128i two_n_exponent =
-        _mm_add_epi32(x_minus_half_floor, *((__m128i*)float_exponent_bias));
-    const __m128 two_n =
-        _mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift));
-    // Compute y.
-    const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor));
-    // Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
-    static const ALIGN16_BEG float C2[4] ALIGN16_END = {
-        3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f};
-    static const ALIGN16_BEG float C1[4] ALIGN16_END = {
-        6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f};
-    static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f,
-                                                        1.0017247f, 1.0017247f};
-    const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128*)C2));
-    const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128*)C1));
-    const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y);
-    const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128*)C0));
-
-    // Combine parts.
-    a_exp_b = _mm_mul_ps(exp2_y, two_n);
-  }
-  return a_exp_b;
-}
-
-static void OverdriveAndSuppressSSE2(AecCore* aec,
-                                     float hNl[PART_LEN1],
-                                     const float hNlFb,
-                                     float efw[2][PART_LEN1]) {
-  int i;
-  const __m128 vec_hNlFb = _mm_set1_ps(hNlFb);
-  const __m128 vec_one = _mm_set1_ps(1.0f);
-  const __m128 vec_minus_one = _mm_set1_ps(-1.0f);
-  const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm);
-  // vectorized code (four at once)
-  for (i = 0; i + 3 < PART_LEN1; i += 4) {
-    // Weight subbands
-    __m128 vec_hNl = _mm_loadu_ps(&hNl[i]);
-    const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]);
-    const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb);
-    const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb);
-    const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve);
-    const __m128 vec_one_weightCurve_hNl =
-        _mm_mul_ps(vec_one_weightCurve, vec_hNl);
-    const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl);
-    const __m128 vec_if1 = _mm_and_ps(
-        bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl));
-    vec_hNl = _mm_or_ps(vec_if0, vec_if1);
-
-    {
-      const __m128 vec_overDriveCurve =
-          _mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);
-      const __m128 vec_overDriveSm_overDriveCurve =
-          _mm_mul_ps(vec_overDriveSm, vec_overDriveCurve);
-      vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);
-      _mm_storeu_ps(&hNl[i], vec_hNl);
-    }
-
-    // Suppress error signal
-    {
-      __m128 vec_efw_re = _mm_loadu_ps(&efw[0][i]);
-      __m128 vec_efw_im = _mm_loadu_ps(&efw[1][i]);
-      vec_efw_re = _mm_mul_ps(vec_efw_re, vec_hNl);
-      vec_efw_im = _mm_mul_ps(vec_efw_im, vec_hNl);
-
-      // Ooura fft returns incorrect sign on imaginary component. It matters
-      // here because we are making an additive change with comfort noise.
-      vec_efw_im = _mm_mul_ps(vec_efw_im, vec_minus_one);
-      _mm_storeu_ps(&efw[0][i], vec_efw_re);
-      _mm_storeu_ps(&efw[1][i], vec_efw_im);
-    }
-  }
-  // scalar code for the remaining items.
-  for (; i < PART_LEN1; i++) {
-    // Weight subbands
-    if (hNl[i] > hNlFb) {
-      hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
-               (1 - WebRtcAec_weightCurve[i]) * hNl[i];
-    }
-    hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
-
-    // Suppress error signal
-    efw[0][i] *= hNl[i];
-    efw[1][i] *= hNl[i];
-
-    // Ooura fft returns incorrect sign on imaginary component. It matters
-    // here because we are making an additive change with comfort noise.
-    efw[1][i] *= -1;
-  }
-}
-
-__inline static void _mm_add_ps_4x1(__m128 sum, float *dst) {
-  // A+B C+D
-  sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2)));
-  // A+B+C+D A+B+C+D
-  sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1)));
-  _mm_store_ss(dst, sum);
-}
-static int PartitionDelay(const AecCore* aec) {
-  // Measures the energy in each filter partition and returns the partition with
-  // highest energy.
-  // TODO(bjornv): Spread computational cost by computing one partition per
-  // block?
-  float wfEnMax = 0;
-  int i;
-  int delay = 0;
-
-  for (i = 0; i < aec->num_partitions; i++) {
-    int j;
-    int pos = i * PART_LEN1;
-    float wfEn = 0;
-    __m128 vec_wfEn = _mm_set1_ps(0.0f);
-    // vectorized code (four at once)
-    for (j = 0; j + 3 < PART_LEN1; j += 4) {
-      const __m128 vec_wfBuf0 = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
-      const __m128 vec_wfBuf1 = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
-      vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0));
-      vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1));
-    }
-    _mm_add_ps_4x1(vec_wfEn, &wfEn);
-
-    // scalar code for the remaining items.
-    for (; j < PART_LEN1; j++) {
-      wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
-              aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
-    }
-
-    if (wfEn > wfEnMax) {
-      wfEnMax = wfEn;
-      delay = i;
-    }
-  }
-  return delay;
-}
-
-// Updates the following smoothed  Power Spectral Densities (PSD):
-//  - sd  : near-end
-//  - se  : residual echo
-//  - sx  : far-end
-//  - sde : cross-PSD of near-end and residual echo
-//  - sxd : cross-PSD of near-end and far-end
-//
-// In addition to updating the PSDs, also the filter diverge state is determined
-// upon actions are taken.
-static void SmoothedPSD(AecCore* aec,
-                        float efw[2][PART_LEN1],
-                        float dfw[2][PART_LEN1],
-                        float xfw[2][PART_LEN1]) {
-  // Power estimate smoothing coefficients.
-  const float* ptrGCoh = aec->extended_filter_enabled
-      ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
-      : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
-  int i;
-  float sdSum = 0, seSum = 0;
-  const __m128 vec_15 =  _mm_set1_ps(WebRtcAec_kMinFarendPSD);
-  const __m128 vec_GCoh0 = _mm_set1_ps(ptrGCoh[0]);
-  const __m128 vec_GCoh1 = _mm_set1_ps(ptrGCoh[1]);
-  __m128 vec_sdSum = _mm_set1_ps(0.0f);
-  __m128 vec_seSum = _mm_set1_ps(0.0f);
-
-  for (i = 0; i + 3 < PART_LEN1; i += 4) {
-    const __m128 vec_dfw0 = _mm_loadu_ps(&dfw[0][i]);
-    const __m128 vec_dfw1 = _mm_loadu_ps(&dfw[1][i]);
-    const __m128 vec_efw0 = _mm_loadu_ps(&efw[0][i]);
-    const __m128 vec_efw1 = _mm_loadu_ps(&efw[1][i]);
-    const __m128 vec_xfw0 = _mm_loadu_ps(&xfw[0][i]);
-    const __m128 vec_xfw1 = _mm_loadu_ps(&xfw[1][i]);
-    __m128 vec_sd = _mm_mul_ps(_mm_loadu_ps(&aec->sd[i]), vec_GCoh0);
-    __m128 vec_se = _mm_mul_ps(_mm_loadu_ps(&aec->se[i]), vec_GCoh0);
-    __m128 vec_sx = _mm_mul_ps(_mm_loadu_ps(&aec->sx[i]), vec_GCoh0);
-    __m128 vec_dfw_sumsq = _mm_mul_ps(vec_dfw0, vec_dfw0);
-    __m128 vec_efw_sumsq = _mm_mul_ps(vec_efw0, vec_efw0);
-    __m128 vec_xfw_sumsq = _mm_mul_ps(vec_xfw0, vec_xfw0);
-    vec_dfw_sumsq = _mm_add_ps(vec_dfw_sumsq, _mm_mul_ps(vec_dfw1, vec_dfw1));
-    vec_efw_sumsq = _mm_add_ps(vec_efw_sumsq, _mm_mul_ps(vec_efw1, vec_efw1));
-    vec_xfw_sumsq = _mm_add_ps(vec_xfw_sumsq, _mm_mul_ps(vec_xfw1, vec_xfw1));
-    vec_xfw_sumsq = _mm_max_ps(vec_xfw_sumsq, vec_15);
-    vec_sd = _mm_add_ps(vec_sd, _mm_mul_ps(vec_dfw_sumsq, vec_GCoh1));
-    vec_se = _mm_add_ps(vec_se, _mm_mul_ps(vec_efw_sumsq, vec_GCoh1));
-    vec_sx = _mm_add_ps(vec_sx, _mm_mul_ps(vec_xfw_sumsq, vec_GCoh1));
-    _mm_storeu_ps(&aec->sd[i], vec_sd);
-    _mm_storeu_ps(&aec->se[i], vec_se);
-    _mm_storeu_ps(&aec->sx[i], vec_sx);
-
-    {
-      const __m128 vec_3210 = _mm_loadu_ps(&aec->sde[i][0]);
-      const __m128 vec_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
-      __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
-                                    _MM_SHUFFLE(2, 0, 2, 0));
-      __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
-                                    _MM_SHUFFLE(3, 1, 3, 1));
-      __m128 vec_dfwefw0011 = _mm_mul_ps(vec_dfw0, vec_efw0);
-      __m128 vec_dfwefw0110 = _mm_mul_ps(vec_dfw0, vec_efw1);
-      vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
-      vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
-      vec_dfwefw0011 = _mm_add_ps(vec_dfwefw0011,
-                                  _mm_mul_ps(vec_dfw1, vec_efw1));
-      vec_dfwefw0110 = _mm_sub_ps(vec_dfwefw0110,
-                                  _mm_mul_ps(vec_dfw1, vec_efw0));
-      vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwefw0011, vec_GCoh1));
-      vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwefw0110, vec_GCoh1));
-      _mm_storeu_ps(&aec->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b));
-      _mm_storeu_ps(&aec->sde[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
-    }
-
-    {
-      const __m128 vec_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
-      const __m128 vec_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
-      __m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
-                                    _MM_SHUFFLE(2, 0, 2, 0));
-      __m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
-                                    _MM_SHUFFLE(3, 1, 3, 1));
-      __m128 vec_dfwxfw0011 = _mm_mul_ps(vec_dfw0, vec_xfw0);
-      __m128 vec_dfwxfw0110 = _mm_mul_ps(vec_dfw0, vec_xfw1);
-      vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
-      vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
-      vec_dfwxfw0011 = _mm_add_ps(vec_dfwxfw0011,
-                                  _mm_mul_ps(vec_dfw1, vec_xfw1));
-      vec_dfwxfw0110 = _mm_sub_ps(vec_dfwxfw0110,
-                                  _mm_mul_ps(vec_dfw1, vec_xfw0));
-      vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwxfw0011, vec_GCoh1));
-      vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwxfw0110, vec_GCoh1));
-      _mm_storeu_ps(&aec->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b));
-      _mm_storeu_ps(&aec->sxd[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
-    }
-
-    vec_sdSum = _mm_add_ps(vec_sdSum, vec_sd);
-    vec_seSum = _mm_add_ps(vec_seSum, vec_se);
-  }
-
-  _mm_add_ps_4x1(vec_sdSum, &sdSum);
-  _mm_add_ps_4x1(vec_seSum, &seSum);
-
-  for (; i < PART_LEN1; i++) {
-    aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
-                 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
-    aec->se[i] = ptrGCoh[0] * aec->se[i] +
-                 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
-    // We threshold here to protect against the ill-effects of a zero farend.
-    // The threshold is not arbitrarily chosen, but balances protection and
-    // adverse interaction with the algorithm's tuning.
-    // TODO(bjornv): investigate further why this is so sensitive.
-    aec->sx[i] =
-        ptrGCoh[0] * aec->sx[i] +
-        ptrGCoh[1] * WEBRTC_SPL_MAX(
-            xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
-            WebRtcAec_kMinFarendPSD);
-
-    aec->sde[i][0] =
-        ptrGCoh[0] * aec->sde[i][0] +
-        ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
-    aec->sde[i][1] =
-        ptrGCoh[0] * aec->sde[i][1] +
-        ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
-
-    aec->sxd[i][0] =
-        ptrGCoh[0] * aec->sxd[i][0] +
-        ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
-    aec->sxd[i][1] =
-        ptrGCoh[0] * aec->sxd[i][1] +
-        ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
-
-    sdSum += aec->sd[i];
-    seSum += aec->se[i];
-  }
-
-  // Divergent filter safeguard.
-  aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
-
-  if (aec->divergeState)
-    memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
-
-  // Reset if error is significantly larger than nearend (13 dB).
-  if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
-    memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
-}
-
-// Window time domain data to be used by the fft.
-__inline static void WindowData(float* x_windowed, const float* x) {
-  int i;
-  for (i = 0; i < PART_LEN; i += 4) {
-    const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]);
-    const __m128 vec_Buf2 = _mm_loadu_ps(&x[PART_LEN + i]);
-    const __m128 vec_sqrtHanning = _mm_load_ps(&WebRtcAec_sqrtHanning[i]);
-    // A B C D
-    __m128 vec_sqrtHanning_rev =
-        _mm_loadu_ps(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
-    // D C B A
-    vec_sqrtHanning_rev =
-        _mm_shuffle_ps(vec_sqrtHanning_rev, vec_sqrtHanning_rev,
-                       _MM_SHUFFLE(0, 1, 2, 3));
-    _mm_storeu_ps(&x_windowed[i], _mm_mul_ps(vec_Buf1, vec_sqrtHanning));
-    _mm_storeu_ps(&x_windowed[PART_LEN + i],
-                  _mm_mul_ps(vec_Buf2, vec_sqrtHanning_rev));
-  }
-}
-
-// Puts fft output data into a complex valued array.
-__inline static void StoreAsComplex(const float* data,
-                                    float data_complex[2][PART_LEN1]) {
-  int i;
-  for (i = 0; i < PART_LEN; i += 4) {
-    const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]);
-    const __m128 vec_fft4 = _mm_loadu_ps(&data[2 * i + 4]);
-    const __m128 vec_a = _mm_shuffle_ps(vec_fft0, vec_fft4,
-                                        _MM_SHUFFLE(2, 0, 2, 0));
-    const __m128 vec_b = _mm_shuffle_ps(vec_fft0, vec_fft4,
-                                        _MM_SHUFFLE(3, 1, 3, 1));
-    _mm_storeu_ps(&data_complex[0][i], vec_a);
-    _mm_storeu_ps(&data_complex[1][i], vec_b);
-  }
-  // fix beginning/end values
-  data_complex[1][0] = 0;
-  data_complex[1][PART_LEN] = 0;
-  data_complex[0][0] = data[0];
-  data_complex[0][PART_LEN] = data[1];
-}
-
-static void SubbandCoherenceSSE2(AecCore* aec,
-                                 float efw[2][PART_LEN1],
-                                 float xfw[2][PART_LEN1],
-                                 float* fft,
-                                 float* cohde,
-                                 float* cohxd) {
-  float dfw[2][PART_LEN1];
-  int i;
-
-  if (aec->delayEstCtr == 0)
-    aec->delayIdx = PartitionDelay(aec);
-
-  // Use delayed far.
-  memcpy(xfw,
-         aec->xfwBuf + aec->delayIdx * PART_LEN1,
-         sizeof(xfw[0][0]) * 2 * PART_LEN1);
-
-  // Windowed near fft
-  WindowData(fft, aec->dBuf);
-  aec_rdft_forward_128(fft);
-  StoreAsComplex(fft, dfw);
-
-  // Windowed error fft
-  WindowData(fft, aec->eBuf);
-  aec_rdft_forward_128(fft);
-  StoreAsComplex(fft, efw);
-
-  SmoothedPSD(aec, efw, dfw, xfw);
-
-  {
-    const __m128 vec_1eminus10 =  _mm_set1_ps(1e-10f);
-
-    // Subband coherence
-    for (i = 0; i + 3 < PART_LEN1; i += 4) {
-      const __m128 vec_sd = _mm_loadu_ps(&aec->sd[i]);
-      const __m128 vec_se = _mm_loadu_ps(&aec->se[i]);
-      const __m128 vec_sx = _mm_loadu_ps(&aec->sx[i]);
-      const __m128 vec_sdse = _mm_add_ps(vec_1eminus10,
-                                         _mm_mul_ps(vec_sd, vec_se));
-      const __m128 vec_sdsx = _mm_add_ps(vec_1eminus10,
-                                         _mm_mul_ps(vec_sd, vec_sx));
-      const __m128 vec_sde_3210 = _mm_loadu_ps(&aec->sde[i][0]);
-      const __m128 vec_sde_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
-      const __m128 vec_sxd_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
-      const __m128 vec_sxd_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
-      const __m128 vec_sde_0 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
-                                              _MM_SHUFFLE(2, 0, 2, 0));
-      const __m128 vec_sde_1 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
-                                              _MM_SHUFFLE(3, 1, 3, 1));
-      const __m128 vec_sxd_0 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
-                                              _MM_SHUFFLE(2, 0, 2, 0));
-      const __m128 vec_sxd_1 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
-                                              _MM_SHUFFLE(3, 1, 3, 1));
-      __m128 vec_cohde = _mm_mul_ps(vec_sde_0, vec_sde_0);
-      __m128 vec_cohxd = _mm_mul_ps(vec_sxd_0, vec_sxd_0);
-      vec_cohde = _mm_add_ps(vec_cohde, _mm_mul_ps(vec_sde_1, vec_sde_1));
-      vec_cohde = _mm_div_ps(vec_cohde, vec_sdse);
-      vec_cohxd = _mm_add_ps(vec_cohxd, _mm_mul_ps(vec_sxd_1, vec_sxd_1));
-      vec_cohxd = _mm_div_ps(vec_cohxd, vec_sdsx);
-      _mm_storeu_ps(&cohde[i], vec_cohde);
-      _mm_storeu_ps(&cohxd[i], vec_cohxd);
-    }
-
-    // scalar code for the remaining items.
-    for (; i < PART_LEN1; i++) {
-      cohde[i] =
-          (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
-          (aec->sd[i] * aec->se[i] + 1e-10f);
-      cohxd[i] =
-          (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
-          (aec->sx[i] * aec->sd[i] + 1e-10f);
-    }
-  }
-}
-
-void WebRtcAec_InitAec_SSE2(void) {
-  WebRtcAec_FilterFar = FilterFarSSE2;
-  WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
-  WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
-  WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
-  WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
-}
--- a/webrtc/modules/audio_processing/aec/aec_rdft.c
+++ b/webrtc/modules/audio_processing/aec/aec_rdft.c
@ -1,589 +0,0 @@
-/*
- * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
- * Copyright Takuya OOURA, 1996-2001
- *
- * You may use, copy, modify and distribute this code for any purpose (include
- * commercial use) and without fee. Please refer to this package when you modify
- * this code.
- *
- * Changes by the WebRTC authors:
- *    - Trivial type modifications.
- *    - Minimal code subset to do rdft of length 128.
- *    - Optimizations because of known length.
- *
- *  All changes are covered by the WebRTC license and IP grant:
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
-
-#include <math.h>
-
-#include "webrtc/system_wrappers/include/cpu_features_wrapper.h"
-#include "webrtc/typedefs.h"
-
-// These tables used to be computed at run-time. For example, refer to:
-// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/aec/aec_rdft.c?r=6564
-// to see the initialization code.
-const float rdft_w[64] = {
-    1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f,
-    0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f,
-    0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f,
-    0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f,
-    0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
-    0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f,
-    0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f,
-    0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f,
-    0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f,
-    0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
-    0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f,
-    0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f,
-    0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f,
-    0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f,
-    0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
-    0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
-};
-const float rdft_wk3ri_first[16] = {
-    1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f,
-    0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
-    0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f,
-    0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
-};
-const float rdft_wk3ri_second[16] = {
-    -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f,
-    -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f,
-    -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f,
-    -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
-};
-ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = {
-    1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f,
-    0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f,
-    0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f,
-    0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f,
-    0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f,
-    0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f,
-    0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f,
-    0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f,
-};
-ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = {
-    1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f,
-    0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f,
-    0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f,
-    0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
-    0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f,
-    0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f,
-    0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f,
-    0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f,
-};
-ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = {
-    1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f,
-    0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
-    0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f,
-    -0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f,
-    0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f,
-    0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f,
-    0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f,
-    -0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f,
-};
-ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = {
-    -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
-    -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
-    -0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f,
-    -0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f,
-    -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f,
-    -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f,
-    -0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f,
-    -0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f,
-};
-ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = {
-    -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f,
-    -0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f,
-    -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
-    -0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f,
-    -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f,
-    -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f,
-    -0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f,
-    -0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f,
-};
-ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = {
-    -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
-    -0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f,
-    -0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f,
-    -0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f,
-    -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f,
-    -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f,
-    -0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f,
-    -0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f,
-};
-ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = {
-    0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f,
-};
-
-static void bitrv2_128_C(float* a) {
-  /*
-      Following things have been attempted but are no faster:
-      (a) Storing the swap indexes in a LUT (index calculations are done
-          for 'free' while waiting on memory/L1).
-      (b) Consolidate the load/store of two consecutive floats by a 64 bit
-          integer (execution is memory/L1 bound).
-      (c) Do a mix of floats and 64 bit integer to maximize register
-          utilization (execution is memory/L1 bound).
-      (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
-      (e) Hard-coding of the offsets to completely eliminates index
-          calculations.
-  */
-
-  unsigned int j, j1, k, k1;
-  float xr, xi, yr, yi;
-
-  static const int ip[4] = {0, 64, 32, 96};
-  for (k = 0; k < 4; k++) {
-    for (j = 0; j < k; j++) {
-      j1 = 2 * j + ip[k];
-      k1 = 2 * k + ip[j];
-      xr = a[j1 + 0];
-      xi = a[j1 + 1];
-      yr = a[k1 + 0];
-      yi = a[k1 + 1];
-      a[j1 + 0] = yr;
-      a[j1 + 1] = yi;
-      a[k1 + 0] = xr;
-      a[k1 + 1] = xi;
-      j1 += 8;
-      k1 += 16;
-      xr = a[j1 + 0];
-      xi = a[j1 + 1];
-      yr = a[k1 + 0];
-      yi = a[k1 + 1];
-      a[j1 + 0] = yr;
-      a[j1 + 1] = yi;
-      a[k1 + 0] = xr;
-      a[k1 + 1] = xi;
-      j1 += 8;
-      k1 -= 8;
-      xr = a[j1 + 0];
-      xi = a[j1 + 1];
-      yr = a[k1 + 0];
-      yi = a[k1 + 1];
-      a[j1 + 0] = yr;
-      a[j1 + 1] = yi;
-      a[k1 + 0] = xr;
-      a[k1 + 1] = xi;
-      j1 += 8;
-      k1 += 16;
-      xr = a[j1 + 0];
-      xi = a[j1 + 1];
-      yr = a[k1 + 0];
-      yi = a[k1 + 1];
-      a[j1 + 0] = yr;
-      a[j1 + 1] = yi;
-      a[k1 + 0] = xr;
-      a[k1 + 1] = xi;
-    }
-    j1 = 2 * k + 8 + ip[k];
-    k1 = j1 + 8;
-    xr = a[j1 + 0];
-    xi = a[j1 + 1];
-    yr = a[k1 + 0];
-    yi = a[k1 + 1];
-    a[j1 + 0] = yr;
-    a[j1 + 1] = yi;
-    a[k1 + 0] = xr;
-    a[k1 + 1] = xi;
-  }
-}
-
-static void cft1st_128_C(float* a) {
-  const int n = 128;
-  int j, k1, k2;
-  float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
-  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
-
-  // The processing of the first set of elements was simplified in C to avoid
-  // some operations (multiplication by zero or one, addition of two elements
-  // multiplied by the same weight, ...).
-  x0r = a[0] + a[2];
-  x0i = a[1] + a[3];
-  x1r = a[0] - a[2];
-  x1i = a[1] - a[3];
-  x2r = a[4] + a[6];
-  x2i = a[5] + a[7];
-  x3r = a[4] - a[6];
-  x3i = a[5] - a[7];
-  a[0] = x0r + x2r;
-  a[1] = x0i + x2i;
-  a[4] = x0r - x2r;
-  a[5] = x0i - x2i;
-  a[2] = x1r - x3i;
-  a[3] = x1i + x3r;
-  a[6] = x1r + x3i;
-  a[7] = x1i - x3r;
-  wk1r = rdft_w[2];
-  x0r = a[8] + a[10];
-  x0i = a[9] + a[11];
-  x1r = a[8] - a[10];
-  x1i = a[9] - a[11];
-  x2r = a[12] + a[14];
-  x2i = a[13] + a[15];
-  x3r = a[12] - a[14];
-  x3i = a[13] - a[15];
-  a[8] = x0r + x2r;
-  a[9] = x0i + x2i;
-  a[12] = x2i - x0i;
-  a[13] = x0r - x2r;
-  x0r = x1r - x3i;
-  x0i = x1i + x3r;
-  a[10] = wk1r * (x0r - x0i);
-  a[11] = wk1r * (x0r + x0i);
-  x0r = x3i + x1r;
-  x0i = x3r - x1i;
-  a[14] = wk1r * (x0i - x0r);
-  a[15] = wk1r * (x0i + x0r);
-  k1 = 0;
-  for (j = 16; j < n; j += 16) {
-    k1 += 2;
-    k2 = 2 * k1;
-    wk2r = rdft_w[k1 + 0];
-    wk2i = rdft_w[k1 + 1];
-    wk1r = rdft_w[k2 + 0];
-    wk1i = rdft_w[k2 + 1];
-    wk3r = rdft_wk3ri_first[k1 + 0];
-    wk3i = rdft_wk3ri_first[k1 + 1];
-    x0r = a[j + 0] + a[j + 2];
-    x0i = a[j + 1] + a[j + 3];
-    x1r = a[j + 0] - a[j + 2];
-    x1i = a[j + 1] - a[j + 3];
-    x2r = a[j + 4] + a[j + 6];
-    x2i = a[j + 5] + a[j + 7];
-    x3r = a[j + 4] - a[j + 6];
-    x3i = a[j + 5] - a[j + 7];
-    a[j + 0] = x0r + x2r;
-    a[j + 1] = x0i + x2i;
-    x0r -= x2r;
-    x0i -= x2i;
-    a[j + 4] = wk2r * x0r - wk2i * x0i;
-    a[j + 5] = wk2r * x0i + wk2i * x0r;
-    x0r = x1r - x3i;
-    x0i = x1i + x3r;
-    a[j + 2] = wk1r * x0r - wk1i * x0i;
-    a[j + 3] = wk1r * x0i + wk1i * x0r;
-    x0r = x1r + x3i;
-    x0i = x1i - x3r;
-    a[j + 6] = wk3r * x0r - wk3i * x0i;
-    a[j + 7] = wk3r * x0i + wk3i * x0r;
-    wk1r = rdft_w[k2 + 2];
-    wk1i = rdft_w[k2 + 3];
-    wk3r = rdft_wk3ri_second[k1 + 0];
-    wk3i = rdft_wk3ri_second[k1 + 1];
-    x0r = a[j + 8] + a[j + 10];
-    x0i = a[j + 9] + a[j + 11];
-    x1r = a[j + 8] - a[j + 10];
-    x1i = a[j + 9] - a[j + 11];
-    x2r = a[j + 12] + a[j + 14];
-    x2i = a[j + 13] + a[j + 15];
-    x3r = a[j + 12] - a[j + 14];
-    x3i = a[j + 13] - a[j + 15];
-    a[j + 8] = x0r + x2r;
-    a[j + 9] = x0i + x2i;
-    x0r -= x2r;
-    x0i -= x2i;
-    a[j + 12] = -wk2i * x0r - wk2r * x0i;
-    a[j + 13] = -wk2i * x0i + wk2r * x0r;
-    x0r = x1r - x3i;
-    x0i = x1i + x3r;
-    a[j + 10] = wk1r * x0r - wk1i * x0i;
-    a[j + 11] = wk1r * x0i + wk1i * x0r;
-    x0r = x1r + x3i;
-    x0i = x1i - x3r;
-    a[j + 14] = wk3r * x0r - wk3i * x0i;
-    a[j + 15] = wk3r * x0i + wk3i * x0r;
-  }
-}
-
-static void cftmdl_128_C(float* a) {
-  const int l = 8;
-  const int n = 128;
-  const int m = 32;
-  int j0, j1, j2, j3, k, k1, k2, m2;
-  float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
-  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
-
-  for (j0 = 0; j0 < l; j0 += 2) {
-    j1 = j0 + 8;
-    j2 = j0 + 16;
-    j3 = j0 + 24;
-    x0r = a[j0 + 0] + a[j1 + 0];
-    x0i = a[j0 + 1] + a[j1 + 1];
-    x1r = a[j0 + 0] - a[j1 + 0];
-    x1i = a[j0 + 1] - a[j1 + 1];
-    x2r = a[j2 + 0] + a[j3 + 0];
-    x2i = a[j2 + 1] + a[j3 + 1];
-    x3r = a[j2 + 0] - a[j3 + 0];
-    x3i = a[j2 + 1] - a[j3 + 1];
-    a[j0 + 0] = x0r + x2r;
-    a[j0 + 1] = x0i + x2i;
-    a[j2 + 0] = x0r - x2r;
-    a[j2 + 1] = x0i - x2i;
-    a[j1 + 0] = x1r - x3i;
-    a[j1 + 1] = x1i + x3r;
-    a[j3 + 0] = x1r + x3i;
-    a[j3 + 1] = x1i - x3r;
-  }
-  wk1r = rdft_w[2];
-  for (j0 = m; j0 < l + m; j0 += 2) {
-    j1 = j0 + 8;
-    j2 = j0 + 16;
-    j3 = j0 + 24;
-    x0r = a[j0 + 0] + a[j1 + 0];
-    x0i = a[j0 + 1] + a[j1 + 1];
-    x1r = a[j0 + 0] - a[j1 + 0];
-    x1i = a[j0 + 1] - a[j1 + 1];
-    x2r = a[j2 + 0] + a[j3 + 0];
-    x2i = a[j2 + 1] + a[j3 + 1];
-    x3r = a[j2 + 0] - a[j3 + 0];
-    x3i = a[j2 + 1] - a[j3 + 1];
-    a[j0 + 0] = x0r + x2r;
-    a[j0 + 1] = x0i + x2i;
-    a[j2 + 0] = x2i - x0i;
-    a[j2 + 1] = x0r - x2r;
-    x0r = x1r - x3i;
-    x0i = x1i + x3r;
-    a[j1 + 0] = wk1r * (x0r - x0i);
-    a[j1 + 1] = wk1r * (x0r + x0i);
-    x0r = x3i + x1r;
-    x0i = x3r - x1i;
-    a[j3 + 0] = wk1r * (x0i - x0r);
-    a[j3 + 1] = wk1r * (x0i + x0r);
-  }
-  k1 = 0;
-  m2 = 2 * m;
-  for (k = m2; k < n; k += m2) {
-    k1 += 2;
-    k2 = 2 * k1;
-    wk2r = rdft_w[k1 + 0];
-    wk2i = rdft_w[k1 + 1];
-    wk1r = rdft_w[k2 + 0];
-    wk1i = rdft_w[k2 + 1];
-    wk3r = rdft_wk3ri_first[k1 + 0];
-    wk3i = rdft_wk3ri_first[k1 + 1];
-    for (j0 = k; j0 < l + k; j0 += 2) {
-      j1 = j0 + 8;
-      j2 = j0 + 16;
-      j3 = j0 + 24;
-      x0r = a[j0 + 0] + a[j1 + 0];
-      x0i = a[j0 + 1] + a[j1 + 1];
-      x1r = a[j0 + 0] - a[j1 + 0];
-      x1i = a[j0 + 1] - a[j1 + 1];
-      x2r = a[j2 + 0] + a[j3 + 0];
-      x2i = a[j2 + 1] + a[j3 + 1];
-      x3r = a[j2 + 0] - a[j3 + 0];
-      x3i = a[j2 + 1] - a[j3 + 1];
-      a[j0 + 0] = x0r + x2r;
-      a[j0 + 1] = x0i + x2i;
-      x0r -= x2r;
-      x0i -= x2i;
-      a[j2 + 0] = wk2r * x0r - wk2i * x0i;
-      a[j2 + 1] = wk2r * x0i + wk2i * x0r;
-      x0r = x1r - x3i;
-      x0i = x1i + x3r;
-      a[j1 + 0] = wk1r * x0r - wk1i * x0i;
-      a[j1 + 1] = wk1r * x0i + wk1i * x0r;
-      x0r = x1r + x3i;
-      x0i = x1i - x3r;
-      a[j3 + 0] = wk3r * x0r - wk3i * x0i;
-      a[j3 + 1] = wk3r * x0i + wk3i * x0r;
-    }
-    wk1r = rdft_w[k2 + 2];
-    wk1i = rdft_w[k2 + 3];
-    wk3r = rdft_wk3ri_second[k1 + 0];
-    wk3i = rdft_wk3ri_second[k1 + 1];
-    for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
-      j1 = j0 + 8;
-      j2 = j0 + 16;
-      j3 = j0 + 24;
-      x0r = a[j0 + 0] + a[j1 + 0];
-      x0i = a[j0 + 1] + a[j1 + 1];
-      x1r = a[j0 + 0] - a[j1 + 0];
-      x1i = a[j0 + 1] - a[j1 + 1];
-      x2r = a[j2 + 0] + a[j3 + 0];
-      x2i = a[j2 + 1] + a[j3 + 1];
-      x3r = a[j2 + 0] - a[j3 + 0];
-      x3i = a[j2 + 1] - a[j3 + 1];
-      a[j0 + 0] = x0r + x2r;
-      a[j0 + 1] = x0i + x2i;
-      x0r -= x2r;
-      x0i -= x2i;
-      a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
-      a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
-      x0r = x1r - x3i;
-      x0i = x1i + x3r;
-      a[j1 + 0] = wk1r * x0r - wk1i * x0i;
-      a[j1 + 1] = wk1r * x0i + wk1i * x0r;
-      x0r = x1r + x3i;
-      x0i = x1i - x3r;
-      a[j3 + 0] = wk3r * x0r - wk3i * x0i;
-      a[j3 + 1] = wk3r * x0i + wk3i * x0r;
-    }
-  }
-}
-
-static void cftfsub_128_C(float* a) {
-  int j, j1, j2, j3, l;
-  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
-
-  cft1st_128(a);
-  cftmdl_128(a);
-  l = 32;
-  for (j = 0; j < l; j += 2) {
-    j1 = j + l;
-    j2 = j1 + l;
-    j3 = j2 + l;
-    x0r = a[j] + a[j1];
-    x0i = a[j + 1] + a[j1 + 1];
-    x1r = a[j] - a[j1];
-    x1i = a[j + 1] - a[j1 + 1];
-    x2r = a[j2] + a[j3];
-    x2i = a[j2 + 1] + a[j3 + 1];
-    x3r = a[j2] - a[j3];
-    x3i = a[j2 + 1] - a[j3 + 1];
-    a[j] = x0r + x2r;
-    a[j + 1] = x0i + x2i;
-    a[j2] = x0r - x2r;
-    a[j2 + 1] = x0i - x2i;
-    a[j1] = x1r - x3i;
-    a[j1 + 1] = x1i + x3r;
-    a[j3] = x1r + x3i;
-    a[j3 + 1] = x1i - x3r;
-  }
-}
-
-static void cftbsub_128_C(float* a) {
-  int j, j1, j2, j3, l;
-  float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
-
-  cft1st_128(a);
-  cftmdl_128(a);
-  l = 32;
-
-  for (j = 0; j < l; j += 2) {
-    j1 = j + l;
-    j2 = j1 + l;
-    j3 = j2 + l;
-    x0r = a[j] + a[j1];
-    x0i = -a[j + 1] - a[j1 + 1];
-    x1r = a[j] - a[j1];
-    x1i = -a[j + 1] + a[j1 + 1];
-    x2r = a[j2] + a[j3];
-    x2i = a[j2 + 1] + a[j3 + 1];
-    x3r = a[j2] - a[j3];
-    x3i = a[j2 + 1] - a[j3 + 1];
-    a[j] = x0r + x2r;
-    a[j + 1] = x0i - x2i;
-    a[j2] = x0r - x2r;
-    a[j2 + 1] = x0i + x2i;
-    a[j1] = x1r - x3i;
-    a[j1 + 1] = x1i - x3r;
-    a[j3] = x1r + x3i;
-    a[j3 + 1] = x1i + x3r;
-  }
-}
-
-static void rftfsub_128_C(float* a) {
-  const float* c = rdft_w + 32;
-  int j1, j2, k1, k2;
-  float wkr, wki, xr, xi, yr, yi;
-
-  for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
-    k2 = 128 - j2;
-    k1 = 32 - j1;
-    wkr = 0.5f - c[k1];
-    wki = c[j1];
-    xr = a[j2 + 0] - a[k2 + 0];
-    xi = a[j2 + 1] + a[k2 + 1];
-    yr = wkr * xr - wki * xi;
-    yi = wkr * xi + wki * xr;
-    a[j2 + 0] -= yr;
-    a[j2 + 1] -= yi;
-    a[k2 + 0] += yr;
-    a[k2 + 1] -= yi;
-  }
-}
-
-static void rftbsub_128_C(float* a) {
-  const float* c = rdft_w + 32;
-  int j1, j2, k1, k2;
-  float wkr, wki, xr, xi, yr, yi;
-
-  a[1] = -a[1];
-  for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
-    k2 = 128 - j2;
-    k1 = 32 - j1;
-    wkr = 0.5f - c[k1];
-    wki = c[j1];
-    xr = a[j2 + 0] - a[k2 + 0];
-    xi = a[j2 + 1] + a[k2 + 1];
-    yr = wkr * xr + wki * xi;
-    yi = wkr * xi - wki * xr;
-    a[j2 + 0] = a[j2 + 0] - yr;
-    a[j2 + 1] = yi - a[j2 + 1];
-    a[k2 + 0] = yr + a[k2 + 0];
-    a[k2 + 1] = yi - a[k2 + 1];
-  }
-  a[65] = -a[65];
-}
-
-void aec_rdft_forward_128(float* a) {
-  float xi;
-  bitrv2_128(a);
-  cftfsub_128(a);
-  rftfsub_128(a);
-  xi = a[0] - a[1];
-  a[0] += a[1];
-  a[1] = xi;
-}
-
-void aec_rdft_inverse_128(float* a) {
-  a[1] = 0.5f * (a[0] - a[1]);
-  a[0] -= a[1];
-  rftbsub_128(a);
-  bitrv2_128(a);
-  cftbsub_128(a);
-}
-
-// code path selection
-RftSub128 cft1st_128;
-RftSub128 cftmdl_128;
-RftSub128 rftfsub_128;
-RftSub128 rftbsub_128;
-RftSub128 cftfsub_128;
-RftSub128 cftbsub_128;
-RftSub128 bitrv2_128;
-
-void aec_rdft_init(void) {
-  cft1st_128 = cft1st_128_C;
-  cftmdl_128 = cftmdl_128_C;
-  rftfsub_128 = rftfsub_128_C;
-  rftbsub_128 = rftbsub_128_C;
-  cftfsub_128 = cftfsub_128_C;
-  cftbsub_128 = cftbsub_128_C;
-  bitrv2_128 = bitrv2_128_C;
-#if defined(WEBRTC_ARCH_X86_FAMILY)
-  if (WebRtc_GetCPUInfo(kSSE2)) {
-    aec_rdft_init_sse2();
-  }
-#endif
-#if defined(MIPS_FPU_LE)
-  aec_rdft_init_mips();
-#endif
-#if defined(WEBRTC_HAS_NEON)
-  aec_rdft_init_neon();
-#elif defined(WEBRTC_DETECT_NEON)
-  if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
-    aec_rdft_init_neon();
-  }
-#endif
-}
--- a/webrtc/modules/audio_processing/aec/aec_rdft.h
+++ b/webrtc/modules/audio_processing/aec/aec_rdft.h
@ -1,61 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
-
-#include "webrtc/modules/audio_processing/aec/aec_common.h"
-
-// These intrinsics were unavailable before VS 2008.
-// TODO(andrew): move to a common file.
-#if defined(_MSC_VER) && _MSC_VER < 1500
-#include <emmintrin.h>
-static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; }
-static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; }
-#endif
-
-// Constants shared by all paths (C, SSE2, NEON).
-extern const float rdft_w[64];
-// Constants used by the C path.
-extern const float rdft_wk3ri_first[16];
-extern const float rdft_wk3ri_second[16];
-// Constants used by SSE2 and NEON but initialized in the C path.
-extern ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32];
-extern ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32];
-extern ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32];
-extern ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32];
-extern ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32];
-extern ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32];
-extern ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4];
-
-// code path selection function pointers
-typedef void (*RftSub128)(float* a);
-extern RftSub128 rftfsub_128;
-extern RftSub128 rftbsub_128;
-extern RftSub128 cft1st_128;
-extern RftSub128 cftmdl_128;
-extern RftSub128 cftfsub_128;
-extern RftSub128 cftbsub_128;
-extern RftSub128 bitrv2_128;
-
-// entry points
-void aec_rdft_init(void);
-void aec_rdft_init_sse2(void);
-void aec_rdft_forward_128(float* a);
-void aec_rdft_inverse_128(float* a);
-
-#if defined(MIPS_FPU_LE)
-void aec_rdft_init_mips(void);
-#endif
-#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
-void aec_rdft_init_neon(void);
-#endif
-
-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
--- a/webrtc/modules/audio_processing/aec/aec_rdft_mips.c
+++ b/webrtc/modules/audio_processing/aec/aec_rdft_mips.c
--- a/webrtc/modules/audio_processing/aec/aec_rdft_neon.c
+++ b/webrtc/modules/audio_processing/aec/aec_rdft_neon.c
@ -1,355 +0,0 @@
-/*
- *  Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/*
- * The rdft AEC algorithm, neon version of speed-critical functions.
- *
- * Based on the sse2 version.
- */
-
-
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
-
-#include <arm_neon.h>
-
-static const ALIGN16_BEG float ALIGN16_END
-    k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
-
-static void cft1st_128_neon(float* a) {
-  const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
-  int j, k2;
-
-  for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
-    float32x4_t a00v = vld1q_f32(&a[j + 0]);
-    float32x4_t a04v = vld1q_f32(&a[j + 4]);
-    float32x4_t a08v = vld1q_f32(&a[j + 8]);
-    float32x4_t a12v = vld1q_f32(&a[j + 12]);
-    float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v));
-    float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v));
-    float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v));
-    float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v));
-    const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]);
-    const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2]);
-    const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2]);
-    const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2]);
-    const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2]);
-    const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2]);
-    float32x4_t x0v = vaddq_f32(a01v, a23v);
-    const float32x4_t x1v = vsubq_f32(a01v, a23v);
-    const float32x4_t x2v = vaddq_f32(a45v, a67v);
-    const float32x4_t x3v = vsubq_f32(a45v, a67v);
-    const float32x4_t x3w = vrev64q_f32(x3v);
-    float32x4_t x0w;
-    a01v = vaddq_f32(x0v, x2v);
-    x0v = vsubq_f32(x0v, x2v);
-    x0w = vrev64q_f32(x0v);
-    a45v = vmulq_f32(wk2rv, x0v);
-    a45v = vmlaq_f32(a45v, wk2iv, x0w);
-    x0v = vmlaq_f32(x1v, x3w, vec_swap_sign);
-    x0w = vrev64q_f32(x0v);
-    a23v = vmulq_f32(wk1rv, x0v);
-    a23v = vmlaq_f32(a23v, wk1iv, x0w);
-    x0v = vmlsq_f32(x1v, x3w, vec_swap_sign);
-    x0w = vrev64q_f32(x0v);
-    a67v = vmulq_f32(wk3rv, x0v);
-    a67v = vmlaq_f32(a67v, wk3iv, x0w);
-    a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v));
-    a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v));
-    a08v = vcombine_f32(vget_high_f32(a01v), vget_high_f32(a23v));
-    a12v = vcombine_f32(vget_high_f32(a45v), vget_high_f32(a67v));
-    vst1q_f32(&a[j + 0], a00v);
-    vst1q_f32(&a[j + 4], a04v);
-    vst1q_f32(&a[j + 8], a08v);
-    vst1q_f32(&a[j + 12], a12v);
-  }
-}
-
-static void cftmdl_128_neon(float* a) {
-  int j;
-  const int l = 8;
-  const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
-  float32x4_t wk1rv = vld1q_f32(cftmdl_wk1r);
-
-  for (j = 0; j < l; j += 2) {
-    const float32x2_t a_00 = vld1_f32(&a[j + 0]);
-    const float32x2_t a_08 = vld1_f32(&a[j + 8]);
-    const float32x2_t a_32 = vld1_f32(&a[j + 32]);
-    const float32x2_t a_40 = vld1_f32(&a[j + 40]);
-    const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
-    const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
-    const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
-    const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
-    const float32x2_t a_16 = vld1_f32(&a[j + 16]);
-    const float32x2_t a_24 = vld1_f32(&a[j + 24]);
-    const float32x2_t a_48 = vld1_f32(&a[j + 48]);
-    const float32x2_t a_56 = vld1_f32(&a[j + 56]);
-    const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
-    const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
-    const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
-    const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
-    const float32x4_t xx0 = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-    const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-    const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
-    const float32x4_t x1_x3_add =
-        vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
-    const float32x4_t x1_x3_sub =
-        vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
-    const float32x2_t yy0_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 0);
-    const float32x2_t yy0_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 0);
-    const float32x4_t yy0_as = vcombine_f32(yy0_a, yy0_s);
-    const float32x2_t yy1_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 1);
-    const float32x2_t yy1_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 1);
-    const float32x4_t yy1_as = vcombine_f32(yy1_a, yy1_s);
-    const float32x4_t yy0 = vmlaq_f32(yy0_as, vec_swap_sign, yy1_as);
-    const float32x4_t yy4 = vmulq_f32(wk1rv, yy0);
-    const float32x4_t xx1_rev = vrev64q_f32(xx1);
-    const float32x4_t yy4_rev = vrev64q_f32(yy4);
-
-    vst1_f32(&a[j + 0], vget_low_f32(xx0));
-    vst1_f32(&a[j + 32], vget_high_f32(xx0));
-    vst1_f32(&a[j + 16], vget_low_f32(xx1));
-    vst1_f32(&a[j + 48], vget_high_f32(xx1_rev));
-
-    a[j + 48] = -a[j + 48];
-
-    vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add));
-    vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub));
-    vst1_f32(&a[j + 40], vget_low_f32(yy4));
-    vst1_f32(&a[j + 56], vget_high_f32(yy4_rev));
-  }
-
-  {
-    const int k = 64;
-    const int k1 = 2;
-    const int k2 = 2 * k1;
-    const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2 + 0]);
-    const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2 + 0]);
-    const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2 + 0]);
-    const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2 + 0]);
-    const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2 + 0]);
-    wk1rv = vld1q_f32(&rdft_wk1r[k2 + 0]);
-    for (j = k; j < l + k; j += 2) {
-      const float32x2_t a_00 = vld1_f32(&a[j + 0]);
-      const float32x2_t a_08 = vld1_f32(&a[j + 8]);
-      const float32x2_t a_32 = vld1_f32(&a[j + 32]);
-      const float32x2_t a_40 = vld1_f32(&a[j + 40]);
-      const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
-      const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
-      const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
-      const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
-      const float32x2_t a_16 = vld1_f32(&a[j + 16]);
-      const float32x2_t a_24 = vld1_f32(&a[j + 24]);
-      const float32x2_t a_48 = vld1_f32(&a[j + 48]);
-      const float32x2_t a_56 = vld1_f32(&a[j + 56]);
-      const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
-      const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
-      const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
-      const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
-      const float32x4_t xx = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-      const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-      const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
-      const float32x4_t x1_x3_add =
-          vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
-      const float32x4_t x1_x3_sub =
-          vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
-      float32x4_t xx4 = vmulq_f32(wk2rv, xx1);
-      float32x4_t xx12 = vmulq_f32(wk1rv, x1_x3_add);
-      float32x4_t xx22 = vmulq_f32(wk3rv, x1_x3_sub);
-      xx4 = vmlaq_f32(xx4, wk2iv, vrev64q_f32(xx1));
-      xx12 = vmlaq_f32(xx12, wk1iv, vrev64q_f32(x1_x3_add));
-      xx22 = vmlaq_f32(xx22, wk3iv, vrev64q_f32(x1_x3_sub));
-
-      vst1_f32(&a[j + 0], vget_low_f32(xx));
-      vst1_f32(&a[j + 32], vget_high_f32(xx));
-      vst1_f32(&a[j + 16], vget_low_f32(xx4));
-      vst1_f32(&a[j + 48], vget_high_f32(xx4));
-      vst1_f32(&a[j + 8], vget_low_f32(xx12));
-      vst1_f32(&a[j + 40], vget_high_f32(xx12));
-      vst1_f32(&a[j + 24], vget_low_f32(xx22));
-      vst1_f32(&a[j + 56], vget_high_f32(xx22));
-    }
-  }
-}
-
-__inline static float32x4_t reverse_order_f32x4(float32x4_t in) {
-  // A B C D -> C D A B
-  const float32x4_t rev = vcombine_f32(vget_high_f32(in), vget_low_f32(in));
-  // C D A B -> D C B A
-  return vrev64q_f32(rev);
-}
-
-static void rftfsub_128_neon(float* a) {
-  const float* c = rdft_w + 32;
-  int j1, j2;
-  const float32x4_t mm_half = vdupq_n_f32(0.5f);
-
-  // Vectorized code (four at once).
-  // Note: commented number are indexes for the first iteration of the loop.
-  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
-    // Load 'wk'.
-    const float32x4_t c_j1 = vld1q_f32(&c[j1]);          //  1,  2,  3,  4,
-    const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]);     // 28, 29, 30, 31,
-    const float32x4_t wkrt = vsubq_f32(mm_half, c_k1);   // 28, 29, 30, 31,
-    const float32x4_t wkr_ = reverse_order_f32x4(wkrt);  // 31, 30, 29, 28,
-    const float32x4_t wki_ = c_j1;                       //  1,  2,  3,  4,
-    // Load and shuffle 'a'.
-    //   2,   4,   6,   8,   3,   5,   7,   9
-    float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
-    // 120, 122, 124, 126, 121, 123, 125, 127,
-    const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
-    // 126, 124, 122, 120
-    const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
-    // 127, 125, 123, 121
-    const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
-    // Calculate 'x'.
-    const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
-    // 2-126, 4-124, 6-122, 8-120,
-    const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
-    // 3-127, 5-125, 7-123, 9-121,
-    // Calculate product into 'y'.
-    //    yr = wkr * xr - wki * xi;
-    //    yi = wkr * xi + wki * xr;
-    const float32x4_t a_ = vmulq_f32(wkr_, xr_);
-    const float32x4_t b_ = vmulq_f32(wki_, xi_);
-    const float32x4_t c_ = vmulq_f32(wkr_, xi_);
-    const float32x4_t d_ = vmulq_f32(wki_, xr_);
-    const float32x4_t yr_ = vsubq_f32(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
-    const float32x4_t yi_ = vaddq_f32(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
-                                                // Update 'a'.
-                                                //    a[j2 + 0] -= yr;
-                                                //    a[j2 + 1] -= yi;
-                                                //    a[k2 + 0] += yr;
-                                                //    a[k2 + 1] -= yi;
-    // 126, 124, 122, 120,
-    const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
-    // 127, 125, 123, 121,
-    const float32x4_t a_k2_p1n = vsubq_f32(a_k2_p1, yi_);
-    // Shuffle in right order and store.
-    const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
-    const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
-    // 124, 125, 126, 127, 120, 121, 122, 123
-    const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
-    //   2,   4,   6,   8,
-    a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
-    //   3,   5,   7,   9,
-    a_j2_p.val[1] = vsubq_f32(a_j2_p.val[1], yi_);
-    //   2,   3,   4,   5,   6,   7,   8,   9,
-    vst2q_f32(&a[0 + j2], a_j2_p);
-
-    vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
-    vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
-  }
-
-  // Scalar code for the remaining items.
-  for (; j2 < 64; j1 += 1, j2 += 2) {
-    const int k2 = 128 - j2;
-    const int k1 = 32 - j1;
-    const float wkr = 0.5f - c[k1];
-    const float wki = c[j1];
-    const float xr = a[j2 + 0] - a[k2 + 0];
-    const float xi = a[j2 + 1] + a[k2 + 1];
-    const float yr = wkr * xr - wki * xi;
-    const float yi = wkr * xi + wki * xr;
-    a[j2 + 0] -= yr;
-    a[j2 + 1] -= yi;
-    a[k2 + 0] += yr;
-    a[k2 + 1] -= yi;
-  }
-}
-
-static void rftbsub_128_neon(float* a) {
-  const float* c = rdft_w + 32;
-  int j1, j2;
-  const float32x4_t mm_half = vdupq_n_f32(0.5f);
-
-  a[1] = -a[1];
-  // Vectorized code (four at once).
-  //    Note: commented number are indexes for the first iteration of the loop.
-  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
-    // Load 'wk'.
-    const float32x4_t c_j1 = vld1q_f32(&c[j1]);         //  1,  2,  3,  4,
-    const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]);    // 28, 29, 30, 31,
-    const float32x4_t wkrt = vsubq_f32(mm_half, c_k1);  // 28, 29, 30, 31,
-    const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
-    const float32x4_t wki_ = c_j1;                      //  1,  2,  3,  4,
-    // Load and shuffle 'a'.
-    //   2,   4,   6,   8,   3,   5,   7,   9
-    float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
-    // 120, 122, 124, 126, 121, 123, 125, 127,
-    const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
-    // 126, 124, 122, 120
-    const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
-    // 127, 125, 123, 121
-    const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
-    // Calculate 'x'.
-    const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
-    // 2-126, 4-124, 6-122, 8-120,
-    const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
-    // 3-127, 5-125, 7-123, 9-121,
-    // Calculate product into 'y'.
-    //    yr = wkr * xr - wki * xi;
-    //    yi = wkr * xi + wki * xr;
-    const float32x4_t a_ = vmulq_f32(wkr_, xr_);
-    const float32x4_t b_ = vmulq_f32(wki_, xi_);
-    const float32x4_t c_ = vmulq_f32(wkr_, xi_);
-    const float32x4_t d_ = vmulq_f32(wki_, xr_);
-    const float32x4_t yr_ = vaddq_f32(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
-    const float32x4_t yi_ = vsubq_f32(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
-                                                // Update 'a'.
-                                                //    a[j2 + 0] -= yr;
-                                                //    a[j2 + 1] -= yi;
-                                                //    a[k2 + 0] += yr;
-                                                //    a[k2 + 1] -= yi;
-    // 126, 124, 122, 120,
-    const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
-    // 127, 125, 123, 121,
-    const float32x4_t a_k2_p1n = vsubq_f32(yi_, a_k2_p1);
-    // Shuffle in right order and store.
-    //   2,   3,   4,   5,   6,   7,   8,   9,
-    const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
-    const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
-    // 124, 125, 126, 127, 120, 121, 122, 123
-    const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
-    //   2,   4,   6,   8,
-    a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
-    //   3,   5,   7,   9,
-    a_j2_p.val[1] = vsubq_f32(yi_, a_j2_p.val[1]);
-    //   2,   3,   4,   5,   6,   7,   8,   9,
-    vst2q_f32(&a[0 + j2], a_j2_p);
-
-    vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
-    vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
-  }
-
-  // Scalar code for the remaining items.
-  for (; j2 < 64; j1 += 1, j2 += 2) {
-    const int k2 = 128 - j2;
-    const int k1 = 32 - j1;
-    const float wkr = 0.5f - c[k1];
-    const float wki = c[j1];
-    const float xr = a[j2 + 0] - a[k2 + 0];
-    const float xi = a[j2 + 1] + a[k2 + 1];
-    const float yr = wkr * xr + wki * xi;
-    const float yi = wkr * xi - wki * xr;
-    a[j2 + 0] = a[j2 + 0] - yr;
-    a[j2 + 1] = yi - a[j2 + 1];
-    a[k2 + 0] = yr + a[k2 + 0];
-    a[k2 + 1] = yi - a[k2 + 1];
-  }
-  a[65] = -a[65];
-}
-
-void aec_rdft_init_neon(void) {
-  cft1st_128 = cft1st_128_neon;
-  cftmdl_128 = cftmdl_128_neon;
-  rftfsub_128 = rftfsub_128_neon;
-  rftbsub_128 = rftbsub_128_neon;
-}
-
--- a/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c
+++ b/webrtc/modules/audio_processing/aec/aec_rdft_sse2.c
@ -1,427 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
-
-#include <emmintrin.h>
-
-static const ALIGN16_BEG float ALIGN16_END
-    k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
-
-static void cft1st_128_SSE2(float* a) {
-  const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
-  int j, k2;
-
-  for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
-    __m128 a00v = _mm_loadu_ps(&a[j + 0]);
-    __m128 a04v = _mm_loadu_ps(&a[j + 4]);
-    __m128 a08v = _mm_loadu_ps(&a[j + 8]);
-    __m128 a12v = _mm_loadu_ps(&a[j + 12]);
-    __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0));
-    __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2));
-    __m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0));
-    __m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2));
-
-    const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]);
-    const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]);
-    const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]);
-    const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]);
-    const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]);
-    const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]);
-    __m128 x0v = _mm_add_ps(a01v, a23v);
-    const __m128 x1v = _mm_sub_ps(a01v, a23v);
-    const __m128 x2v = _mm_add_ps(a45v, a67v);
-    const __m128 x3v = _mm_sub_ps(a45v, a67v);
-    __m128 x0w;
-    a01v = _mm_add_ps(x0v, x2v);
-    x0v = _mm_sub_ps(x0v, x2v);
-    x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
-    {
-      const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v);
-      const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w);
-      a45v = _mm_add_ps(a45_0v, a45_1v);
-    }
-    {
-      __m128 a23_0v, a23_1v;
-      const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1));
-      const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w);
-      x0v = _mm_add_ps(x1v, x3s);
-      x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
-      a23_0v = _mm_mul_ps(wk1rv, x0v);
-      a23_1v = _mm_mul_ps(wk1iv, x0w);
-      a23v = _mm_add_ps(a23_0v, a23_1v);
-
-      x0v = _mm_sub_ps(x1v, x3s);
-      x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
-    }
-    {
-      const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v);
-      const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w);
-      a67v = _mm_add_ps(a67_0v, a67_1v);
-    }
-
-    a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0));
-    a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0));
-    a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2));
-    a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2));
-    _mm_storeu_ps(&a[j + 0], a00v);
-    _mm_storeu_ps(&a[j + 4], a04v);
-    _mm_storeu_ps(&a[j + 8], a08v);
-    _mm_storeu_ps(&a[j + 12], a12v);
-  }
-}
-
-static void cftmdl_128_SSE2(float* a) {
-  const int l = 8;
-  const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
-  int j0;
-
-  __m128 wk1rv = _mm_load_ps(cftmdl_wk1r);
-  for (j0 = 0; j0 < l; j0 += 2) {
-    const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
-    const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
-    const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
-    const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
-    const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
-                                          _mm_castsi128_ps(a_32),
-                                          _MM_SHUFFLE(1, 0, 1, 0));
-    const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
-                                          _mm_castsi128_ps(a_40),
-                                          _MM_SHUFFLE(1, 0, 1, 0));
-    __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
-    const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
-
-    const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
-    const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
-    const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
-    const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
-    const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
-                                          _mm_castsi128_ps(a_48),
-                                          _MM_SHUFFLE(1, 0, 1, 0));
-    const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
-                                          _mm_castsi128_ps(a_56),
-                                          _MM_SHUFFLE(1, 0, 1, 0));
-    const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
-    const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
-
-    const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-    const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-
-    const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
-        _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
-    const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
-    const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
-    const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
-
-    const __m128 yy0 =
-        _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2));
-    const __m128 yy1 =
-        _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3));
-    const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1);
-    const __m128 yy3 = _mm_add_ps(yy0, yy2);
-    const __m128 yy4 = _mm_mul_ps(wk1rv, yy3);
-
-    _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0));
-    _mm_storel_epi64(
-        (__m128i*)&a[j0 + 32],
-        _mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2)));
-
-    _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1));
-    _mm_storel_epi64(
-        (__m128i*)&a[j0 + 48],
-        _mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3)));
-    a[j0 + 48] = -a[j0 + 48];
-
-    _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add));
-    _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub));
-
-    _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4));
-    _mm_storel_epi64(
-        (__m128i*)&a[j0 + 56],
-        _mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3)));
-  }
-
-  {
-    int k = 64;
-    int k1 = 2;
-    int k2 = 2 * k1;
-    const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]);
-    const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]);
-    const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]);
-    const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]);
-    const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]);
-    wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]);
-    for (j0 = k; j0 < l + k; j0 += 2) {
-      const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
-      const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
-      const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
-      const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
-      const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
-                                            _mm_castsi128_ps(a_32),
-                                            _MM_SHUFFLE(1, 0, 1, 0));
-      const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
-                                            _mm_castsi128_ps(a_40),
-                                            _MM_SHUFFLE(1, 0, 1, 0));
-      __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
-      const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
-
-      const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
-      const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
-      const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
-      const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
-      const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
-                                            _mm_castsi128_ps(a_48),
-                                            _MM_SHUFFLE(1, 0, 1, 0));
-      const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
-                                            _mm_castsi128_ps(a_56),
-                                            _MM_SHUFFLE(1, 0, 1, 0));
-      const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
-      const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
-
-      const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-      const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-      const __m128 xx2 = _mm_mul_ps(xx1, wk2rv);
-      const __m128 xx3 =
-          _mm_mul_ps(wk2iv,
-                     _mm_castsi128_ps(_mm_shuffle_epi32(
-                         _mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1))));
-      const __m128 xx4 = _mm_add_ps(xx2, xx3);
-
-      const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
-          _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
-      const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
-      const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
-      const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
-
-      const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
-      const __m128 xx11 = _mm_mul_ps(
-          wk1iv,
-          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
-                                             _MM_SHUFFLE(2, 3, 0, 1))));
-      const __m128 xx12 = _mm_add_ps(xx10, xx11);
-
-      const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
-      const __m128 xx21 = _mm_mul_ps(
-          wk3iv,
-          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
-                                             _MM_SHUFFLE(2, 3, 0, 1))));
-      const __m128 xx22 = _mm_add_ps(xx20, xx21);
-
-      _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));
-      _mm_storel_epi64(
-          (__m128i*)&a[j0 + 32],
-          _mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2)));
-
-      _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4));
-      _mm_storel_epi64(
-          (__m128i*)&a[j0 + 48],
-          _mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2)));
-
-      _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12));
-      _mm_storel_epi64(
-          (__m128i*)&a[j0 + 40],
-          _mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2)));
-
-      _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22));
-      _mm_storel_epi64(
-          (__m128i*)&a[j0 + 56],
-          _mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2)));
-    }
-  }
-}
-
-static void rftfsub_128_SSE2(float* a) {
-  const float* c = rdft_w + 32;
-  int j1, j2, k1, k2;
-  float wkr, wki, xr, xi, yr, yi;
-
-  static const ALIGN16_BEG float ALIGN16_END
-      k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
-  const __m128 mm_half = _mm_load_ps(k_half);
-
-  // Vectorized code (four at once).
-  //    Note: commented number are indexes for the first iteration of the loop.
-  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
-    // Load 'wk'.
-    const __m128 c_j1 = _mm_loadu_ps(&c[j1]);       //  1,  2,  3,  4,
-    const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]);  // 28, 29, 30, 31,
-    const __m128 wkrt = _mm_sub_ps(mm_half, c_k1);  // 28, 29, 30, 31,
-    const __m128 wkr_ =
-        _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3));  // 31, 30, 29, 28,
-    const __m128 wki_ = c_j1;                                 //  1,  2,  3,  4,
-    // Load and shuffle 'a'.
-    const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]);    //   2,   3,   4,   5,
-    const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]);    //   6,   7,   8,   9,
-    const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]);  // 120, 121, 122, 123,
-    const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]);  // 124, 125, 126, 127,
-    const __m128 a_j2_p0 = _mm_shuffle_ps(
-        a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0));  //   2,   4,   6,   8,
-    const __m128 a_j2_p1 = _mm_shuffle_ps(
-        a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1));  //   3,   5,   7,   9,
-    const __m128 a_k2_p0 = _mm_shuffle_ps(
-        a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2));  // 126, 124, 122, 120,
-    const __m128 a_k2_p1 = _mm_shuffle_ps(
-        a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3));  // 127, 125, 123, 121,
-    // Calculate 'x'.
-    const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
-    // 2-126, 4-124, 6-122, 8-120,
-    const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
-    // 3-127, 5-125, 7-123, 9-121,
-    // Calculate product into 'y'.
-    //    yr = wkr * xr - wki * xi;
-    //    yi = wkr * xi + wki * xr;
-    const __m128 a_ = _mm_mul_ps(wkr_, xr_);
-    const __m128 b_ = _mm_mul_ps(wki_, xi_);
-    const __m128 c_ = _mm_mul_ps(wkr_, xi_);
-    const __m128 d_ = _mm_mul_ps(wki_, xr_);
-    const __m128 yr_ = _mm_sub_ps(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
-    const __m128 yi_ = _mm_add_ps(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
-                                            // Update 'a'.
-                                            //    a[j2 + 0] -= yr;
-                                            //    a[j2 + 1] -= yi;
-                                            //    a[k2 + 0] += yr;
-    //    a[k2 + 1] -= yi;
-    const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_);  //   2,   4,   6,   8,
-    const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_);  //   3,   5,   7,   9,
-    const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_);  // 126, 124, 122, 120,
-    const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_);  // 127, 125, 123, 121,
-    // Shuffle in right order and store.
-    const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
-    //   2,   3,   4,   5,
-    const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
-    //   6,   7,   8,   9,
-    const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
-    // 122, 123, 120, 121,
-    const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
-    // 126, 127, 124, 125,
-    const __m128 a_k2_0n = _mm_shuffle_ps(
-        a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2));  // 120, 121, 122, 123,
-    const __m128 a_k2_4n = _mm_shuffle_ps(
-        a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2));  // 124, 125, 126, 127,
-    _mm_storeu_ps(&a[0 + j2], a_j2_0n);
-    _mm_storeu_ps(&a[4 + j2], a_j2_4n);
-    _mm_storeu_ps(&a[122 - j2], a_k2_0n);
-    _mm_storeu_ps(&a[126 - j2], a_k2_4n);
-  }
-  // Scalar code for the remaining items.
-  for (; j2 < 64; j1 += 1, j2 += 2) {
-    k2 = 128 - j2;
-    k1 = 32 - j1;
-    wkr = 0.5f - c[k1];
-    wki = c[j1];
-    xr = a[j2 + 0] - a[k2 + 0];
-    xi = a[j2 + 1] + a[k2 + 1];
-    yr = wkr * xr - wki * xi;
-    yi = wkr * xi + wki * xr;
-    a[j2 + 0] -= yr;
-    a[j2 + 1] -= yi;
-    a[k2 + 0] += yr;
-    a[k2 + 1] -= yi;
-  }
-}
-
-static void rftbsub_128_SSE2(float* a) {
-  const float* c = rdft_w + 32;
-  int j1, j2, k1, k2;
-  float wkr, wki, xr, xi, yr, yi;
-
-  static const ALIGN16_BEG float ALIGN16_END
-      k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
-  const __m128 mm_half = _mm_load_ps(k_half);
-
-  a[1] = -a[1];
-  // Vectorized code (four at once).
-  //    Note: commented number are indexes for the first iteration of the loop.
-  for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
-    // Load 'wk'.
-    const __m128 c_j1 = _mm_loadu_ps(&c[j1]);       //  1,  2,  3,  4,
-    const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]);  // 28, 29, 30, 31,
-    const __m128 wkrt = _mm_sub_ps(mm_half, c_k1);  // 28, 29, 30, 31,
-    const __m128 wkr_ =
-        _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3));  // 31, 30, 29, 28,
-    const __m128 wki_ = c_j1;                                 //  1,  2,  3,  4,
-    // Load and shuffle 'a'.
-    const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]);    //   2,   3,   4,   5,
-    const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]);    //   6,   7,   8,   9,
-    const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]);  // 120, 121, 122, 123,
-    const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]);  // 124, 125, 126, 127,
-    const __m128 a_j2_p0 = _mm_shuffle_ps(
-        a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0));  //   2,   4,   6,   8,
-    const __m128 a_j2_p1 = _mm_shuffle_ps(
-        a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1));  //   3,   5,   7,   9,
-    const __m128 a_k2_p0 = _mm_shuffle_ps(
-        a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2));  // 126, 124, 122, 120,
-    const __m128 a_k2_p1 = _mm_shuffle_ps(
-        a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3));  // 127, 125, 123, 121,
-    // Calculate 'x'.
-    const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
-    // 2-126, 4-124, 6-122, 8-120,
-    const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
-    // 3-127, 5-125, 7-123, 9-121,
-    // Calculate product into 'y'.
-    //    yr = wkr * xr + wki * xi;
-    //    yi = wkr * xi - wki * xr;
-    const __m128 a_ = _mm_mul_ps(wkr_, xr_);
-    const __m128 b_ = _mm_mul_ps(wki_, xi_);
-    const __m128 c_ = _mm_mul_ps(wkr_, xi_);
-    const __m128 d_ = _mm_mul_ps(wki_, xr_);
-    const __m128 yr_ = _mm_add_ps(a_, b_);  // 2-126, 4-124, 6-122, 8-120,
-    const __m128 yi_ = _mm_sub_ps(c_, d_);  // 3-127, 5-125, 7-123, 9-121,
-                                            // Update 'a'.
-                                            //    a[j2 + 0] = a[j2 + 0] - yr;
-                                            //    a[j2 + 1] = yi - a[j2 + 1];
-                                            //    a[k2 + 0] = yr + a[k2 + 0];
-    //    a[k2 + 1] = yi - a[k2 + 1];
-    const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_);  //   2,   4,   6,   8,
-    const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1);  //   3,   5,   7,   9,
-    const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_);  // 126, 124, 122, 120,
-    const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1);  // 127, 125, 123, 121,
-    // Shuffle in right order and store.
-    const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
-    //   2,   3,   4,   5,
-    const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
-    //   6,   7,   8,   9,
-    const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
-    // 122, 123, 120, 121,
-    const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
-    // 126, 127, 124, 125,
-    const __m128 a_k2_0n = _mm_shuffle_ps(
-        a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2));  // 120, 121, 122, 123,
-    const __m128 a_k2_4n = _mm_shuffle_ps(
-        a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2));  // 124, 125, 126, 127,
-    _mm_storeu_ps(&a[0 + j2], a_j2_0n);
-    _mm_storeu_ps(&a[4 + j2], a_j2_4n);
-    _mm_storeu_ps(&a[122 - j2], a_k2_0n);
-    _mm_storeu_ps(&a[126 - j2], a_k2_4n);
-  }
-  // Scalar code for the remaining items.
-  for (; j2 < 64; j1 += 1, j2 += 2) {
-    k2 = 128 - j2;
-    k1 = 32 - j1;
-    wkr = 0.5f - c[k1];
-    wki = c[j1];
-    xr = a[j2 + 0] - a[k2 + 0];
-    xi = a[j2 + 1] + a[k2 + 1];
-    yr = wkr * xr + wki * xi;
-    yi = wkr * xi - wki * xr;
-    a[j2 + 0] = a[j2 + 0] - yr;
-    a[j2 + 1] = yi - a[j2 + 1];
-    a[k2 + 0] = yr + a[k2 + 0];
-    a[k2 + 1] = yi - a[k2 + 1];
-  }
-  a[65] = -a[65];
-}
-
-void aec_rdft_init_sse2(void) {
-  cft1st_128 = cft1st_128_SSE2;
-  cftmdl_128 = cftmdl_128_SSE2;
-  rftfsub_128 = rftfsub_128_SSE2;
-  rftbsub_128 = rftbsub_128_SSE2;
-}
--- a/webrtc/modules/audio_processing/aec/aec_resampler.c
+++ b/webrtc/modules/audio_processing/aec/aec_resampler.c
@ -1,209 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for
- * clock skew by resampling the farend signal.
- */
-
-#include "webrtc/modules/audio_processing/aec/aec_resampler.h"
-
-#include <assert.h>
-#include <math.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include "webrtc/modules/audio_processing/aec/aec_core.h"
-
-enum {
-  kEstimateLengthFrames = 400
-};
-
-typedef struct {
-  float buffer[kResamplerBufferSize];
-  float position;
-
-  int deviceSampleRateHz;
-  int skewData[kEstimateLengthFrames];
-  int skewDataIndex;
-  float skewEstimate;
-} AecResampler;
-
-static int EstimateSkew(const int* rawSkew,
-                        int size,
-                        int absLimit,
-                        float* skewEst);
-
-void* WebRtcAec_CreateResampler() {
-  return malloc(sizeof(AecResampler));
-}
-
-int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz) {
-  AecResampler* obj = (AecResampler*)resampInst;
-  memset(obj->buffer, 0, sizeof(obj->buffer));
-  obj->position = 0.0;
-
-  obj->deviceSampleRateHz = deviceSampleRateHz;
-  memset(obj->skewData, 0, sizeof(obj->skewData));
-  obj->skewDataIndex = 0;
-  obj->skewEstimate = 0.0;
-
-  return 0;
-}
-
-void WebRtcAec_FreeResampler(void* resampInst) {
-  AecResampler* obj = (AecResampler*)resampInst;
-  free(obj);
-}
-
-void WebRtcAec_ResampleLinear(void* resampInst,
-                              const float* inspeech,
-                              size_t size,
-                              float skew,
-                              float* outspeech,
-                              size_t* size_out) {
-  AecResampler* obj = (AecResampler*)resampInst;
-
-  float* y;
-  float be, tnew;
-  size_t tn, mm;
-
-  assert(size <= 2 * FRAME_LEN);
-  assert(resampInst != NULL);
-  assert(inspeech != NULL);
-  assert(outspeech != NULL);
-  assert(size_out != NULL);
-
-  // Add new frame data in lookahead
-  memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay],
-         inspeech,
-         size * sizeof(inspeech[0]));
-
-  // Sample rate ratio
-  be = 1 + skew;
-
-  // Loop over input frame
-  mm = 0;
-  y = &obj->buffer[FRAME_LEN];  // Point at current frame
-
-  tnew = be * mm + obj->position;
-  tn = (size_t)tnew;
-
-  while (tn < size) {
-
-    // Interpolation
-    outspeech[mm] = y[tn] + (tnew - tn) * (y[tn + 1] - y[tn]);
-    mm++;
-
-    tnew = be * mm + obj->position;
-    tn = (int)tnew;
-  }
-
-  *size_out = mm;
-  obj->position += (*size_out) * be - size;
-
-  // Shift buffer
-  memmove(obj->buffer,
-          &obj->buffer[size],
-          (kResamplerBufferSize - size) * sizeof(obj->buffer[0]));
-}
-
-int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst) {
-  AecResampler* obj = (AecResampler*)resampInst;
-  int err = 0;
-
-  if (obj->skewDataIndex < kEstimateLengthFrames) {
-    obj->skewData[obj->skewDataIndex] = rawSkew;
-    obj->skewDataIndex++;
-  } else if (obj->skewDataIndex == kEstimateLengthFrames) {
-    err = EstimateSkew(
-        obj->skewData, kEstimateLengthFrames, obj->deviceSampleRateHz, skewEst);
-    obj->skewEstimate = *skewEst;
-    obj->skewDataIndex++;
-  } else {
-    *skewEst = obj->skewEstimate;
-  }
-
-  return err;
-}
-
-int EstimateSkew(const int* rawSkew,
-                 int size,
-                 int deviceSampleRateHz,
-                 float* skewEst) {
-  const int absLimitOuter = (int)(0.04f * deviceSampleRateHz);
-  const int absLimitInner = (int)(0.0025f * deviceSampleRateHz);
-  int i = 0;
-  int n = 0;
-  float rawAvg = 0;
-  float err = 0;
-  float rawAbsDev = 0;
-  int upperLimit = 0;
-  int lowerLimit = 0;
-  float cumSum = 0;
-  float x = 0;
-  float x2 = 0;
-  float y = 0;
-  float xy = 0;
-  float xAvg = 0;
-  float denom = 0;
-  float skew = 0;
-
-  *skewEst = 0;  // Set in case of error below.
-  for (i = 0; i < size; i++) {
-    if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
-      n++;
-      rawAvg += rawSkew[i];
-    }
-  }
-
-  if (n == 0) {
-    return -1;
-  }
-  assert(n > 0);
-  rawAvg /= n;
-
-  for (i = 0; i < size; i++) {
-    if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
-      err = rawSkew[i] - rawAvg;
-      rawAbsDev += err >= 0 ? err : -err;
-    }
-  }
-  assert(n > 0);
-  rawAbsDev /= n;
-  upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1);  // +1 for ceiling.
-  lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1);  // -1 for floor.
-
-  n = 0;
-  for (i = 0; i < size; i++) {
-    if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) ||
-        (rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) {
-      n++;
-      cumSum += rawSkew[i];
-      x += n;
-      x2 += n * n;
-      y += cumSum;
-      xy += n * cumSum;
-    }
-  }
-
-  if (n == 0) {
-    return -1;
-  }
-  assert(n > 0);
-  xAvg = x / n;
-  denom = x2 - xAvg * x;
-
-  if (denom != 0) {
-    skew = (xy - xAvg * y) / denom;
-  }
-
-  *skewEst = skew;
-  return 0;
-}
--- a/webrtc/modules/audio_processing/aec/aec_resampler.h
+++ b/webrtc/modules/audio_processing/aec/aec_resampler.h
@ -1,39 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
-
-#include "webrtc/modules/audio_processing/aec/aec_core.h"
-
-enum {
-  kResamplingDelay = 1
-};
-enum {
-  kResamplerBufferSize = FRAME_LEN * 4
-};
-
-// Unless otherwise specified, functions return 0 on success and -1 on error.
-void* WebRtcAec_CreateResampler();  // Returns NULL on error.
-int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz);
-void WebRtcAec_FreeResampler(void* resampInst);
-
-// Estimates skew from raw measurement.
-int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst);
-
-// Resamples input using linear interpolation.
-void WebRtcAec_ResampleLinear(void* resampInst,
-                              const float* inspeech,
-                              size_t size,
-                              float skew,
-                              float* outspeech,
-                              size_t* size_out);
-
-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
--- a/webrtc/modules/audio_processing/aec/echo_cancellation.c
+++ b/webrtc/modules/audio_processing/aec/echo_cancellation.c
@ -1,923 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/*
- * Contains the API functions for the AEC.
- */
-#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
-
-#include <math.h>
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-#include <stdio.h>
-#endif
-#include <stdlib.h>
-#include <string.h>
-
-#include "webrtc/common_audio/ring_buffer.h"
-#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
-#include "webrtc/modules/audio_processing/aec/aec_core.h"
-#include "webrtc/modules/audio_processing/aec/aec_resampler.h"
-#include "webrtc/modules/audio_processing/aec/echo_cancellation_internal.h"
-#include "webrtc/typedefs.h"
-
-// Measured delays [ms]
-// Device                Chrome  GTP
-// MacBook Air           10
-// MacBook Retina        10      100
-// MacPro                30?
-//
-// Win7 Desktop          70      80?
-// Win7 T430s            110
-// Win8 T420s            70
-//
-// Daisy                 50
-// Pixel (w/ preproc?)           240
-// Pixel (w/o preproc?)  110     110
-
-// The extended filter mode gives us the flexibility to ignore the system's
-// reported delays. We do this for platforms which we believe provide results
-// which are incompatible with the AEC's expectations. Based on measurements
-// (some provided above) we set a conservative (i.e. lower than measured)
-// fixed delay.
-//
-// WEBRTC_UNTRUSTED_DELAY will only have an impact when |extended_filter_mode|
-// is enabled. See the note along with |DelayCorrection| in
-// echo_cancellation_impl.h for more details on the mode.
-//
-// Justification:
-// Chromium/Mac: Here, the true latency is so low (~10-20 ms), that it plays
-// havoc with the AEC's buffering. To avoid this, we set a fixed delay of 20 ms
-// and then compensate by rewinding by 10 ms (in wideband) through
-// kDelayDiffOffsetSamples. This trick does not seem to work for larger rewind
-// values, but fortunately this is sufficient.
-//
-// Chromium/Linux(ChromeOS): The values we get on this platform don't correspond
-// well to reality. The variance doesn't match the AEC's buffer changes, and the
-// bulk values tend to be too low. However, the range across different hardware
-// appears to be too large to choose a single value.
-//
-// GTP/Linux(ChromeOS): TBD, but for the moment we will trust the values.
-#if defined(WEBRTC_CHROMIUM_BUILD) && defined(WEBRTC_MAC)
-#define WEBRTC_UNTRUSTED_DELAY
-#endif
-
-#if defined(WEBRTC_UNTRUSTED_DELAY) && defined(WEBRTC_MAC)
-static const int kDelayDiffOffsetSamples = -160;
-#else
-// Not enabled for now.
-static const int kDelayDiffOffsetSamples = 0;
-#endif
-
-#if defined(WEBRTC_MAC)
-static const int kFixedDelayMs = 20;
-#else
-static const int kFixedDelayMs = 50;
-#endif
-#if !defined(WEBRTC_UNTRUSTED_DELAY)
-static const int kMinTrustedDelayMs = 20;
-#endif
-static const int kMaxTrustedDelayMs = 500;
-
-// Maximum length of resampled signal. Must be an integer multiple of frames
-// (ceil(1/(1 + MIN_SKEW)*2) + 1)*FRAME_LEN
-// The factor of 2 handles wb, and the + 1 is as a safety margin
-// TODO(bjornv): Replace with kResamplerBufferSize
-#define MAX_RESAMP_LEN (5 * FRAME_LEN)
-
-static const int kMaxBufSizeStart = 62;  // In partitions
-static const int sampMsNb = 8;           // samples per ms in nb
-static const int initCheck = 42;
-
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-int webrtc_aec_instance_count = 0;
-#endif
-
-// Estimates delay to set the position of the far-end buffer read pointer
-// (controlled by knownDelay)
-static void EstBufDelayNormal(Aec* aecInst);
-static void EstBufDelayExtended(Aec* aecInst);
-static int ProcessNormal(Aec* self,
-                         const float* const* near,
-                         size_t num_bands,
-                         float* const* out,
-                         size_t num_samples,
-                         int16_t reported_delay_ms,
-                         int32_t skew);
-static void ProcessExtended(Aec* self,
-                            const float* const* near,
-                            size_t num_bands,
-                            float* const* out,
-                            size_t num_samples,
-                            int16_t reported_delay_ms,
-                            int32_t skew);
-
-void* WebRtcAec_Create() {
-  Aec* aecpc = malloc(sizeof(Aec));
-
-  if (!aecpc) {
-    return NULL;
-  }
-
-  aecpc->aec = WebRtcAec_CreateAec();
-  if (!aecpc->aec) {
-    WebRtcAec_Free(aecpc);
-    return NULL;
-  }
-  aecpc->resampler = WebRtcAec_CreateResampler();
-  if (!aecpc->resampler) {
-    WebRtcAec_Free(aecpc);
-    return NULL;
-  }
-  // Create far-end pre-buffer. The buffer size has to be large enough for
-  // largest possible drift compensation (kResamplerBufferSize) + "almost" an
-  // FFT buffer (PART_LEN2 - 1).
-  aecpc->far_pre_buf =
-      WebRtc_CreateBuffer(PART_LEN2 + kResamplerBufferSize, sizeof(float));
-  if (!aecpc->far_pre_buf) {
-    WebRtcAec_Free(aecpc);
-    return NULL;
-  }
-
-  aecpc->initFlag = 0;
-  aecpc->lastError = 0;
-
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  {
-    char filename[64];
-    sprintf(filename, "aec_buf%d.dat", webrtc_aec_instance_count);
-    aecpc->bufFile = fopen(filename, "wb");
-    sprintf(filename, "aec_skew%d.dat", webrtc_aec_instance_count);
-    aecpc->skewFile = fopen(filename, "wb");
-    sprintf(filename, "aec_delay%d.dat", webrtc_aec_instance_count);
-    aecpc->delayFile = fopen(filename, "wb");
-    webrtc_aec_instance_count++;
-  }
-#endif
-
-  return aecpc;
-}
-
-void WebRtcAec_Free(void* aecInst) {
-  Aec* aecpc = aecInst;
-
-  if (aecpc == NULL) {
-    return;
-  }
-
-  WebRtc_FreeBuffer(aecpc->far_pre_buf);
-
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  fclose(aecpc->bufFile);
-  fclose(aecpc->skewFile);
-  fclose(aecpc->delayFile);
-#endif
-
-  WebRtcAec_FreeAec(aecpc->aec);
-  WebRtcAec_FreeResampler(aecpc->resampler);
-  free(aecpc);
-}
-
-int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq) {
-  Aec* aecpc = aecInst;
-  AecConfig aecConfig;
-
-  if (sampFreq != 8000 &&
-      sampFreq != 16000 &&
-      sampFreq != 32000 &&
-      sampFreq != 48000) {
-    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
-  }
-  aecpc->sampFreq = sampFreq;
-
-  if (scSampFreq < 1 || scSampFreq > 96000) {
-    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
-  }
-  aecpc->scSampFreq = scSampFreq;
-
-  // Initialize echo canceller core
-  if (WebRtcAec_InitAec(aecpc->aec, aecpc->sampFreq) == -1) {
-    aecpc->lastError = AEC_UNSPECIFIED_ERROR;
-    return -1;
-  }
-
-  if (WebRtcAec_InitResampler(aecpc->resampler, aecpc->scSampFreq) == -1) {
-    aecpc->lastError = AEC_UNSPECIFIED_ERROR;
-    return -1;
-  }
-
-  WebRtc_InitBuffer(aecpc->far_pre_buf);
-  WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN);  // Start overlap.
-
-  aecpc->initFlag = initCheck;  // indicates that initialization has been done
-
-  if (aecpc->sampFreq == 32000 || aecpc->sampFreq == 48000) {
-    aecpc->splitSampFreq = 16000;
-  } else {
-    aecpc->splitSampFreq = sampFreq;
-  }
-
-  aecpc->delayCtr = 0;
-  aecpc->sampFactor = (aecpc->scSampFreq * 1.0f) / aecpc->splitSampFreq;
-  // Sampling frequency multiplier (SWB is processed as 160 frame size).
-  aecpc->rate_factor = aecpc->splitSampFreq / 8000;
-
-  aecpc->sum = 0;
-  aecpc->counter = 0;
-  aecpc->checkBuffSize = 1;
-  aecpc->firstVal = 0;
-
-  // We skip the startup_phase completely (setting to 0) if DA-AEC is enabled,
-  // but not extended_filter mode.
-  aecpc->startup_phase = WebRtcAec_extended_filter_enabled(aecpc->aec) ||
-      !WebRtcAec_delay_agnostic_enabled(aecpc->aec);
-  aecpc->bufSizeStart = 0;
-  aecpc->checkBufSizeCtr = 0;
-  aecpc->msInSndCardBuf = 0;
-  aecpc->filtDelay = -1;  // -1 indicates an initialized state.
-  aecpc->timeForDelayChange = 0;
-  aecpc->knownDelay = 0;
-  aecpc->lastDelayDiff = 0;
-
-  aecpc->skewFrCtr = 0;
-  aecpc->resample = kAecFalse;
-  aecpc->highSkewCtr = 0;
-  aecpc->skew = 0;
-
-  aecpc->farend_started = 0;
-
-  // Default settings.
-  aecConfig.nlpMode = kAecNlpModerate;
-  aecConfig.skewMode = kAecFalse;
-  aecConfig.metricsMode = kAecFalse;
-  aecConfig.delay_logging = kAecFalse;
-
-  if (WebRtcAec_set_config(aecpc, aecConfig) == -1) {
-    aecpc->lastError = AEC_UNSPECIFIED_ERROR;
-    return -1;
-  }
-
-  return 0;
-}
-
-// only buffer L band for farend
-int32_t WebRtcAec_BufferFarend(void* aecInst,
-                               const float* farend,
-                               size_t nrOfSamples) {
-  Aec* aecpc = aecInst;
-  size_t newNrOfSamples = nrOfSamples;
-  float new_farend[MAX_RESAMP_LEN];
-  const float* farend_ptr = farend;
-
-  if (farend == NULL) {
-    aecpc->lastError = AEC_NULL_POINTER_ERROR;
-    return -1;
-  }
-
-  if (aecpc->initFlag != initCheck) {
-    aecpc->lastError = AEC_UNINITIALIZED_ERROR;
-    return -1;
-  }
-
-  // number of samples == 160 for SWB input
-  if (nrOfSamples != 80 && nrOfSamples != 160) {
-    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
-  }
-
-  if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) {
-    // Resample and get a new number of samples
-    WebRtcAec_ResampleLinear(aecpc->resampler,
-                             farend,
-                             nrOfSamples,
-                             aecpc->skew,
-                             new_farend,
-                             &newNrOfSamples);
-    farend_ptr = new_farend;
-  }
-
-  aecpc->farend_started = 1;
-  WebRtcAec_SetSystemDelay(
-      aecpc->aec, WebRtcAec_system_delay(aecpc->aec) + (int)newNrOfSamples);
-
-  // Write the time-domain data to |far_pre_buf|.
-  WebRtc_WriteBuffer(aecpc->far_pre_buf, farend_ptr, newNrOfSamples);
-
-  // Transform to frequency domain if we have enough data.
-  while (WebRtc_available_read(aecpc->far_pre_buf) >= PART_LEN2) {
-    // We have enough data to pass to the FFT, hence read PART_LEN2 samples.
-    {
-      float* ptmp = NULL;
-      float tmp[PART_LEN2];
-      WebRtc_ReadBuffer(aecpc->far_pre_buf, (void**)&ptmp, tmp, PART_LEN2);
-      WebRtcAec_BufferFarendPartition(aecpc->aec, ptmp);
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-      WebRtc_WriteBuffer(
-          WebRtcAec_far_time_buf(aecpc->aec), &ptmp[PART_LEN], 1);
-#endif
-    }
-
-    // Rewind |far_pre_buf| PART_LEN samples for overlap before continuing.
-    WebRtc_MoveReadPtr(aecpc->far_pre_buf, -PART_LEN);
-  }
-
-  return 0;
-}
-
-int32_t WebRtcAec_Process(void* aecInst,
-                          const float* const* nearend,
-                          size_t num_bands,
-                          float* const* out,
-                          size_t nrOfSamples,
-                          int16_t msInSndCardBuf,
-                          int32_t skew) {
-  Aec* aecpc = aecInst;
-  int32_t retVal = 0;
-
-  if (out == NULL) {
-    aecpc->lastError = AEC_NULL_POINTER_ERROR;
-    return -1;
-  }
-
-  if (aecpc->initFlag != initCheck) {
-    aecpc->lastError = AEC_UNINITIALIZED_ERROR;
-    return -1;
-  }
-
-  // number of samples == 160 for SWB input
-  if (nrOfSamples != 80 && nrOfSamples != 160) {
-    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
-  }
-
-  if (msInSndCardBuf < 0) {
-    msInSndCardBuf = 0;
-    aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
-    retVal = -1;
-  } else if (msInSndCardBuf > kMaxTrustedDelayMs) {
-    // The clamping is now done in ProcessExtended/Normal().
-    aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
-    retVal = -1;
-  }
-
-  // This returns the value of aec->extended_filter_enabled.
-  if (WebRtcAec_extended_filter_enabled(aecpc->aec)) {
-    ProcessExtended(aecpc,
-                    nearend,
-                    num_bands,
-                    out,
-                    nrOfSamples,
-                    msInSndCardBuf,
-                    skew);
-  } else {
-    if (ProcessNormal(aecpc,
-                      nearend,
-                      num_bands,
-                      out,
-                      nrOfSamples,
-                      msInSndCardBuf,
-                      skew) != 0) {
-      retVal = -1;
-    }
-  }
-
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  {
-    int16_t far_buf_size_ms = (int16_t)(WebRtcAec_system_delay(aecpc->aec) /
-                                        (sampMsNb * aecpc->rate_factor));
-    (void)fwrite(&far_buf_size_ms, 2, 1, aecpc->bufFile);
-    (void)fwrite(
-        &aecpc->knownDelay, sizeof(aecpc->knownDelay), 1, aecpc->delayFile);
-  }
-#endif
-
-  return retVal;
-}
-
-int WebRtcAec_set_config(void* handle, AecConfig config) {
-  Aec* self = (Aec*)handle;
-  if (self->initFlag != initCheck) {
-    self->lastError = AEC_UNINITIALIZED_ERROR;
-    return -1;
-  }
-
-  if (config.skewMode != kAecFalse && config.skewMode != kAecTrue) {
-    self->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
-  }
-  self->skewMode = config.skewMode;
-
-  if (config.nlpMode != kAecNlpConservative &&
-      config.nlpMode != kAecNlpModerate &&
-      config.nlpMode != kAecNlpAggressive) {
-    self->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
-  }
-
-  if (config.metricsMode != kAecFalse && config.metricsMode != kAecTrue) {
-    self->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
-  }
-
-  if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) {
-    self->lastError = AEC_BAD_PARAMETER_ERROR;
-    return -1;
-  }
-
-  WebRtcAec_SetConfigCore(
-      self->aec, config.nlpMode, config.metricsMode, config.delay_logging);
-  return 0;
-}
-
-int WebRtcAec_get_echo_status(void* handle, int* status) {
-  Aec* self = (Aec*)handle;
-  if (status == NULL) {
-    self->lastError = AEC_NULL_POINTER_ERROR;
-    return -1;
-  }
-  if (self->initFlag != initCheck) {
-    self->lastError = AEC_UNINITIALIZED_ERROR;
-    return -1;
-  }
-
-  *status = WebRtcAec_echo_state(self->aec);
-
-  return 0;
-}
-
-int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics) {
-  const float kUpWeight = 0.7f;
-  float dtmp;
-  int stmp;
-  Aec* self = (Aec*)handle;
-  Stats erl;
-  Stats erle;
-  Stats a_nlp;
-
-  if (handle == NULL) {
-    return -1;
-  }
-  if (metrics == NULL) {
-    self->lastError = AEC_NULL_POINTER_ERROR;
-    return -1;
-  }
-  if (self->initFlag != initCheck) {
-    self->lastError = AEC_UNINITIALIZED_ERROR;
-    return -1;
-  }
-
-  WebRtcAec_GetEchoStats(self->aec, &erl, &erle, &a_nlp);
-
-  // ERL
-  metrics->erl.instant = (int)erl.instant;
-
-  if ((erl.himean > kOffsetLevel) && (erl.average > kOffsetLevel)) {
-    // Use a mix between regular average and upper part average.
-    dtmp = kUpWeight * erl.himean + (1 - kUpWeight) * erl.average;
-    metrics->erl.average = (int)dtmp;
-  } else {
-    metrics->erl.average = kOffsetLevel;
-  }
-
-  metrics->erl.max = (int)erl.max;
-
-  if (erl.min < (kOffsetLevel * (-1))) {
-    metrics->erl.min = (int)erl.min;
-  } else {
-    metrics->erl.min = kOffsetLevel;
-  }
-
-  // ERLE
-  metrics->erle.instant = (int)erle.instant;
-
-  if ((erle.himean > kOffsetLevel) && (erle.average > kOffsetLevel)) {
-    // Use a mix between regular average and upper part average.
-    dtmp = kUpWeight * erle.himean + (1 - kUpWeight) * erle.average;
-    metrics->erle.average = (int)dtmp;
-  } else {
-    metrics->erle.average = kOffsetLevel;
-  }
-
-  metrics->erle.max = (int)erle.max;
-
-  if (erle.min < (kOffsetLevel * (-1))) {
-    metrics->erle.min = (int)erle.min;
-  } else {
-    metrics->erle.min = kOffsetLevel;
-  }
-
-  // RERL
-  if ((metrics->erl.average > kOffsetLevel) &&
-      (metrics->erle.average > kOffsetLevel)) {
-    stmp = metrics->erl.average + metrics->erle.average;
-  } else {
-    stmp = kOffsetLevel;
-  }
-  metrics->rerl.average = stmp;
-
-  // No other statistics needed, but returned for completeness.
-  metrics->rerl.instant = stmp;
-  metrics->rerl.max = stmp;
-  metrics->rerl.min = stmp;
-
-  // A_NLP
-  metrics->aNlp.instant = (int)a_nlp.instant;
-
-  if ((a_nlp.himean > kOffsetLevel) && (a_nlp.average > kOffsetLevel)) {
-    // Use a mix between regular average and upper part average.
-    dtmp = kUpWeight * a_nlp.himean + (1 - kUpWeight) * a_nlp.average;
-    metrics->aNlp.average = (int)dtmp;
-  } else {
-    metrics->aNlp.average = kOffsetLevel;
-  }
-
-  metrics->aNlp.max = (int)a_nlp.max;
-
-  if (a_nlp.min < (kOffsetLevel * (-1))) {
-    metrics->aNlp.min = (int)a_nlp.min;
-  } else {
-    metrics->aNlp.min = kOffsetLevel;
-  }
-
-  return 0;
-}
-
-int WebRtcAec_GetDelayMetrics(void* handle,
-                              int* median,
-                              int* std,
-                              float* fraction_poor_delays) {
-  Aec* self = handle;
-  if (median == NULL) {
-    self->lastError = AEC_NULL_POINTER_ERROR;
-    return -1;
-  }
-  if (std == NULL) {
-    self->lastError = AEC_NULL_POINTER_ERROR;
-    return -1;
-  }
-  if (self->initFlag != initCheck) {
-    self->lastError = AEC_UNINITIALIZED_ERROR;
-    return -1;
-  }
-  if (WebRtcAec_GetDelayMetricsCore(self->aec, median, std,
-                                    fraction_poor_delays) ==
-      -1) {
-    // Logging disabled.
-    self->lastError = AEC_UNSUPPORTED_FUNCTION_ERROR;
-    return -1;
-  }
-
-  return 0;
-}
-
-int32_t WebRtcAec_get_error_code(void* aecInst) {
-  Aec* aecpc = aecInst;
-  return aecpc->lastError;
-}
-
-AecCore* WebRtcAec_aec_core(void* handle) {
-  if (!handle) {
-    return NULL;
-  }
-  return ((Aec*)handle)->aec;
-}
-
-static int ProcessNormal(Aec* aecpc,
-                         const float* const* nearend,
-                         size_t num_bands,
-                         float* const* out,
-                         size_t nrOfSamples,
-                         int16_t msInSndCardBuf,
-                         int32_t skew) {
-  int retVal = 0;
-  size_t i;
-  size_t nBlocks10ms;
-  // Limit resampling to doubling/halving of signal
-  const float minSkewEst = -0.5f;
-  const float maxSkewEst = 1.0f;
-
-  msInSndCardBuf =
-      msInSndCardBuf > kMaxTrustedDelayMs ? kMaxTrustedDelayMs : msInSndCardBuf;
-  // TODO(andrew): we need to investigate if this +10 is really wanted.
-  msInSndCardBuf += 10;
-  aecpc->msInSndCardBuf = msInSndCardBuf;
-
-  if (aecpc->skewMode == kAecTrue) {
-    if (aecpc->skewFrCtr < 25) {
-      aecpc->skewFrCtr++;
-    } else {
-      retVal = WebRtcAec_GetSkew(aecpc->resampler, skew, &aecpc->skew);
-      if (retVal == -1) {
-        aecpc->skew = 0;
-        aecpc->lastError = AEC_BAD_PARAMETER_WARNING;
-      }
-
-      aecpc->skew /= aecpc->sampFactor * nrOfSamples;
-
-      if (aecpc->skew < 1.0e-3 && aecpc->skew > -1.0e-3) {
-        aecpc->resample = kAecFalse;
-      } else {
-        aecpc->resample = kAecTrue;
-      }
-
-      if (aecpc->skew < minSkewEst) {
-        aecpc->skew = minSkewEst;
-      } else if (aecpc->skew > maxSkewEst) {
-        aecpc->skew = maxSkewEst;
-      }
-
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-      (void)fwrite(&aecpc->skew, sizeof(aecpc->skew), 1, aecpc->skewFile);
-#endif
-    }
-  }
-
-  nBlocks10ms = nrOfSamples / (FRAME_LEN * aecpc->rate_factor);
-
-  if (aecpc->startup_phase) {
-    for (i = 0; i < num_bands; ++i) {
-      // Only needed if they don't already point to the same place.
-      if (nearend[i] != out[i]) {
-        memcpy(out[i], nearend[i], sizeof(nearend[i][0]) * nrOfSamples);
-      }
-    }
-
-    // The AEC is in the start up mode
-    // AEC is disabled until the system delay is OK
-
-    // Mechanism to ensure that the system delay is reasonably stable.
-    if (aecpc->checkBuffSize) {
-      aecpc->checkBufSizeCtr++;
-      // Before we fill up the far-end buffer we require the system delay
-      // to be stable (+/-8 ms) compared to the first value. This
-      // comparison is made during the following 6 consecutive 10 ms
-      // blocks. If it seems to be stable then we start to fill up the
-      // far-end buffer.
-      if (aecpc->counter == 0) {
-        aecpc->firstVal = aecpc->msInSndCardBuf;
-        aecpc->sum = 0;
-      }
-
-      if (abs(aecpc->firstVal - aecpc->msInSndCardBuf) <
-          WEBRTC_SPL_MAX(0.2 * aecpc->msInSndCardBuf, sampMsNb)) {
-        aecpc->sum += aecpc->msInSndCardBuf;
-        aecpc->counter++;
-      } else {
-        aecpc->counter = 0;
-      }
-
-      if (aecpc->counter * nBlocks10ms >= 6) {
-        // The far-end buffer size is determined in partitions of
-        // PART_LEN samples. Use 75% of the average value of the system
-        // delay as buffer size to start with.
-        aecpc->bufSizeStart =
-            WEBRTC_SPL_MIN((3 * aecpc->sum * aecpc->rate_factor * 8) /
-                               (4 * aecpc->counter * PART_LEN),
-                           kMaxBufSizeStart);
-        // Buffer size has now been determined.
-        aecpc->checkBuffSize = 0;
-      }
-
-      if (aecpc->checkBufSizeCtr * nBlocks10ms > 50) {
-        // For really bad systems, don't disable the echo canceller for
-        // more than 0.5 sec.
-        aecpc->bufSizeStart = WEBRTC_SPL_MIN(
-            (aecpc->msInSndCardBuf * aecpc->rate_factor * 3) / 40,
-            kMaxBufSizeStart);
-        aecpc->checkBuffSize = 0;
-      }
-    }
-
-    // If |checkBuffSize| changed in the if-statement above.
-    if (!aecpc->checkBuffSize) {
-      // The system delay is now reasonably stable (or has been unstable
-      // for too long). When the far-end buffer is filled with
-      // approximately the same amount of data as reported by the system
-      // we end the startup phase.
-      int overhead_elements =
-          WebRtcAec_system_delay(aecpc->aec) / PART_LEN - aecpc->bufSizeStart;
-      if (overhead_elements == 0) {
-        // Enable the AEC
-        aecpc->startup_phase = 0;
-      } else if (overhead_elements > 0) {
-        // TODO(bjornv): Do we need a check on how much we actually
-        // moved the read pointer? It should always be possible to move
-        // the pointer |overhead_elements| since we have only added data
-        // to the buffer and no delay compensation nor AEC processing
-        // has been done.
-        WebRtcAec_MoveFarReadPtr(aecpc->aec, overhead_elements);
-
-        // Enable the AEC
-        aecpc->startup_phase = 0;
-      }
-    }
-  } else {
-    // AEC is enabled.
-    EstBufDelayNormal(aecpc);
-
-    // Call the AEC.
-    // TODO(bjornv): Re-structure such that we don't have to pass
-    // |aecpc->knownDelay| as input. Change name to something like
-    // |system_buffer_diff|.
-    WebRtcAec_ProcessFrames(aecpc->aec,
-                            nearend,
-                            num_bands,
-                            nrOfSamples,
-                            aecpc->knownDelay,
-                            out);
-  }
-
-  return retVal;
-}
-
-static void ProcessExtended(Aec* self,
-                            const float* const* near,
-                            size_t num_bands,
-                            float* const* out,
-                            size_t num_samples,
-                            int16_t reported_delay_ms,
-                            int32_t skew) {
-  size_t i;
-  const int delay_diff_offset = kDelayDiffOffsetSamples;
-#if defined(WEBRTC_UNTRUSTED_DELAY)
-  reported_delay_ms = kFixedDelayMs;
-#else
-  // This is the usual mode where we trust the reported system delay values.
-  // Due to the longer filter, we no longer add 10 ms to the reported delay
-  // to reduce chance of non-causality. Instead we apply a minimum here to avoid
-  // issues with the read pointer jumping around needlessly.
-  reported_delay_ms = reported_delay_ms < kMinTrustedDelayMs
-                          ? kMinTrustedDelayMs
-                          : reported_delay_ms;
-  // If the reported delay appears to be bogus, we attempt to recover by using
-  // the measured fixed delay values. We use >= here because higher layers
-  // may already clamp to this maximum value, and we would otherwise not
-  // detect it here.
-  reported_delay_ms = reported_delay_ms >= kMaxTrustedDelayMs
-                          ? kFixedDelayMs
-                          : reported_delay_ms;
-#endif
-  self->msInSndCardBuf = reported_delay_ms;
-
-  if (!self->farend_started) {
-    for (i = 0; i < num_bands; ++i) {
-      // Only needed if they don't already point to the same place.
-      if (near[i] != out[i]) {
-        memcpy(out[i], near[i], sizeof(near[i][0]) * num_samples);
-      }
-    }
-    return;
-  }
-  if (self->startup_phase) {
-    // In the extended mode, there isn't a startup "phase", just a special
-    // action on the first frame. In the trusted delay case, we'll take the
-    // current reported delay, unless it's less then our conservative
-    // measurement.
-    int startup_size_ms =
-        reported_delay_ms < kFixedDelayMs ? kFixedDelayMs : reported_delay_ms;
-#if defined(WEBRTC_ANDROID)
-    int target_delay = startup_size_ms * self->rate_factor * 8;
-#else
-    // To avoid putting the AEC in a non-causal state we're being slightly
-    // conservative and scale by 2. On Android we use a fixed delay and
-    // therefore there is no need to scale the target_delay.
-    int target_delay = startup_size_ms * self->rate_factor * 8 / 2;
-#endif
-    int overhead_elements =
-        (WebRtcAec_system_delay(self->aec) - target_delay) / PART_LEN;
-    WebRtcAec_MoveFarReadPtr(self->aec, overhead_elements);
-    self->startup_phase = 0;
-  }
-
-  EstBufDelayExtended(self);
-
-  {
-    // |delay_diff_offset| gives us the option to manually rewind the delay on
-    // very low delay platforms which can't be expressed purely through
-    // |reported_delay_ms|.
-    const int adjusted_known_delay =
-        WEBRTC_SPL_MAX(0, self->knownDelay + delay_diff_offset);
-
-    WebRtcAec_ProcessFrames(self->aec,
-                            near,
-                            num_bands,
-                            num_samples,
-                            adjusted_known_delay,
-                            out);
-  }
-}
-
-static void EstBufDelayNormal(Aec* aecpc) {
-  int nSampSndCard = aecpc->msInSndCardBuf * sampMsNb * aecpc->rate_factor;
-  int current_delay = nSampSndCard - WebRtcAec_system_delay(aecpc->aec);
-  int delay_difference = 0;
-
-  // Before we proceed with the delay estimate filtering we:
-  // 1) Compensate for the frame that will be read.
-  // 2) Compensate for drift resampling.
-  // 3) Compensate for non-causality if needed, since the estimated delay can't
-  //    be negative.
-
-  // 1) Compensating for the frame(s) that will be read/processed.
-  current_delay += FRAME_LEN * aecpc->rate_factor;
-
-  // 2) Account for resampling frame delay.
-  if (aecpc->skewMode == kAecTrue && aecpc->resample == kAecTrue) {
-    current_delay -= kResamplingDelay;
-  }
-
-  // 3) Compensate for non-causality, if needed, by flushing one block.
-  if (current_delay < PART_LEN) {
-    current_delay += WebRtcAec_MoveFarReadPtr(aecpc->aec, 1) * PART_LEN;
-  }
-
-  // We use -1 to signal an initialized state in the "extended" implementation;
-  // compensate for that.
-  aecpc->filtDelay = aecpc->filtDelay < 0 ? 0 : aecpc->filtDelay;
-  aecpc->filtDelay =
-      WEBRTC_SPL_MAX(0, (short)(0.8 * aecpc->filtDelay + 0.2 * current_delay));
-
-  delay_difference = aecpc->filtDelay - aecpc->knownDelay;
-  if (delay_difference > 224) {
-    if (aecpc->lastDelayDiff < 96) {
-      aecpc->timeForDelayChange = 0;
-    } else {
-      aecpc->timeForDelayChange++;
-    }
-  } else if (delay_difference < 96 && aecpc->knownDelay > 0) {
-    if (aecpc->lastDelayDiff > 224) {
-      aecpc->timeForDelayChange = 0;
-    } else {
-      aecpc->timeForDelayChange++;
-    }
-  } else {
-    aecpc->timeForDelayChange = 0;
-  }
-  aecpc->lastDelayDiff = delay_difference;
-
-  if (aecpc->timeForDelayChange > 25) {
-    aecpc->knownDelay = WEBRTC_SPL_MAX((int)aecpc->filtDelay - 160, 0);
-  }
-}
-
-static void EstBufDelayExtended(Aec* self) {
-  int reported_delay = self->msInSndCardBuf * sampMsNb * self->rate_factor;
-  int current_delay = reported_delay - WebRtcAec_system_delay(self->aec);
-  int delay_difference = 0;
-
-  // Before we proceed with the delay estimate filtering we:
-  // 1) Compensate for the frame that will be read.
-  // 2) Compensate for drift resampling.
-  // 3) Compensate for non-causality if needed, since the estimated delay can't
-  //    be negative.
-
-  // 1) Compensating for the frame(s) that will be read/processed.
-  current_delay += FRAME_LEN * self->rate_factor;
-
-  // 2) Account for resampling frame delay.
-  if (self->skewMode == kAecTrue && self->resample == kAecTrue) {
-    current_delay -= kResamplingDelay;
-  }
-
-  // 3) Compensate for non-causality, if needed, by flushing two blocks.
-  if (current_delay < PART_LEN) {
-    current_delay += WebRtcAec_MoveFarReadPtr(self->aec, 2) * PART_LEN;
-  }
-
-  if (self->filtDelay == -1) {
-    self->filtDelay = WEBRTC_SPL_MAX(0, 0.5 * current_delay);
-  } else {
-    self->filtDelay = WEBRTC_SPL_MAX(
-        0, (short)(0.95 * self->filtDelay + 0.05 * current_delay));
-  }
-
-  delay_difference = self->filtDelay - self->knownDelay;
-  if (delay_difference > 384) {
-    if (self->lastDelayDiff < 128) {
-      self->timeForDelayChange = 0;
-    } else {
-      self->timeForDelayChange++;
-    }
-  } else if (delay_difference < 128 && self->knownDelay > 0) {
-    if (self->lastDelayDiff > 384) {
-      self->timeForDelayChange = 0;
-    } else {
-      self->timeForDelayChange++;
-    }
-  } else {
-    self->timeForDelayChange = 0;
-  }
-  self->lastDelayDiff = delay_difference;
-
-  if (self->timeForDelayChange > 25) {
-    self->knownDelay = WEBRTC_SPL_MAX((int)self->filtDelay - 256, 0);
-  }
-}
--- a/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
+++ b/webrtc/modules/audio_processing/aec/echo_cancellation_internal.h
@ -1,67 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
-
-#include "webrtc/common_audio/ring_buffer.h"
-#include "webrtc/modules/audio_processing/aec/aec_core.h"
-
-typedef struct {
-  int delayCtr;
-  int sampFreq;
-  int splitSampFreq;
-  int scSampFreq;
-  float sampFactor;  // scSampRate / sampFreq
-  short skewMode;
-  int bufSizeStart;
-  int knownDelay;
-  int rate_factor;
-
-  short initFlag;  // indicates if AEC has been initialized
-
-  // Variables used for averaging far end buffer size
-  short counter;
-  int sum;
-  short firstVal;
-  short checkBufSizeCtr;
-
-  // Variables used for delay shifts
-  short msInSndCardBuf;
-  short filtDelay;  // Filtered delay estimate.
-  int timeForDelayChange;
-  int startup_phase;
-  int checkBuffSize;
-  short lastDelayDiff;
-
-#ifdef WEBRTC_AEC_DEBUG_DUMP
-  FILE* bufFile;
-  FILE* delayFile;
-  FILE* skewFile;
-#endif
-
-  // Structures
-  void* resampler;
-
-  int skewFrCtr;
-  int resample;  // if the skew is small enough we don't resample
-  int highSkewCtr;
-  float skew;
-
-  RingBuffer* far_pre_buf;  // Time domain far-end pre-buffer.
-
-  int lastError;
-
-  int farend_started;
-
-  AecCore* aec;
-} Aec;
-
-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
--- a/webrtc/modules/audio_processing/aec/include/echo_cancellation.h
+++ b/webrtc/modules/audio_processing/aec/include/echo_cancellation.h
@ -1,245 +0,0 @@
-/*
- *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
-
-#include <stddef.h>
-
-#include "webrtc/typedefs.h"
-
-// Errors
-#define AEC_UNSPECIFIED_ERROR 12000
-#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001
-#define AEC_UNINITIALIZED_ERROR 12002
-#define AEC_NULL_POINTER_ERROR 12003
-#define AEC_BAD_PARAMETER_ERROR 12004
-
-// Warnings
-#define AEC_BAD_PARAMETER_WARNING 12050
-
-enum {
-  kAecNlpConservative = 0,
-  kAecNlpModerate,
-  kAecNlpAggressive
-};
-
-enum {
-  kAecFalse = 0,
-  kAecTrue
-};
-
-typedef struct {
-  int16_t nlpMode;      // default kAecNlpModerate
-  int16_t skewMode;     // default kAecFalse
-  int16_t metricsMode;  // default kAecFalse
-  int delay_logging;    // default kAecFalse
-  // float realSkew;
-} AecConfig;
-
-typedef struct {
-  int instant;
-  int average;
-  int max;
-  int min;
-} AecLevel;
-
-typedef struct {
-  AecLevel rerl;
-  AecLevel erl;
-  AecLevel erle;
-  AecLevel aNlp;
-} AecMetrics;
-
-struct AecCore;
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-/*
- * Allocates the memory needed by the AEC. The memory needs to be initialized
- * separately using the WebRtcAec_Init() function. Returns a pointer to the
- * object or NULL on error.
- */
-void* WebRtcAec_Create();
-
-/*
- * This function releases the memory allocated by WebRtcAec_Create().
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void*        aecInst         Pointer to the AEC instance
- */
-void WebRtcAec_Free(void* aecInst);
-
-/*
- * Initializes an AEC instance.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void*          aecInst       Pointer to the AEC instance
- * int32_t        sampFreq      Sampling frequency of data
- * int32_t        scSampFreq    Soundcard sampling frequency
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * int32_t        return        0: OK
- *                             -1: error
- */
-int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq);
-
-/*
- * Inserts an 80 or 160 sample block of data into the farend buffer.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void*          aecInst       Pointer to the AEC instance
- * const float*   farend        In buffer containing one frame of
- *                              farend signal for L band
- * int16_t        nrOfSamples   Number of samples in farend buffer
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * int32_t        return        0: OK
- *                             -1: error
- */
-int32_t WebRtcAec_BufferFarend(void* aecInst,
-                               const float* farend,
-                               size_t nrOfSamples);
-
-/*
- * Runs the echo canceller on an 80 or 160 sample blocks of data.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void*         aecInst        Pointer to the AEC instance
- * float* const* nearend        In buffer containing one frame of
- *                              nearend+echo signal for each band
- * int           num_bands      Number of bands in nearend buffer
- * int16_t       nrOfSamples    Number of samples in nearend buffer
- * int16_t       msInSndCardBuf Delay estimate for sound card and
- *                              system buffers
- * int16_t       skew           Difference between number of samples played
- *                              and recorded at the soundcard (for clock skew
- *                              compensation)
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * float* const* out            Out buffer, one frame of processed nearend
- *                              for each band
- * int32_t       return         0: OK
- *                             -1: error
- */
-int32_t WebRtcAec_Process(void* aecInst,
-                          const float* const* nearend,
-                          size_t num_bands,
-                          float* const* out,
-                          size_t nrOfSamples,
-                          int16_t msInSndCardBuf,
-                          int32_t skew);
-
-/*
- * This function enables the user to set certain parameters on-the-fly.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void*          handle        Pointer to the AEC instance
- * AecConfig      config        Config instance that contains all
- *                              properties to be set
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * int            return         0: OK
- *                              -1: error
- */
-int WebRtcAec_set_config(void* handle, AecConfig config);
-
-/*
- * Gets the current echo status of the nearend signal.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void*          handle        Pointer to the AEC instance
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * int*           status        0: Almost certainly nearend single-talk
- *                              1: Might not be neared single-talk
- * int            return         0: OK
- *                              -1: error
- */
-int WebRtcAec_get_echo_status(void* handle, int* status);
-
-/*
- * Gets the current echo metrics for the session.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void*          handle        Pointer to the AEC instance
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * AecMetrics*    metrics       Struct which will be filled out with the
- *                              current echo metrics.
- * int            return         0: OK
- *                              -1: error
- */
-int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics);
-
-/*
- * Gets the current delay metrics for the session.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void*   handle               Pointer to the AEC instance
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * int*    median               Delay median value.
- * int*    std                  Delay standard deviation.
- * float*  fraction_poor_delays Fraction of the delay estimates that may
- *                              cause the AEC to perform poorly.
- *
- * int     return                0: OK
- *                              -1: error
- */
-int WebRtcAec_GetDelayMetrics(void* handle,
-                              int* median,
-                              int* std,
-                              float* fraction_poor_delays);
-
-/*
- * Gets the last error code.
- *
- * Inputs                       Description
- * -------------------------------------------------------------------
- * void*          aecInst       Pointer to the AEC instance
- *
- * Outputs                      Description
- * -------------------------------------------------------------------
- * int32_t        return        11000-11100: error code
- */
-int32_t WebRtcAec_get_error_code(void* aecInst);
-
-// Returns a pointer to the low level AEC handle.
-//
-// Input:
-//  - handle                    : Pointer to the AEC instance.
-//
-// Return value:
-//  - AecCore pointer           : NULL for error.
-//
-struct AecCore* WebRtcAec_aec_core(void* handle);
-
-#ifdef __cplusplus
-}
-#endif
-#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
--- a/webrtc/modules/audio_processing/aec3/BUILD.gn
+++ b/webrtc/modules/audio_processing/aec3/BUILD.gn
@ -0,0 +1,367 @@
+# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+#
+# Use of this source code is governed by a BSD-style license
+# that can be found in the LICENSE file in the root of the source
+# tree. An additional intellectual property rights grant can be found
+# in the file PATENTS.  All contributing project authors may
+# be found in the AUTHORS file in the root of the source tree.
+
+import("../../../webrtc.gni")
+
+rtc_library("aec3") {
+  visibility = [ "*" ]
+  configs += [ "..:apm_debug_dump" ]
+  sources = [
+    "adaptive_fir_filter.cc",
+    "adaptive_fir_filter_erl.cc",
+    "aec3_common.cc",
+    "aec3_fft.cc",
+    "aec_state.cc",
+    "aec_state.h",
+    "alignment_mixer.cc",
+    "alignment_mixer.h",
+    "api_call_jitter_metrics.cc",
+    "api_call_jitter_metrics.h",
+    "block_buffer.cc",
+    "block_delay_buffer.cc",
+    "block_delay_buffer.h",
+    "block_framer.cc",
+    "block_framer.h",
+    "block_processor.cc",
+    "block_processor.h",
+    "block_processor_metrics.cc",
+    "block_processor_metrics.h",
+    "clockdrift_detector.cc",
+    "clockdrift_detector.h",
+    "coarse_filter_update_gain.cc",
+    "coarse_filter_update_gain.h",
+    "comfort_noise_generator.cc",
+    "comfort_noise_generator.h",
+    "decimator.cc",
+    "decimator.h",
+    "delay_estimate.h",
+    "dominant_nearend_detector.cc",
+    "dominant_nearend_detector.h",
+    "downsampled_render_buffer.cc",
+    "downsampled_render_buffer.h",
+    "echo_audibility.cc",
+    "echo_audibility.h",
+    "echo_canceller3.cc",
+    "echo_canceller3.h",
+    "echo_path_delay_estimator.cc",
+    "echo_path_delay_estimator.h",
+    "echo_path_variability.cc",
+    "echo_path_variability.h",
+    "echo_remover.cc",
+    "echo_remover.h",
+    "echo_remover_metrics.cc",
+    "echo_remover_metrics.h",
+    "erl_estimator.cc",
+    "erl_estimator.h",
+    "erle_estimator.cc",
+    "erle_estimator.h",
+    "fft_buffer.cc",
+    "filter_analyzer.cc",
+    "filter_analyzer.h",
+    "frame_blocker.cc",
+    "frame_blocker.h",
+    "fullband_erle_estimator.cc",
+    "fullband_erle_estimator.h",
+    "matched_filter.cc",
+    "matched_filter_lag_aggregator.cc",
+    "matched_filter_lag_aggregator.h",
+    "moving_average.cc",
+    "moving_average.h",
+    "nearend_detector.h",
+    "refined_filter_update_gain.cc",
+    "refined_filter_update_gain.h",
+    "render_buffer.cc",
+    "render_delay_buffer.cc",
+    "render_delay_buffer.h",
+    "render_delay_controller.cc",
+    "render_delay_controller.h",
+    "render_delay_controller_metrics.cc",
+    "render_delay_controller_metrics.h",
+    "render_signal_analyzer.cc",
+    "render_signal_analyzer.h",
+    "residual_echo_estimator.cc",
+    "residual_echo_estimator.h",
+    "reverb_decay_estimator.cc",
+    "reverb_decay_estimator.h",
+    "reverb_frequency_response.cc",
+    "reverb_frequency_response.h",
+    "reverb_model.cc",
+    "reverb_model.h",
+    "reverb_model_estimator.cc",
+    "reverb_model_estimator.h",
+    "signal_dependent_erle_estimator.cc",
+    "signal_dependent_erle_estimator.h",
+    "spectrum_buffer.cc",
+    "stationarity_estimator.cc",
+    "stationarity_estimator.h",
+    "subband_erle_estimator.cc",
+    "subband_erle_estimator.h",
+    "subband_nearend_detector.cc",
+    "subband_nearend_detector.h",
+    "subtractor.cc",
+    "subtractor.h",
+    "subtractor_output.cc",
+    "subtractor_output.h",
+    "subtractor_output_analyzer.cc",
+    "subtractor_output_analyzer.h",
+    "suppression_filter.cc",
+    "suppression_filter.h",
+    "suppression_gain.cc",
+    "suppression_gain.h",
+    "transparent_mode.cc",
+    "transparent_mode.h",
+  ]
+
+  defines = []
+  if (rtc_build_with_neon && current_cpu != "arm64") {
+    suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ]
+    cflags = [ "-mfpu=neon" ]
+  }
+
+  deps = [
+    ":adaptive_fir_filter",
+    ":adaptive_fir_filter_erl",
+    ":aec3_common",
+    ":aec3_fft",
+    ":fft_data",
+    ":matched_filter",
+    ":render_buffer",
+    ":vector_math",
+    "..:apm_logging",
+    "..:audio_buffer",
+    "..:high_pass_filter",
+    "../../../api:array_view",
+    "../../../api/audio:aec3_config",
+    "../../../api/audio:echo_control",
+    "../../../common_audio:common_audio_c",
+    "../../../rtc_base:checks",
+    "../../../rtc_base:rtc_base_approved",
+    "../../../rtc_base:safe_minmax",
+    "../../../rtc_base/experiments:field_trial_parser",
+    "../../../rtc_base/system:arch",
+    "../../../system_wrappers",
+    "../../../system_wrappers:field_trial",
+    "../../../system_wrappers:metrics",
+    "../utility:cascaded_biquad_filter",
+  ]
+  absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
+
+  if (current_cpu == "x86" || current_cpu == "x64") {
+    deps += [ ":aec3_avx2" ]
+  }
+}
+
+rtc_source_set("aec3_common") {
+  sources = [ "aec3_common.h" ]
+}
+
+rtc_source_set("aec3_fft") {
+  sources = [ "aec3_fft.h" ]
+  deps = [
+    ":aec3_common",
+    ":fft_data",
+    "../../../api:array_view",
+    "../../../common_audio/third_party/ooura:fft_size_128",
+    "../../../rtc_base:checks",
+    "../../../rtc_base:rtc_base_approved",
+    "../../../rtc_base/system:arch",
+  ]
+}
+
+rtc_source_set("render_buffer") {
+  sources = [
+    "block_buffer.h",
+    "fft_buffer.h",
+    "render_buffer.h",
+    "spectrum_buffer.h",
+  ]
+  deps = [
+    ":aec3_common",
+    ":fft_data",
+    "../../../api:array_view",
+    "../../../rtc_base:checks",
+    "../../../rtc_base:rtc_base_approved",
+    "../../../rtc_base/system:arch",
+  ]
+}
+
+rtc_source_set("adaptive_fir_filter") {
+  sources = [ "adaptive_fir_filter.h" ]
+  deps = [
+    ":aec3_common",
+    ":aec3_fft",
+    ":fft_data",
+    ":render_buffer",
+    "..:apm_logging",
+    "../../../api:array_view",
+    "../../../rtc_base/system:arch",
+  ]
+}
+
+rtc_source_set("adaptive_fir_filter_erl") {
+  sources = [ "adaptive_fir_filter_erl.h" ]
+  deps = [
+    ":aec3_common",
+    "../../../api:array_view",
+    "../../../rtc_base/system:arch",
+  ]
+}
+
+rtc_source_set("matched_filter") {
+  sources = [ "matched_filter.h" ]
+  deps = [
+    ":aec3_common",
+    "../../../api:array_view",
+    "../../../rtc_base:rtc_base_approved",
+    "../../../rtc_base/system:arch",
+  ]
+}
+
+rtc_source_set("vector_math") {
+  sources = [ "vector_math.h" ]
+  deps = [
+    ":aec3_common",
+    "../../../api:array_view",
+    "../../../rtc_base:checks",
+    "../../../rtc_base/system:arch",
+  ]
+}
+
+rtc_source_set("fft_data") {
+  sources = [ "fft_data.h" ]
+  deps = [
+    ":aec3_common",
+    "../../../api:array_view",
+    "../../../rtc_base/system:arch",
+  ]
+}
+
+if (current_cpu == "x86" || current_cpu == "x64") {
+  rtc_library("aec3_avx2") {
+    configs += [ "..:apm_debug_dump" ]
+    sources = [
+      "adaptive_fir_filter_avx2.cc",
+      "adaptive_fir_filter_erl_avx2.cc",
+      "fft_data_avx2.cc",
+      "matched_filter_avx2.cc",
+      "vector_math_avx2.cc",
+    ]
+
+    if (is_win) {
+      cflags = [ "/arch:AVX2" ]
+    } else {
+      cflags = [
+        "-mavx2",
+        "-mfma",
+      ]
+    }
+
+    deps = [
+      ":adaptive_fir_filter",
+      ":adaptive_fir_filter_erl",
+      ":fft_data",
+      ":matched_filter",
+      ":vector_math",
+      "../../../api:array_view",
+      "../../../rtc_base:checks",
+    ]
+  }
+}
+
+if (rtc_include_tests) {
+  rtc_library("aec3_unittests") {
+    testonly = true
+
+    configs += [ "..:apm_debug_dump" ]
+    sources = [
+      "mock/mock_block_processor.cc",
+      "mock/mock_block_processor.h",
+      "mock/mock_echo_remover.cc",
+      "mock/mock_echo_remover.h",
+      "mock/mock_render_delay_buffer.cc",
+      "mock/mock_render_delay_buffer.h",
+      "mock/mock_render_delay_controller.cc",
+      "mock/mock_render_delay_controller.h",
+    ]
+
+    deps = [
+      ":adaptive_fir_filter",
+      ":adaptive_fir_filter_erl",
+      ":aec3",
+      ":aec3_common",
+      ":aec3_fft",
+      ":fft_data",
+      ":matched_filter",
+      ":render_buffer",
+      ":vector_math",
+      "..:apm_logging",
+      "..:audio_buffer",
+      "..:audio_processing",
+      "..:audio_processing_unittests",
+      "..:high_pass_filter",
+      "../../../api:array_view",
+      "../../../api/audio:aec3_config",
+      "../../../rtc_base:checks",
+      "../../../rtc_base:rtc_base_approved",
+      "../../../rtc_base:safe_minmax",
+      "../../../rtc_base/system:arch",
+      "../../../system_wrappers",
+      "../../../test:field_trial",
+      "../../../test:test_support",
+      "../utility:cascaded_biquad_filter",
+    ]
+    absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
+
+    defines = []
+
+    if (rtc_enable_protobuf) {
+      sources += [
+        "adaptive_fir_filter_erl_unittest.cc",
+        "adaptive_fir_filter_unittest.cc",
+        "aec3_fft_unittest.cc",
+        "aec_state_unittest.cc",
+        "alignment_mixer_unittest.cc",
+        "api_call_jitter_metrics_unittest.cc",
+        "block_delay_buffer_unittest.cc",
+        "block_framer_unittest.cc",
+        "block_processor_metrics_unittest.cc",
+        "block_processor_unittest.cc",
+        "clockdrift_detector_unittest.cc",
+        "coarse_filter_update_gain_unittest.cc",
+        "comfort_noise_generator_unittest.cc",
+        "decimator_unittest.cc",
+        "echo_canceller3_unittest.cc",
+        "echo_path_delay_estimator_unittest.cc",
+        "echo_path_variability_unittest.cc",
+        "echo_remover_metrics_unittest.cc",
+        "echo_remover_unittest.cc",
+        "erl_estimator_unittest.cc",
+        "erle_estimator_unittest.cc",
+        "fft_data_unittest.cc",
+        "filter_analyzer_unittest.cc",
+        "frame_blocker_unittest.cc",
+        "matched_filter_lag_aggregator_unittest.cc",
+        "matched_filter_unittest.cc",
+        "moving_average_unittest.cc",
+        "refined_filter_update_gain_unittest.cc",
+        "render_buffer_unittest.cc",
+        "render_delay_buffer_unittest.cc",
+        "render_delay_controller_metrics_unittest.cc",
+        "render_delay_controller_unittest.cc",
+        "render_signal_analyzer_unittest.cc",
+        "residual_echo_estimator_unittest.cc",
+        "reverb_model_estimator_unittest.cc",
+        "signal_dependent_erle_estimator_unittest.cc",
+        "subtractor_unittest.cc",
+        "suppression_filter_unittest.cc",
+        "suppression_gain_unittest.cc",
+        "vector_math_unittest.cc",
+      ]
+    }
+  }
+}
--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc
+++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc
@ -0,0 +1,740 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/adaptive_fir_filter.h"
+
+// Defines WEBRTC_ARCH_X86_FAMILY, used below.
+#include "rtc_base/system/arch.h"
+
+#if defined(WEBRTC_HAS_NEON)
+#include <arm_neon.h>
+#endif
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+#include <emmintrin.h>
+#endif
+#include <math.h>
+
+#include <algorithm>
+#include <functional>
+
+#include "modules/audio_processing/aec3/fft_data.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+namespace aec3 {
+
+// Computes and stores the frequency response of the filter.
+void ComputeFrequencyResponse(
+    size_t num_partitions,
+    const std::vector<std::vector<FftData>>& H,
+    std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
+  for (auto& H2_ch : *H2) {
+    H2_ch.fill(0.f);
+  }
+
+  const size_t num_render_channels = H[0].size();
+  RTC_DCHECK_EQ(H.size(), H2->capacity());
+  for (size_t p = 0; p < num_partitions; ++p) {
+    RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
+    for (size_t ch = 0; ch < num_render_channels; ++ch) {
+      for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
+        float tmp =
+            H[p][ch].re[j] * H[p][ch].re[j] + H[p][ch].im[j] * H[p][ch].im[j];
+        (*H2)[p][j] = std::max((*H2)[p][j], tmp);
+      }
+    }
+  }
+}
+
+#if defined(WEBRTC_HAS_NEON)
+// Computes and stores the frequency response of the filter.
+void ComputeFrequencyResponse_Neon(
+    size_t num_partitions,
+    const std::vector<std::vector<FftData>>& H,
+    std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
+  for (auto& H2_ch : *H2) {
+    H2_ch.fill(0.f);
+  }
+
+  const size_t num_render_channels = H[0].size();
+  RTC_DCHECK_EQ(H.size(), H2->capacity());
+  for (size_t p = 0; p < num_partitions; ++p) {
+    RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
+    for (size_t ch = 0; ch < num_render_channels; ++ch) {
+      for (size_t j = 0; j < kFftLengthBy2; j += 4) {
+        const float32x4_t re = vld1q_f32(&H[p][ch].re[j]);
+        const float32x4_t im = vld1q_f32(&H[p][ch].im[j]);
+        float32x4_t H2_new = vmulq_f32(re, re);
+        H2_new = vmlaq_f32(H2_new, im, im);
+        float32x4_t H2_p_j = vld1q_f32(&(*H2)[p][j]);
+        H2_p_j = vmaxq_f32(H2_p_j, H2_new);
+        vst1q_f32(&(*H2)[p][j], H2_p_j);
+      }
+      float H2_new = H[p][ch].re[kFftLengthBy2] * H[p][ch].re[kFftLengthBy2] +
+                     H[p][ch].im[kFftLengthBy2] * H[p][ch].im[kFftLengthBy2];
+      (*H2)[p][kFftLengthBy2] = std::max((*H2)[p][kFftLengthBy2], H2_new);
+    }
+  }
+}
+#endif
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+// Computes and stores the frequency response of the filter.
+void ComputeFrequencyResponse_Sse2(
+    size_t num_partitions,
+    const std::vector<std::vector<FftData>>& H,
+    std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
+  for (auto& H2_ch : *H2) {
+    H2_ch.fill(0.f);
+  }
+
+  const size_t num_render_channels = H[0].size();
+  RTC_DCHECK_EQ(H.size(), H2->capacity());
+  // constexpr __mmmask8 kMaxMask = static_cast<__mmmask8>(256u);
+  for (size_t p = 0; p < num_partitions; ++p) {
+    RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
+    for (size_t ch = 0; ch < num_render_channels; ++ch) {
+      for (size_t j = 0; j < kFftLengthBy2; j += 4) {
+        const __m128 re = _mm_loadu_ps(&H[p][ch].re[j]);
+        const __m128 re2 = _mm_mul_ps(re, re);
+        const __m128 im = _mm_loadu_ps(&H[p][ch].im[j]);
+        const __m128 im2 = _mm_mul_ps(im, im);
+        const __m128 H2_new = _mm_add_ps(re2, im2);
+        __m128 H2_k_j = _mm_loadu_ps(&(*H2)[p][j]);
+        H2_k_j = _mm_max_ps(H2_k_j, H2_new);
+        _mm_storeu_ps(&(*H2)[p][j], H2_k_j);
+      }
+      float H2_new = H[p][ch].re[kFftLengthBy2] * H[p][ch].re[kFftLengthBy2] +
+                     H[p][ch].im[kFftLengthBy2] * H[p][ch].im[kFftLengthBy2];
+      (*H2)[p][kFftLengthBy2] = std::max((*H2)[p][kFftLengthBy2], H2_new);
+    }
+  }
+}
+#endif
+
+// Adapts the filter partitions as H(t+1)=H(t)+G(t)*conj(X(t)).
+void AdaptPartitions(const RenderBuffer& render_buffer,
+                     const FftData& G,
+                     size_t num_partitions,
+                     std::vector<std::vector<FftData>>* H) {
+  rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
+      render_buffer.GetFftBuffer();
+  size_t index = render_buffer.Position();
+  const size_t num_render_channels = render_buffer_data[index].size();
+  for (size_t p = 0; p < num_partitions; ++p) {
+    for (size_t ch = 0; ch < num_render_channels; ++ch) {
+      const FftData& X_p_ch = render_buffer_data[index][ch];
+      FftData& H_p_ch = (*H)[p][ch];
+      for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+        H_p_ch.re[k] += X_p_ch.re[k] * G.re[k] + X_p_ch.im[k] * G.im[k];
+        H_p_ch.im[k] += X_p_ch.re[k] * G.im[k] - X_p_ch.im[k] * G.re[k];
+      }
+    }
+    index = index < (render_buffer_data.size() - 1) ? index + 1 : 0;
+  }
+}
+
+#if defined(WEBRTC_HAS_NEON)
+// Adapts the filter partitions. (Neon variant)
+void AdaptPartitions_Neon(const RenderBuffer& render_buffer,
+                          const FftData& G,
+                          size_t num_partitions,
+                          std::vector<std::vector<FftData>>* H) {
+  rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
+      render_buffer.GetFftBuffer();
+  const size_t num_render_channels = render_buffer_data[0].size();
+  const size_t lim1 = std::min(
+      render_buffer_data.size() - render_buffer.Position(), num_partitions);
+  const size_t lim2 = num_partitions;
+  constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
+
+  size_t X_partition = render_buffer.Position();
+  size_t limit = lim1;
+  size_t p = 0;
+  do {
+    for (; p < limit; ++p, ++X_partition) {
+      for (size_t ch = 0; ch < num_render_channels; ++ch) {
+        FftData& H_p_ch = (*H)[p][ch];
+        const FftData& X = render_buffer_data[X_partition][ch];
+        for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
+          const float32x4_t G_re = vld1q_f32(&G.re[k]);
+          const float32x4_t G_im = vld1q_f32(&G.im[k]);
+          const float32x4_t X_re = vld1q_f32(&X.re[k]);
+          const float32x4_t X_im = vld1q_f32(&X.im[k]);
+          const float32x4_t H_re = vld1q_f32(&H_p_ch.re[k]);
+          const float32x4_t H_im = vld1q_f32(&H_p_ch.im[k]);
+          const float32x4_t a = vmulq_f32(X_re, G_re);
+          const float32x4_t e = vmlaq_f32(a, X_im, G_im);
+          const float32x4_t c = vmulq_f32(X_re, G_im);
+          const float32x4_t f = vmlsq_f32(c, X_im, G_re);
+          const float32x4_t g = vaddq_f32(H_re, e);
+          const float32x4_t h = vaddq_f32(H_im, f);
+          vst1q_f32(&H_p_ch.re[k], g);
+          vst1q_f32(&H_p_ch.im[k], h);
+        }
+      }
+    }
+
+    X_partition = 0;
+    limit = lim2;
+  } while (p < lim2);
+
+  X_partition = render_buffer.Position();
+  limit = lim1;
+  p = 0;
+  do {
+    for (; p < limit; ++p, ++X_partition) {
+      for (size_t ch = 0; ch < num_render_channels; ++ch) {
+        FftData& H_p_ch = (*H)[p][ch];
+        const FftData& X = render_buffer_data[X_partition][ch];
+
+        H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] +
+                                    X.im[kFftLengthBy2] * G.im[kFftLengthBy2];
+        H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] -
+                                    X.im[kFftLengthBy2] * G.re[kFftLengthBy2];
+      }
+    }
+    X_partition = 0;
+    limit = lim2;
+  } while (p < lim2);
+}
+#endif
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+// Adapts the filter partitions. (SSE2 variant)
+void AdaptPartitions_Sse2(const RenderBuffer& render_buffer,
+                          const FftData& G,
+                          size_t num_partitions,
+                          std::vector<std::vector<FftData>>* H) {
+  rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
+      render_buffer.GetFftBuffer();
+  const size_t num_render_channels = render_buffer_data[0].size();
+  const size_t lim1 = std::min(
+      render_buffer_data.size() - render_buffer.Position(), num_partitions);
+  const size_t lim2 = num_partitions;
+  constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
+
+  size_t X_partition = render_buffer.Position();
+  size_t limit = lim1;
+  size_t p = 0;
+  do {
+    for (; p < limit; ++p, ++X_partition) {
+      for (size_t ch = 0; ch < num_render_channels; ++ch) {
+        FftData& H_p_ch = (*H)[p][ch];
+        const FftData& X = render_buffer_data[X_partition][ch];
+
+        for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
+          const __m128 G_re = _mm_loadu_ps(&G.re[k]);
+          const __m128 G_im = _mm_loadu_ps(&G.im[k]);
+          const __m128 X_re = _mm_loadu_ps(&X.re[k]);
+          const __m128 X_im = _mm_loadu_ps(&X.im[k]);
+          const __m128 H_re = _mm_loadu_ps(&H_p_ch.re[k]);
+          const __m128 H_im = _mm_loadu_ps(&H_p_ch.im[k]);
+          const __m128 a = _mm_mul_ps(X_re, G_re);
+          const __m128 b = _mm_mul_ps(X_im, G_im);
+          const __m128 c = _mm_mul_ps(X_re, G_im);
+          const __m128 d = _mm_mul_ps(X_im, G_re);
+          const __m128 e = _mm_add_ps(a, b);
+          const __m128 f = _mm_sub_ps(c, d);
+          const __m128 g = _mm_add_ps(H_re, e);
+          const __m128 h = _mm_add_ps(H_im, f);
+          _mm_storeu_ps(&H_p_ch.re[k], g);
+          _mm_storeu_ps(&H_p_ch.im[k], h);
+        }
+      }
+    }
+    X_partition = 0;
+    limit = lim2;
+  } while (p < lim2);
+
+  X_partition = render_buffer.Position();
+  limit = lim1;
+  p = 0;
+  do {
+    for (; p < limit; ++p, ++X_partition) {
+      for (size_t ch = 0; ch < num_render_channels; ++ch) {
+        FftData& H_p_ch = (*H)[p][ch];
+        const FftData& X = render_buffer_data[X_partition][ch];
+
+        H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] +
+                                    X.im[kFftLengthBy2] * G.im[kFftLengthBy2];
+        H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] -
+                                    X.im[kFftLengthBy2] * G.re[kFftLengthBy2];
+      }
+    }
+
+    X_partition = 0;
+    limit = lim2;
+  } while (p < lim2);
+}
+#endif
+
+// Produces the filter output.
+void ApplyFilter(const RenderBuffer& render_buffer,
+                 size_t num_partitions,
+                 const std::vector<std::vector<FftData>>& H,
+                 FftData* S) {
+  S->re.fill(0.f);
+  S->im.fill(0.f);
+
+  rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
+      render_buffer.GetFftBuffer();
+  size_t index = render_buffer.Position();
+  const size_t num_render_channels = render_buffer_data[index].size();
+  for (size_t p = 0; p < num_partitions; ++p) {
+    RTC_DCHECK_EQ(num_render_channels, H[p].size());
+    for (size_t ch = 0; ch < num_render_channels; ++ch) {
+      const FftData& X_p_ch = render_buffer_data[index][ch];
+      const FftData& H_p_ch = H[p][ch];
+      for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+        S->re[k] += X_p_ch.re[k] * H_p_ch.re[k] - X_p_ch.im[k] * H_p_ch.im[k];
+        S->im[k] += X_p_ch.re[k] * H_p_ch.im[k] + X_p_ch.im[k] * H_p_ch.re[k];
+      }
+    }
+    index = index < (render_buffer_data.size() - 1) ? index + 1 : 0;
+  }
+}
+
+#if defined(WEBRTC_HAS_NEON)
+// Produces the filter output (Neon variant).
+void ApplyFilter_Neon(const RenderBuffer& render_buffer,
+                      size_t num_partitions,
+                      const std::vector<std::vector<FftData>>& H,
+                      FftData* S) {
+  // const RenderBuffer& render_buffer,
+  //                     rtc::ArrayView<const FftData> H,
+  //                     FftData* S) {
+  RTC_DCHECK_GE(H.size(), H.size() - 1);
+  S->Clear();
+
+  rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
+      render_buffer.GetFftBuffer();
+  const size_t num_render_channels = render_buffer_data[0].size();
+  const size_t lim1 = std::min(
+      render_buffer_data.size() - render_buffer.Position(), num_partitions);
+  const size_t lim2 = num_partitions;
+  constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
+
+  size_t X_partition = render_buffer.Position();
+  size_t p = 0;
+  size_t limit = lim1;
+  do {
+    for (; p < limit; ++p, ++X_partition) {
+      for (size_t ch = 0; ch < num_render_channels; ++ch) {
+        const FftData& H_p_ch = H[p][ch];
+        const FftData& X = render_buffer_data[X_partition][ch];
+        for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
+          const float32x4_t X_re = vld1q_f32(&X.re[k]);
+          const float32x4_t X_im = vld1q_f32(&X.im[k]);
+          const float32x4_t H_re = vld1q_f32(&H_p_ch.re[k]);
+          const float32x4_t H_im = vld1q_f32(&H_p_ch.im[k]);
+          const float32x4_t S_re = vld1q_f32(&S->re[k]);
+          const float32x4_t S_im = vld1q_f32(&S->im[k]);
+          const float32x4_t a = vmulq_f32(X_re, H_re);
+          const float32x4_t e = vmlsq_f32(a, X_im, H_im);
+          const float32x4_t c = vmulq_f32(X_re, H_im);
+          const float32x4_t f = vmlaq_f32(c, X_im, H_re);
+          const float32x4_t g = vaddq_f32(S_re, e);
+          const float32x4_t h = vaddq_f32(S_im, f);
+          vst1q_f32(&S->re[k], g);
+          vst1q_f32(&S->im[k], h);
+        }
+      }
+    }
+    limit = lim2;
+    X_partition = 0;
+  } while (p < lim2);
+
+  X_partition = render_buffer.Position();
+  p = 0;
+  limit = lim1;
+  do {
+    for (; p < limit; ++p, ++X_partition) {
+      for (size_t ch = 0; ch < num_render_channels; ++ch) {
+        const FftData& H_p_ch = H[p][ch];
+        const FftData& X = render_buffer_data[X_partition][ch];
+        S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] -
+                                X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
+        S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] +
+                                X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2];
+      }
+    }
+    limit = lim2;
+    X_partition = 0;
+  } while (p < lim2);
+}
+#endif
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+// Produces the filter output (SSE2 variant).
+void ApplyFilter_Sse2(const RenderBuffer& render_buffer,
+                      size_t num_partitions,
+                      const std::vector<std::vector<FftData>>& H,
+                      FftData* S) {
+  // const RenderBuffer& render_buffer,
+  //                     rtc::ArrayView<const FftData> H,
+  //                     FftData* S) {
+  RTC_DCHECK_GE(H.size(), H.size() - 1);
+  S->re.fill(0.f);
+  S->im.fill(0.f);
+
+  rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
+      render_buffer.GetFftBuffer();
+  const size_t num_render_channels = render_buffer_data[0].size();
+  const size_t lim1 = std::min(
+      render_buffer_data.size() - render_buffer.Position(), num_partitions);
+  const size_t lim2 = num_partitions;
+  constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
+
+  size_t X_partition = render_buffer.Position();
+  size_t p = 0;
+  size_t limit = lim1;
+  do {
+    for (; p < limit; ++p, ++X_partition) {
+      for (size_t ch = 0; ch < num_render_channels; ++ch) {
+        const FftData& H_p_ch = H[p][ch];
+        const FftData& X = render_buffer_data[X_partition][ch];
+        for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
+          const __m128 X_re = _mm_loadu_ps(&X.re[k]);
+          const __m128 X_im = _mm_loadu_ps(&X.im[k]);
+          const __m128 H_re = _mm_loadu_ps(&H_p_ch.re[k]);
+          const __m128 H_im = _mm_loadu_ps(&H_p_ch.im[k]);
+          const __m128 S_re = _mm_loadu_ps(&S->re[k]);
+          const __m128 S_im = _mm_loadu_ps(&S->im[k]);
+          const __m128 a = _mm_mul_ps(X_re, H_re);
+          const __m128 b = _mm_mul_ps(X_im, H_im);
+          const __m128 c = _mm_mul_ps(X_re, H_im);
+          const __m128 d = _mm_mul_ps(X_im, H_re);
+          const __m128 e = _mm_sub_ps(a, b);
+          const __m128 f = _mm_add_ps(c, d);
+          const __m128 g = _mm_add_ps(S_re, e);
+          const __m128 h = _mm_add_ps(S_im, f);
+          _mm_storeu_ps(&S->re[k], g);
+          _mm_storeu_ps(&S->im[k], h);
+        }
+      }
+    }
+    limit = lim2;
+    X_partition = 0;
+  } while (p < lim2);
+
+  X_partition = render_buffer.Position();
+  p = 0;
+  limit = lim1;
+  do {
+    for (; p < limit; ++p, ++X_partition) {
+      for (size_t ch = 0; ch < num_render_channels; ++ch) {
+        const FftData& H_p_ch = H[p][ch];
+        const FftData& X = render_buffer_data[X_partition][ch];
+        S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] -
+                                X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
+        S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] +
+                                X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2];
+      }
+    }
+    limit = lim2;
+    X_partition = 0;
+  } while (p < lim2);
+}
+#endif
+
+}  // namespace aec3
+
+namespace {
+
+// Ensures that the newly added filter partitions after a size increase are set
+// to zero.
+void ZeroFilter(size_t old_size,
+                size_t new_size,
+                std::vector<std::vector<FftData>>* H) {
+  RTC_DCHECK_GE(H->size(), old_size);
+  RTC_DCHECK_GE(H->size(), new_size);
+
+  for (size_t p = old_size; p < new_size; ++p) {
+    RTC_DCHECK_EQ((*H)[p].size(), (*H)[0].size());
+    for (size_t ch = 0; ch < (*H)[0].size(); ++ch) {
+      (*H)[p][ch].Clear();
+    }
+  }
+}
+
+}  // namespace
+
+AdaptiveFirFilter::AdaptiveFirFilter(size_t max_size_partitions,
+                                     size_t initial_size_partitions,
+                                     size_t size_change_duration_blocks,
+                                     size_t num_render_channels,
+                                     Aec3Optimization optimization,
+                                     ApmDataDumper* data_dumper)
+    : data_dumper_(data_dumper),
+      fft_(),
+      optimization_(optimization),
+      num_render_channels_(num_render_channels),
+      max_size_partitions_(max_size_partitions),
+      size_change_duration_blocks_(
+          static_cast<int>(size_change_duration_blocks)),
+      current_size_partitions_(initial_size_partitions),
+      target_size_partitions_(initial_size_partitions),
+      old_target_size_partitions_(initial_size_partitions),
+      H_(max_size_partitions_, std::vector<FftData>(num_render_channels_)) {
+  RTC_DCHECK(data_dumper_);
+  RTC_DCHECK_GE(max_size_partitions, initial_size_partitions);
+
+  RTC_DCHECK_LT(0, size_change_duration_blocks_);
+  one_by_size_change_duration_blocks_ = 1.f / size_change_duration_blocks_;
+
+  ZeroFilter(0, max_size_partitions_, &H_);
+
+  SetSizePartitions(current_size_partitions_, true);
+}
+
+AdaptiveFirFilter::~AdaptiveFirFilter() = default;
+
+void AdaptiveFirFilter::HandleEchoPathChange() {
+  // TODO(peah): Check the value and purpose of the code below.
+  ZeroFilter(current_size_partitions_, max_size_partitions_, &H_);
+}
+
+void AdaptiveFirFilter::SetSizePartitions(size_t size, bool immediate_effect) {
+  RTC_DCHECK_EQ(max_size_partitions_, H_.capacity());
+  RTC_DCHECK_LE(size, max_size_partitions_);
+
+  target_size_partitions_ = std::min(max_size_partitions_, size);
+  if (immediate_effect) {
+    size_t old_size_partitions_ = current_size_partitions_;
+    current_size_partitions_ = old_target_size_partitions_ =
+        target_size_partitions_;
+    ZeroFilter(old_size_partitions_, current_size_partitions_, &H_);
+
+    partition_to_constrain_ =
+        std::min(partition_to_constrain_, current_size_partitions_ - 1);
+    size_change_counter_ = 0;
+  } else {
+    size_change_counter_ = size_change_duration_blocks_;
+  }
+}
+
+void AdaptiveFirFilter::UpdateSize() {
+  RTC_DCHECK_GE(size_change_duration_blocks_, size_change_counter_);
+  size_t old_size_partitions_ = current_size_partitions_;
+  if (size_change_counter_ > 0) {
+    --size_change_counter_;
+
+    auto average = [](float from, float to, float from_weight) {
+      return from * from_weight + to * (1.f - from_weight);
+    };
+
+    float change_factor =
+        size_change_counter_ * one_by_size_change_duration_blocks_;
+
+    current_size_partitions_ = average(old_target_size_partitions_,
+                                       target_size_partitions_, change_factor);
+
+    partition_to_constrain_ =
+        std::min(partition_to_constrain_, current_size_partitions_ - 1);
+  } else {
+    current_size_partitions_ = old_target_size_partitions_ =
+        target_size_partitions_;
+  }
+  ZeroFilter(old_size_partitions_, current_size_partitions_, &H_);
+  RTC_DCHECK_LE(0, size_change_counter_);
+}
+
+void AdaptiveFirFilter::Filter(const RenderBuffer& render_buffer,
+                               FftData* S) const {
+  RTC_DCHECK(S);
+  switch (optimization_) {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+    case Aec3Optimization::kSse2:
+      aec3::ApplyFilter_Sse2(render_buffer, current_size_partitions_, H_, S);
+      break;
+    case Aec3Optimization::kAvx2:
+      aec3::ApplyFilter_Avx2(render_buffer, current_size_partitions_, H_, S);
+      break;
+#endif
+#if defined(WEBRTC_HAS_NEON)
+    case Aec3Optimization::kNeon:
+      aec3::ApplyFilter_Neon(render_buffer, current_size_partitions_, H_, S);
+      break;
+#endif
+    default:
+      aec3::ApplyFilter(render_buffer, current_size_partitions_, H_, S);
+  }
+}
+
+void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer,
+                              const FftData& G) {
+  // Adapt the filter and update the filter size.
+  AdaptAndUpdateSize(render_buffer, G);
+
+  // Constrain the filter partitions in a cyclic manner.
+  Constrain();
+}
+
+void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer,
+                              const FftData& G,
+                              std::vector<float>* impulse_response) {
+  // Adapt the filter and update the filter size.
+  AdaptAndUpdateSize(render_buffer, G);
+
+  // Constrain the filter partitions in a cyclic manner.
+  ConstrainAndUpdateImpulseResponse(impulse_response);
+}
+
+void AdaptiveFirFilter::ComputeFrequencyResponse(
+    std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) const {
+  RTC_DCHECK_GE(max_size_partitions_, H2->capacity());
+
+  H2->resize(current_size_partitions_);
+
+  switch (optimization_) {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+    case Aec3Optimization::kSse2:
+      aec3::ComputeFrequencyResponse_Sse2(current_size_partitions_, H_, H2);
+      break;
+    case Aec3Optimization::kAvx2:
+      aec3::ComputeFrequencyResponse_Avx2(current_size_partitions_, H_, H2);
+      break;
+#endif
+#if defined(WEBRTC_HAS_NEON)
+    case Aec3Optimization::kNeon:
+      aec3::ComputeFrequencyResponse_Neon(current_size_partitions_, H_, H2);
+      break;
+#endif
+    default:
+      aec3::ComputeFrequencyResponse(current_size_partitions_, H_, H2);
+  }
+}
+
+void AdaptiveFirFilter::AdaptAndUpdateSize(const RenderBuffer& render_buffer,
+                                           const FftData& G) {
+  // Update the filter size if needed.
+  UpdateSize();
+
+  // Adapt the filter.
+  switch (optimization_) {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+    case Aec3Optimization::kSse2:
+      aec3::AdaptPartitions_Sse2(render_buffer, G, current_size_partitions_,
+                                 &H_);
+      break;
+    case Aec3Optimization::kAvx2:
+      aec3::AdaptPartitions_Avx2(render_buffer, G, current_size_partitions_,
+                                 &H_);
+      break;
+#endif
+#if defined(WEBRTC_HAS_NEON)
+    case Aec3Optimization::kNeon:
+      aec3::AdaptPartitions_Neon(render_buffer, G, current_size_partitions_,
+                                 &H_);
+      break;
+#endif
+    default:
+      aec3::AdaptPartitions(render_buffer, G, current_size_partitions_, &H_);
+  }
+}
+
+// Constrains the partition of the frequency domain filter to be limited in
+// time via setting the relevant time-domain coefficients to zero and updates
+// the corresponding values in an externally stored impulse response estimate.
+void AdaptiveFirFilter::ConstrainAndUpdateImpulseResponse(
+    std::vector<float>* impulse_response) {
+  RTC_DCHECK_EQ(GetTimeDomainLength(max_size_partitions_),
+                impulse_response->capacity());
+  impulse_response->resize(GetTimeDomainLength(current_size_partitions_));
+  std::array<float, kFftLength> h;
+  impulse_response->resize(GetTimeDomainLength(current_size_partitions_));
+  std::fill(
+      impulse_response->begin() + partition_to_constrain_ * kFftLengthBy2,
+      impulse_response->begin() + (partition_to_constrain_ + 1) * kFftLengthBy2,
+      0.f);
+
+  for (size_t ch = 0; ch < num_render_channels_; ++ch) {
+    fft_.Ifft(H_[partition_to_constrain_][ch], &h);
+
+    static constexpr float kScale = 1.0f / kFftLengthBy2;
+    std::for_each(h.begin(), h.begin() + kFftLengthBy2,
+                  [](float& a) { a *= kScale; });
+    std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
+
+    if (ch == 0) {
+      std::copy(
+          h.begin(), h.begin() + kFftLengthBy2,
+          impulse_response->begin() + partition_to_constrain_ * kFftLengthBy2);
+    } else {
+      for (size_t k = 0, j = partition_to_constrain_ * kFftLengthBy2;
+           k < kFftLengthBy2; ++k, ++j) {
+        if (fabsf((*impulse_response)[j]) < fabsf(h[k])) {
+          (*impulse_response)[j] = h[k];
+        }
+      }
+    }
+
+    fft_.Fft(&h, &H_[partition_to_constrain_][ch]);
+  }
+
+  partition_to_constrain_ =
+      partition_to_constrain_ < (current_size_partitions_ - 1)
+          ? partition_to_constrain_ + 1
+          : 0;
+}
+
+// Constrains the a partiton of the frequency domain filter to be limited in
+// time via setting the relevant time-domain coefficients to zero.
+void AdaptiveFirFilter::Constrain() {
+  std::array<float, kFftLength> h;
+  for (size_t ch = 0; ch < num_render_channels_; ++ch) {
+    fft_.Ifft(H_[partition_to_constrain_][ch], &h);
+
+    static constexpr float kScale = 1.0f / kFftLengthBy2;
+    std::for_each(h.begin(), h.begin() + kFftLengthBy2,
+                  [](float& a) { a *= kScale; });
+    std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
+
+    fft_.Fft(&h, &H_[partition_to_constrain_][ch]);
+  }
+
+  partition_to_constrain_ =
+      partition_to_constrain_ < (current_size_partitions_ - 1)
+          ? partition_to_constrain_ + 1
+          : 0;
+}
+
+void AdaptiveFirFilter::ScaleFilter(float factor) {
+  for (auto& H_p : H_) {
+    for (auto& H_p_ch : H_p) {
+      for (auto& re : H_p_ch.re) {
+        re *= factor;
+      }
+      for (auto& im : H_p_ch.im) {
+        im *= factor;
+      }
+    }
+  }
+}
+
+// Set the filter coefficients.
+void AdaptiveFirFilter::SetFilter(size_t num_partitions,
+                                  const std::vector<std::vector<FftData>>& H) {
+  const size_t min_num_partitions =
+      std::min(current_size_partitions_, num_partitions);
+  for (size_t p = 0; p < min_num_partitions; ++p) {
+    RTC_DCHECK_EQ(H_[p].size(), H[p].size());
+    RTC_DCHECK_EQ(num_render_channels_, H_[p].size());
+
+    for (size_t ch = 0; ch < num_render_channels_; ++ch) {
+      std::copy(H[p][ch].re.begin(), H[p][ch].re.end(), H_[p][ch].re.begin());
+      std::copy(H[p][ch].im.begin(), H[p][ch].im.end(), H_[p][ch].im.begin());
+    }
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h
+++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h
@ -0,0 +1,191 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_
+
+#include <stddef.h>
+
+#include <array>
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/aec3_fft.h"
+#include "modules/audio_processing/aec3/fft_data.h"
+#include "modules/audio_processing/aec3/render_buffer.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/system/arch.h"
+
+namespace webrtc {
+namespace aec3 {
+// Computes and stores the frequency response of the filter.
+void ComputeFrequencyResponse(
+    size_t num_partitions,
+    const std::vector<std::vector<FftData>>& H,
+    std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
+#if defined(WEBRTC_HAS_NEON)
+void ComputeFrequencyResponse_Neon(
+    size_t num_partitions,
+    const std::vector<std::vector<FftData>>& H,
+    std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
+#endif
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+void ComputeFrequencyResponse_Sse2(
+    size_t num_partitions,
+    const std::vector<std::vector<FftData>>& H,
+    std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
+
+void ComputeFrequencyResponse_Avx2(
+    size_t num_partitions,
+    const std::vector<std::vector<FftData>>& H,
+    std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
+#endif
+
+// Adapts the filter partitions.
+void AdaptPartitions(const RenderBuffer& render_buffer,
+                     const FftData& G,
+                     size_t num_partitions,
+                     std::vector<std::vector<FftData>>* H);
+#if defined(WEBRTC_HAS_NEON)
+void AdaptPartitions_Neon(const RenderBuffer& render_buffer,
+                          const FftData& G,
+                          size_t num_partitions,
+                          std::vector<std::vector<FftData>>* H);
+#endif
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+void AdaptPartitions_Sse2(const RenderBuffer& render_buffer,
+                          const FftData& G,
+                          size_t num_partitions,
+                          std::vector<std::vector<FftData>>* H);
+
+void AdaptPartitions_Avx2(const RenderBuffer& render_buffer,
+                          const FftData& G,
+                          size_t num_partitions,
+                          std::vector<std::vector<FftData>>* H);
+#endif
+
+// Produces the filter output.
+void ApplyFilter(const RenderBuffer& render_buffer,
+                 size_t num_partitions,
+                 const std::vector<std::vector<FftData>>& H,
+                 FftData* S);
+#if defined(WEBRTC_HAS_NEON)
+void ApplyFilter_Neon(const RenderBuffer& render_buffer,
+                      size_t num_partitions,
+                      const std::vector<std::vector<FftData>>& H,
+                      FftData* S);
+#endif
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+void ApplyFilter_Sse2(const RenderBuffer& render_buffer,
+                      size_t num_partitions,
+                      const std::vector<std::vector<FftData>>& H,
+                      FftData* S);
+
+void ApplyFilter_Avx2(const RenderBuffer& render_buffer,
+                      size_t num_partitions,
+                      const std::vector<std::vector<FftData>>& H,
+                      FftData* S);
+#endif
+
+}  // namespace aec3
+
+// Provides a frequency domain adaptive filter functionality.
+class AdaptiveFirFilter {
+ public:
+  AdaptiveFirFilter(size_t max_size_partitions,
+                    size_t initial_size_partitions,
+                    size_t size_change_duration_blocks,
+                    size_t num_render_channels,
+                    Aec3Optimization optimization,
+                    ApmDataDumper* data_dumper);
+
+  ~AdaptiveFirFilter();
+
+  AdaptiveFirFilter(const AdaptiveFirFilter&) = delete;
+  AdaptiveFirFilter& operator=(const AdaptiveFirFilter&) = delete;
+
+  // Produces the output of the filter.
+  void Filter(const RenderBuffer& render_buffer, FftData* S) const;
+
+  // Adapts the filter and updates an externally stored impulse response
+  // estimate.
+  void Adapt(const RenderBuffer& render_buffer,
+             const FftData& G,
+             std::vector<float>* impulse_response);
+
+  // Adapts the filter.
+  void Adapt(const RenderBuffer& render_buffer, const FftData& G);
+
+  // Receives reports that known echo path changes have occured and adjusts
+  // the filter adaptation accordingly.
+  void HandleEchoPathChange();
+
+  // Returns the filter size.
+  size_t SizePartitions() const { return current_size_partitions_; }
+
+  // Sets the filter size.
+  void SetSizePartitions(size_t size, bool immediate_effect);
+
+  // Computes the frequency responses for the filter partitions.
+  void ComputeFrequencyResponse(
+      std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) const;
+
+  // Returns the maximum number of partitions for the filter.
+  size_t max_filter_size_partitions() const { return max_size_partitions_; }
+
+  void DumpFilter(const char* name_frequency_domain) {
+    for (size_t p = 0; p < max_size_partitions_; ++p) {
+      data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].re);
+      data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].im);
+    }
+  }
+
+  // Scale the filter impulse response and spectrum by a factor.
+  void ScaleFilter(float factor);
+
+  // Set the filter coefficients.
+  void SetFilter(size_t num_partitions,
+                 const std::vector<std::vector<FftData>>& H);
+
+  // Gets the filter coefficients.
+  const std::vector<std::vector<FftData>>& GetFilter() const { return H_; }
+
+ private:
+  // Adapts the filter and updates the filter size.
+  void AdaptAndUpdateSize(const RenderBuffer& render_buffer, const FftData& G);
+
+  // Constrain the filter partitions in a cyclic manner.
+  void Constrain();
+  // Constrains the filter in a cyclic manner and updates the corresponding
+  // values in the supplied impulse response.
+  void ConstrainAndUpdateImpulseResponse(std::vector<float>* impulse_response);
+
+  // Gradually Updates the current filter size towards the target size.
+  void UpdateSize();
+
+  ApmDataDumper* const data_dumper_;
+  const Aec3Fft fft_;
+  const Aec3Optimization optimization_;
+  const size_t num_render_channels_;
+  const size_t max_size_partitions_;
+  const int size_change_duration_blocks_;
+  float one_by_size_change_duration_blocks_;
+  size_t current_size_partitions_;
+  size_t target_size_partitions_;
+  size_t old_target_size_partitions_;
+  int size_change_counter_ = 0;
+  std::vector<std::vector<FftData>> H_;
+  size_t partition_to_constrain_ = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_
--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc
+++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc
@ -0,0 +1,187 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/adaptive_fir_filter.h"
+
+#include <immintrin.h>
+
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+namespace aec3 {
+
+// Computes and stores the frequency response of the filter.
+void ComputeFrequencyResponse_Avx2(
+    size_t num_partitions,
+    const std::vector<std::vector<FftData>>& H,
+    std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
+  for (auto& H2_ch : *H2) {
+    H2_ch.fill(0.f);
+  }
+
+  const size_t num_render_channels = H[0].size();
+  RTC_DCHECK_EQ(H.size(), H2->capacity());
+  for (size_t p = 0; p < num_partitions; ++p) {
+    RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
+    for (size_t ch = 0; ch < num_render_channels; ++ch) {
+      for (size_t j = 0; j < kFftLengthBy2; j += 8) {
+        __m256 re = _mm256_loadu_ps(&H[p][ch].re[j]);
+        __m256 re2 = _mm256_mul_ps(re, re);
+        __m256 im = _mm256_loadu_ps(&H[p][ch].im[j]);
+        re2 = _mm256_fmadd_ps(im, im, re2);
+        __m256 H2_k_j = _mm256_loadu_ps(&(*H2)[p][j]);
+        H2_k_j = _mm256_max_ps(H2_k_j, re2);
+        _mm256_storeu_ps(&(*H2)[p][j], H2_k_j);
+      }
+      float H2_new = H[p][ch].re[kFftLengthBy2] * H[p][ch].re[kFftLengthBy2] +
+                     H[p][ch].im[kFftLengthBy2] * H[p][ch].im[kFftLengthBy2];
+      (*H2)[p][kFftLengthBy2] = std::max((*H2)[p][kFftLengthBy2], H2_new);
+    }
+  }
+}
+
+// Adapts the filter partitions.
+void AdaptPartitions_Avx2(const RenderBuffer& render_buffer,
+                          const FftData& G,
+                          size_t num_partitions,
+                          std::vector<std::vector<FftData>>* H) {
+  rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
+      render_buffer.GetFftBuffer();
+  const size_t num_render_channels = render_buffer_data[0].size();
+  const size_t lim1 = std::min(
+      render_buffer_data.size() - render_buffer.Position(), num_partitions);
+  const size_t lim2 = num_partitions;
+  constexpr size_t kNumEightBinBands = kFftLengthBy2 / 8;
+
+  size_t X_partition = render_buffer.Position();
+  size_t limit = lim1;
+  size_t p = 0;
+  do {
+    for (; p < limit; ++p, ++X_partition) {
+      for (size_t ch = 0; ch < num_render_channels; ++ch) {
+        FftData& H_p_ch = (*H)[p][ch];
+        const FftData& X = render_buffer_data[X_partition][ch];
+
+        for (size_t k = 0, n = 0; n < kNumEightBinBands; ++n, k += 8) {
+          const __m256 G_re = _mm256_loadu_ps(&G.re[k]);
+          const __m256 G_im = _mm256_loadu_ps(&G.im[k]);
+          const __m256 X_re = _mm256_loadu_ps(&X.re[k]);
+          const __m256 X_im = _mm256_loadu_ps(&X.im[k]);
+          const __m256 H_re = _mm256_loadu_ps(&H_p_ch.re[k]);
+          const __m256 H_im = _mm256_loadu_ps(&H_p_ch.im[k]);
+          const __m256 a = _mm256_mul_ps(X_re, G_re);
+          const __m256 b = _mm256_mul_ps(X_im, G_im);
+          const __m256 c = _mm256_mul_ps(X_re, G_im);
+          const __m256 d = _mm256_mul_ps(X_im, G_re);
+          const __m256 e = _mm256_add_ps(a, b);
+          const __m256 f = _mm256_sub_ps(c, d);
+          const __m256 g = _mm256_add_ps(H_re, e);
+          const __m256 h = _mm256_add_ps(H_im, f);
+          _mm256_storeu_ps(&H_p_ch.re[k], g);
+          _mm256_storeu_ps(&H_p_ch.im[k], h);
+        }
+      }
+    }
+    X_partition = 0;
+    limit = lim2;
+  } while (p < lim2);
+
+  X_partition = render_buffer.Position();
+  limit = lim1;
+  p = 0;
+  do {
+    for (; p < limit; ++p, ++X_partition) {
+      for (size_t ch = 0; ch < num_render_channels; ++ch) {
+        FftData& H_p_ch = (*H)[p][ch];
+        const FftData& X = render_buffer_data[X_partition][ch];
+
+        H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] +
+                                    X.im[kFftLengthBy2] * G.im[kFftLengthBy2];
+        H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] -
+                                    X.im[kFftLengthBy2] * G.re[kFftLengthBy2];
+      }
+    }
+
+    X_partition = 0;
+    limit = lim2;
+  } while (p < lim2);
+}
+
+// Produces the filter output (AVX2 variant).
+void ApplyFilter_Avx2(const RenderBuffer& render_buffer,
+                      size_t num_partitions,
+                      const std::vector<std::vector<FftData>>& H,
+                      FftData* S) {
+  RTC_DCHECK_GE(H.size(), H.size() - 1);
+  S->re.fill(0.f);
+  S->im.fill(0.f);
+
+  rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
+      render_buffer.GetFftBuffer();
+  const size_t num_render_channels = render_buffer_data[0].size();
+  const size_t lim1 = std::min(
+      render_buffer_data.size() - render_buffer.Position(), num_partitions);
+  const size_t lim2 = num_partitions;
+  constexpr size_t kNumEightBinBands = kFftLengthBy2 / 8;
+
+  size_t X_partition = render_buffer.Position();
+  size_t p = 0;
+  size_t limit = lim1;
+  do {
+    for (; p < limit; ++p, ++X_partition) {
+      for (size_t ch = 0; ch < num_render_channels; ++ch) {
+        const FftData& H_p_ch = H[p][ch];
+        const FftData& X = render_buffer_data[X_partition][ch];
+        for (size_t k = 0, n = 0; n < kNumEightBinBands; ++n, k += 8) {
+          const __m256 X_re = _mm256_loadu_ps(&X.re[k]);
+          const __m256 X_im = _mm256_loadu_ps(&X.im[k]);
+          const __m256 H_re = _mm256_loadu_ps(&H_p_ch.re[k]);
+          const __m256 H_im = _mm256_loadu_ps(&H_p_ch.im[k]);
+          const __m256 S_re = _mm256_loadu_ps(&S->re[k]);
+          const __m256 S_im = _mm256_loadu_ps(&S->im[k]);
+          const __m256 a = _mm256_mul_ps(X_re, H_re);
+          const __m256 b = _mm256_mul_ps(X_im, H_im);
+          const __m256 c = _mm256_mul_ps(X_re, H_im);
+          const __m256 d = _mm256_mul_ps(X_im, H_re);
+          const __m256 e = _mm256_sub_ps(a, b);
+          const __m256 f = _mm256_add_ps(c, d);
+          const __m256 g = _mm256_add_ps(S_re, e);
+          const __m256 h = _mm256_add_ps(S_im, f);
+          _mm256_storeu_ps(&S->re[k], g);
+          _mm256_storeu_ps(&S->im[k], h);
+        }
+      }
+    }
+    limit = lim2;
+    X_partition = 0;
+  } while (p < lim2);
+
+  X_partition = render_buffer.Position();
+  p = 0;
+  limit = lim1;
+  do {
+    for (; p < limit; ++p, ++X_partition) {
+      for (size_t ch = 0; ch < num_render_channels; ++ch) {
+        const FftData& H_p_ch = H[p][ch];
+        const FftData& X = render_buffer_data[X_partition][ch];
+        S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] -
+                                X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
+        S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] +
+                                X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2];
+      }
+    }
+    limit = lim2;
+    X_partition = 0;
+  } while (p < lim2);
+}
+
+}  // namespace aec3
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc
+++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc
@ -0,0 +1,102 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h"
+
+#include <algorithm>
+#include <functional>
+
+#if defined(WEBRTC_HAS_NEON)
+#include <arm_neon.h>
+#endif
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+#include <emmintrin.h>
+#endif
+
+namespace webrtc {
+
+namespace aec3 {
+
+// Computes and stores the echo return loss estimate of the filter, which is the
+// sum of the partition frequency responses.
+void ErlComputer(const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
+                 rtc::ArrayView<float> erl) {
+  std::fill(erl.begin(), erl.end(), 0.f);
+  for (auto& H2_j : H2) {
+    std::transform(H2_j.begin(), H2_j.end(), erl.begin(), erl.begin(),
+                   std::plus<float>());
+  }
+}
+
+#if defined(WEBRTC_HAS_NEON)
+// Computes and stores the echo return loss estimate of the filter, which is the
+// sum of the partition frequency responses.
+void ErlComputer_NEON(
+    const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
+    rtc::ArrayView<float> erl) {
+  std::fill(erl.begin(), erl.end(), 0.f);
+  for (auto& H2_j : H2) {
+    for (size_t k = 0; k < kFftLengthBy2; k += 4) {
+      const float32x4_t H2_j_k = vld1q_f32(&H2_j[k]);
+      float32x4_t erl_k = vld1q_f32(&erl[k]);
+      erl_k = vaddq_f32(erl_k, H2_j_k);
+      vst1q_f32(&erl[k], erl_k);
+    }
+    erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
+  }
+}
+#endif
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+// Computes and stores the echo return loss estimate of the filter, which is the
+// sum of the partition frequency responses.
+void ErlComputer_SSE2(
+    const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
+    rtc::ArrayView<float> erl) {
+  std::fill(erl.begin(), erl.end(), 0.f);
+  for (auto& H2_j : H2) {
+    for (size_t k = 0; k < kFftLengthBy2; k += 4) {
+      const __m128 H2_j_k = _mm_loadu_ps(&H2_j[k]);
+      __m128 erl_k = _mm_loadu_ps(&erl[k]);
+      erl_k = _mm_add_ps(erl_k, H2_j_k);
+      _mm_storeu_ps(&erl[k], erl_k);
+    }
+    erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
+  }
+}
+#endif
+
+}  // namespace aec3
+
+void ComputeErl(const Aec3Optimization& optimization,
+                const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
+                rtc::ArrayView<float> erl) {
+  RTC_DCHECK_EQ(kFftLengthBy2Plus1, erl.size());
+  // Update the frequency response and echo return loss for the filter.
+  switch (optimization) {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+    case Aec3Optimization::kSse2:
+      aec3::ErlComputer_SSE2(H2, erl);
+      break;
+    case Aec3Optimization::kAvx2:
+      aec3::ErlComputer_AVX2(H2, erl);
+      break;
+#endif
+#if defined(WEBRTC_HAS_NEON)
+    case Aec3Optimization::kNeon:
+      aec3::ErlComputer_NEON(H2, erl);
+      break;
+#endif
+    default:
+      aec3::ErlComputer(H2, erl);
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.h
+++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.h
@ -0,0 +1,54 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_
+
+#include <stddef.h>
+
+#include <array>
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "rtc_base/system/arch.h"
+
+namespace webrtc {
+namespace aec3 {
+
+// Computes and stores the echo return loss estimate of the filter, which is the
+// sum of the partition frequency responses.
+void ErlComputer(const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
+                 rtc::ArrayView<float> erl);
+#if defined(WEBRTC_HAS_NEON)
+void ErlComputer_NEON(
+    const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
+    rtc::ArrayView<float> erl);
+#endif
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+void ErlComputer_SSE2(
+    const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
+    rtc::ArrayView<float> erl);
+
+void ErlComputer_AVX2(
+    const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
+    rtc::ArrayView<float> erl);
+#endif
+
+}  // namespace aec3
+
+// Computes the echo return loss based on a frequency response.
+void ComputeErl(const Aec3Optimization& optimization,
+                const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
+                rtc::ArrayView<float> erl);
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_
--- a/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_avx2.cc
+++ b/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl_avx2.cc
@ -0,0 +1,37 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h"
+
+#include <immintrin.h>
+
+namespace webrtc {
+
+namespace aec3 {
+
+// Computes and stores the echo return loss estimate of the filter, which is the
+// sum of the partition frequency responses.
+void ErlComputer_AVX2(
+    const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
+    rtc::ArrayView<float> erl) {
+  std::fill(erl.begin(), erl.end(), 0.f);
+  for (auto& H2_j : H2) {
+    for (size_t k = 0; k < kFftLengthBy2; k += 8) {
+      const __m256 H2_j_k = _mm256_loadu_ps(&H2_j[k]);
+      __m256 erl_k = _mm256_loadu_ps(&erl[k]);
+      erl_k = _mm256_add_ps(erl_k, H2_j_k);
+      _mm256_storeu_ps(&erl[k], erl_k);
+    }
+    erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
+  }
+}
+
+}  // namespace aec3
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/aec3_common.cc
+++ b/webrtc/modules/audio_processing/aec3/aec3_common.cc
@ -0,0 +1,58 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/aec3_common.h"
+
+#include <stdint.h>
+
+#include "rtc_base/checks.h"
+#include "rtc_base/system/arch.h"
+#include "system_wrappers/include/cpu_features_wrapper.h"
+
+namespace webrtc {
+
+Aec3Optimization DetectOptimization() {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+  if (GetCPUInfo(kAVX2) != 0) {
+    return Aec3Optimization::kAvx2;
+  } else if (GetCPUInfo(kSSE2) != 0) {
+    return Aec3Optimization::kSse2;
+  }
+#endif
+
+#if defined(WEBRTC_HAS_NEON)
+  return Aec3Optimization::kNeon;
+#endif
+
+  return Aec3Optimization::kNone;
+}
+
+float FastApproxLog2f(const float in) {
+  RTC_DCHECK_GT(in, .0f);
+  // Read and interpret float as uint32_t and then cast to float.
+  // This is done to extract the exponent (bits 30 - 23).
+  // "Right shift" of the exponent is then performed by multiplying
+  // with the constant (1/2^23). Finally, we subtract a constant to
+  // remove the bias (https://en.wikipedia.org/wiki/Exponent_bias).
+  union {
+    float dummy;
+    uint32_t a;
+  } x = {in};
+  float out = x.a;
+  out *= 1.1920929e-7f;  // 1/2^23
+  out -= 126.942695f;    // Remove bias.
+  return out;
+}
+
+float Log2TodB(const float in_log2) {
+  return 3.0102999566398121 * in_log2;
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/aec3_common.h
+++ b/webrtc/modules/audio_processing/aec3/aec3_common.h
@ -0,0 +1,114 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_
+
+#include <stddef.h>
+
+namespace webrtc {
+
+#ifdef _MSC_VER /* visual c++ */
+#define ALIGN16_BEG __declspec(align(16))
+#define ALIGN16_END
+#else /* gcc or icc */
+#define ALIGN16_BEG
+#define ALIGN16_END __attribute__((aligned(16)))
+#endif
+
+enum class Aec3Optimization { kNone, kSse2, kAvx2, kNeon };
+
+constexpr int kNumBlocksPerSecond = 250;
+
+constexpr int kMetricsReportingIntervalBlocks = 10 * kNumBlocksPerSecond;
+constexpr int kMetricsComputationBlocks = 7;
+constexpr int kMetricsCollectionBlocks =
+    kMetricsReportingIntervalBlocks - kMetricsComputationBlocks;
+
+constexpr size_t kFftLengthBy2 = 64;
+constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1;
+constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1;
+constexpr size_t kFftLength = 2 * kFftLengthBy2;
+constexpr size_t kFftLengthBy2Log2 = 6;
+
+constexpr int kRenderTransferQueueSizeFrames = 100;
+
+constexpr size_t kMaxNumBands = 3;
+constexpr size_t kFrameSize = 160;
+constexpr size_t kSubFrameLength = kFrameSize / 2;
+
+constexpr size_t kBlockSize = kFftLengthBy2;
+constexpr size_t kBlockSizeLog2 = kFftLengthBy2Log2;
+
+constexpr size_t kExtendedBlockSize = 2 * kFftLengthBy2;
+constexpr size_t kMatchedFilterWindowSizeSubBlocks = 32;
+constexpr size_t kMatchedFilterAlignmentShiftSizeSubBlocks =
+    kMatchedFilterWindowSizeSubBlocks * 3 / 4;
+
+// TODO(peah): Integrate this with how it is done inside audio_processing_impl.
+constexpr size_t NumBandsForRate(int sample_rate_hz) {
+  return static_cast<size_t>(sample_rate_hz / 16000);
+}
+
+constexpr bool ValidFullBandRate(int sample_rate_hz) {
+  return sample_rate_hz == 16000 || sample_rate_hz == 32000 ||
+         sample_rate_hz == 48000;
+}
+
+constexpr int GetTimeDomainLength(int filter_length_blocks) {
+  return filter_length_blocks * kFftLengthBy2;
+}
+
+constexpr size_t GetDownSampledBufferSize(size_t down_sampling_factor,
+                                          size_t num_matched_filters) {
+  return kBlockSize / down_sampling_factor *
+         (kMatchedFilterAlignmentShiftSizeSubBlocks * num_matched_filters +
+          kMatchedFilterWindowSizeSubBlocks + 1);
+}
+
+constexpr size_t GetRenderDelayBufferSize(size_t down_sampling_factor,
+                                          size_t num_matched_filters,
+                                          size_t filter_length_blocks) {
+  return GetDownSampledBufferSize(down_sampling_factor, num_matched_filters) /
+             (kBlockSize / down_sampling_factor) +
+         filter_length_blocks + 1;
+}
+
+// Detects what kind of optimizations to use for the code.
+Aec3Optimization DetectOptimization();
+
+// Computes the log2 of the input in a fast an approximate manner.
+float FastApproxLog2f(const float in);
+
+// Returns dB from a power quantity expressed in log2.
+float Log2TodB(const float in_log2);
+
+static_assert(1 << kBlockSizeLog2 == kBlockSize,
+              "Proper number of shifts for blocksize");
+
+static_assert(1 << kFftLengthBy2Log2 == kFftLengthBy2,
+              "Proper number of shifts for the fft length");
+
+static_assert(1 == NumBandsForRate(16000), "Number of bands for 16 kHz");
+static_assert(2 == NumBandsForRate(32000), "Number of bands for 32 kHz");
+static_assert(3 == NumBandsForRate(48000), "Number of bands for 48 kHz");
+
+static_assert(ValidFullBandRate(16000),
+              "Test that 16 kHz is a valid sample rate");
+static_assert(ValidFullBandRate(32000),
+              "Test that 32 kHz is a valid sample rate");
+static_assert(ValidFullBandRate(48000),
+              "Test that 48 kHz is a valid sample rate");
+static_assert(!ValidFullBandRate(8001),
+              "Test that 8001 Hz is not a valid sample rate");
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_
--- a/webrtc/modules/audio_processing/aec3/aec3_fft.cc
+++ b/webrtc/modules/audio_processing/aec3/aec3_fft.cc
@ -0,0 +1,144 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/aec3_fft.h"
+
+#include <algorithm>
+#include <functional>
+#include <iterator>
+
+#include "rtc_base/checks.h"
+#include "system_wrappers/include/cpu_features_wrapper.h"
+
+namespace webrtc {
+
+namespace {
+
+const float kHanning64[kFftLengthBy2] = {
+    0.f,         0.00248461f, 0.00991376f, 0.0222136f,  0.03926189f,
+    0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f,
+    0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f,
+    0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f,
+    0.70564355f, 0.75f,       0.79187184f, 0.83084292f, 0.86652594f,
+    0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f,
+    0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f,
+    0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f,
+    0.83084292f, 0.79187184f, 0.75f,       0.70564355f, 0.65924333f,
+    0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f,
+    0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f,
+    0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f,
+    0.0222136f,  0.00991376f, 0.00248461f, 0.f};
+
+// Hanning window from Matlab command win = sqrt(hanning(128)).
+const float kSqrtHanning128[kFftLength] = {
+    0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f,
+    0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f,
+    0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
+    0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f,
+    0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f,
+    0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
+    0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f,
+    0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f,
+    0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
+    0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f,
+    0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f,
+    0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
+    0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f,
+    0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f,
+    0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
+    0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f,
+    1.00000000000000f, 0.99969881869620f, 0.99879545620517f, 0.99729045667869f,
+    0.99518472667220f, 0.99247953459871f, 0.98917650996478f, 0.98527764238894f,
+    0.98078528040323f, 0.97570213003853f, 0.97003125319454f, 0.96377606579544f,
+    0.95694033573221f, 0.94952818059304f, 0.94154406518302f, 0.93299279883474f,
+    0.92387953251129f, 0.91420975570353f, 0.90398929312344f, 0.89322430119552f,
+    0.88192126434835f, 0.87008699110871f, 0.85772861000027f, 0.84485356524971f,
+    0.83146961230255f, 0.81758481315158f, 0.80320753148064f, 0.78834642762661f,
+    0.77301045336274f, 0.75720884650648f, 0.74095112535496f, 0.72424708295147f,
+    0.70710678118655f, 0.68954054473707f, 0.67155895484702f, 0.65317284295378f,
+    0.63439328416365f, 0.61523159058063f, 0.59569930449243f, 0.57580819141785f,
+    0.55557023301960f, 0.53499761988710f, 0.51410274419322f, 0.49289819222978f,
+    0.47139673682600f, 0.44961132965461f, 0.42755509343028f, 0.40524131400499f,
+    0.38268343236509f, 0.35989503653499f, 0.33688985339222f, 0.31368174039889f,
+    0.29028467725446f, 0.26671275747490f, 0.24298017990326f, 0.21910124015687f,
+    0.19509032201613f, 0.17096188876030f, 0.14673047445536f, 0.12241067519922f,
+    0.09801714032956f, 0.07356456359967f, 0.04906767432742f, 0.02454122852291f};
+
+bool IsSse2Available() {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+  return GetCPUInfo(kSSE2) != 0;
+#else
+  return false;
+#endif
+}
+
+}  // namespace
+
+Aec3Fft::Aec3Fft() : ooura_fft_(IsSse2Available()) {}
+
+// TODO(peah): Change x to be std::array once the rest of the code allows this.
+void Aec3Fft::ZeroPaddedFft(rtc::ArrayView<const float> x,
+                            Window window,
+                            FftData* X) const {
+  RTC_DCHECK(X);
+  RTC_DCHECK_EQ(kFftLengthBy2, x.size());
+  std::array<float, kFftLength> fft;
+  std::fill(fft.begin(), fft.begin() + kFftLengthBy2, 0.f);
+  switch (window) {
+    case Window::kRectangular:
+      std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2);
+      break;
+    case Window::kHanning:
+      std::transform(x.begin(), x.end(), std::begin(kHanning64),
+                     fft.begin() + kFftLengthBy2,
+                     [](float a, float b) { return a * b; });
+      break;
+    case Window::kSqrtHanning:
+      RTC_NOTREACHED();
+      break;
+    default:
+      RTC_NOTREACHED();
+  }
+
+  Fft(&fft, X);
+}
+
+void Aec3Fft::PaddedFft(rtc::ArrayView<const float> x,
+                        rtc::ArrayView<const float> x_old,
+                        Window window,
+                        FftData* X) const {
+  RTC_DCHECK(X);
+  RTC_DCHECK_EQ(kFftLengthBy2, x.size());
+  RTC_DCHECK_EQ(kFftLengthBy2, x_old.size());
+  std::array<float, kFftLength> fft;
+
+  switch (window) {
+    case Window::kRectangular:
+      std::copy(x_old.begin(), x_old.end(), fft.begin());
+      std::copy(x.begin(), x.end(), fft.begin() + x_old.size());
+      break;
+    case Window::kHanning:
+      RTC_NOTREACHED();
+      break;
+    case Window::kSqrtHanning:
+      std::transform(x_old.begin(), x_old.end(), std::begin(kSqrtHanning128),
+                     fft.begin(), std::multiplies<float>());
+      std::transform(x.begin(), x.end(),
+                     std::begin(kSqrtHanning128) + x_old.size(),
+                     fft.begin() + x_old.size(), std::multiplies<float>());
+      break;
+    default:
+      RTC_NOTREACHED();
+  }
+
+  Fft(&fft, X);
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/aec3_fft.h
+++ b/webrtc/modules/audio_processing/aec3/aec3_fft.h
@ -0,0 +1,75 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_
+
+#include <array>
+
+#include "api/array_view.h"
+#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/fft_data.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/constructor_magic.h"
+
+namespace webrtc {
+
+// Wrapper class that provides 128 point real valued FFT functionality with the
+// FftData type.
+class Aec3Fft {
+ public:
+  enum class Window { kRectangular, kHanning, kSqrtHanning };
+
+  Aec3Fft();
+
+  // Computes the FFT. Note that both the input and output are modified.
+  void Fft(std::array<float, kFftLength>* x, FftData* X) const {
+    RTC_DCHECK(x);
+    RTC_DCHECK(X);
+    ooura_fft_.Fft(x->data());
+    X->CopyFromPackedArray(*x);
+  }
+  // Computes the inverse Fft.
+  void Ifft(const FftData& X, std::array<float, kFftLength>* x) const {
+    RTC_DCHECK(x);
+    X.CopyToPackedArray(x);
+    ooura_fft_.InverseFft(x->data());
+  }
+
+  // Windows the input using a Hanning window, and then adds padding of
+  // kFftLengthBy2 initial zeros before computing the Fft.
+  void ZeroPaddedFft(rtc::ArrayView<const float> x,
+                     Window window,
+                     FftData* X) const;
+
+  // Concatenates the kFftLengthBy2 values long x and x_old before computing the
+  // Fft. After that, x is copied to x_old.
+  void PaddedFft(rtc::ArrayView<const float> x,
+                 rtc::ArrayView<const float> x_old,
+                 FftData* X) const {
+    PaddedFft(x, x_old, Window::kRectangular, X);
+  }
+
+  // Padded Fft using a time-domain window.
+  void PaddedFft(rtc::ArrayView<const float> x,
+                 rtc::ArrayView<const float> x_old,
+                 Window window,
+                 FftData* X) const;
+
+ private:
+  const OouraFft ooura_fft_;
+
+  RTC_DISALLOW_COPY_AND_ASSIGN(Aec3Fft);
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_
--- a/webrtc/modules/audio_processing/aec3/aec_state.cc
+++ b/webrtc/modules/audio_processing/aec3/aec_state.cc
@ -0,0 +1,477 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/aec_state.h"
+
+#include <math.h>
+
+#include <algorithm>
+#include <numeric>
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/atomic_ops.h"
+#include "rtc_base/checks.h"
+#include "system_wrappers/include/field_trial.h"
+
+namespace webrtc {
+namespace {
+
+bool DeactivateInitialStateResetAtEchoPathChange() {
+  return field_trial::IsEnabled(
+      "WebRTC-Aec3DeactivateInitialStateResetKillSwitch");
+}
+
+bool FullResetAtEchoPathChange() {
+  return !field_trial::IsEnabled("WebRTC-Aec3AecStateFullResetKillSwitch");
+}
+
+bool SubtractorAnalyzerResetAtEchoPathChange() {
+  return !field_trial::IsEnabled(
+      "WebRTC-Aec3AecStateSubtractorAnalyzerResetKillSwitch");
+}
+
+void ComputeAvgRenderReverb(
+    const SpectrumBuffer& spectrum_buffer,
+    int delay_blocks,
+    float reverb_decay,
+    ReverbModel* reverb_model,
+    rtc::ArrayView<float, kFftLengthBy2Plus1> reverb_power_spectrum) {
+  RTC_DCHECK(reverb_model);
+  const size_t num_render_channels = spectrum_buffer.buffer[0].size();
+  int idx_at_delay =
+      spectrum_buffer.OffsetIndex(spectrum_buffer.read, delay_blocks);
+  int idx_past = spectrum_buffer.IncIndex(idx_at_delay);
+
+  std::array<float, kFftLengthBy2Plus1> X2_data;
+  rtc::ArrayView<const float> X2;
+  if (num_render_channels > 1) {
+    auto average_channels =
+        [](size_t num_render_channels,
+           rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+               spectrum_band_0,
+           rtc::ArrayView<float, kFftLengthBy2Plus1> render_power) {
+          std::fill(render_power.begin(), render_power.end(), 0.f);
+          for (size_t ch = 0; ch < num_render_channels; ++ch) {
+            for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+              render_power[k] += spectrum_band_0[ch][k];
+            }
+          }
+          const float normalizer = 1.f / num_render_channels;
+          for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+            render_power[k] *= normalizer;
+          }
+        };
+    average_channels(num_render_channels, spectrum_buffer.buffer[idx_past],
+                     X2_data);
+    reverb_model->UpdateReverbNoFreqShaping(
+        X2_data, /*power_spectrum_scaling=*/1.0f, reverb_decay);
+
+    average_channels(num_render_channels, spectrum_buffer.buffer[idx_at_delay],
+                     X2_data);
+    X2 = X2_data;
+  } else {
+    reverb_model->UpdateReverbNoFreqShaping(
+        spectrum_buffer.buffer[idx_past][/*channel=*/0],
+        /*power_spectrum_scaling=*/1.0f, reverb_decay);
+
+    X2 = spectrum_buffer.buffer[idx_at_delay][/*channel=*/0];
+  }
+
+  rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
+      reverb_model->reverb();
+  for (size_t k = 0; k < X2.size(); ++k) {
+    reverb_power_spectrum[k] = X2[k] + reverb_power[k];
+  }
+}
+
+}  // namespace
+
+int AecState::instance_count_ = 0;
+
+void AecState::GetResidualEchoScaling(
+    rtc::ArrayView<float> residual_scaling) const {
+  bool filter_has_had_time_to_converge;
+  if (config_.filter.conservative_initial_phase) {
+    filter_has_had_time_to_converge =
+        strong_not_saturated_render_blocks_ >= 1.5f * kNumBlocksPerSecond;
+  } else {
+    filter_has_had_time_to_converge =
+        strong_not_saturated_render_blocks_ >= 0.8f * kNumBlocksPerSecond;
+  }
+  echo_audibility_.GetResidualEchoScaling(filter_has_had_time_to_converge,
+                                          residual_scaling);
+}
+
+absl::optional<float> AecState::ErleUncertainty() const {
+  if (SaturatedEcho()) {
+    return 1.f;
+  }
+
+  return absl::nullopt;
+}
+
+AecState::AecState(const EchoCanceller3Config& config,
+                   size_t num_capture_channels)
+    : data_dumper_(
+          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
+      config_(config),
+      num_capture_channels_(num_capture_channels),
+      deactivate_initial_state_reset_at_echo_path_change_(
+          DeactivateInitialStateResetAtEchoPathChange()),
+      full_reset_at_echo_path_change_(FullResetAtEchoPathChange()),
+      subtractor_analyzer_reset_at_echo_path_change_(
+          SubtractorAnalyzerResetAtEchoPathChange()),
+      initial_state_(config_),
+      delay_state_(config_, num_capture_channels_),
+      transparent_state_(TransparentMode::Create(config_)),
+      filter_quality_state_(config_, num_capture_channels_),
+      erl_estimator_(2 * kNumBlocksPerSecond),
+      erle_estimator_(2 * kNumBlocksPerSecond, config_, num_capture_channels_),
+      filter_analyzer_(config_, num_capture_channels_),
+      echo_audibility_(
+          config_.echo_audibility.use_stationarity_properties_at_init),
+      reverb_model_estimator_(config_, num_capture_channels_),
+      subtractor_output_analyzer_(num_capture_channels_) {}
+
+AecState::~AecState() = default;
+
+void AecState::HandleEchoPathChange(
+    const EchoPathVariability& echo_path_variability) {
+  const auto full_reset = [&]() {
+    filter_analyzer_.Reset();
+    capture_signal_saturation_ = false;
+    strong_not_saturated_render_blocks_ = 0;
+    blocks_with_active_render_ = 0;
+    if (!deactivate_initial_state_reset_at_echo_path_change_) {
+      initial_state_.Reset();
+    }
+    if (transparent_state_) {
+      transparent_state_->Reset();
+    }
+    erle_estimator_.Reset(true);
+    erl_estimator_.Reset();
+    filter_quality_state_.Reset();
+  };
+
+  // TODO(peah): Refine the reset scheme according to the type of gain and
+  // delay adjustment.
+
+  if (full_reset_at_echo_path_change_ &&
+      echo_path_variability.delay_change !=
+          EchoPathVariability::DelayAdjustment::kNone) {
+    full_reset();
+  } else if (echo_path_variability.gain_change) {
+    erle_estimator_.Reset(false);
+  }
+  if (subtractor_analyzer_reset_at_echo_path_change_) {
+    subtractor_output_analyzer_.HandleEchoPathChange();
+  }
+}
+
+void AecState::Update(
+    const absl::optional<DelayEstimate>& external_delay,
+    rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
+        adaptive_filter_frequency_responses,
+    rtc::ArrayView<const std::vector<float>> adaptive_filter_impulse_responses,
+    const RenderBuffer& render_buffer,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_refined,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
+    rtc::ArrayView<const SubtractorOutput> subtractor_output) {
+  RTC_DCHECK_EQ(num_capture_channels_, Y2.size());
+  RTC_DCHECK_EQ(num_capture_channels_, subtractor_output.size());
+  RTC_DCHECK_EQ(num_capture_channels_,
+                adaptive_filter_frequency_responses.size());
+  RTC_DCHECK_EQ(num_capture_channels_,
+                adaptive_filter_impulse_responses.size());
+
+  // Analyze the filter outputs and filters.
+  bool any_filter_converged;
+  bool all_filters_diverged;
+  subtractor_output_analyzer_.Update(subtractor_output, &any_filter_converged,
+                                     &all_filters_diverged);
+
+  bool any_filter_consistent;
+  float max_echo_path_gain;
+  filter_analyzer_.Update(adaptive_filter_impulse_responses, render_buffer,
+                          &any_filter_consistent, &max_echo_path_gain);
+
+  // Estimate the direct path delay of the filter.
+  if (config_.filter.use_linear_filter) {
+    delay_state_.Update(filter_analyzer_.FilterDelaysBlocks(), external_delay,
+                        strong_not_saturated_render_blocks_);
+  }
+
+  const std::vector<std::vector<float>>& aligned_render_block =
+      render_buffer.Block(-delay_state_.MinDirectPathFilterDelay())[0];
+
+  // Update render counters.
+  bool active_render = false;
+  for (size_t ch = 0; ch < aligned_render_block.size(); ++ch) {
+    const float render_energy = std::inner_product(
+        aligned_render_block[ch].begin(), aligned_render_block[ch].end(),
+        aligned_render_block[ch].begin(), 0.f);
+    if (render_energy > (config_.render_levels.active_render_limit *
+                         config_.render_levels.active_render_limit) *
+                            kFftLengthBy2) {
+      active_render = true;
+      break;
+    }
+  }
+  blocks_with_active_render_ += active_render ? 1 : 0;
+  strong_not_saturated_render_blocks_ +=
+      active_render && !SaturatedCapture() ? 1 : 0;
+
+  std::array<float, kFftLengthBy2Plus1> avg_render_spectrum_with_reverb;
+
+  ComputeAvgRenderReverb(render_buffer.GetSpectrumBuffer(),
+                         delay_state_.MinDirectPathFilterDelay(), ReverbDecay(),
+                         &avg_render_reverb_, avg_render_spectrum_with_reverb);
+
+  if (config_.echo_audibility.use_stationarity_properties) {
+    // Update the echo audibility evaluator.
+    echo_audibility_.Update(render_buffer, avg_render_reverb_.reverb(),
+                            delay_state_.MinDirectPathFilterDelay(),
+                            delay_state_.ExternalDelayReported());
+  }
+
+  // Update the ERL and ERLE measures.
+  if (initial_state_.TransitionTriggered()) {
+    erle_estimator_.Reset(false);
+  }
+
+  erle_estimator_.Update(render_buffer, adaptive_filter_frequency_responses,
+                         avg_render_spectrum_with_reverb, Y2, E2_refined,
+                         subtractor_output_analyzer_.ConvergedFilters());
+
+  erl_estimator_.Update(
+      subtractor_output_analyzer_.ConvergedFilters(),
+      render_buffer.Spectrum(delay_state_.MinDirectPathFilterDelay()), Y2);
+
+  // Detect and flag echo saturation.
+  if (config_.ep_strength.echo_can_saturate) {
+    saturation_detector_.Update(aligned_render_block, SaturatedCapture(),
+                                UsableLinearEstimate(), subtractor_output,
+                                max_echo_path_gain);
+  } else {
+    RTC_DCHECK(!saturation_detector_.SaturatedEcho());
+  }
+
+  // Update the decision on whether to use the initial state parameter set.
+  initial_state_.Update(active_render, SaturatedCapture());
+
+  // Detect whether the transparent mode should be activated.
+  if (transparent_state_) {
+    transparent_state_->Update(delay_state_.MinDirectPathFilterDelay(),
+                               any_filter_consistent, any_filter_converged,
+                               all_filters_diverged, active_render,
+                               SaturatedCapture());
+  }
+
+  // Analyze the quality of the filter.
+  filter_quality_state_.Update(active_render, TransparentModeActive(),
+                               SaturatedCapture(), external_delay,
+                               any_filter_converged);
+
+  // Update the reverb estimate.
+  const bool stationary_block =
+      config_.echo_audibility.use_stationarity_properties &&
+      echo_audibility_.IsBlockStationary();
+
+  reverb_model_estimator_.Update(
+      filter_analyzer_.GetAdjustedFilters(),
+      adaptive_filter_frequency_responses,
+      erle_estimator_.GetInstLinearQualityEstimates(),
+      delay_state_.DirectPathFilterDelays(),
+      filter_quality_state_.UsableLinearFilterOutputs(), stationary_block);
+
+  erle_estimator_.Dump(data_dumper_);
+  reverb_model_estimator_.Dump(data_dumper_.get());
+  data_dumper_->DumpRaw("aec3_active_render", active_render);
+  data_dumper_->DumpRaw("aec3_erl", Erl());
+  data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain());
+  data_dumper_->DumpRaw("aec3_erle", Erle()[0]);
+  data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate());
+  data_dumper_->DumpRaw("aec3_transparent_mode", TransparentModeActive());
+  data_dumper_->DumpRaw("aec3_filter_delay",
+                        filter_analyzer_.MinFilterDelayBlocks());
+
+  data_dumper_->DumpRaw("aec3_any_filter_consistent", any_filter_consistent);
+  data_dumper_->DumpRaw("aec3_initial_state",
+                        initial_state_.InitialStateActive());
+  data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture());
+  data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho());
+  data_dumper_->DumpRaw("aec3_any_filter_converged", any_filter_converged);
+  data_dumper_->DumpRaw("aec3_all_filters_diverged", all_filters_diverged);
+
+  data_dumper_->DumpRaw("aec3_external_delay_avaliable",
+                        external_delay ? 1 : 0);
+  data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est",
+                        GetReverbFrequencyResponse());
+}
+
+AecState::InitialState::InitialState(const EchoCanceller3Config& config)
+    : conservative_initial_phase_(config.filter.conservative_initial_phase),
+      initial_state_seconds_(config.filter.initial_state_seconds) {
+  Reset();
+}
+void AecState::InitialState::InitialState::Reset() {
+  initial_state_ = true;
+  strong_not_saturated_render_blocks_ = 0;
+}
+void AecState::InitialState::InitialState::Update(bool active_render,
+                                                  bool saturated_capture) {
+  strong_not_saturated_render_blocks_ +=
+      active_render && !saturated_capture ? 1 : 0;
+
+  // Flag whether the initial state is still active.
+  bool prev_initial_state = initial_state_;
+  if (conservative_initial_phase_) {
+    initial_state_ =
+        strong_not_saturated_render_blocks_ < 5 * kNumBlocksPerSecond;
+  } else {
+    initial_state_ = strong_not_saturated_render_blocks_ <
+                     initial_state_seconds_ * kNumBlocksPerSecond;
+  }
+
+  // Flag whether the transition from the initial state has started.
+  transition_triggered_ = !initial_state_ && prev_initial_state;
+}
+
+AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config,
+                                   size_t num_capture_channels)
+    : delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize),
+      filter_delays_blocks_(num_capture_channels, delay_headroom_blocks_),
+      min_filter_delay_(delay_headroom_blocks_) {}
+
+void AecState::FilterDelay::Update(
+    rtc::ArrayView<const int> analyzer_filter_delay_estimates_blocks,
+    const absl::optional<DelayEstimate>& external_delay,
+    size_t blocks_with_proper_filter_adaptation) {
+  // Update the delay based on the external delay.
+  if (external_delay &&
+      (!external_delay_ || external_delay_->delay != external_delay->delay)) {
+    external_delay_ = external_delay;
+    external_delay_reported_ = true;
+  }
+
+  // Override the estimated delay if it is not certain that the filter has had
+  // time to converge.
+  const bool delay_estimator_may_not_have_converged =
+      blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond;
+  if (delay_estimator_may_not_have_converged && external_delay_) {
+    const int delay_guess = delay_headroom_blocks_;
+    std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(),
+              delay_guess);
+  } else {
+    RTC_DCHECK_EQ(filter_delays_blocks_.size(),
+                  analyzer_filter_delay_estimates_blocks.size());
+    std::copy(analyzer_filter_delay_estimates_blocks.begin(),
+              analyzer_filter_delay_estimates_blocks.end(),
+              filter_delays_blocks_.begin());
+  }
+
+  min_filter_delay_ = *std::min_element(filter_delays_blocks_.begin(),
+                                        filter_delays_blocks_.end());
+}
+
+AecState::FilteringQualityAnalyzer::FilteringQualityAnalyzer(
+    const EchoCanceller3Config& config,
+    size_t num_capture_channels)
+    : use_linear_filter_(config.filter.use_linear_filter),
+      usable_linear_filter_estimates_(num_capture_channels, false) {}
+
+void AecState::FilteringQualityAnalyzer::Reset() {
+  std::fill(usable_linear_filter_estimates_.begin(),
+            usable_linear_filter_estimates_.end(), false);
+  overall_usable_linear_estimates_ = false;
+  filter_update_blocks_since_reset_ = 0;
+}
+
+void AecState::FilteringQualityAnalyzer::Update(
+    bool active_render,
+    bool transparent_mode,
+    bool saturated_capture,
+    const absl::optional<DelayEstimate>& external_delay,
+    bool any_filter_converged) {
+  // Update blocks counter.
+  const bool filter_update = active_render && !saturated_capture;
+  filter_update_blocks_since_reset_ += filter_update ? 1 : 0;
+  filter_update_blocks_since_start_ += filter_update ? 1 : 0;
+
+  // Store convergence flag when observed.
+  convergence_seen_ = convergence_seen_ || any_filter_converged;
+
+  // Verify requirements for achieving a decent filter. The requirements for
+  // filter adaptation at call startup are more restrictive than after an
+  // in-call reset.
+  const bool sufficient_data_to_converge_at_startup =
+      filter_update_blocks_since_start_ > kNumBlocksPerSecond * 0.4f;
+  const bool sufficient_data_to_converge_at_reset =
+      sufficient_data_to_converge_at_startup &&
+      filter_update_blocks_since_reset_ > kNumBlocksPerSecond * 0.2f;
+
+  // The linear filter can only be used if it has had time to converge.
+  overall_usable_linear_estimates_ = sufficient_data_to_converge_at_startup &&
+                                     sufficient_data_to_converge_at_reset;
+
+  // The linear filter can only be used if an external delay or convergence have
+  // been identified
+  overall_usable_linear_estimates_ =
+      overall_usable_linear_estimates_ && (external_delay || convergence_seen_);
+
+  // If transparent mode is on, deactivate usign the linear filter.
+  overall_usable_linear_estimates_ =
+      overall_usable_linear_estimates_ && !transparent_mode;
+
+  if (use_linear_filter_) {
+    std::fill(usable_linear_filter_estimates_.begin(),
+              usable_linear_filter_estimates_.end(),
+              overall_usable_linear_estimates_);
+  }
+}
+
+void AecState::SaturationDetector::Update(
+    rtc::ArrayView<const std::vector<float>> x,
+    bool saturated_capture,
+    bool usable_linear_estimate,
+    rtc::ArrayView<const SubtractorOutput> subtractor_output,
+    float echo_path_gain) {
+  saturated_echo_ = false;
+  if (!saturated_capture) {
+    return;
+  }
+
+  if (usable_linear_estimate) {
+    constexpr float kSaturationThreshold = 20000.f;
+    for (size_t ch = 0; ch < subtractor_output.size(); ++ch) {
+      saturated_echo_ =
+          saturated_echo_ ||
+          (subtractor_output[ch].s_refined_max_abs > kSaturationThreshold ||
+           subtractor_output[ch].s_coarse_max_abs > kSaturationThreshold);
+    }
+  } else {
+    float max_sample = 0.f;
+    for (auto& channel : x) {
+      for (float sample : channel) {
+        max_sample = std::max(max_sample, fabsf(sample));
+      }
+    }
+
+    const float kMargin = 10.f;
+    float peak_echo_amplitude = max_sample * echo_path_gain * kMargin;
+    saturated_echo_ = saturated_echo_ || peak_echo_amplitude > 32000;
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/aec_state.h
+++ b/webrtc/modules/audio_processing/aec3/aec_state.h
@ -0,0 +1,294 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
+
+#include <stddef.h>
+
+#include <array>
+#include <memory>
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/delay_estimate.h"
+#include "modules/audio_processing/aec3/echo_audibility.h"
+#include "modules/audio_processing/aec3/echo_path_variability.h"
+#include "modules/audio_processing/aec3/erl_estimator.h"
+#include "modules/audio_processing/aec3/erle_estimator.h"
+#include "modules/audio_processing/aec3/filter_analyzer.h"
+#include "modules/audio_processing/aec3/render_buffer.h"
+#include "modules/audio_processing/aec3/reverb_model_estimator.h"
+#include "modules/audio_processing/aec3/subtractor_output.h"
+#include "modules/audio_processing/aec3/subtractor_output_analyzer.h"
+#include "modules/audio_processing/aec3/transparent_mode.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+
+// Handles the state and the conditions for the echo removal functionality.
+class AecState {
+ public:
+  AecState(const EchoCanceller3Config& config, size_t num_capture_channels);
+  ~AecState();
+
+  // Returns whether the echo subtractor can be used to determine the residual
+  // echo.
+  bool UsableLinearEstimate() const {
+    return filter_quality_state_.LinearFilterUsable() &&
+           config_.filter.use_linear_filter;
+  }
+
+  // Returns whether the echo subtractor output should be used as output.
+  bool UseLinearFilterOutput() const {
+    return filter_quality_state_.LinearFilterUsable() &&
+           config_.filter.use_linear_filter;
+  }
+
+  // Returns whether the render signal is currently active.
+  bool ActiveRender() const { return blocks_with_active_render_ > 200; }
+
+  // Returns the appropriate scaling of the residual echo to match the
+  // audibility.
+  void GetResidualEchoScaling(rtc::ArrayView<float> residual_scaling) const;
+
+  // Returns whether the stationary properties of the signals are used in the
+  // aec.
+  bool UseStationarityProperties() const {
+    return config_.echo_audibility.use_stationarity_properties;
+  }
+
+  // Returns the ERLE.
+  rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
+    return erle_estimator_.Erle();
+  }
+
+  // Returns an offset to apply to the estimation of the residual echo
+  // computation. Returning nullopt means that no offset should be used, while
+  // any other value will be applied as a multiplier to the estimated residual
+  // echo.
+  absl::optional<float> ErleUncertainty() const;
+
+  // Returns the fullband ERLE estimate in log2 units.
+  float FullBandErleLog2() const { return erle_estimator_.FullbandErleLog2(); }
+
+  // Returns the ERL.
+  const std::array<float, kFftLengthBy2Plus1>& Erl() const {
+    return erl_estimator_.Erl();
+  }
+
+  // Returns the time-domain ERL.
+  float ErlTimeDomain() const { return erl_estimator_.ErlTimeDomain(); }
+
+  // Returns the delay estimate based on the linear filter.
+  int MinDirectPathFilterDelay() const {
+    return delay_state_.MinDirectPathFilterDelay();
+  }
+
+  // Returns whether the capture signal is saturated.
+  bool SaturatedCapture() const { return capture_signal_saturation_; }
+
+  // Returns whether the echo signal is saturated.
+  bool SaturatedEcho() const { return saturation_detector_.SaturatedEcho(); }
+
+  // Updates the capture signal saturation.
+  void UpdateCaptureSaturation(bool capture_signal_saturation) {
+    capture_signal_saturation_ = capture_signal_saturation;
+  }
+
+  // Returns whether the transparent mode is active
+  bool TransparentModeActive() const {
+    return transparent_state_ && transparent_state_->Active();
+  }
+
+  // Takes appropriate action at an echo path change.
+  void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
+
+  // Returns the decay factor for the echo reverberation.
+  float ReverbDecay() const { return reverb_model_estimator_.ReverbDecay(); }
+
+  // Return the frequency response of the reverberant echo.
+  rtc::ArrayView<const float> GetReverbFrequencyResponse() const {
+    return reverb_model_estimator_.GetReverbFrequencyResponse();
+  }
+
+  // Returns whether the transition for going out of the initial stated has
+  // been triggered.
+  bool TransitionTriggered() const {
+    return initial_state_.TransitionTriggered();
+  }
+
+  // Updates the aec state.
+  // TODO(bugs.webrtc.org/10913): Compute multi-channel ERL.
+  void Update(
+      const absl::optional<DelayEstimate>& external_delay,
+      rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
+          adaptive_filter_frequency_responses,
+      rtc::ArrayView<const std::vector<float>>
+          adaptive_filter_impulse_responses,
+      const RenderBuffer& render_buffer,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_refined,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
+      rtc::ArrayView<const SubtractorOutput> subtractor_output);
+
+  // Returns filter length in blocks.
+  int FilterLengthBlocks() const {
+    // All filters have the same length, so arbitrarily return channel 0 length.
+    return filter_analyzer_.FilterLengthBlocks();
+  }
+
+ private:
+  static int instance_count_;
+  std::unique_ptr<ApmDataDumper> data_dumper_;
+  const EchoCanceller3Config config_;
+  const size_t num_capture_channels_;
+  const bool deactivate_initial_state_reset_at_echo_path_change_;
+  const bool full_reset_at_echo_path_change_;
+  const bool subtractor_analyzer_reset_at_echo_path_change_;
+
+  // Class for controlling the transition from the intial state, which in turn
+  // controls when the filter parameters for the initial state should be used.
+  class InitialState {
+   public:
+    explicit InitialState(const EchoCanceller3Config& config);
+    // Resets the state to again begin in the initial state.
+    void Reset();
+
+    // Updates the state based on new data.
+    void Update(bool active_render, bool saturated_capture);
+
+    // Returns whether the initial state is active or not.
+    bool InitialStateActive() const { return initial_state_; }
+
+    // Returns that the transition from the initial state has was started.
+    bool TransitionTriggered() const { return transition_triggered_; }
+
+   private:
+    const bool conservative_initial_phase_;
+    const float initial_state_seconds_;
+    bool transition_triggered_ = false;
+    bool initial_state_ = true;
+    size_t strong_not_saturated_render_blocks_ = 0;
+  } initial_state_;
+
+  // Class for choosing the direct-path delay relative to the beginning of the
+  // filter, as well as any other data related to the delay used within
+  // AecState.
+  class FilterDelay {
+   public:
+    FilterDelay(const EchoCanceller3Config& config,
+                size_t num_capture_channels);
+
+    // Returns whether an external delay has been reported to the AecState (from
+    // the delay estimator).
+    bool ExternalDelayReported() const { return external_delay_reported_; }
+
+    // Returns the delay in blocks relative to the beginning of the filter that
+    // corresponds to the direct path of the echo.
+    rtc::ArrayView<const int> DirectPathFilterDelays() const {
+      return filter_delays_blocks_;
+    }
+
+    // Returns the minimum delay among the direct path delays relative to the
+    // beginning of the filter
+    int MinDirectPathFilterDelay() const { return min_filter_delay_; }
+
+    // Updates the delay estimates based on new data.
+    void Update(
+        rtc::ArrayView<const int> analyzer_filter_delay_estimates_blocks,
+        const absl::optional<DelayEstimate>& external_delay,
+        size_t blocks_with_proper_filter_adaptation);
+
+   private:
+    const int delay_headroom_blocks_;
+    bool external_delay_reported_ = false;
+    std::vector<int> filter_delays_blocks_;
+    int min_filter_delay_;
+    absl::optional<DelayEstimate> external_delay_;
+  } delay_state_;
+
+  // Classifier for toggling transparent mode when there is no echo.
+  std::unique_ptr<TransparentMode> transparent_state_;
+
+  // Class for analyzing how well the linear filter is, and can be expected to,
+  // perform on the current signals. The purpose of this is for using to
+  // select the echo suppression functionality as well as the input to the echo
+  // suppressor.
+  class FilteringQualityAnalyzer {
+   public:
+    FilteringQualityAnalyzer(const EchoCanceller3Config& config,
+                             size_t num_capture_channels);
+
+    // Returns whether the linear filter can be used for the echo
+    // canceller output.
+    bool LinearFilterUsable() const { return overall_usable_linear_estimates_; }
+
+    // Returns whether an individual filter output can be used for the echo
+    // canceller output.
+    const std::vector<bool>& UsableLinearFilterOutputs() const {
+      return usable_linear_filter_estimates_;
+    }
+
+    // Resets the state of the analyzer.
+    void Reset();
+
+    // Updates the analysis based on new data.
+    void Update(bool active_render,
+                bool transparent_mode,
+                bool saturated_capture,
+                const absl::optional<DelayEstimate>& external_delay,
+                bool any_filter_converged);
+
+   private:
+    const bool use_linear_filter_;
+    bool overall_usable_linear_estimates_ = false;
+    size_t filter_update_blocks_since_reset_ = 0;
+    size_t filter_update_blocks_since_start_ = 0;
+    bool convergence_seen_ = false;
+    std::vector<bool> usable_linear_filter_estimates_;
+  } filter_quality_state_;
+
+  // Class for detecting whether the echo is to be considered to be
+  // saturated.
+  class SaturationDetector {
+   public:
+    // Returns whether the echo is to be considered saturated.
+    bool SaturatedEcho() const { return saturated_echo_; }
+
+    // Updates the detection decision based on new data.
+    void Update(rtc::ArrayView<const std::vector<float>> x,
+                bool saturated_capture,
+                bool usable_linear_estimate,
+                rtc::ArrayView<const SubtractorOutput> subtractor_output,
+                float echo_path_gain);
+
+   private:
+    bool saturated_echo_ = false;
+  } saturation_detector_;
+
+  ErlEstimator erl_estimator_;
+  ErleEstimator erle_estimator_;
+  size_t strong_not_saturated_render_blocks_ = 0;
+  size_t blocks_with_active_render_ = 0;
+  bool capture_signal_saturation_ = false;
+  FilterAnalyzer filter_analyzer_;
+  EchoAudibility echo_audibility_;
+  ReverbModelEstimator reverb_model_estimator_;
+  ReverbModel avg_render_reverb_;
+  SubtractorOutputAnalyzer subtractor_output_analyzer_;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
--- a/webrtc/modules/audio_processing/aec3/alignment_mixer.cc
+++ b/webrtc/modules/audio_processing/aec3/alignment_mixer.cc
@ -0,0 +1,160 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/audio_processing/aec3/alignment_mixer.h"
+
+#include <algorithm>
+
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+namespace {
+
+AlignmentMixer::MixingVariant ChooseMixingVariant(bool downmix,
+                                                  bool adaptive_selection,
+                                                  int num_channels) {
+  RTC_DCHECK(!(adaptive_selection && downmix));
+  RTC_DCHECK_LT(0, num_channels);
+
+  if (num_channels == 1) {
+    return AlignmentMixer::MixingVariant::kFixed;
+  }
+  if (downmix) {
+    return AlignmentMixer::MixingVariant::kDownmix;
+  }
+  if (adaptive_selection) {
+    return AlignmentMixer::MixingVariant::kAdaptive;
+  }
+  return AlignmentMixer::MixingVariant::kFixed;
+}
+
+}  // namespace
+
+AlignmentMixer::AlignmentMixer(
+    size_t num_channels,
+    const EchoCanceller3Config::Delay::AlignmentMixing& config)
+    : AlignmentMixer(num_channels,
+                     config.downmix,
+                     config.adaptive_selection,
+                     config.activity_power_threshold,
+                     config.prefer_first_two_channels) {}
+
+AlignmentMixer::AlignmentMixer(size_t num_channels,
+                               bool downmix,
+                               bool adaptive_selection,
+                               float activity_power_threshold,
+                               bool prefer_first_two_channels)
+    : num_channels_(num_channels),
+      one_by_num_channels_(1.f / num_channels_),
+      excitation_energy_threshold_(kBlockSize * activity_power_threshold),
+      prefer_first_two_channels_(prefer_first_two_channels),
+      selection_variant_(
+          ChooseMixingVariant(downmix, adaptive_selection, num_channels_)) {
+  if (selection_variant_ == MixingVariant::kAdaptive) {
+    std::fill(strong_block_counters_.begin(), strong_block_counters_.end(), 0);
+    cumulative_energies_.resize(num_channels_);
+    std::fill(cumulative_energies_.begin(), cumulative_energies_.end(), 0.f);
+  }
+}
+
+void AlignmentMixer::ProduceOutput(rtc::ArrayView<const std::vector<float>> x,
+                                   rtc::ArrayView<float, kBlockSize> y) {
+  RTC_DCHECK_EQ(x.size(), num_channels_);
+  if (selection_variant_ == MixingVariant::kDownmix) {
+    Downmix(x, y);
+    return;
+  }
+
+  int ch = selection_variant_ == MixingVariant::kFixed ? 0 : SelectChannel(x);
+
+  RTC_DCHECK_GE(x.size(), ch);
+  std::copy(x[ch].begin(), x[ch].end(), y.begin());
+}
+
+void AlignmentMixer::Downmix(rtc::ArrayView<const std::vector<float>> x,
+                             rtc::ArrayView<float, kBlockSize> y) const {
+  RTC_DCHECK_EQ(x.size(), num_channels_);
+  RTC_DCHECK_GE(num_channels_, 2);
+  std::copy(x[0].begin(), x[0].end(), y.begin());
+  for (size_t ch = 1; ch < num_channels_; ++ch) {
+    for (size_t i = 0; i < kBlockSize; ++i) {
+      y[i] += x[ch][i];
+    }
+  }
+
+  for (size_t i = 0; i < kBlockSize; ++i) {
+    y[i] *= one_by_num_channels_;
+  }
+}
+
+int AlignmentMixer::SelectChannel(rtc::ArrayView<const std::vector<float>> x) {
+  RTC_DCHECK_EQ(x.size(), num_channels_);
+  RTC_DCHECK_GE(num_channels_, 2);
+  RTC_DCHECK_EQ(cumulative_energies_.size(), num_channels_);
+
+  constexpr size_t kBlocksToChooseLeftOrRight =
+      static_cast<size_t>(0.5f * kNumBlocksPerSecond);
+  const bool good_signal_in_left_or_right =
+      prefer_first_two_channels_ &&
+      (strong_block_counters_[0] > kBlocksToChooseLeftOrRight ||
+       strong_block_counters_[1] > kBlocksToChooseLeftOrRight);
+
+  const int num_ch_to_analyze =
+      good_signal_in_left_or_right ? 2 : num_channels_;
+
+  constexpr int kNumBlocksBeforeEnergySmoothing = 60 * kNumBlocksPerSecond;
+  ++block_counter_;
+
+  for (int ch = 0; ch < num_ch_to_analyze; ++ch) {
+    RTC_DCHECK_EQ(x[ch].size(), kBlockSize);
+    float x2_sum = 0.f;
+    for (size_t i = 0; i < kBlockSize; ++i) {
+      x2_sum += x[ch][i] * x[ch][i];
+    }
+
+    if (ch < 2 && x2_sum > excitation_energy_threshold_) {
+      ++strong_block_counters_[ch];
+    }
+
+    if (block_counter_ <= kNumBlocksBeforeEnergySmoothing) {
+      cumulative_energies_[ch] += x2_sum;
+    } else {
+      constexpr float kSmoothing = 1.f / (10 * kNumBlocksPerSecond);
+      cumulative_energies_[ch] +=
+          kSmoothing * (x2_sum - cumulative_energies_[ch]);
+    }
+  }
+
+  // Normalize the energies to allow the energy computations to from now be
+  // based on smoothing.
+  if (block_counter_ == kNumBlocksBeforeEnergySmoothing) {
+    constexpr float kOneByNumBlocksBeforeEnergySmoothing =
+        1.f / kNumBlocksBeforeEnergySmoothing;
+    for (int ch = 0; ch < num_ch_to_analyze; ++ch) {
+      cumulative_energies_[ch] *= kOneByNumBlocksBeforeEnergySmoothing;
+    }
+  }
+
+  int strongest_ch = 0;
+  for (int ch = 0; ch < num_ch_to_analyze; ++ch) {
+    if (cumulative_energies_[ch] > cumulative_energies_[strongest_ch]) {
+      strongest_ch = ch;
+    }
+  }
+
+  if ((good_signal_in_left_or_right && selected_channel_ > 1) ||
+      cumulative_energies_[strongest_ch] >
+          2.f * cumulative_energies_[selected_channel_]) {
+    selected_channel_ = strongest_ch;
+  }
+
+  return selected_channel_;
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/alignment_mixer.h
+++ b/webrtc/modules/audio_processing/aec3/alignment_mixer.h
@ -0,0 +1,58 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_
+
+#include <vector>
+
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+
+namespace webrtc {
+
+// Performs channel conversion to mono for the purpose of providing a decent
+// mono input for the delay estimation. This is achieved by analyzing all
+// incoming channels and produce one single channel output.
+class AlignmentMixer {
+ public:
+  AlignmentMixer(size_t num_channels,
+                 const EchoCanceller3Config::Delay::AlignmentMixing& config);
+
+  AlignmentMixer(size_t num_channels,
+                 bool downmix,
+                 bool adaptive_selection,
+                 float excitation_limit,
+                 bool prefer_first_two_channels);
+
+  void ProduceOutput(rtc::ArrayView<const std::vector<float>> x,
+                     rtc::ArrayView<float, kBlockSize> y);
+
+  enum class MixingVariant { kDownmix, kAdaptive, kFixed };
+
+ private:
+  const size_t num_channels_;
+  const float one_by_num_channels_;
+  const float excitation_energy_threshold_;
+  const bool prefer_first_two_channels_;
+  const MixingVariant selection_variant_;
+  std::array<size_t, 2> strong_block_counters_;
+  std::vector<float> cumulative_energies_;
+  int selected_channel_ = 0;
+  size_t block_counter_ = 0;
+
+  void Downmix(const rtc::ArrayView<const std::vector<float>> x,
+               rtc::ArrayView<float, kBlockSize> y) const;
+  int SelectChannel(rtc::ArrayView<const std::vector<float>> x);
+};
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_
--- a/webrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc
+++ b/webrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc
@ -0,0 +1,121 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/api_call_jitter_metrics.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "system_wrappers/include/metrics.h"
+
+namespace webrtc {
+namespace {
+
+bool TimeToReportMetrics(int frames_since_last_report) {
+  constexpr int kNumFramesPerSecond = 100;
+  constexpr int kReportingIntervalFrames = 10 * kNumFramesPerSecond;
+  return frames_since_last_report == kReportingIntervalFrames;
+}
+
+}  // namespace
+
+ApiCallJitterMetrics::Jitter::Jitter()
+    : max_(0), min_(std::numeric_limits<int>::max()) {}
+
+void ApiCallJitterMetrics::Jitter::Update(int num_api_calls_in_a_row) {
+  min_ = std::min(min_, num_api_calls_in_a_row);
+  max_ = std::max(max_, num_api_calls_in_a_row);
+}
+
+void ApiCallJitterMetrics::Jitter::Reset() {
+  min_ = std::numeric_limits<int>::max();
+  max_ = 0;
+}
+
+void ApiCallJitterMetrics::Reset() {
+  render_jitter_.Reset();
+  capture_jitter_.Reset();
+  num_api_calls_in_a_row_ = 0;
+  frames_since_last_report_ = 0;
+  last_call_was_render_ = false;
+  proper_call_observed_ = false;
+}
+
+void ApiCallJitterMetrics::ReportRenderCall() {
+  if (!last_call_was_render_) {
+    // If the previous call was a capture and a proper call has been observed
+    // (containing both render and capture data), storing the last number of
+    // capture calls into the metrics.
+    if (proper_call_observed_) {
+      capture_jitter_.Update(num_api_calls_in_a_row_);
+    }
+
+    // Reset the call counter to start counting render calls.
+    num_api_calls_in_a_row_ = 0;
+  }
+  ++num_api_calls_in_a_row_;
+  last_call_was_render_ = true;
+}
+
+void ApiCallJitterMetrics::ReportCaptureCall() {
+  if (last_call_was_render_) {
+    // If the previous call was a render and a proper call has been observed
+    // (containing both render and capture data), storing the last number of
+    // render calls into the metrics.
+    if (proper_call_observed_) {
+      render_jitter_.Update(num_api_calls_in_a_row_);
+    }
+    // Reset the call counter to start counting capture calls.
+    num_api_calls_in_a_row_ = 0;
+
+    // If this statement is reached, at least one render and one capture call
+    // have been observed.
+    proper_call_observed_ = true;
+  }
+  ++num_api_calls_in_a_row_;
+  last_call_was_render_ = false;
+
+  // Only report and update jitter metrics for when a proper call, containing
+  // both render and capture data, has been observed.
+  if (proper_call_observed_ &&
+      TimeToReportMetrics(++frames_since_last_report_)) {
+    // Report jitter, where the base basic unit is frames.
+    constexpr int kMaxJitterToReport = 50;
+
+    // Report max and min jitter for render and capture, in units of 20 ms.
+    RTC_HISTOGRAM_COUNTS_LINEAR(
+        "WebRTC.Audio.EchoCanceller.MaxRenderJitter",
+        std::min(kMaxJitterToReport, render_jitter().max()), 1,
+        kMaxJitterToReport, kMaxJitterToReport);
+    RTC_HISTOGRAM_COUNTS_LINEAR(
+        "WebRTC.Audio.EchoCanceller.MinRenderJitter",
+        std::min(kMaxJitterToReport, render_jitter().min()), 1,
+        kMaxJitterToReport, kMaxJitterToReport);
+
+    RTC_HISTOGRAM_COUNTS_LINEAR(
+        "WebRTC.Audio.EchoCanceller.MaxCaptureJitter",
+        std::min(kMaxJitterToReport, capture_jitter().max()), 1,
+        kMaxJitterToReport, kMaxJitterToReport);
+    RTC_HISTOGRAM_COUNTS_LINEAR(
+        "WebRTC.Audio.EchoCanceller.MinCaptureJitter",
+        std::min(kMaxJitterToReport, capture_jitter().min()), 1,
+        kMaxJitterToReport, kMaxJitterToReport);
+
+    frames_since_last_report_ = 0;
+    Reset();
+  }
+}
+
+bool ApiCallJitterMetrics::WillReportMetricsAtNextCapture() const {
+  return TimeToReportMetrics(frames_since_last_report_ + 1);
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/api_call_jitter_metrics.h
+++ b/webrtc/modules/audio_processing/aec3/api_call_jitter_metrics.h
@ -0,0 +1,60 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_
+
+namespace webrtc {
+
+// Stores data for reporting metrics on the API call jitter.
+class ApiCallJitterMetrics {
+ public:
+  class Jitter {
+   public:
+    Jitter();
+    void Update(int num_api_calls_in_a_row);
+    void Reset();
+
+    int min() const { return min_; }
+    int max() const { return max_; }
+
+   private:
+    int max_;
+    int min_;
+  };
+
+  ApiCallJitterMetrics() { Reset(); }
+
+  // Update metrics for render API call.
+  void ReportRenderCall();
+
+  // Update and periodically report metrics for capture API call.
+  void ReportCaptureCall();
+
+  // Methods used only for testing.
+  const Jitter& render_jitter() const { return render_jitter_; }
+  const Jitter& capture_jitter() const { return capture_jitter_; }
+  bool WillReportMetricsAtNextCapture() const;
+
+ private:
+  void Reset();
+
+  Jitter render_jitter_;
+  Jitter capture_jitter_;
+
+  int num_api_calls_in_a_row_ = 0;
+  int frames_since_last_report_ = 0;
+  bool last_call_was_render_ = false;
+  bool proper_call_observed_ = false;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_
--- a/webrtc/modules/audio_processing/aec3/block_buffer.cc
+++ b/webrtc/modules/audio_processing/aec3/block_buffer.cc
@ -0,0 +1,39 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/block_buffer.h"
+
+#include <algorithm>
+
+namespace webrtc {
+
+BlockBuffer::BlockBuffer(size_t size,
+                         size_t num_bands,
+                         size_t num_channels,
+                         size_t frame_length)
+    : size(static_cast<int>(size)),
+      buffer(size,
+             std::vector<std::vector<std::vector<float>>>(
+                 num_bands,
+                 std::vector<std::vector<float>>(
+                     num_channels,
+                     std::vector<float>(frame_length, 0.f)))) {
+  for (auto& block : buffer) {
+    for (auto& band : block) {
+      for (auto& channel : band) {
+        std::fill(channel.begin(), channel.end(), 0.f);
+      }
+    }
+  }
+}
+
+BlockBuffer::~BlockBuffer() = default;
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/block_buffer.h
+++ b/webrtc/modules/audio_processing/aec3/block_buffer.h
@ -0,0 +1,62 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+// Struct for bundling a circular buffer of two dimensional vector objects
+// together with the read and write indices.
+struct BlockBuffer {
+  BlockBuffer(size_t size,
+              size_t num_bands,
+              size_t num_channels,
+              size_t frame_length);
+  ~BlockBuffer();
+
+  int IncIndex(int index) const {
+    RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
+    return index < size - 1 ? index + 1 : 0;
+  }
+
+  int DecIndex(int index) const {
+    RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
+    return index > 0 ? index - 1 : size - 1;
+  }
+
+  int OffsetIndex(int index, int offset) const {
+    RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
+    RTC_DCHECK_GE(size, offset);
+    return (size + index + offset) % size;
+  }
+
+  void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); }
+  void IncWriteIndex() { write = IncIndex(write); }
+  void DecWriteIndex() { write = DecIndex(write); }
+  void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); }
+  void IncReadIndex() { read = IncIndex(read); }
+  void DecReadIndex() { read = DecIndex(read); }
+
+  const int size;
+  std::vector<std::vector<std::vector<std::vector<float>>>> buffer;
+  int write = 0;
+  int read = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_
--- a/webrtc/modules/audio_processing/aec3/block_delay_buffer.cc
+++ b/webrtc/modules/audio_processing/aec3/block_delay_buffer.cc
@ -0,0 +1,62 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/audio_processing/aec3/block_delay_buffer.h"
+
+#include "api/array_view.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+BlockDelayBuffer::BlockDelayBuffer(size_t num_channels,
+                                   size_t num_bands,
+                                   size_t frame_length,
+                                   size_t delay_samples)
+    : frame_length_(frame_length),
+      delay_(delay_samples),
+      buf_(num_channels,
+           std::vector<std::vector<float>>(num_bands,
+                                           std::vector<float>(delay_, 0.f))) {}
+
+BlockDelayBuffer::~BlockDelayBuffer() = default;
+
+void BlockDelayBuffer::DelaySignal(AudioBuffer* frame) {
+  RTC_DCHECK_EQ(buf_.size(), frame->num_channels());
+  if (delay_ == 0) {
+    return;
+  }
+
+  const size_t num_bands = buf_[0].size();
+  const size_t num_channels = buf_.size();
+
+  const size_t i_start = last_insert_;
+  size_t i = 0;
+  for (size_t ch = 0; ch < num_channels; ++ch) {
+    RTC_DCHECK_EQ(buf_[ch].size(), frame->num_bands());
+    RTC_DCHECK_EQ(buf_[ch].size(), num_bands);
+    rtc::ArrayView<float* const> frame_ch(frame->split_bands(ch), num_bands);
+
+    for (size_t band = 0; band < num_bands; ++band) {
+      RTC_DCHECK_EQ(delay_, buf_[ch][band].size());
+      i = i_start;
+
+      for (size_t k = 0; k < frame_length_; ++k) {
+        const float tmp = buf_[ch][band][i];
+        buf_[ch][band][i] = frame_ch[band][k];
+        frame_ch[band][k] = tmp;
+
+        i = i < delay_ - 1 ? i + 1 : 0;
+      }
+    }
+  }
+
+  last_insert_ = i;
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/block_delay_buffer.h
+++ b/webrtc/modules/audio_processing/aec3/block_delay_buffer.h
@ -0,0 +1,43 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "modules/audio_processing/audio_buffer.h"
+
+namespace webrtc {
+
+// Class for applying a fixed delay to the samples in a signal partitioned using
+// the audiobuffer band-splitting scheme.
+class BlockDelayBuffer {
+ public:
+  BlockDelayBuffer(size_t num_channels,
+                   size_t num_bands,
+                   size_t frame_length,
+                   size_t delay_samples);
+  ~BlockDelayBuffer();
+
+  // Delays the samples by the specified delay.
+  void DelaySignal(AudioBuffer* frame);
+
+ private:
+  const size_t frame_length_;
+  const size_t delay_;
+  std::vector<std::vector<std::vector<float>>> buf_;
+  size_t last_insert_ = 0;
+};
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_
--- a/webrtc/modules/audio_processing/aec3/block_framer.cc
+++ b/webrtc/modules/audio_processing/aec3/block_framer.cc
@ -0,0 +1,86 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/block_framer.h"
+
+#include <algorithm>
+
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+BlockFramer::BlockFramer(size_t num_bands, size_t num_channels)
+    : num_bands_(num_bands),
+      num_channels_(num_channels),
+      buffer_(num_bands_,
+              std::vector<std::vector<float>>(
+                  num_channels,
+                  std::vector<float>(kBlockSize, 0.f))) {
+  RTC_DCHECK_LT(0, num_bands);
+  RTC_DCHECK_LT(0, num_channels);
+}
+
+BlockFramer::~BlockFramer() = default;
+
+// All the constants are chosen so that the buffer is either empty or has enough
+// samples for InsertBlockAndExtractSubFrame to produce a frame. In order to
+// achieve this, the InsertBlockAndExtractSubFrame and InsertBlock methods need
+// to be called in the correct order.
+void BlockFramer::InsertBlock(
+    const std::vector<std::vector<std::vector<float>>>& block) {
+  RTC_DCHECK_EQ(num_bands_, block.size());
+  for (size_t band = 0; band < num_bands_; ++band) {
+    RTC_DCHECK_EQ(num_channels_, block[band].size());
+    for (size_t channel = 0; channel < num_channels_; ++channel) {
+      RTC_DCHECK_EQ(kBlockSize, block[band][channel].size());
+      RTC_DCHECK_EQ(0, buffer_[band][channel].size());
+
+      buffer_[band][channel].insert(buffer_[band][channel].begin(),
+                                    block[band][channel].begin(),
+                                    block[band][channel].end());
+    }
+  }
+}
+
+void BlockFramer::InsertBlockAndExtractSubFrame(
+    const std::vector<std::vector<std::vector<float>>>& block,
+    std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame) {
+  RTC_DCHECK(sub_frame);
+  RTC_DCHECK_EQ(num_bands_, block.size());
+  RTC_DCHECK_EQ(num_bands_, sub_frame->size());
+  for (size_t band = 0; band < num_bands_; ++band) {
+    RTC_DCHECK_EQ(num_channels_, block[band].size());
+    RTC_DCHECK_EQ(num_channels_, (*sub_frame)[0].size());
+    for (size_t channel = 0; channel < num_channels_; ++channel) {
+      RTC_DCHECK_LE(kSubFrameLength,
+                    buffer_[band][channel].size() + kBlockSize);
+      RTC_DCHECK_EQ(kBlockSize, block[band][channel].size());
+      RTC_DCHECK_GE(kBlockSize, buffer_[band][channel].size());
+      RTC_DCHECK_EQ(kSubFrameLength, (*sub_frame)[band][channel].size());
+
+      const int samples_to_frame =
+          kSubFrameLength - buffer_[band][channel].size();
+      std::copy(buffer_[band][channel].begin(), buffer_[band][channel].end(),
+                (*sub_frame)[band][channel].begin());
+      std::copy(
+          block[band][channel].begin(),
+          block[band][channel].begin() + samples_to_frame,
+          (*sub_frame)[band][channel].begin() + buffer_[band][channel].size());
+      buffer_[band][channel].clear();
+      buffer_[band][channel].insert(
+          buffer_[band][channel].begin(),
+          block[band][channel].begin() + samples_to_frame,
+          block[band][channel].end());
+    }
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/block_framer.h
+++ b/webrtc/modules/audio_processing/aec3/block_framer.h
@ -0,0 +1,48 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_
+
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+
+namespace webrtc {
+
+// Class for producing frames consisting of 2 subframes of 80 samples each
+// from 64 sample blocks. The class is designed to work together with the
+// FrameBlocker class which performs the reverse conversion. Used together with
+// that, this class produces output frames are the same rate as frames are
+// received by the FrameBlocker class. Note that the internal buffers will
+// overrun if any other rate of packets insertion is used.
+class BlockFramer {
+ public:
+  BlockFramer(size_t num_bands, size_t num_channels);
+  ~BlockFramer();
+  BlockFramer(const BlockFramer&) = delete;
+  BlockFramer& operator=(const BlockFramer&) = delete;
+
+  // Adds a 64 sample block into the data that will form the next output frame.
+  void InsertBlock(const std::vector<std::vector<std::vector<float>>>& block);
+  // Adds a 64 sample block and extracts an 80 sample subframe.
+  void InsertBlockAndExtractSubFrame(
+      const std::vector<std::vector<std::vector<float>>>& block,
+      std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame);
+
+ private:
+  const size_t num_bands_;
+  const size_t num_channels_;
+  std::vector<std::vector<std::vector<float>>> buffer_;
+};
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_
--- a/webrtc/modules/audio_processing/aec3/block_processor.cc
+++ b/webrtc/modules/audio_processing/aec3/block_processor.cc
@ -0,0 +1,292 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/audio_processing/aec3/block_processor.h"
+
+#include <stddef.h>
+
+#include <memory>
+#include <utility>
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "api/audio/echo_control.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/block_processor_metrics.h"
+#include "modules/audio_processing/aec3/delay_estimate.h"
+#include "modules/audio_processing/aec3/echo_path_variability.h"
+#include "modules/audio_processing/aec3/echo_remover.h"
+#include "modules/audio_processing/aec3/render_delay_buffer.h"
+#include "modules/audio_processing/aec3/render_delay_controller.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/atomic_ops.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace {
+
+enum class BlockProcessorApiCall { kCapture, kRender };
+
+class BlockProcessorImpl final : public BlockProcessor {
+ public:
+  BlockProcessorImpl(const EchoCanceller3Config& config,
+                     int sample_rate_hz,
+                     size_t num_render_channels,
+                     size_t num_capture_channels,
+                     std::unique_ptr<RenderDelayBuffer> render_buffer,
+                     std::unique_ptr<RenderDelayController> delay_controller,
+                     std::unique_ptr<EchoRemover> echo_remover);
+
+  BlockProcessorImpl() = delete;
+
+  ~BlockProcessorImpl() override;
+
+  void ProcessCapture(
+      bool echo_path_gain_change,
+      bool capture_signal_saturation,
+      std::vector<std::vector<std::vector<float>>>* linear_output,
+      std::vector<std::vector<std::vector<float>>>* capture_block) override;
+
+  void BufferRender(
+      const std::vector<std::vector<std::vector<float>>>& block) override;
+
+  void UpdateEchoLeakageStatus(bool leakage_detected) override;
+
+  void GetMetrics(EchoControl::Metrics* metrics) const override;
+
+  void SetAudioBufferDelay(int delay_ms) override;
+
+ private:
+  static int instance_count_;
+  std::unique_ptr<ApmDataDumper> data_dumper_;
+  const EchoCanceller3Config config_;
+  bool capture_properly_started_ = false;
+  bool render_properly_started_ = false;
+  const size_t sample_rate_hz_;
+  std::unique_ptr<RenderDelayBuffer> render_buffer_;
+  std::unique_ptr<RenderDelayController> delay_controller_;
+  std::unique_ptr<EchoRemover> echo_remover_;
+  BlockProcessorMetrics metrics_;
+  RenderDelayBuffer::BufferingEvent render_event_;
+  size_t capture_call_counter_ = 0;
+  absl::optional<DelayEstimate> estimated_delay_;
+};
+
+int BlockProcessorImpl::instance_count_ = 0;
+
+BlockProcessorImpl::BlockProcessorImpl(
+    const EchoCanceller3Config& config,
+    int sample_rate_hz,
+    size_t num_render_channels,
+    size_t num_capture_channels,
+    std::unique_ptr<RenderDelayBuffer> render_buffer,
+    std::unique_ptr<RenderDelayController> delay_controller,
+    std::unique_ptr<EchoRemover> echo_remover)
+    : data_dumper_(
+          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
+      config_(config),
+      sample_rate_hz_(sample_rate_hz),
+      render_buffer_(std::move(render_buffer)),
+      delay_controller_(std::move(delay_controller)),
+      echo_remover_(std::move(echo_remover)),
+      render_event_(RenderDelayBuffer::BufferingEvent::kNone) {
+  RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
+}
+
+BlockProcessorImpl::~BlockProcessorImpl() = default;
+
+void BlockProcessorImpl::ProcessCapture(
+    bool echo_path_gain_change,
+    bool capture_signal_saturation,
+    std::vector<std::vector<std::vector<float>>>* linear_output,
+    std::vector<std::vector<std::vector<float>>>* capture_block) {
+  RTC_DCHECK(capture_block);
+  RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), capture_block->size());
+  RTC_DCHECK_EQ(kBlockSize, (*capture_block)[0][0].size());
+
+  capture_call_counter_++;
+
+  data_dumper_->DumpRaw("aec3_processblock_call_order",
+                        static_cast<int>(BlockProcessorApiCall::kCapture));
+  data_dumper_->DumpWav("aec3_processblock_capture_input", kBlockSize,
+                        &(*capture_block)[0][0][0], 16000, 1);
+
+  if (render_properly_started_) {
+    if (!capture_properly_started_) {
+      capture_properly_started_ = true;
+      render_buffer_->Reset();
+      if (delay_controller_)
+        delay_controller_->Reset(true);
+    }
+  } else {
+    // If no render data has yet arrived, do not process the capture signal.
+    render_buffer_->HandleSkippedCaptureProcessing();
+    return;
+  }
+
+  EchoPathVariability echo_path_variability(
+      echo_path_gain_change, EchoPathVariability::DelayAdjustment::kNone,
+      false);
+
+  if (render_event_ == RenderDelayBuffer::BufferingEvent::kRenderOverrun &&
+      render_properly_started_) {
+    echo_path_variability.delay_change =
+        EchoPathVariability::DelayAdjustment::kBufferFlush;
+    if (delay_controller_)
+      delay_controller_->Reset(true);
+    RTC_LOG(LS_WARNING) << "Reset due to render buffer overrun at block  "
+                        << capture_call_counter_;
+  }
+  render_event_ = RenderDelayBuffer::BufferingEvent::kNone;
+
+  // Update the render buffers with any newly arrived render blocks and prepare
+  // the render buffers for reading the render data corresponding to the current
+  // capture block.
+  RenderDelayBuffer::BufferingEvent buffer_event =
+      render_buffer_->PrepareCaptureProcessing();
+  // Reset the delay controller at render buffer underrun.
+  if (buffer_event == RenderDelayBuffer::BufferingEvent::kRenderUnderrun) {
+    if (delay_controller_)
+      delay_controller_->Reset(false);
+  }
+
+  data_dumper_->DumpWav("aec3_processblock_capture_input2", kBlockSize,
+                        &(*capture_block)[0][0][0], 16000, 1);
+
+  bool has_delay_estimator = !config_.delay.use_external_delay_estimator;
+  if (has_delay_estimator) {
+    RTC_DCHECK(delay_controller_);
+    // Compute and apply the render delay required to achieve proper signal
+    // alignment.
+    estimated_delay_ = delay_controller_->GetDelay(
+        render_buffer_->GetDownsampledRenderBuffer(), render_buffer_->Delay(),
+        (*capture_block)[0]);
+
+    if (estimated_delay_) {
+      bool delay_change =
+          render_buffer_->AlignFromDelay(estimated_delay_->delay);
+      if (delay_change) {
+        rtc::LoggingSeverity log_level =
+            config_.delay.log_warning_on_delay_changes ? rtc::LS_WARNING
+                                                       : rtc::LS_INFO;
+        RTC_LOG_V(log_level) << "Delay changed to " << estimated_delay_->delay
+                             << " at block " << capture_call_counter_;
+        echo_path_variability.delay_change =
+            EchoPathVariability::DelayAdjustment::kNewDetectedDelay;
+      }
+    }
+
+    echo_path_variability.clock_drift = delay_controller_->HasClockdrift();
+
+  } else {
+    render_buffer_->AlignFromExternalDelay();
+  }
+
+  // Remove the echo from the capture signal.
+  if (has_delay_estimator || render_buffer_->HasReceivedBufferDelay()) {
+    echo_remover_->ProcessCapture(
+        echo_path_variability, capture_signal_saturation, estimated_delay_,
+        render_buffer_->GetRenderBuffer(), linear_output, capture_block);
+  }
+
+  // Update the metrics.
+  metrics_.UpdateCapture(false);
+}
+
+void BlockProcessorImpl::BufferRender(
+    const std::vector<std::vector<std::vector<float>>>& block) {
+  RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), block.size());
+  RTC_DCHECK_EQ(kBlockSize, block[0][0].size());
+  data_dumper_->DumpRaw("aec3_processblock_call_order",
+                        static_cast<int>(BlockProcessorApiCall::kRender));
+  data_dumper_->DumpWav("aec3_processblock_render_input", kBlockSize,
+                        &block[0][0][0], 16000, 1);
+  data_dumper_->DumpWav("aec3_processblock_render_input2", kBlockSize,
+                        &block[0][0][0], 16000, 1);
+
+  render_event_ = render_buffer_->Insert(block);
+
+  metrics_.UpdateRender(render_event_ !=
+                        RenderDelayBuffer::BufferingEvent::kNone);
+
+  render_properly_started_ = true;
+  if (delay_controller_)
+    delay_controller_->LogRenderCall();
+}
+
+void BlockProcessorImpl::UpdateEchoLeakageStatus(bool leakage_detected) {
+  echo_remover_->UpdateEchoLeakageStatus(leakage_detected);
+}
+
+void BlockProcessorImpl::GetMetrics(EchoControl::Metrics* metrics) const {
+  echo_remover_->GetMetrics(metrics);
+  constexpr int block_size_ms = 4;
+  absl::optional<size_t> delay = render_buffer_->Delay();
+  metrics->delay_ms = delay ? static_cast<int>(*delay) * block_size_ms : 0;
+}
+
+void BlockProcessorImpl::SetAudioBufferDelay(int delay_ms) {
+  render_buffer_->SetAudioBufferDelay(delay_ms);
+}
+
+}  // namespace
+
+BlockProcessor* BlockProcessor::Create(const EchoCanceller3Config& config,
+                                       int sample_rate_hz,
+                                       size_t num_render_channels,
+                                       size_t num_capture_channels) {
+  std::unique_ptr<RenderDelayBuffer> render_buffer(
+      RenderDelayBuffer::Create(config, sample_rate_hz, num_render_channels));
+  std::unique_ptr<RenderDelayController> delay_controller;
+  if (!config.delay.use_external_delay_estimator) {
+    delay_controller.reset(RenderDelayController::Create(config, sample_rate_hz,
+                                                         num_capture_channels));
+  }
+  std::unique_ptr<EchoRemover> echo_remover(EchoRemover::Create(
+      config, sample_rate_hz, num_render_channels, num_capture_channels));
+  return Create(config, sample_rate_hz, num_render_channels,
+                num_capture_channels, std::move(render_buffer),
+                std::move(delay_controller), std::move(echo_remover));
+}
+
+BlockProcessor* BlockProcessor::Create(
+    const EchoCanceller3Config& config,
+    int sample_rate_hz,
+    size_t num_render_channels,
+    size_t num_capture_channels,
+    std::unique_ptr<RenderDelayBuffer> render_buffer) {
+  std::unique_ptr<RenderDelayController> delay_controller;
+  if (!config.delay.use_external_delay_estimator) {
+    delay_controller.reset(RenderDelayController::Create(config, sample_rate_hz,
+                                                         num_capture_channels));
+  }
+  std::unique_ptr<EchoRemover> echo_remover(EchoRemover::Create(
+      config, sample_rate_hz, num_render_channels, num_capture_channels));
+  return Create(config, sample_rate_hz, num_render_channels,
+                num_capture_channels, std::move(render_buffer),
+                std::move(delay_controller), std::move(echo_remover));
+}
+
+BlockProcessor* BlockProcessor::Create(
+    const EchoCanceller3Config& config,
+    int sample_rate_hz,
+    size_t num_render_channels,
+    size_t num_capture_channels,
+    std::unique_ptr<RenderDelayBuffer> render_buffer,
+    std::unique_ptr<RenderDelayController> delay_controller,
+    std::unique_ptr<EchoRemover> echo_remover) {
+  return new BlockProcessorImpl(config, sample_rate_hz, num_render_channels,
+                                num_capture_channels, std::move(render_buffer),
+                                std::move(delay_controller),
+                                std::move(echo_remover));
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/block_processor.h
+++ b/webrtc/modules/audio_processing/aec3/block_processor.h
@ -0,0 +1,76 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_
+
+#include <stddef.h>
+
+#include <memory>
+#include <vector>
+
+#include "api/audio/echo_canceller3_config.h"
+#include "api/audio/echo_control.h"
+#include "modules/audio_processing/aec3/echo_remover.h"
+#include "modules/audio_processing/aec3/render_delay_buffer.h"
+#include "modules/audio_processing/aec3/render_delay_controller.h"
+
+namespace webrtc {
+
+// Class for performing echo cancellation on 64 sample blocks of audio data.
+class BlockProcessor {
+ public:
+  static BlockProcessor* Create(const EchoCanceller3Config& config,
+                                int sample_rate_hz,
+                                size_t num_render_channels,
+                                size_t num_capture_channels);
+  // Only used for testing purposes.
+  static BlockProcessor* Create(
+      const EchoCanceller3Config& config,
+      int sample_rate_hz,
+      size_t num_render_channels,
+      size_t num_capture_channels,
+      std::unique_ptr<RenderDelayBuffer> render_buffer);
+  static BlockProcessor* Create(
+      const EchoCanceller3Config& config,
+      int sample_rate_hz,
+      size_t num_render_channels,
+      size_t num_capture_channels,
+      std::unique_ptr<RenderDelayBuffer> render_buffer,
+      std::unique_ptr<RenderDelayController> delay_controller,
+      std::unique_ptr<EchoRemover> echo_remover);
+
+  virtual ~BlockProcessor() = default;
+
+  // Get current metrics.
+  virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0;
+
+  // Provides an optional external estimate of the audio buffer delay.
+  virtual void SetAudioBufferDelay(int delay_ms) = 0;
+
+  // Processes a block of capture data.
+  virtual void ProcessCapture(
+      bool echo_path_gain_change,
+      bool capture_signal_saturation,
+      std::vector<std::vector<std::vector<float>>>* linear_output,
+      std::vector<std::vector<std::vector<float>>>* capture_block) = 0;
+
+  // Buffers a block of render data supplied by a FrameBlocker object.
+  virtual void BufferRender(
+      const std::vector<std::vector<std::vector<float>>>& render_block) = 0;
+
+  // Reports whether echo leakage has been detected in the echo canceller
+  // output.
+  virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_
--- a/webrtc/modules/audio_processing/aec3/block_processor_metrics.cc
+++ b/webrtc/modules/audio_processing/aec3/block_processor_metrics.cc
@ -0,0 +1,104 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/block_processor_metrics.h"
+
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "rtc_base/checks.h"
+#include "system_wrappers/include/metrics.h"
+
+namespace webrtc {
+
+namespace {
+
+enum class RenderUnderrunCategory {
+  kNone,
+  kFew,
+  kSeveral,
+  kMany,
+  kConstant,
+  kNumCategories
+};
+
+enum class RenderOverrunCategory {
+  kNone,
+  kFew,
+  kSeveral,
+  kMany,
+  kConstant,
+  kNumCategories
+};
+
+}  // namespace
+
+void BlockProcessorMetrics::UpdateCapture(bool underrun) {
+  ++capture_block_counter_;
+  if (underrun) {
+    ++render_buffer_underruns_;
+  }
+
+  if (capture_block_counter_ == kMetricsReportingIntervalBlocks) {
+    metrics_reported_ = true;
+
+    RenderUnderrunCategory underrun_category;
+    if (render_buffer_underruns_ == 0) {
+      underrun_category = RenderUnderrunCategory::kNone;
+    } else if (render_buffer_underruns_ > (capture_block_counter_ >> 1)) {
+      underrun_category = RenderUnderrunCategory::kConstant;
+    } else if (render_buffer_underruns_ > 100) {
+      underrun_category = RenderUnderrunCategory::kMany;
+    } else if (render_buffer_underruns_ > 10) {
+      underrun_category = RenderUnderrunCategory::kSeveral;
+    } else {
+      underrun_category = RenderUnderrunCategory::kFew;
+    }
+    RTC_HISTOGRAM_ENUMERATION(
+        "WebRTC.Audio.EchoCanceller.RenderUnderruns",
+        static_cast<int>(underrun_category),
+        static_cast<int>(RenderUnderrunCategory::kNumCategories));
+
+    RenderOverrunCategory overrun_category;
+    if (render_buffer_overruns_ == 0) {
+      overrun_category = RenderOverrunCategory::kNone;
+    } else if (render_buffer_overruns_ > (buffer_render_calls_ >> 1)) {
+      overrun_category = RenderOverrunCategory::kConstant;
+    } else if (render_buffer_overruns_ > 100) {
+      overrun_category = RenderOverrunCategory::kMany;
+    } else if (render_buffer_overruns_ > 10) {
+      overrun_category = RenderOverrunCategory::kSeveral;
+    } else {
+      overrun_category = RenderOverrunCategory::kFew;
+    }
+    RTC_HISTOGRAM_ENUMERATION(
+        "WebRTC.Audio.EchoCanceller.RenderOverruns",
+        static_cast<int>(overrun_category),
+        static_cast<int>(RenderOverrunCategory::kNumCategories));
+
+    ResetMetrics();
+    capture_block_counter_ = 0;
+  } else {
+    metrics_reported_ = false;
+  }
+}
+
+void BlockProcessorMetrics::UpdateRender(bool overrun) {
+  ++buffer_render_calls_;
+  if (overrun) {
+    ++render_buffer_overruns_;
+  }
+}
+
+void BlockProcessorMetrics::ResetMetrics() {
+  render_buffer_underruns_ = 0;
+  render_buffer_overruns_ = 0;
+  buffer_render_calls_ = 0;
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/block_processor_metrics.h
+++ b/webrtc/modules/audio_processing/aec3/block_processor_metrics.h
@ -0,0 +1,47 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_
+
+#include "rtc_base/constructor_magic.h"
+
+namespace webrtc {
+
+// Handles the reporting of metrics for the block_processor.
+class BlockProcessorMetrics {
+ public:
+  BlockProcessorMetrics() = default;
+
+  // Updates the metric with new capture data.
+  void UpdateCapture(bool underrun);
+
+  // Updates the metric with new render data.
+  void UpdateRender(bool overrun);
+
+  // Returns true if the metrics have just been reported, otherwise false.
+  bool MetricsReported() { return metrics_reported_; }
+
+ private:
+  // Resets the metrics.
+  void ResetMetrics();
+
+  int capture_block_counter_ = 0;
+  bool metrics_reported_ = false;
+  int render_buffer_underruns_ = 0;
+  int render_buffer_overruns_ = 0;
+  int buffer_render_calls_ = 0;
+
+  RTC_DISALLOW_COPY_AND_ASSIGN(BlockProcessorMetrics);
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_
--- a/webrtc/modules/audio_processing/aec3/clockdrift_detector.cc
+++ b/webrtc/modules/audio_processing/aec3/clockdrift_detector.cc
@ -0,0 +1,61 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/audio_processing/aec3/clockdrift_detector.h"
+
+namespace webrtc {
+
+ClockdriftDetector::ClockdriftDetector()
+    : level_(Level::kNone), stability_counter_(0) {
+  delay_history_.fill(0);
+}
+
+ClockdriftDetector::~ClockdriftDetector() = default;
+
+void ClockdriftDetector::Update(int delay_estimate) {
+  if (delay_estimate == delay_history_[0]) {
+    // Reset clockdrift level if delay estimate is stable for 7500 blocks (30
+    // seconds).
+    if (++stability_counter_ > 7500)
+      level_ = Level::kNone;
+    return;
+  }
+
+  stability_counter_ = 0;
+  const int d1 = delay_history_[0] - delay_estimate;
+  const int d2 = delay_history_[1] - delay_estimate;
+  const int d3 = delay_history_[2] - delay_estimate;
+
+  // Patterns recognized as positive clockdrift:
+  // [x-3], x-2, x-1, x.
+  // [x-3], x-1, x-2, x.
+  const bool probable_drift_up =
+      (d1 == -1 && d2 == -2) || (d1 == -2 && d2 == -1);
+  const bool drift_up = probable_drift_up && d3 == -3;
+
+  // Patterns recognized as negative clockdrift:
+  // [x+3], x+2, x+1, x.
+  // [x+3], x+1, x+2, x.
+  const bool probable_drift_down = (d1 == 1 && d2 == 2) || (d1 == 2 && d2 == 1);
+  const bool drift_down = probable_drift_down && d3 == 3;
+
+  // Set clockdrift level.
+  if (drift_up || drift_down) {
+    level_ = Level::kVerified;
+  } else if ((probable_drift_up || probable_drift_down) &&
+             level_ == Level::kNone) {
+    level_ = Level::kProbable;
+  }
+
+  // Shift delay history one step.
+  delay_history_[2] = delay_history_[1];
+  delay_history_[1] = delay_history_[0];
+  delay_history_[0] = delay_estimate;
+}
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/clockdrift_detector.h
+++ b/webrtc/modules/audio_processing/aec3/clockdrift_detector.h
@ -0,0 +1,40 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_
+
+#include <stddef.h>
+
+#include <array>
+
+namespace webrtc {
+
+class ApmDataDumper;
+struct DownsampledRenderBuffer;
+struct EchoCanceller3Config;
+
+// Detects clockdrift by analyzing the estimated delay.
+class ClockdriftDetector {
+ public:
+  enum class Level { kNone, kProbable, kVerified, kNumCategories };
+  ClockdriftDetector();
+  ~ClockdriftDetector();
+  void Update(int delay_estimate);
+  Level ClockdriftLevel() const { return level_; }
+
+ private:
+  std::array<int, 3> delay_history_;
+  Level level_;
+  size_t stability_counter_;
+};
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_
--- a/webrtc/modules/audio_processing/aec3/coarse_filter_update_gain.cc
+++ b/webrtc/modules/audio_processing/aec3/coarse_filter_update_gain.cc
@ -0,0 +1,103 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/coarse_filter_update_gain.h"
+
+#include <algorithm>
+#include <functional>
+
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+CoarseFilterUpdateGain::CoarseFilterUpdateGain(
+    const EchoCanceller3Config::Filter::CoarseConfiguration& config,
+    size_t config_change_duration_blocks)
+    : config_change_duration_blocks_(
+          static_cast<int>(config_change_duration_blocks)) {
+  SetConfig(config, true);
+  RTC_DCHECK_LT(0, config_change_duration_blocks_);
+  one_by_config_change_duration_blocks_ = 1.f / config_change_duration_blocks_;
+}
+
+void CoarseFilterUpdateGain::HandleEchoPathChange() {
+  poor_signal_excitation_counter_ = 0;
+  call_counter_ = 0;
+}
+
+void CoarseFilterUpdateGain::Compute(
+    const std::array<float, kFftLengthBy2Plus1>& render_power,
+    const RenderSignalAnalyzer& render_signal_analyzer,
+    const FftData& E_coarse,
+    size_t size_partitions,
+    bool saturated_capture_signal,
+    FftData* G) {
+  RTC_DCHECK(G);
+  ++call_counter_;
+
+  UpdateCurrentConfig();
+
+  if (render_signal_analyzer.PoorSignalExcitation()) {
+    poor_signal_excitation_counter_ = 0;
+  }
+
+  // Do not update the filter if the render is not sufficiently excited.
+  if (++poor_signal_excitation_counter_ < size_partitions ||
+      saturated_capture_signal || call_counter_ <= size_partitions) {
+    G->re.fill(0.f);
+    G->im.fill(0.f);
+    return;
+  }
+
+  // Compute mu.
+  std::array<float, kFftLengthBy2Plus1> mu;
+  const auto& X2 = render_power;
+  for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+    if (X2[k] > current_config_.noise_gate) {
+      mu[k] = current_config_.rate / X2[k];
+    } else {
+      mu[k] = 0.f;
+    }
+  }
+
+  // Avoid updating the filter close to narrow bands in the render signals.
+  render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu);
+
+  // G = mu * E * X2.
+  for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+    G->re[k] = mu[k] * E_coarse.re[k];
+    G->im[k] = mu[k] * E_coarse.im[k];
+  }
+}
+
+void CoarseFilterUpdateGain::UpdateCurrentConfig() {
+  RTC_DCHECK_GE(config_change_duration_blocks_, config_change_counter_);
+  if (config_change_counter_ > 0) {
+    if (--config_change_counter_ > 0) {
+      auto average = [](float from, float to, float from_weight) {
+        return from * from_weight + to * (1.f - from_weight);
+      };
+
+      float change_factor =
+          config_change_counter_ * one_by_config_change_duration_blocks_;
+
+      current_config_.rate =
+          average(old_target_config_.rate, target_config_.rate, change_factor);
+      current_config_.noise_gate =
+          average(old_target_config_.noise_gate, target_config_.noise_gate,
+                  change_factor);
+    } else {
+      current_config_ = old_target_config_ = target_config_;
+    }
+  }
+  RTC_DCHECK_LE(0, config_change_counter_);
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/coarse_filter_update_gain.h
+++ b/webrtc/modules/audio_processing/aec3/coarse_filter_update_gain.h
@ -0,0 +1,74 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_
+
+#include <stddef.h>
+
+#include <array>
+
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/fft_data.h"
+#include "modules/audio_processing/aec3/render_signal_analyzer.h"
+
+namespace webrtc {
+
+// Provides functionality for computing the fixed gain for the coarse filter.
+class CoarseFilterUpdateGain {
+ public:
+  explicit CoarseFilterUpdateGain(
+      const EchoCanceller3Config::Filter::CoarseConfiguration& config,
+      size_t config_change_duration_blocks);
+
+  // Takes action in the case of a known echo path change.
+  void HandleEchoPathChange();
+
+  // Computes the gain.
+  void Compute(const std::array<float, kFftLengthBy2Plus1>& render_power,
+               const RenderSignalAnalyzer& render_signal_analyzer,
+               const FftData& E_coarse,
+               size_t size_partitions,
+               bool saturated_capture_signal,
+               FftData* G);
+
+  // Sets a new config.
+  void SetConfig(
+      const EchoCanceller3Config::Filter::CoarseConfiguration& config,
+      bool immediate_effect) {
+    if (immediate_effect) {
+      old_target_config_ = current_config_ = target_config_ = config;
+      config_change_counter_ = 0;
+    } else {
+      old_target_config_ = current_config_;
+      target_config_ = config;
+      config_change_counter_ = config_change_duration_blocks_;
+    }
+  }
+
+ private:
+  EchoCanceller3Config::Filter::CoarseConfiguration current_config_;
+  EchoCanceller3Config::Filter::CoarseConfiguration target_config_;
+  EchoCanceller3Config::Filter::CoarseConfiguration old_target_config_;
+  const int config_change_duration_blocks_;
+  float one_by_config_change_duration_blocks_;
+  // TODO(peah): Check whether this counter should instead be initialized to a
+  // large value.
+  size_t poor_signal_excitation_counter_ = 0;
+  size_t call_counter_ = 0;
+  int config_change_counter_ = 0;
+
+  void UpdateCurrentConfig();
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_
--- a/webrtc/modules/audio_processing/aec3/comfort_noise_generator.cc
+++ b/webrtc/modules/audio_processing/aec3/comfort_noise_generator.cc
@ -0,0 +1,186 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/comfort_noise_generator.h"
+
+// Defines WEBRTC_ARCH_X86_FAMILY, used below.
+#include "rtc_base/system/arch.h"
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+#include <emmintrin.h>
+#endif
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <cstdint>
+#include <functional>
+#include <numeric>
+
+#include "common_audio/signal_processing/include/signal_processing_library.h"
+#include "modules/audio_processing/aec3/vector_math.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+namespace {
+
+// Computes the noise floor value that matches a WGN input of noise_floor_dbfs.
+float GetNoiseFloorFactor(float noise_floor_dbfs) {
+  // kdBfsNormalization = 20.f*log10(32768.f).
+  constexpr float kdBfsNormalization = 90.30899869919436f;
+  return 64.f * powf(10.f, (kdBfsNormalization + noise_floor_dbfs) * 0.1f);
+}
+
+// Table of sqrt(2) * sin(2*pi*i/32).
+constexpr float kSqrt2Sin[32] = {
+    +0.0000000f, +0.2758994f, +0.5411961f, +0.7856950f, +1.0000000f,
+    +1.1758756f, +1.3065630f, +1.3870398f, +1.4142136f, +1.3870398f,
+    +1.3065630f, +1.1758756f, +1.0000000f, +0.7856950f, +0.5411961f,
+    +0.2758994f, +0.0000000f, -0.2758994f, -0.5411961f, -0.7856950f,
+    -1.0000000f, -1.1758756f, -1.3065630f, -1.3870398f, -1.4142136f,
+    -1.3870398f, -1.3065630f, -1.1758756f, -1.0000000f, -0.7856950f,
+    -0.5411961f, -0.2758994f};
+
+void GenerateComfortNoise(Aec3Optimization optimization,
+                          const std::array<float, kFftLengthBy2Plus1>& N2,
+                          uint32_t* seed,
+                          FftData* lower_band_noise,
+                          FftData* upper_band_noise) {
+  FftData* N_low = lower_band_noise;
+  FftData* N_high = upper_band_noise;
+
+  // Compute square root spectrum.
+  std::array<float, kFftLengthBy2Plus1> N;
+  std::copy(N2.begin(), N2.end(), N.begin());
+  aec3::VectorMath(optimization).Sqrt(N);
+
+  // Compute the noise level for the upper bands.
+  constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1);
+  constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2;
+  const float high_band_noise_level =
+      std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) *
+      kOneByNumBands;
+
+  // The analysis and synthesis windowing cause loss of power when
+  // cross-fading the noise where frames are completely uncorrelated
+  // (generated with random phase), hence the factor sqrt(2).
+  // This is not the case for the speech signal where the input is overlapping
+  // (strong correlation).
+  N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] =
+      N_high->re[kFftLengthBy2] = 0.f;
+  for (size_t k = 1; k < kFftLengthBy2; k++) {
+    constexpr int kIndexMask = 32 - 1;
+    // Generate a random 31-bit integer.
+    seed[0] = (seed[0] * 69069 + 1) & (0x80000000 - 1);
+    // Convert to a 5-bit index.
+    int i = seed[0] >> 26;
+
+    // y = sqrt(2) * sin(a)
+    const float x = kSqrt2Sin[i];
+    // x = sqrt(2) * cos(a) = sqrt(2) * sin(a + pi/2)
+    const float y = kSqrt2Sin[(i + 8) & kIndexMask];
+
+    // Form low-frequency noise via spectral shaping.
+    N_low->re[k] = N[k] * x;
+    N_low->im[k] = N[k] * y;
+
+    // Form the high-frequency noise via simple levelling.
+    N_high->re[k] = high_band_noise_level * x;
+    N_high->im[k] = high_band_noise_level * y;
+  }
+}
+
+}  // namespace
+
+ComfortNoiseGenerator::ComfortNoiseGenerator(const EchoCanceller3Config& config,
+                                             Aec3Optimization optimization,
+                                             size_t num_capture_channels)
+    : optimization_(optimization),
+      seed_(42),
+      num_capture_channels_(num_capture_channels),
+      noise_floor_(GetNoiseFloorFactor(config.comfort_noise.noise_floor_dbfs)),
+      N2_initial_(
+          std::make_unique<std::vector<std::array<float, kFftLengthBy2Plus1>>>(
+              num_capture_channels_)),
+      Y2_smoothed_(num_capture_channels_),
+      N2_(num_capture_channels_) {
+  for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+    (*N2_initial_)[ch].fill(0.f);
+    Y2_smoothed_[ch].fill(0.f);
+    N2_[ch].fill(1.0e6f);
+  }
+}
+
+ComfortNoiseGenerator::~ComfortNoiseGenerator() = default;
+
+void ComfortNoiseGenerator::Compute(
+    bool saturated_capture,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        capture_spectrum,
+    rtc::ArrayView<FftData> lower_band_noise,
+    rtc::ArrayView<FftData> upper_band_noise) {
+  const auto& Y2 = capture_spectrum;
+
+  if (!saturated_capture) {
+    // Smooth Y2.
+    for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+      std::transform(Y2_smoothed_[ch].begin(), Y2_smoothed_[ch].end(),
+                     Y2[ch].begin(), Y2_smoothed_[ch].begin(),
+                     [](float a, float b) { return a + 0.1f * (b - a); });
+    }
+
+    if (N2_counter_ > 50) {
+      // Update N2 from Y2_smoothed.
+      for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+        std::transform(N2_[ch].begin(), N2_[ch].end(), Y2_smoothed_[ch].begin(),
+                       N2_[ch].begin(), [](float a, float b) {
+                         return b < a ? (0.9f * b + 0.1f * a) * 1.0002f
+                                      : a * 1.0002f;
+                       });
+      }
+    }
+
+    if (N2_initial_) {
+      if (++N2_counter_ == 1000) {
+        N2_initial_.reset();
+      } else {
+        // Compute the N2_initial from N2.
+        for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+          std::transform(N2_[ch].begin(), N2_[ch].end(),
+                         (*N2_initial_)[ch].begin(), (*N2_initial_)[ch].begin(),
+                         [](float a, float b) {
+                           return a > b ? b + 0.001f * (a - b) : a;
+                         });
+        }
+      }
+    }
+
+    for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+      for (auto& n : N2_[ch]) {
+        n = std::max(n, noise_floor_);
+      }
+      if (N2_initial_) {
+        for (auto& n : (*N2_initial_)[ch]) {
+          n = std::max(n, noise_floor_);
+        }
+      }
+    }
+  }
+
+  // Choose N2 estimate to use.
+  const auto& N2 = N2_initial_ ? (*N2_initial_) : N2_;
+
+  for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+    GenerateComfortNoise(optimization_, N2[ch], &seed_, &lower_band_noise[ch],
+                         &upper_band_noise[ch]);
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/comfort_noise_generator.h
+++ b/webrtc/modules/audio_processing/aec3/comfort_noise_generator.h
@ -0,0 +1,78 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_
+
+#include <stdint.h>
+
+#include <array>
+#include <memory>
+
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/aec_state.h"
+#include "modules/audio_processing/aec3/fft_data.h"
+#include "rtc_base/constructor_magic.h"
+#include "rtc_base/system/arch.h"
+
+namespace webrtc {
+namespace aec3 {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+
+void EstimateComfortNoise_SSE2(const std::array<float, kFftLengthBy2Plus1>& N2,
+                               uint32_t* seed,
+                               FftData* lower_band_noise,
+                               FftData* upper_band_noise);
+#endif
+void EstimateComfortNoise(const std::array<float, kFftLengthBy2Plus1>& N2,
+                          uint32_t* seed,
+                          FftData* lower_band_noise,
+                          FftData* upper_band_noise);
+
+}  // namespace aec3
+
+// Generates the comfort noise.
+class ComfortNoiseGenerator {
+ public:
+  ComfortNoiseGenerator(const EchoCanceller3Config& config,
+                        Aec3Optimization optimization,
+                        size_t num_capture_channels);
+  ComfortNoiseGenerator() = delete;
+  ~ComfortNoiseGenerator();
+  ComfortNoiseGenerator(const ComfortNoiseGenerator&) = delete;
+
+  // Computes the comfort noise.
+  void Compute(bool saturated_capture,
+               rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+                   capture_spectrum,
+               rtc::ArrayView<FftData> lower_band_noise,
+               rtc::ArrayView<FftData> upper_band_noise);
+
+  // Returns the estimate of the background noise spectrum.
+  rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> NoiseSpectrum()
+      const {
+    return N2_;
+  }
+
+ private:
+  const Aec3Optimization optimization_;
+  uint32_t seed_;
+  const size_t num_capture_channels_;
+  const float noise_floor_;
+  std::unique_ptr<std::vector<std::array<float, kFftLengthBy2Plus1>>>
+      N2_initial_;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> Y2_smoothed_;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> N2_;
+  int N2_counter_ = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_
--- a/webrtc/modules/audio_processing/aec3/decimator.cc
+++ b/webrtc/modules/audio_processing/aec3/decimator.cc
@ -0,0 +1,91 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/audio_processing/aec3/decimator.h"
+
+#include <array>
+#include <vector>
+
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+namespace {
+
+// signal.butter(2, 3400/8000.0, 'lowpass', analog=False)
+const std::vector<CascadedBiQuadFilter::BiQuadParam> GetLowPassFilterDS2() {
+  return std::vector<CascadedBiQuadFilter::BiQuadParam>{
+      {{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f},
+      {{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f},
+      {{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f}};
+}
+
+// signal.ellip(6, 1, 40, 1800/8000, btype='lowpass', analog=False)
+const std::vector<CascadedBiQuadFilter::BiQuadParam> GetLowPassFilterDS4() {
+  return std::vector<CascadedBiQuadFilter::BiQuadParam>{
+      {{-0.08873842f, 0.99605496f}, {0.75916227f, 0.23841065f}, 0.26250696827f},
+      {{0.62273832f, 0.78243018f}, {0.74892112f, 0.5410152f}, 0.26250696827f},
+      {{0.71107693f, 0.70311421f}, {0.74895534f, 0.63924616f}, 0.26250696827f}};
+}
+
+// signal.cheby1(1, 6, [1000/8000, 2000/8000], btype='bandpass', analog=False)
+const std::vector<CascadedBiQuadFilter::BiQuadParam> GetBandPassFilterDS8() {
+  return std::vector<CascadedBiQuadFilter::BiQuadParam>{
+      {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
+      {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
+      {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
+      {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
+      {{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}};
+}
+
+// signal.butter(2, 1000/8000.0, 'highpass', analog=False)
+const std::vector<CascadedBiQuadFilter::BiQuadParam> GetHighPassFilter() {
+  return std::vector<CascadedBiQuadFilter::BiQuadParam>{
+      {{1.f, 0.f}, {0.72712179f, 0.21296904f}, 0.7570763753338849f}};
+}
+
+const std::vector<CascadedBiQuadFilter::BiQuadParam> GetPassThroughFilter() {
+  return std::vector<CascadedBiQuadFilter::BiQuadParam>{};
+}
+}  // namespace
+
+Decimator::Decimator(size_t down_sampling_factor)
+    : down_sampling_factor_(down_sampling_factor),
+      anti_aliasing_filter_(down_sampling_factor_ == 4
+                                ? GetLowPassFilterDS4()
+                                : (down_sampling_factor_ == 8
+                                       ? GetBandPassFilterDS8()
+                                       : GetLowPassFilterDS2())),
+      noise_reduction_filter_(down_sampling_factor_ == 8
+                                  ? GetPassThroughFilter()
+                                  : GetHighPassFilter()) {
+  RTC_DCHECK(down_sampling_factor_ == 2 || down_sampling_factor_ == 4 ||
+             down_sampling_factor_ == 8);
+}
+
+void Decimator::Decimate(rtc::ArrayView<const float> in,
+                         rtc::ArrayView<float> out) {
+  RTC_DCHECK_EQ(kBlockSize, in.size());
+  RTC_DCHECK_EQ(kBlockSize / down_sampling_factor_, out.size());
+  std::array<float, kBlockSize> x;
+
+  // Limit the frequency content of the signal to avoid aliasing.
+  anti_aliasing_filter_.Process(in, x);
+
+  // Reduce the impact of near-end noise.
+  noise_reduction_filter_.Process(x);
+
+  // Downsample the signal.
+  for (size_t j = 0, k = 0; j < out.size(); ++j, k += down_sampling_factor_) {
+    RTC_DCHECK_GT(kBlockSize, k);
+    out[j] = x[k];
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/decimator.h
+++ b/webrtc/modules/audio_processing/aec3/decimator.h
@ -0,0 +1,41 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_
+
+#include <array>
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/utility/cascaded_biquad_filter.h"
+#include "rtc_base/constructor_magic.h"
+
+namespace webrtc {
+
+// Provides functionality for decimating a signal.
+class Decimator {
+ public:
+  explicit Decimator(size_t down_sampling_factor);
+
+  // Downsamples the signal.
+  void Decimate(rtc::ArrayView<const float> in, rtc::ArrayView<float> out);
+
+ private:
+  const size_t down_sampling_factor_;
+  CascadedBiQuadFilter anti_aliasing_filter_;
+  CascadedBiQuadFilter noise_reduction_filter_;
+
+  RTC_DISALLOW_COPY_AND_ASSIGN(Decimator);
+};
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_
--- a/webrtc/modules/audio_processing/aec3/delay_estimate.h
+++ b/webrtc/modules/audio_processing/aec3/delay_estimate.h
@ -0,0 +1,31 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_
+
+namespace webrtc {
+
+// Stores delay_estimates.
+struct DelayEstimate {
+  enum class Quality { kCoarse, kRefined };
+
+  DelayEstimate(Quality quality, size_t delay)
+      : quality(quality), delay(delay) {}
+
+  Quality quality;
+  size_t delay;
+  size_t blocks_since_last_change = 0;
+  size_t blocks_since_last_update = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_
--- a/webrtc/modules/audio_processing/aec3/dominant_nearend_detector.cc
+++ b/webrtc/modules/audio_processing/aec3/dominant_nearend_detector.cc
@ -0,0 +1,75 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/dominant_nearend_detector.h"
+
+#include <numeric>
+
+namespace webrtc {
+DominantNearendDetector::DominantNearendDetector(
+    const EchoCanceller3Config::Suppressor::DominantNearendDetection& config,
+    size_t num_capture_channels)
+    : enr_threshold_(config.enr_threshold),
+      enr_exit_threshold_(config.enr_exit_threshold),
+      snr_threshold_(config.snr_threshold),
+      hold_duration_(config.hold_duration),
+      trigger_threshold_(config.trigger_threshold),
+      use_during_initial_phase_(config.use_during_initial_phase),
+      num_capture_channels_(num_capture_channels),
+      trigger_counters_(num_capture_channels_),
+      hold_counters_(num_capture_channels_) {}
+
+void DominantNearendDetector::Update(
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        nearend_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        residual_echo_spectrum,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        comfort_noise_spectrum,
+    bool initial_state) {
+  nearend_state_ = false;
+
+  auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
+    RTC_DCHECK_LE(16, spectrum.size());
+    return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
+  };
+
+  for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+    const float ne_sum = low_frequency_energy(nearend_spectrum[ch]);
+    const float echo_sum = low_frequency_energy(residual_echo_spectrum[ch]);
+    const float noise_sum = low_frequency_energy(comfort_noise_spectrum[ch]);
+
+    // Detect strong active nearend if the nearend is sufficiently stronger than
+    // the echo and the nearend noise.
+    if ((!initial_state || use_during_initial_phase_) &&
+        echo_sum < enr_threshold_ * ne_sum &&
+        ne_sum > snr_threshold_ * noise_sum) {
+      if (++trigger_counters_[ch] >= trigger_threshold_) {
+        // After a period of strong active nearend activity, flag nearend mode.
+        hold_counters_[ch] = hold_duration_;
+        trigger_counters_[ch] = trigger_threshold_;
+      }
+    } else {
+      // Forget previously detected strong active nearend activity.
+      trigger_counters_[ch] = std::max(0, trigger_counters_[ch] - 1);
+    }
+
+    // Exit nearend-state early at strong echo.
+    if (echo_sum > enr_exit_threshold_ * ne_sum &&
+        echo_sum > snr_threshold_ * noise_sum) {
+      hold_counters_[ch] = 0;
+    }
+
+    // Remain in any nearend mode for a certain duration.
+    hold_counters_[ch] = std::max(0, hold_counters_[ch] - 1);
+    nearend_state_ = nearend_state_ || hold_counters_[ch] > 0;
+  }
+}
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/dominant_nearend_detector.h
+++ b/webrtc/modules/audio_processing/aec3/dominant_nearend_detector.h
@ -0,0 +1,56 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
+
+#include <vector>
+
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/nearend_detector.h"
+
+namespace webrtc {
+// Class for selecting whether the suppressor is in the nearend or echo state.
+class DominantNearendDetector : public NearendDetector {
+ public:
+  DominantNearendDetector(
+      const EchoCanceller3Config::Suppressor::DominantNearendDetection& config,
+      size_t num_capture_channels);
+
+  // Returns whether the current state is the nearend state.
+  bool IsNearendState() const override { return nearend_state_; }
+
+  // Updates the state selection based on latest spectral estimates.
+  void Update(rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+                  nearend_spectrum,
+              rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+                  residual_echo_spectrum,
+              rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+                  comfort_noise_spectrum,
+              bool initial_state) override;
+
+ private:
+  const float enr_threshold_;
+  const float enr_exit_threshold_;
+  const float snr_threshold_;
+  const int hold_duration_;
+  const int trigger_threshold_;
+  const bool use_during_initial_phase_;
+  const size_t num_capture_channels_;
+
+  bool nearend_state_ = false;
+  std::vector<int> trigger_counters_;
+  std::vector<int> hold_counters_;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
--- a/webrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc
+++ b/webrtc/modules/audio_processing/aec3/downsampled_render_buffer.cc
@ -0,0 +1,25 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
+
+#include <algorithm>
+
+namespace webrtc {
+
+DownsampledRenderBuffer::DownsampledRenderBuffer(size_t downsampled_buffer_size)
+    : size(static_cast<int>(downsampled_buffer_size)),
+      buffer(downsampled_buffer_size, 0.f) {
+  std::fill(buffer.begin(), buffer.end(), 0.f);
+}
+
+DownsampledRenderBuffer::~DownsampledRenderBuffer() = default;
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/downsampled_render_buffer.h
+++ b/webrtc/modules/audio_processing/aec3/downsampled_render_buffer.h
@ -0,0 +1,58 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+// Holds the circular buffer of the downsampled render data.
+struct DownsampledRenderBuffer {
+  explicit DownsampledRenderBuffer(size_t downsampled_buffer_size);
+  ~DownsampledRenderBuffer();
+
+  int IncIndex(int index) const {
+    RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
+    return index < size - 1 ? index + 1 : 0;
+  }
+
+  int DecIndex(int index) const {
+    RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
+    return index > 0 ? index - 1 : size - 1;
+  }
+
+  int OffsetIndex(int index, int offset) const {
+    RTC_DCHECK_GE(buffer.size(), offset);
+    RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
+    return (size + index + offset) % size;
+  }
+
+  void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); }
+  void IncWriteIndex() { write = IncIndex(write); }
+  void DecWriteIndex() { write = DecIndex(write); }
+  void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); }
+  void IncReadIndex() { read = IncIndex(read); }
+  void DecReadIndex() { read = DecIndex(read); }
+
+  const int size;
+  std::vector<float> buffer;
+  int write = 0;
+  int read = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_
--- a/webrtc/modules/audio_processing/aec3/echo_audibility.cc
+++ b/webrtc/modules/audio_processing/aec3/echo_audibility.cc
@ -0,0 +1,118 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/echo_audibility.h"
+
+#include <algorithm>
+#include <cmath>
+#include <utility>
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/block_buffer.h"
+#include "modules/audio_processing/aec3/spectrum_buffer.h"
+#include "modules/audio_processing/aec3/stationarity_estimator.h"
+
+namespace webrtc {
+
+EchoAudibility::EchoAudibility(bool use_render_stationarity_at_init)
+    : use_render_stationarity_at_init_(use_render_stationarity_at_init) {
+  Reset();
+}
+
+EchoAudibility::~EchoAudibility() = default;
+
+void EchoAudibility::Update(const RenderBuffer& render_buffer,
+                            rtc::ArrayView<const float> average_reverb,
+                            int delay_blocks,
+                            bool external_delay_seen) {
+  UpdateRenderNoiseEstimator(render_buffer.GetSpectrumBuffer(),
+                             render_buffer.GetBlockBuffer(),
+                             external_delay_seen);
+
+  if (external_delay_seen || use_render_stationarity_at_init_) {
+    UpdateRenderStationarityFlags(render_buffer, average_reverb, delay_blocks);
+  }
+}
+
+void EchoAudibility::Reset() {
+  render_stationarity_.Reset();
+  non_zero_render_seen_ = false;
+  render_spectrum_write_prev_ = absl::nullopt;
+}
+
+void EchoAudibility::UpdateRenderStationarityFlags(
+    const RenderBuffer& render_buffer,
+    rtc::ArrayView<const float> average_reverb,
+    int min_channel_delay_blocks) {
+  const SpectrumBuffer& spectrum_buffer = render_buffer.GetSpectrumBuffer();
+  int idx_at_delay = spectrum_buffer.OffsetIndex(spectrum_buffer.read,
+                                                 min_channel_delay_blocks);
+
+  int num_lookahead = render_buffer.Headroom() - min_channel_delay_blocks + 1;
+  num_lookahead = std::max(0, num_lookahead);
+
+  render_stationarity_.UpdateStationarityFlags(spectrum_buffer, average_reverb,
+                                               idx_at_delay, num_lookahead);
+}
+
+void EchoAudibility::UpdateRenderNoiseEstimator(
+    const SpectrumBuffer& spectrum_buffer,
+    const BlockBuffer& block_buffer,
+    bool external_delay_seen) {
+  if (!render_spectrum_write_prev_) {
+    render_spectrum_write_prev_ = spectrum_buffer.write;
+    render_block_write_prev_ = block_buffer.write;
+    return;
+  }
+  int render_spectrum_write_current = spectrum_buffer.write;
+  if (!non_zero_render_seen_ && !external_delay_seen) {
+    non_zero_render_seen_ = !IsRenderTooLow(block_buffer);
+  }
+  if (non_zero_render_seen_) {
+    for (int idx = render_spectrum_write_prev_.value();
+         idx != render_spectrum_write_current;
+         idx = spectrum_buffer.DecIndex(idx)) {
+      render_stationarity_.UpdateNoiseEstimator(spectrum_buffer.buffer[idx]);
+    }
+  }
+  render_spectrum_write_prev_ = render_spectrum_write_current;
+}
+
+bool EchoAudibility::IsRenderTooLow(const BlockBuffer& block_buffer) {
+  const int num_render_channels =
+      static_cast<int>(block_buffer.buffer[0][0].size());
+  bool too_low = false;
+  const int render_block_write_current = block_buffer.write;
+  if (render_block_write_current == render_block_write_prev_) {
+    too_low = true;
+  } else {
+    for (int idx = render_block_write_prev_; idx != render_block_write_current;
+         idx = block_buffer.IncIndex(idx)) {
+      float max_abs_over_channels = 0.f;
+      for (int ch = 0; ch < num_render_channels; ++ch) {
+        auto block = block_buffer.buffer[idx][0][ch];
+        auto r = std::minmax_element(block.cbegin(), block.cend());
+        float max_abs_channel =
+            std::max(std::fabs(*r.first), std::fabs(*r.second));
+        max_abs_over_channels =
+            std::max(max_abs_over_channels, max_abs_channel);
+      }
+      if (max_abs_over_channels < 10.f) {
+        too_low = true;  // Discards all blocks if one of them is too low.
+        break;
+      }
+    }
+  }
+  render_block_write_prev_ = render_block_write_current;
+  return too_low;
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/echo_audibility.h
+++ b/webrtc/modules/audio_processing/aec3/echo_audibility.h
@ -0,0 +1,86 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
+
+#include <stddef.h>
+
+#include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/block_buffer.h"
+#include "modules/audio_processing/aec3/render_buffer.h"
+#include "modules/audio_processing/aec3/spectrum_buffer.h"
+#include "modules/audio_processing/aec3/stationarity_estimator.h"
+#include "rtc_base/constructor_magic.h"
+
+namespace webrtc {
+
+class EchoAudibility {
+ public:
+  explicit EchoAudibility(bool use_render_stationarity_at_init);
+  ~EchoAudibility();
+
+  EchoAudibility(const EchoAudibility&) = delete;
+  EchoAudibility& operator=(const EchoAudibility&) = delete;
+
+  // Feed new render data to the echo audibility estimator.
+  void Update(const RenderBuffer& render_buffer,
+              rtc::ArrayView<const float> average_reverb,
+              int min_channel_delay_blocks,
+              bool external_delay_seen);
+  // Get the residual echo scaling.
+  void GetResidualEchoScaling(bool filter_has_had_time_to_converge,
+                              rtc::ArrayView<float> residual_scaling) const {
+    for (size_t band = 0; band < residual_scaling.size(); ++band) {
+      if (render_stationarity_.IsBandStationary(band) &&
+          (filter_has_had_time_to_converge ||
+           use_render_stationarity_at_init_)) {
+        residual_scaling[band] = 0.f;
+      } else {
+        residual_scaling[band] = 1.0f;
+      }
+    }
+  }
+
+  // Returns true if the current render block is estimated as stationary.
+  bool IsBlockStationary() const {
+    return render_stationarity_.IsBlockStationary();
+  }
+
+ private:
+  // Reset the EchoAudibility class.
+  void Reset();
+
+  // Updates the render stationarity flags for the current frame.
+  void UpdateRenderStationarityFlags(const RenderBuffer& render_buffer,
+                                     rtc::ArrayView<const float> average_reverb,
+                                     int delay_blocks);
+
+  // Updates the noise estimator with the new render data since the previous
+  // call to this method.
+  void UpdateRenderNoiseEstimator(const SpectrumBuffer& spectrum_buffer,
+                                  const BlockBuffer& block_buffer,
+                                  bool external_delay_seen);
+
+  // Returns a bool being true if the render signal contains just close to zero
+  // values.
+  bool IsRenderTooLow(const BlockBuffer& block_buffer);
+
+  absl::optional<int> render_spectrum_write_prev_;
+  int render_block_write_prev_;
+  bool non_zero_render_seen_;
+  const bool use_render_stationarity_at_init_;
+  StationarityEstimator render_stationarity_;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
--- a/webrtc/modules/audio_processing/aec3/echo_canceller3.cc
+++ b/webrtc/modules/audio_processing/aec3/echo_canceller3.cc
@ -0,0 +1,868 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/audio_processing/aec3/echo_canceller3.h"
+
+#include <algorithm>
+#include <utility>
+
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/high_pass_filter.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/atomic_ops.h"
+#include "rtc_base/experiments/field_trial_parser.h"
+#include "rtc_base/logging.h"
+#include "system_wrappers/include/field_trial.h"
+
+namespace webrtc {
+
+namespace {
+
+enum class EchoCanceller3ApiCall { kCapture, kRender };
+
+bool DetectSaturation(rtc::ArrayView<const float> y) {
+  for (auto y_k : y) {
+    if (y_k >= 32700.0f || y_k <= -32700.0f) {
+      return true;
+    }
+  }
+  return false;
+}
+
+// Retrieves a value from a field trial if it is available. If no value is
+// present, the default value is returned. If the retrieved value is beyond the
+// specified limits, the default value is returned instead.
+void RetrieveFieldTrialValue(const char* trial_name,
+                             float min,
+                             float max,
+                             float* value_to_update) {
+  const std::string field_trial_str = field_trial::FindFullName(trial_name);
+
+  FieldTrialParameter<double> field_trial_param(/*key=*/"", *value_to_update);
+
+  ParseFieldTrial({&field_trial_param}, field_trial_str);
+  float field_trial_value = static_cast<float>(field_trial_param.Get());
+
+  if (field_trial_value >= min && field_trial_value <= max) {
+    *value_to_update = field_trial_value;
+  }
+}
+
+void RetrieveFieldTrialValue(const char* trial_name,
+                             int min,
+                             int max,
+                             int* value_to_update) {
+  const std::string field_trial_str = field_trial::FindFullName(trial_name);
+
+  FieldTrialParameter<int> field_trial_param(/*key=*/"", *value_to_update);
+
+  ParseFieldTrial({&field_trial_param}, field_trial_str);
+  float field_trial_value = field_trial_param.Get();
+
+  if (field_trial_value >= min && field_trial_value <= max) {
+    *value_to_update = field_trial_value;
+  }
+}
+
+void FillSubFrameView(
+    AudioBuffer* frame,
+    size_t sub_frame_index,
+    std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
+  RTC_DCHECK_GE(1, sub_frame_index);
+  RTC_DCHECK_LE(0, sub_frame_index);
+  RTC_DCHECK_EQ(frame->num_bands(), sub_frame_view->size());
+  RTC_DCHECK_EQ(frame->num_channels(), (*sub_frame_view)[0].size());
+  for (size_t band = 0; band < sub_frame_view->size(); ++band) {
+    for (size_t channel = 0; channel < (*sub_frame_view)[0].size(); ++channel) {
+      (*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
+          &frame->split_bands(channel)[band][sub_frame_index * kSubFrameLength],
+          kSubFrameLength);
+    }
+  }
+}
+
+void FillSubFrameView(
+    std::vector<std::vector<std::vector<float>>>* frame,
+    size_t sub_frame_index,
+    std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
+  RTC_DCHECK_GE(1, sub_frame_index);
+  RTC_DCHECK_EQ(frame->size(), sub_frame_view->size());
+  RTC_DCHECK_EQ((*frame)[0].size(), (*sub_frame_view)[0].size());
+  for (size_t band = 0; band < frame->size(); ++band) {
+    for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) {
+      (*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
+          &(*frame)[band][channel][sub_frame_index * kSubFrameLength],
+          kSubFrameLength);
+    }
+  }
+}
+
+void ProcessCaptureFrameContent(
+    AudioBuffer* linear_output,
+    AudioBuffer* capture,
+    bool level_change,
+    bool saturated_microphone_signal,
+    size_t sub_frame_index,
+    FrameBlocker* capture_blocker,
+    BlockFramer* linear_output_framer,
+    BlockFramer* output_framer,
+    BlockProcessor* block_processor,
+    std::vector<std::vector<std::vector<float>>>* linear_output_block,
+    std::vector<std::vector<rtc::ArrayView<float>>>*
+        linear_output_sub_frame_view,
+    std::vector<std::vector<std::vector<float>>>* capture_block,
+    std::vector<std::vector<rtc::ArrayView<float>>>* capture_sub_frame_view) {
+  FillSubFrameView(capture, sub_frame_index, capture_sub_frame_view);
+
+  if (linear_output) {
+    RTC_DCHECK(linear_output_framer);
+    RTC_DCHECK(linear_output_block);
+    RTC_DCHECK(linear_output_sub_frame_view);
+    FillSubFrameView(linear_output, sub_frame_index,
+                     linear_output_sub_frame_view);
+  }
+
+  capture_blocker->InsertSubFrameAndExtractBlock(*capture_sub_frame_view,
+                                                 capture_block);
+  block_processor->ProcessCapture(level_change, saturated_microphone_signal,
+                                  linear_output_block, capture_block);
+  output_framer->InsertBlockAndExtractSubFrame(*capture_block,
+                                               capture_sub_frame_view);
+
+  if (linear_output) {
+    RTC_DCHECK(linear_output_framer);
+    linear_output_framer->InsertBlockAndExtractSubFrame(
+        *linear_output_block, linear_output_sub_frame_view);
+  }
+}
+
+void ProcessRemainingCaptureFrameContent(
+    bool level_change,
+    bool saturated_microphone_signal,
+    FrameBlocker* capture_blocker,
+    BlockFramer* linear_output_framer,
+    BlockFramer* output_framer,
+    BlockProcessor* block_processor,
+    std::vector<std::vector<std::vector<float>>>* linear_output_block,
+    std::vector<std::vector<std::vector<float>>>* block) {
+  if (!capture_blocker->IsBlockAvailable()) {
+    return;
+  }
+
+  capture_blocker->ExtractBlock(block);
+  block_processor->ProcessCapture(level_change, saturated_microphone_signal,
+                                  linear_output_block, block);
+  output_framer->InsertBlock(*block);
+
+  if (linear_output_framer) {
+    RTC_DCHECK(linear_output_block);
+    linear_output_framer->InsertBlock(*linear_output_block);
+  }
+}
+
+void BufferRenderFrameContent(
+    std::vector<std::vector<std::vector<float>>>* render_frame,
+    size_t sub_frame_index,
+    FrameBlocker* render_blocker,
+    BlockProcessor* block_processor,
+    std::vector<std::vector<std::vector<float>>>* block,
+    std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
+  FillSubFrameView(render_frame, sub_frame_index, sub_frame_view);
+  render_blocker->InsertSubFrameAndExtractBlock(*sub_frame_view, block);
+  block_processor->BufferRender(*block);
+}
+
+void BufferRemainingRenderFrameContent(
+    FrameBlocker* render_blocker,
+    BlockProcessor* block_processor,
+    std::vector<std::vector<std::vector<float>>>* block) {
+  if (!render_blocker->IsBlockAvailable()) {
+    return;
+  }
+  render_blocker->ExtractBlock(block);
+  block_processor->BufferRender(*block);
+}
+
+void CopyBufferIntoFrame(const AudioBuffer& buffer,
+                         size_t num_bands,
+                         size_t num_channels,
+                         std::vector<std::vector<std::vector<float>>>* frame) {
+  RTC_DCHECK_EQ(num_bands, frame->size());
+  RTC_DCHECK_EQ(num_channels, (*frame)[0].size());
+  RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, (*frame)[0][0].size());
+  for (size_t band = 0; band < num_bands; ++band) {
+    for (size_t channel = 0; channel < num_channels; ++channel) {
+      rtc::ArrayView<const float> buffer_view(
+          &buffer.split_bands_const(channel)[band][0],
+          AudioBuffer::kSplitBandSize);
+      std::copy(buffer_view.begin(), buffer_view.end(),
+                (*frame)[band][channel].begin());
+    }
+  }
+}
+
+}  // namespace
+
+// TODO(webrtc:5298): Move this to a separate file.
+EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
+  EchoCanceller3Config adjusted_cfg = config;
+
+  if (field_trial::IsEnabled("WebRTC-Aec3AntiHowlingMinimizationKillSwitch")) {
+    adjusted_cfg.suppressor.high_bands_suppression
+        .anti_howling_activation_threshold = 25.f;
+    adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 0.01f;
+  }
+
+  if (field_trial::IsEnabled("WebRTC-Aec3UseShortConfigChangeDuration")) {
+    adjusted_cfg.filter.config_change_duration_blocks = 10;
+  }
+
+  if (field_trial::IsEnabled("WebRTC-Aec3UseZeroInitialStateDuration")) {
+    adjusted_cfg.filter.initial_state_seconds = 0.f;
+  } else if (field_trial::IsEnabled(
+                 "WebRTC-Aec3UseDot1SecondsInitialStateDuration")) {
+    adjusted_cfg.filter.initial_state_seconds = .1f;
+  } else if (field_trial::IsEnabled(
+                 "WebRTC-Aec3UseDot2SecondsInitialStateDuration")) {
+    adjusted_cfg.filter.initial_state_seconds = .2f;
+  } else if (field_trial::IsEnabled(
+                 "WebRTC-Aec3UseDot3SecondsInitialStateDuration")) {
+    adjusted_cfg.filter.initial_state_seconds = .3f;
+  } else if (field_trial::IsEnabled(
+                 "WebRTC-Aec3UseDot6SecondsInitialStateDuration")) {
+    adjusted_cfg.filter.initial_state_seconds = .6f;
+  } else if (field_trial::IsEnabled(
+                 "WebRTC-Aec3UseDot9SecondsInitialStateDuration")) {
+    adjusted_cfg.filter.initial_state_seconds = .9f;
+  } else if (field_trial::IsEnabled(
+                 "WebRTC-Aec3Use1Dot2SecondsInitialStateDuration")) {
+    adjusted_cfg.filter.initial_state_seconds = 1.2f;
+  } else if (field_trial::IsEnabled(
+                 "WebRTC-Aec3Use1Dot6SecondsInitialStateDuration")) {
+    adjusted_cfg.filter.initial_state_seconds = 1.6f;
+  } else if (field_trial::IsEnabled(
+                 "WebRTC-Aec3Use2Dot0SecondsInitialStateDuration")) {
+    adjusted_cfg.filter.initial_state_seconds = 2.0f;
+  }
+
+  if (field_trial::IsEnabled("WebRTC-Aec3EchoSaturationDetectionKillSwitch")) {
+    adjusted_cfg.ep_strength.echo_can_saturate = false;
+  }
+
+  if (field_trial::IsEnabled("WebRTC-Aec3UseDot2ReverbDefaultLen")) {
+    adjusted_cfg.ep_strength.default_len = 0.2f;
+  } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot3ReverbDefaultLen")) {
+    adjusted_cfg.ep_strength.default_len = 0.3f;
+  } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot4ReverbDefaultLen")) {
+    adjusted_cfg.ep_strength.default_len = 0.4f;
+  } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot5ReverbDefaultLen")) {
+    adjusted_cfg.ep_strength.default_len = 0.5f;
+  } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot6ReverbDefaultLen")) {
+    adjusted_cfg.ep_strength.default_len = 0.6f;
+  } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot7ReverbDefaultLen")) {
+    adjusted_cfg.ep_strength.default_len = 0.7f;
+  } else if (field_trial::IsEnabled("WebRTC-Aec3UseDot8ReverbDefaultLen")) {
+    adjusted_cfg.ep_strength.default_len = 0.8f;
+  }
+
+  if (field_trial::IsEnabled("WebRTC-Aec3ShortHeadroomKillSwitch")) {
+    // Two blocks headroom.
+    adjusted_cfg.delay.delay_headroom_samples = kBlockSize * 2;
+  }
+
+  if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToZeroKillSwitch")) {
+    adjusted_cfg.erle.clamp_quality_estimate_to_zero = false;
+  }
+
+  if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToOneKillSwitch")) {
+    adjusted_cfg.erle.clamp_quality_estimate_to_one = false;
+  }
+
+  if (field_trial::IsEnabled("WebRTC-Aec3OnsetDetectionKillSwitch")) {
+    adjusted_cfg.erle.onset_detection = false;
+  }
+
+  if (field_trial::IsEnabled(
+          "WebRTC-Aec3EnforceRenderDelayEstimationDownmixing")) {
+    adjusted_cfg.delay.render_alignment_mixing.downmix = true;
+    adjusted_cfg.delay.render_alignment_mixing.adaptive_selection = false;
+  }
+
+  if (field_trial::IsEnabled(
+          "WebRTC-Aec3EnforceCaptureDelayEstimationDownmixing")) {
+    adjusted_cfg.delay.capture_alignment_mixing.downmix = true;
+    adjusted_cfg.delay.capture_alignment_mixing.adaptive_selection = false;
+  }
+
+  if (field_trial::IsEnabled(
+          "WebRTC-Aec3EnforceCaptureDelayEstimationLeftRightPrioritization")) {
+    adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels =
+        true;
+  }
+
+  if (field_trial::IsEnabled(
+          "WebRTC-"
+          "Aec3RenderDelayEstimationLeftRightPrioritizationKillSwitch")) {
+    adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels =
+        false;
+  }
+
+  if (field_trial::IsEnabled("WebRTC-Aec3SensitiveDominantNearendActivation")) {
+    adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.5f;
+  } else if (field_trial::IsEnabled(
+                 "WebRTC-Aec3VerySensitiveDominantNearendActivation")) {
+    adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.75f;
+  }
+
+  if (field_trial::IsEnabled("WebRTC-Aec3TransparentAntiHowlingGain")) {
+    adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 1.f;
+  }
+
+  if (field_trial::IsEnabled(
+          "WebRTC-Aec3EnforceMoreTransparentNormalSuppressorTuning")) {
+    adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent = 0.4f;
+    adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress = 0.5f;
+  }
+
+  if (field_trial::IsEnabled(
+          "WebRTC-Aec3EnforceMoreTransparentNearendSuppressorTuning")) {
+    adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent = 1.29f;
+    adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress = 1.3f;
+  }
+
+  if (field_trial::IsEnabled(
+          "WebRTC-Aec3EnforceMoreTransparentNormalSuppressorHfTuning")) {
+    adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent = 0.3f;
+    adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress = 0.4f;
+  }
+
+  if (field_trial::IsEnabled(
+          "WebRTC-Aec3EnforceMoreTransparentNearendSuppressorHfTuning")) {
+    adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent = 1.09f;
+    adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress = 1.1f;
+  }
+
+  if (field_trial::IsEnabled(
+          "WebRTC-Aec3EnforceRapidlyAdjustingNormalSuppressorTunings")) {
+    adjusted_cfg.suppressor.normal_tuning.max_inc_factor = 2.5f;
+  }
+
+  if (field_trial::IsEnabled(
+          "WebRTC-Aec3EnforceRapidlyAdjustingNearendSuppressorTunings")) {
+    adjusted_cfg.suppressor.nearend_tuning.max_inc_factor = 2.5f;
+  }
+
+  if (field_trial::IsEnabled(
+          "WebRTC-Aec3EnforceSlowlyAdjustingNormalSuppressorTunings")) {
+    adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf = .2f;
+  }
+
+  if (field_trial::IsEnabled(
+          "WebRTC-Aec3EnforceSlowlyAdjustingNearendSuppressorTunings")) {
+    adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf = .2f;
+  }
+
+  if (field_trial::IsEnabled("WebRTC-Aec3EnforceStationarityProperties")) {
+    adjusted_cfg.echo_audibility.use_stationarity_properties = true;
+  }
+
+  if (field_trial::IsEnabled(
+          "WebRTC-Aec3EnforceStationarityPropertiesAtInit")) {
+    adjusted_cfg.echo_audibility.use_stationarity_properties_at_init = true;
+  }
+
+  if (field_trial::IsEnabled("WebRTC-Aec3EnforceLowActiveRenderLimit")) {
+    adjusted_cfg.render_levels.active_render_limit = 50.f;
+  } else if (field_trial::IsEnabled(
+                 "WebRTC-Aec3EnforceVeryLowActiveRenderLimit")) {
+    adjusted_cfg.render_levels.active_render_limit = 30.f;
+  }
+
+  if (field_trial::IsEnabled("WebRTC-Aec3NonlinearModeReverbKillSwitch")) {
+    adjusted_cfg.echo_model.model_reverb_in_nonlinear_mode = false;
+  }
+
+  // Field-trial based override for the whole suppressor tuning.
+  const std::string suppressor_tuning_override_trial_name =
+      field_trial::FindFullName("WebRTC-Aec3SuppressorTuningOverride");
+
+  FieldTrialParameter<double> nearend_tuning_mask_lf_enr_transparent(
+      "nearend_tuning_mask_lf_enr_transparent",
+      adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent);
+  FieldTrialParameter<double> nearend_tuning_mask_lf_enr_suppress(
+      "nearend_tuning_mask_lf_enr_suppress",
+      adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress);
+  FieldTrialParameter<double> nearend_tuning_mask_hf_enr_transparent(
+      "nearend_tuning_mask_hf_enr_transparent",
+      adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent);
+  FieldTrialParameter<double> nearend_tuning_mask_hf_enr_suppress(
+      "nearend_tuning_mask_hf_enr_suppress",
+      adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress);
+  FieldTrialParameter<double> nearend_tuning_max_inc_factor(
+      "nearend_tuning_max_inc_factor",
+      adjusted_cfg.suppressor.nearend_tuning.max_inc_factor);
+  FieldTrialParameter<double> nearend_tuning_max_dec_factor_lf(
+      "nearend_tuning_max_dec_factor_lf",
+      adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf);
+  FieldTrialParameter<double> normal_tuning_mask_lf_enr_transparent(
+      "normal_tuning_mask_lf_enr_transparent",
+      adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent);
+  FieldTrialParameter<double> normal_tuning_mask_lf_enr_suppress(
+      "normal_tuning_mask_lf_enr_suppress",
+      adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress);
+  FieldTrialParameter<double> normal_tuning_mask_hf_enr_transparent(
+      "normal_tuning_mask_hf_enr_transparent",
+      adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent);
+  FieldTrialParameter<double> normal_tuning_mask_hf_enr_suppress(
+      "normal_tuning_mask_hf_enr_suppress",
+      adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress);
+  FieldTrialParameter<double> normal_tuning_max_inc_factor(
+      "normal_tuning_max_inc_factor",
+      adjusted_cfg.suppressor.normal_tuning.max_inc_factor);
+  FieldTrialParameter<double> normal_tuning_max_dec_factor_lf(
+      "normal_tuning_max_dec_factor_lf",
+      adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf);
+  FieldTrialParameter<double> dominant_nearend_detection_enr_threshold(
+      "dominant_nearend_detection_enr_threshold",
+      adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold);
+  FieldTrialParameter<double> dominant_nearend_detection_enr_exit_threshold(
+      "dominant_nearend_detection_enr_exit_threshold",
+      adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold);
+  FieldTrialParameter<double> dominant_nearend_detection_snr_threshold(
+      "dominant_nearend_detection_snr_threshold",
+      adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold);
+  FieldTrialParameter<int> dominant_nearend_detection_hold_duration(
+      "dominant_nearend_detection_hold_duration",
+      adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration);
+  FieldTrialParameter<int> dominant_nearend_detection_trigger_threshold(
+      "dominant_nearend_detection_trigger_threshold",
+      adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold);
+  FieldTrialParameter<double> ep_strength_default_len(
+      "ep_strength_default_len", adjusted_cfg.ep_strength.default_len);
+
+  ParseFieldTrial(
+      {&nearend_tuning_mask_lf_enr_transparent,
+       &nearend_tuning_mask_lf_enr_suppress,
+       &nearend_tuning_mask_hf_enr_transparent,
+       &nearend_tuning_mask_hf_enr_suppress, &nearend_tuning_max_inc_factor,
+       &nearend_tuning_max_dec_factor_lf,
+       &normal_tuning_mask_lf_enr_transparent,
+       &normal_tuning_mask_lf_enr_suppress,
+       &normal_tuning_mask_hf_enr_transparent,
+       &normal_tuning_mask_hf_enr_suppress, &normal_tuning_max_inc_factor,
+       &normal_tuning_max_dec_factor_lf,
+       &dominant_nearend_detection_enr_threshold,
+       &dominant_nearend_detection_enr_exit_threshold,
+       &dominant_nearend_detection_snr_threshold,
+       &dominant_nearend_detection_hold_duration,
+       &dominant_nearend_detection_trigger_threshold, &ep_strength_default_len},
+      suppressor_tuning_override_trial_name);
+
+  adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent =
+      static_cast<float>(nearend_tuning_mask_lf_enr_transparent.Get());
+  adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress =
+      static_cast<float>(nearend_tuning_mask_lf_enr_suppress.Get());
+  adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent =
+      static_cast<float>(nearend_tuning_mask_hf_enr_transparent.Get());
+  adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress =
+      static_cast<float>(nearend_tuning_mask_hf_enr_suppress.Get());
+  adjusted_cfg.suppressor.nearend_tuning.max_inc_factor =
+      static_cast<float>(nearend_tuning_max_inc_factor.Get());
+  adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf =
+      static_cast<float>(nearend_tuning_max_dec_factor_lf.Get());
+  adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent =
+      static_cast<float>(normal_tuning_mask_lf_enr_transparent.Get());
+  adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress =
+      static_cast<float>(normal_tuning_mask_lf_enr_suppress.Get());
+  adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent =
+      static_cast<float>(normal_tuning_mask_hf_enr_transparent.Get());
+  adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress =
+      static_cast<float>(normal_tuning_mask_hf_enr_suppress.Get());
+  adjusted_cfg.suppressor.normal_tuning.max_inc_factor =
+      static_cast<float>(normal_tuning_max_inc_factor.Get());
+  adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf =
+      static_cast<float>(normal_tuning_max_dec_factor_lf.Get());
+  adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold =
+      static_cast<float>(dominant_nearend_detection_enr_threshold.Get());
+  adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold =
+      static_cast<float>(dominant_nearend_detection_enr_exit_threshold.Get());
+  adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold =
+      static_cast<float>(dominant_nearend_detection_snr_threshold.Get());
+  adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration =
+      dominant_nearend_detection_hold_duration.Get();
+  adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold =
+      dominant_nearend_detection_trigger_threshold.Get();
+  adjusted_cfg.ep_strength.default_len =
+      static_cast<float>(ep_strength_default_len.Get());
+
+  // Field trial-based overrides of individual suppressor parameters.
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorNearendLfMaskTransparentOverride", 0.f, 10.f,
+      &adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorNearendLfMaskSuppressOverride", 0.f, 10.f,
+      &adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorNearendHfMaskTransparentOverride", 0.f, 10.f,
+      &adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorNearendHfMaskSuppressOverride", 0.f, 10.f,
+      &adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorNearendMaxIncFactorOverride", 0.f, 10.f,
+      &adjusted_cfg.suppressor.nearend_tuning.max_inc_factor);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorNearendMaxDecFactorLfOverride", 0.f, 10.f,
+      &adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf);
+
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorNormalLfMaskTransparentOverride", 0.f, 10.f,
+      &adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorNormalLfMaskSuppressOverride", 0.f, 10.f,
+      &adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorNormalHfMaskTransparentOverride", 0.f, 10.f,
+      &adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorNormalHfMaskSuppressOverride", 0.f, 10.f,
+      &adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorNormalMaxIncFactorOverride", 0.f, 10.f,
+      &adjusted_cfg.suppressor.normal_tuning.max_inc_factor);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorNormalMaxDecFactorLfOverride", 0.f, 10.f,
+      &adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf);
+
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorDominantNearendEnrThresholdOverride", 0.f, 100.f,
+      &adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorDominantNearendEnrExitThresholdOverride", 0.f,
+      100.f,
+      &adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorDominantNearendSnrThresholdOverride", 0.f, 100.f,
+      &adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorDominantNearendHoldDurationOverride", 0, 1000,
+      &adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration);
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorDominantNearendTriggerThresholdOverride", 0, 1000,
+      &adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold);
+
+  RetrieveFieldTrialValue(
+      "WebRTC-Aec3SuppressorAntiHowlingGainOverride", 0.f, 10.f,
+      &adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain);
+
+  RetrieveFieldTrialValue("WebRTC-Aec3SuppressorEpStrengthDefaultLenOverride",
+                          -1.f, 1.f, &adjusted_cfg.ep_strength.default_len);
+
+  return adjusted_cfg;
+}
+
+class EchoCanceller3::RenderWriter {
+ public:
+  RenderWriter(ApmDataDumper* data_dumper,
+               SwapQueue<std::vector<std::vector<std::vector<float>>>,
+                         Aec3RenderQueueItemVerifier>* render_transfer_queue,
+               size_t num_bands,
+               size_t num_channels);
+
+  RenderWriter() = delete;
+  RenderWriter(const RenderWriter&) = delete;
+  RenderWriter& operator=(const RenderWriter&) = delete;
+
+  ~RenderWriter();
+  void Insert(const AudioBuffer& input);
+
+ private:
+  ApmDataDumper* data_dumper_;
+  const size_t num_bands_;
+  const size_t num_channels_;
+  HighPassFilter high_pass_filter_;
+  std::vector<std::vector<std::vector<float>>> render_queue_input_frame_;
+  SwapQueue<std::vector<std::vector<std::vector<float>>>,
+            Aec3RenderQueueItemVerifier>* render_transfer_queue_;
+};
+
+EchoCanceller3::RenderWriter::RenderWriter(
+    ApmDataDumper* data_dumper,
+    SwapQueue<std::vector<std::vector<std::vector<float>>>,
+              Aec3RenderQueueItemVerifier>* render_transfer_queue,
+    size_t num_bands,
+    size_t num_channels)
+    : data_dumper_(data_dumper),
+      num_bands_(num_bands),
+      num_channels_(num_channels),
+      high_pass_filter_(16000, num_channels),
+      render_queue_input_frame_(
+          num_bands_,
+          std::vector<std::vector<float>>(
+              num_channels_,
+              std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
+      render_transfer_queue_(render_transfer_queue) {
+  RTC_DCHECK(data_dumper);
+}
+
+EchoCanceller3::RenderWriter::~RenderWriter() = default;
+
+void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) {
+  RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, input.num_frames_per_band());
+  RTC_DCHECK_EQ(num_bands_, input.num_bands());
+  RTC_DCHECK_EQ(num_channels_, input.num_channels());
+
+  // TODO(bugs.webrtc.org/8759) Temporary work-around.
+  if (num_bands_ != input.num_bands())
+    return;
+
+  data_dumper_->DumpWav("aec3_render_input", AudioBuffer::kSplitBandSize,
+                        &input.split_bands_const(0)[0][0], 16000, 1);
+
+  CopyBufferIntoFrame(input, num_bands_, num_channels_,
+                      &render_queue_input_frame_);
+  high_pass_filter_.Process(&render_queue_input_frame_[0]);
+
+  static_cast<void>(render_transfer_queue_->Insert(&render_queue_input_frame_));
+}
+
+int EchoCanceller3::instance_count_ = 0;
+
+EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
+                               int sample_rate_hz,
+                               size_t num_render_channels,
+                               size_t num_capture_channels)
+    : EchoCanceller3(AdjustConfig(config),
+                     sample_rate_hz,
+                     num_render_channels,
+                     num_capture_channels,
+                     std::unique_ptr<BlockProcessor>(
+                         BlockProcessor::Create(AdjustConfig(config),
+                                                sample_rate_hz,
+                                                num_render_channels,
+                                                num_capture_channels))) {}
+EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
+                               int sample_rate_hz,
+                               size_t num_render_channels,
+                               size_t num_capture_channels,
+                               std::unique_ptr<BlockProcessor> block_processor)
+    : data_dumper_(
+          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
+      config_(config),
+      sample_rate_hz_(sample_rate_hz),
+      num_bands_(NumBandsForRate(sample_rate_hz_)),
+      num_render_channels_(num_render_channels),
+      num_capture_channels_(num_capture_channels),
+      output_framer_(num_bands_, num_capture_channels_),
+      capture_blocker_(num_bands_, num_capture_channels_),
+      render_blocker_(num_bands_, num_render_channels_),
+      render_transfer_queue_(
+          kRenderTransferQueueSizeFrames,
+          std::vector<std::vector<std::vector<float>>>(
+              num_bands_,
+              std::vector<std::vector<float>>(
+                  num_render_channels_,
+                  std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
+          Aec3RenderQueueItemVerifier(num_bands_,
+                                      num_render_channels_,
+                                      AudioBuffer::kSplitBandSize)),
+      block_processor_(std::move(block_processor)),
+      render_queue_output_frame_(
+          num_bands_,
+          std::vector<std::vector<float>>(
+              num_render_channels_,
+              std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
+      render_block_(
+          num_bands_,
+          std::vector<std::vector<float>>(num_render_channels_,
+                                          std::vector<float>(kBlockSize, 0.f))),
+      capture_block_(
+          num_bands_,
+          std::vector<std::vector<float>>(num_capture_channels_,
+                                          std::vector<float>(kBlockSize, 0.f))),
+      render_sub_frame_view_(
+          num_bands_,
+          std::vector<rtc::ArrayView<float>>(num_render_channels_)),
+      capture_sub_frame_view_(
+          num_bands_,
+          std::vector<rtc::ArrayView<float>>(num_capture_channels_)) {
+  RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
+
+  if (config_.delay.fixed_capture_delay_samples > 0) {
+    block_delay_buffer_.reset(new BlockDelayBuffer(
+        num_capture_channels_, num_bands_, AudioBuffer::kSplitBandSize,
+        config_.delay.fixed_capture_delay_samples));
+  }
+
+  render_writer_.reset(new RenderWriter(data_dumper_.get(),
+                                        &render_transfer_queue_, num_bands_,
+                                        num_render_channels_));
+
+  RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000);
+  RTC_DCHECK_GE(kMaxNumBands, num_bands_);
+
+  if (config_.filter.export_linear_aec_output) {
+    linear_output_framer_.reset(new BlockFramer(1, num_capture_channels_));
+    linear_output_block_ =
+        std::make_unique<std::vector<std::vector<std::vector<float>>>>(
+            1, std::vector<std::vector<float>>(
+                   num_capture_channels_, std::vector<float>(kBlockSize, 0.f)));
+    linear_output_sub_frame_view_ =
+        std::vector<std::vector<rtc::ArrayView<float>>>(
+            1, std::vector<rtc::ArrayView<float>>(num_capture_channels_));
+  }
+}
+
+EchoCanceller3::~EchoCanceller3() = default;
+
+void EchoCanceller3::AnalyzeRender(const AudioBuffer& render) {
+  RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_);
+
+  RTC_DCHECK_EQ(render.num_channels(), num_render_channels_);
+  data_dumper_->DumpRaw("aec3_call_order",
+                        static_cast<int>(EchoCanceller3ApiCall::kRender));
+
+  return render_writer_->Insert(render);
+}
+
+void EchoCanceller3::AnalyzeCapture(const AudioBuffer& capture) {
+  RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
+  data_dumper_->DumpWav("aec3_capture_analyze_input", capture.num_frames(),
+                        capture.channels_const()[0], sample_rate_hz_, 1);
+  saturated_microphone_signal_ = false;
+  for (size_t channel = 0; channel < capture.num_channels(); ++channel) {
+    saturated_microphone_signal_ |=
+        DetectSaturation(rtc::ArrayView<const float>(
+            capture.channels_const()[channel], capture.num_frames()));
+    if (saturated_microphone_signal_) {
+      break;
+    }
+  }
+}
+
+void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) {
+  ProcessCapture(capture, nullptr, level_change);
+}
+
+void EchoCanceller3::ProcessCapture(AudioBuffer* capture,
+                                    AudioBuffer* linear_output,
+                                    bool level_change) {
+  RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
+  RTC_DCHECK(capture);
+  RTC_DCHECK_EQ(num_bands_, capture->num_bands());
+  RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, capture->num_frames_per_band());
+  RTC_DCHECK_EQ(capture->num_channels(), num_capture_channels_);
+  data_dumper_->DumpRaw("aec3_call_order",
+                        static_cast<int>(EchoCanceller3ApiCall::kCapture));
+
+  if (linear_output && !linear_output_framer_) {
+    RTC_LOG(LS_ERROR) << "Trying to retrieve the linear AEC output without "
+                         "properly configuring AEC3.";
+    RTC_NOTREACHED();
+  }
+
+  // Report capture call in the metrics and periodically update API call
+  // metrics.
+  api_call_metrics_.ReportCaptureCall();
+
+  // Optionally delay the capture signal.
+  if (config_.delay.fixed_capture_delay_samples > 0) {
+    RTC_DCHECK(block_delay_buffer_);
+    block_delay_buffer_->DelaySignal(capture);
+  }
+
+  rtc::ArrayView<float> capture_lower_band = rtc::ArrayView<float>(
+      &capture->split_bands(0)[0][0], AudioBuffer::kSplitBandSize);
+
+  data_dumper_->DumpWav("aec3_capture_input", capture_lower_band, 16000, 1);
+
+  EmptyRenderQueue();
+
+  ProcessCaptureFrameContent(linear_output, capture, level_change,
+                             saturated_microphone_signal_, 0, &capture_blocker_,
+                             linear_output_framer_.get(), &output_framer_,
+                             block_processor_.get(), linear_output_block_.get(),
+                             &linear_output_sub_frame_view_, &capture_block_,
+                             &capture_sub_frame_view_);
+
+  ProcessCaptureFrameContent(linear_output, capture, level_change,
+                             saturated_microphone_signal_, 1, &capture_blocker_,
+                             linear_output_framer_.get(), &output_framer_,
+                             block_processor_.get(), linear_output_block_.get(),
+                             &linear_output_sub_frame_view_, &capture_block_,
+                             &capture_sub_frame_view_);
+
+  ProcessRemainingCaptureFrameContent(
+      level_change, saturated_microphone_signal_, &capture_blocker_,
+      linear_output_framer_.get(), &output_framer_, block_processor_.get(),
+      linear_output_block_.get(), &capture_block_);
+
+  data_dumper_->DumpWav("aec3_capture_output", AudioBuffer::kSplitBandSize,
+                        &capture->split_bands(0)[0][0], 16000, 1);
+}
+
+EchoControl::Metrics EchoCanceller3::GetMetrics() const {
+  RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
+  Metrics metrics;
+  block_processor_->GetMetrics(&metrics);
+  return metrics;
+}
+
+void EchoCanceller3::SetAudioBufferDelay(int delay_ms) {
+  RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
+  block_processor_->SetAudioBufferDelay(delay_ms);
+}
+
+bool EchoCanceller3::ActiveProcessing() const {
+  return true;
+}
+
+EchoCanceller3Config EchoCanceller3::CreateDefaultConfig(
+    size_t num_render_channels,
+    size_t num_capture_channels) {
+  EchoCanceller3Config cfg;
+  if (num_render_channels > 1) {
+    // Use shorter and more rapidly adapting coarse filter to compensate for
+    // thge increased number of total filter parameters to adapt.
+    cfg.filter.coarse.length_blocks = 11;
+    cfg.filter.coarse.rate = 0.95f;
+    cfg.filter.coarse_initial.length_blocks = 11;
+    cfg.filter.coarse_initial.rate = 0.95f;
+
+    // Use more concervative suppressor behavior for non-nearend speech.
+    cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f;
+    cfg.suppressor.normal_tuning.max_inc_factor = 1.5f;
+  }
+  return cfg;
+}
+
+void EchoCanceller3::EmptyRenderQueue() {
+  RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
+  bool frame_to_buffer =
+      render_transfer_queue_.Remove(&render_queue_output_frame_);
+  while (frame_to_buffer) {
+    // Report render call in the metrics.
+    api_call_metrics_.ReportRenderCall();
+
+    BufferRenderFrameContent(&render_queue_output_frame_, 0, &render_blocker_,
+                             block_processor_.get(), &render_block_,
+                             &render_sub_frame_view_);
+
+    BufferRenderFrameContent(&render_queue_output_frame_, 1, &render_blocker_,
+                             block_processor_.get(), &render_block_,
+                             &render_sub_frame_view_);
+
+    BufferRemainingRenderFrameContent(&render_blocker_, block_processor_.get(),
+                                      &render_block_);
+
+    frame_to_buffer =
+        render_transfer_queue_.Remove(&render_queue_output_frame_);
+  }
+}
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/echo_canceller3.h
+++ b/webrtc/modules/audio_processing/aec3/echo_canceller3.h
@ -0,0 +1,196 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_
+
+#include <stddef.h>
+
+#include <memory>
+#include <vector>
+
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "api/audio/echo_control.h"
+#include "modules/audio_processing/aec3/api_call_jitter_metrics.h"
+#include "modules/audio_processing/aec3/block_delay_buffer.h"
+#include "modules/audio_processing/aec3/block_framer.h"
+#include "modules/audio_processing/aec3/block_processor.h"
+#include "modules/audio_processing/aec3/frame_blocker.h"
+#include "modules/audio_processing/audio_buffer.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/race_checker.h"
+#include "rtc_base/swap_queue.h"
+#include "rtc_base/thread_annotations.h"
+
+namespace webrtc {
+
+// Method for adjusting config parameter dependencies.
+// Only to be used externally to AEC3 for testing purposes.
+// TODO(webrtc:5298): Move this to a separate file.
+EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config);
+
+// Functor for verifying the invariance of the frames being put into the render
+// queue.
+class Aec3RenderQueueItemVerifier {
+ public:
+  Aec3RenderQueueItemVerifier(size_t num_bands,
+                              size_t num_channels,
+                              size_t frame_length)
+      : num_bands_(num_bands),
+        num_channels_(num_channels),
+        frame_length_(frame_length) {}
+
+  bool operator()(const std::vector<std::vector<std::vector<float>>>& v) const {
+    if (v.size() != num_bands_) {
+      return false;
+    }
+    for (const auto& band : v) {
+      if (band.size() != num_channels_) {
+        return false;
+      }
+      for (const auto& channel : band) {
+        if (channel.size() != frame_length_) {
+          return false;
+        }
+      }
+    }
+    return true;
+  }
+
+ private:
+  const size_t num_bands_;
+  const size_t num_channels_;
+  const size_t frame_length_;
+};
+
+// Main class for the echo canceller3.
+// It does 4 things:
+// -Receives 10 ms frames of band-split audio.
+// -Provides the lower level echo canceller functionality with
+// blocks of 64 samples of audio data.
+// -Partially handles the jitter in the render and capture API
+// call sequence.
+//
+// The class is supposed to be used in a non-concurrent manner apart from the
+// AnalyzeRender call which can be called concurrently with the other methods.
+class EchoCanceller3 : public EchoControl {
+ public:
+  // Normal c-tor to use.
+  EchoCanceller3(const EchoCanceller3Config& config,
+                 int sample_rate_hz,
+                 size_t num_render_channels,
+                 size_t num_capture_channels);
+  // Testing c-tor that is used only for testing purposes.
+  EchoCanceller3(const EchoCanceller3Config& config,
+                 int sample_rate_hz,
+                 size_t num_render_channels,
+                 size_t num_capture_channels,
+                 std::unique_ptr<BlockProcessor> block_processor);
+  ~EchoCanceller3() override;
+  EchoCanceller3(const EchoCanceller3&) = delete;
+  EchoCanceller3& operator=(const EchoCanceller3&) = delete;
+
+  // Analyzes and stores an internal copy of the split-band domain render
+  // signal.
+  void AnalyzeRender(AudioBuffer* render) override { AnalyzeRender(*render); }
+  // Analyzes the full-band domain capture signal to detect signal saturation.
+  void AnalyzeCapture(AudioBuffer* capture) override {
+    AnalyzeCapture(*capture);
+  }
+  // Processes the split-band domain capture signal in order to remove any echo
+  // present in the signal.
+  void ProcessCapture(AudioBuffer* capture, bool level_change) override;
+  // As above, but also returns the linear filter output.
+  void ProcessCapture(AudioBuffer* capture,
+                      AudioBuffer* linear_output,
+                      bool level_change) override;
+  // Collect current metrics from the echo canceller.
+  Metrics GetMetrics() const override;
+  // Provides an optional external estimate of the audio buffer delay.
+  void SetAudioBufferDelay(int delay_ms) override;
+
+  bool ActiveProcessing() const override;
+
+  // Signals whether an external detector has detected echo leakage from the
+  // echo canceller.
+  // Note that in the case echo leakage has been flagged, it should be unflagged
+  // once it is no longer occurring.
+  void UpdateEchoLeakageStatus(bool leakage_detected) {
+    RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
+    block_processor_->UpdateEchoLeakageStatus(leakage_detected);
+  }
+
+  // Produces a default configuration that is suitable for a certain combination
+  // of render and capture channels.
+  static EchoCanceller3Config CreateDefaultConfig(size_t num_render_channels,
+                                                  size_t num_capture_channels);
+
+ private:
+  class RenderWriter;
+
+  // Empties the render SwapQueue.
+  void EmptyRenderQueue();
+
+  // Analyzes and stores an internal copy of the split-band domain render
+  // signal.
+  void AnalyzeRender(const AudioBuffer& render);
+  // Analyzes the full-band domain capture signal to detect signal saturation.
+  void AnalyzeCapture(const AudioBuffer& capture);
+
+  rtc::RaceChecker capture_race_checker_;
+  rtc::RaceChecker render_race_checker_;
+
+  // State that is accessed by the AnalyzeRender call.
+  std::unique_ptr<RenderWriter> render_writer_
+      RTC_GUARDED_BY(render_race_checker_);
+
+  // State that may be accessed by the capture thread.
+  static int instance_count_;
+  std::unique_ptr<ApmDataDumper> data_dumper_;
+  const EchoCanceller3Config config_;
+  const int sample_rate_hz_;
+  const int num_bands_;
+  const size_t num_render_channels_;
+  const size_t num_capture_channels_;
+  std::unique_ptr<BlockFramer> linear_output_framer_
+      RTC_GUARDED_BY(capture_race_checker_);
+  BlockFramer output_framer_ RTC_GUARDED_BY(capture_race_checker_);
+  FrameBlocker capture_blocker_ RTC_GUARDED_BY(capture_race_checker_);
+  FrameBlocker render_blocker_ RTC_GUARDED_BY(capture_race_checker_);
+  SwapQueue<std::vector<std::vector<std::vector<float>>>,
+            Aec3RenderQueueItemVerifier>
+      render_transfer_queue_;
+  std::unique_ptr<BlockProcessor> block_processor_
+      RTC_GUARDED_BY(capture_race_checker_);
+  std::vector<std::vector<std::vector<float>>> render_queue_output_frame_
+      RTC_GUARDED_BY(capture_race_checker_);
+  bool saturated_microphone_signal_ RTC_GUARDED_BY(capture_race_checker_) =
+      false;
+  std::vector<std::vector<std::vector<float>>> render_block_
+      RTC_GUARDED_BY(capture_race_checker_);
+  std::unique_ptr<std::vector<std::vector<std::vector<float>>>>
+      linear_output_block_ RTC_GUARDED_BY(capture_race_checker_);
+  std::vector<std::vector<std::vector<float>>> capture_block_
+      RTC_GUARDED_BY(capture_race_checker_);
+  std::vector<std::vector<rtc::ArrayView<float>>> render_sub_frame_view_
+      RTC_GUARDED_BY(capture_race_checker_);
+  std::vector<std::vector<rtc::ArrayView<float>>> linear_output_sub_frame_view_
+      RTC_GUARDED_BY(capture_race_checker_);
+  std::vector<std::vector<rtc::ArrayView<float>>> capture_sub_frame_view_
+      RTC_GUARDED_BY(capture_race_checker_);
+  std::unique_ptr<BlockDelayBuffer> block_delay_buffer_
+      RTC_GUARDED_BY(capture_race_checker_);
+  ApiCallJitterMetrics api_call_metrics_ RTC_GUARDED_BY(capture_race_checker_);
+};
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_
--- a/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc
+++ b/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator.cc
@ -0,0 +1,125 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/audio_processing/aec3/echo_path_delay_estimator.h"
+
+#include <array>
+
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+EchoPathDelayEstimator::EchoPathDelayEstimator(
+    ApmDataDumper* data_dumper,
+    const EchoCanceller3Config& config,
+    size_t num_capture_channels)
+    : data_dumper_(data_dumper),
+      down_sampling_factor_(config.delay.down_sampling_factor),
+      sub_block_size_(down_sampling_factor_ != 0
+                          ? kBlockSize / down_sampling_factor_
+                          : kBlockSize),
+      capture_mixer_(num_capture_channels,
+                     config.delay.capture_alignment_mixing),
+      capture_decimator_(down_sampling_factor_),
+      matched_filter_(
+          data_dumper_,
+          DetectOptimization(),
+          sub_block_size_,
+          kMatchedFilterWindowSizeSubBlocks,
+          config.delay.num_filters,
+          kMatchedFilterAlignmentShiftSizeSubBlocks,
+          config.delay.down_sampling_factor == 8
+              ? config.render_levels.poor_excitation_render_limit_ds8
+              : config.render_levels.poor_excitation_render_limit,
+          config.delay.delay_estimate_smoothing,
+          config.delay.delay_candidate_detection_threshold),
+      matched_filter_lag_aggregator_(data_dumper_,
+                                     matched_filter_.GetMaxFilterLag(),
+                                     config.delay.delay_selection_thresholds) {
+  RTC_DCHECK(data_dumper);
+  RTC_DCHECK(down_sampling_factor_ > 0);
+}
+
+EchoPathDelayEstimator::~EchoPathDelayEstimator() = default;
+
+void EchoPathDelayEstimator::Reset(bool reset_delay_confidence) {
+  Reset(true, reset_delay_confidence);
+}
+
+absl::optional<DelayEstimate> EchoPathDelayEstimator::EstimateDelay(
+    const DownsampledRenderBuffer& render_buffer,
+    const std::vector<std::vector<float>>& capture) {
+  RTC_DCHECK_EQ(kBlockSize, capture[0].size());
+
+  std::array<float, kBlockSize> downsampled_capture_data;
+  rtc::ArrayView<float> downsampled_capture(downsampled_capture_data.data(),
+                                            sub_block_size_);
+
+  std::array<float, kBlockSize> downmixed_capture;
+  capture_mixer_.ProduceOutput(capture, downmixed_capture);
+  capture_decimator_.Decimate(downmixed_capture, downsampled_capture);
+  data_dumper_->DumpWav("aec3_capture_decimator_output",
+                        downsampled_capture.size(), downsampled_capture.data(),
+                        16000 / down_sampling_factor_, 1);
+  matched_filter_.Update(render_buffer, downsampled_capture);
+
+  absl::optional<DelayEstimate> aggregated_matched_filter_lag =
+      matched_filter_lag_aggregator_.Aggregate(
+          matched_filter_.GetLagEstimates());
+
+  // Run clockdrift detection.
+  if (aggregated_matched_filter_lag &&
+      (*aggregated_matched_filter_lag).quality ==
+          DelayEstimate::Quality::kRefined)
+    clockdrift_detector_.Update((*aggregated_matched_filter_lag).delay);
+
+  // TODO(peah): Move this logging outside of this class once EchoCanceller3
+  // development is done.
+  data_dumper_->DumpRaw(
+      "aec3_echo_path_delay_estimator_delay",
+      aggregated_matched_filter_lag
+          ? static_cast<int>(aggregated_matched_filter_lag->delay *
+                             down_sampling_factor_)
+          : -1);
+
+  // Return the detected delay in samples as the aggregated matched filter lag
+  // compensated by the down sampling factor for the signal being correlated.
+  if (aggregated_matched_filter_lag) {
+    aggregated_matched_filter_lag->delay *= down_sampling_factor_;
+  }
+
+  if (old_aggregated_lag_ && aggregated_matched_filter_lag &&
+      old_aggregated_lag_->delay == aggregated_matched_filter_lag->delay) {
+    ++consistent_estimate_counter_;
+  } else {
+    consistent_estimate_counter_ = 0;
+  }
+  old_aggregated_lag_ = aggregated_matched_filter_lag;
+  constexpr size_t kNumBlocksPerSecondBy2 = kNumBlocksPerSecond / 2;
+  if (consistent_estimate_counter_ > kNumBlocksPerSecondBy2) {
+    Reset(false, false);
+  }
+
+  return aggregated_matched_filter_lag;
+}
+
+void EchoPathDelayEstimator::Reset(bool reset_lag_aggregator,
+                                   bool reset_delay_confidence) {
+  if (reset_lag_aggregator) {
+    matched_filter_lag_aggregator_.Reset(reset_delay_confidence);
+  }
+  matched_filter_.Reset();
+  old_aggregated_lag_ = absl::nullopt;
+  consistent_estimate_counter_ = 0;
+}
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator.h
+++ b/webrtc/modules/audio_processing/aec3/echo_path_delay_estimator.h
@ -0,0 +1,79 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_
+
+#include <stddef.h>
+
+#include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/alignment_mixer.h"
+#include "modules/audio_processing/aec3/clockdrift_detector.h"
+#include "modules/audio_processing/aec3/decimator.h"
+#include "modules/audio_processing/aec3/delay_estimate.h"
+#include "modules/audio_processing/aec3/matched_filter.h"
+#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h"
+#include "rtc_base/constructor_magic.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+struct DownsampledRenderBuffer;
+struct EchoCanceller3Config;
+
+// Estimates the delay of the echo path.
+class EchoPathDelayEstimator {
+ public:
+  EchoPathDelayEstimator(ApmDataDumper* data_dumper,
+                         const EchoCanceller3Config& config,
+                         size_t num_capture_channels);
+  ~EchoPathDelayEstimator();
+
+  // Resets the estimation. If the delay confidence is reset, the reset behavior
+  // is as if the call is restarted.
+  void Reset(bool reset_delay_confidence);
+
+  // Produce a delay estimate if such is avaliable.
+  absl::optional<DelayEstimate> EstimateDelay(
+      const DownsampledRenderBuffer& render_buffer,
+      const std::vector<std::vector<float>>& capture);
+
+  // Log delay estimator properties.
+  void LogDelayEstimationProperties(int sample_rate_hz, size_t shift) const {
+    matched_filter_.LogFilterProperties(sample_rate_hz, shift,
+                                        down_sampling_factor_);
+  }
+
+  // Returns the level of detected clockdrift.
+  ClockdriftDetector::Level Clockdrift() const {
+    return clockdrift_detector_.ClockdriftLevel();
+  }
+
+ private:
+  ApmDataDumper* const data_dumper_;
+  const size_t down_sampling_factor_;
+  const size_t sub_block_size_;
+  AlignmentMixer capture_mixer_;
+  Decimator capture_decimator_;
+  MatchedFilter matched_filter_;
+  MatchedFilterLagAggregator matched_filter_lag_aggregator_;
+  absl::optional<DelayEstimate> old_aggregated_lag_;
+  size_t consistent_estimate_counter_ = 0;
+  ClockdriftDetector clockdrift_detector_;
+
+  // Internal reset method with more granularity.
+  void Reset(bool reset_lag_aggregator, bool reset_delay_confidence);
+
+  RTC_DISALLOW_COPY_AND_ASSIGN(EchoPathDelayEstimator);
+};
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_
--- a/webrtc/modules/audio_processing/aec3/echo_path_variability.cc
+++ b/webrtc/modules/audio_processing/aec3/echo_path_variability.cc
@ -0,0 +1,22 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/echo_path_variability.h"
+
+namespace webrtc {
+
+EchoPathVariability::EchoPathVariability(bool gain_change,
+                                         DelayAdjustment delay_change,
+                                         bool clock_drift)
+    : gain_change(gain_change),
+      delay_change(delay_change),
+      clock_drift(clock_drift) {}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/echo_path_variability.h
+++ b/webrtc/modules/audio_processing/aec3/echo_path_variability.h
@ -0,0 +1,37 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_
+
+namespace webrtc {
+
+struct EchoPathVariability {
+  enum class DelayAdjustment {
+    kNone,
+    kBufferFlush,
+    kNewDetectedDelay
+  };
+
+  EchoPathVariability(bool gain_change,
+                      DelayAdjustment delay_change,
+                      bool clock_drift);
+
+  bool AudioPathChanged() const {
+    return gain_change || delay_change != DelayAdjustment::kNone;
+  }
+  bool gain_change;
+  DelayAdjustment delay_change;
+  bool clock_drift;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_
--- a/webrtc/modules/audio_processing/aec3/echo_remover.cc
+++ b/webrtc/modules/audio_processing/aec3/echo_remover.cc
@ -0,0 +1,500 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/audio_processing/aec3/echo_remover.h"
+
+#include <math.h>
+#include <stddef.h>
+
+#include <algorithm>
+#include <array>
+#include <cmath>
+#include <memory>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/aec3_fft.h"
+#include "modules/audio_processing/aec3/aec_state.h"
+#include "modules/audio_processing/aec3/comfort_noise_generator.h"
+#include "modules/audio_processing/aec3/echo_path_variability.h"
+#include "modules/audio_processing/aec3/echo_remover_metrics.h"
+#include "modules/audio_processing/aec3/fft_data.h"
+#include "modules/audio_processing/aec3/render_buffer.h"
+#include "modules/audio_processing/aec3/render_signal_analyzer.h"
+#include "modules/audio_processing/aec3/residual_echo_estimator.h"
+#include "modules/audio_processing/aec3/subtractor.h"
+#include "modules/audio_processing/aec3/subtractor_output.h"
+#include "modules/audio_processing/aec3/suppression_filter.h"
+#include "modules/audio_processing/aec3/suppression_gain.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/atomic_ops.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+
+namespace {
+
+// Maximum number of channels for which the capture channel data is stored on
+// the stack. If the number of channels are larger than this, they are stored
+// using scratch memory that is pre-allocated on the heap. The reason for this
+// partitioning is not to waste heap space for handling the more common numbers
+// of channels, while at the same time not limiting the support for higher
+// numbers of channels by enforcing the capture channel data to be stored on the
+// stack using a fixed maximum value.
+constexpr size_t kMaxNumChannelsOnStack = 2;
+
+// Chooses the number of channels to store on the heap when that is required due
+// to the number of capture channels being larger than the pre-defined number
+// of channels to store on the stack.
+size_t NumChannelsOnHeap(size_t num_capture_channels) {
+  return num_capture_channels > kMaxNumChannelsOnStack ? num_capture_channels
+                                                       : 0;
+}
+
+void LinearEchoPower(const FftData& E,
+                     const FftData& Y,
+                     std::array<float, kFftLengthBy2Plus1>* S2) {
+  for (size_t k = 0; k < E.re.size(); ++k) {
+    (*S2)[k] = (Y.re[k] - E.re[k]) * (Y.re[k] - E.re[k]) +
+               (Y.im[k] - E.im[k]) * (Y.im[k] - E.im[k]);
+  }
+}
+
+// Fades between two input signals using a fix-sized transition.
+void SignalTransition(rtc::ArrayView<const float> from,
+                      rtc::ArrayView<const float> to,
+                      rtc::ArrayView<float> out) {
+  if (from == to) {
+    RTC_DCHECK_EQ(to.size(), out.size());
+    std::copy(to.begin(), to.end(), out.begin());
+  } else {
+    constexpr size_t kTransitionSize = 30;
+    constexpr float kOneByTransitionSizePlusOne = 1.f / (kTransitionSize + 1);
+
+    RTC_DCHECK_EQ(from.size(), to.size());
+    RTC_DCHECK_EQ(from.size(), out.size());
+    RTC_DCHECK_LE(kTransitionSize, out.size());
+
+    for (size_t k = 0; k < kTransitionSize; ++k) {
+      float a = (k + 1) * kOneByTransitionSizePlusOne;
+      out[k] = a * to[k] + (1.f - a) * from[k];
+    }
+
+    std::copy(to.begin() + kTransitionSize, to.end(),
+              out.begin() + kTransitionSize);
+  }
+}
+
+// Computes a windowed (square root Hanning) padded FFT and updates the related
+// memory.
+void WindowedPaddedFft(const Aec3Fft& fft,
+                       rtc::ArrayView<const float> v,
+                       rtc::ArrayView<float> v_old,
+                       FftData* V) {
+  fft.PaddedFft(v, v_old, Aec3Fft::Window::kSqrtHanning, V);
+  std::copy(v.begin(), v.end(), v_old.begin());
+}
+
+// Class for removing the echo from the capture signal.
+class EchoRemoverImpl final : public EchoRemover {
+ public:
+  EchoRemoverImpl(const EchoCanceller3Config& config,
+                  int sample_rate_hz,
+                  size_t num_render_channels,
+                  size_t num_capture_channels);
+  ~EchoRemoverImpl() override;
+  EchoRemoverImpl(const EchoRemoverImpl&) = delete;
+  EchoRemoverImpl& operator=(const EchoRemoverImpl&) = delete;
+
+  void GetMetrics(EchoControl::Metrics* metrics) const override;
+
+  // Removes the echo from a block of samples from the capture signal. The
+  // supplied render signal is assumed to be pre-aligned with the capture
+  // signal.
+  void ProcessCapture(
+      EchoPathVariability echo_path_variability,
+      bool capture_signal_saturation,
+      const absl::optional<DelayEstimate>& external_delay,
+      RenderBuffer* render_buffer,
+      std::vector<std::vector<std::vector<float>>>* linear_output,
+      std::vector<std::vector<std::vector<float>>>* capture) override;
+
+  // Updates the status on whether echo leakage is detected in the output of the
+  // echo remover.
+  void UpdateEchoLeakageStatus(bool leakage_detected) override {
+    echo_leakage_detected_ = leakage_detected;
+  }
+
+ private:
+  // Selects which of the coarse and refined linear filter outputs that is most
+  // appropriate to pass to the suppressor and forms the linear filter output by
+  // smoothly transition between those.
+  void FormLinearFilterOutput(const SubtractorOutput& subtractor_output,
+                              rtc::ArrayView<float> output);
+
+  static int instance_count_;
+  const EchoCanceller3Config config_;
+  const Aec3Fft fft_;
+  std::unique_ptr<ApmDataDumper> data_dumper_;
+  const Aec3Optimization optimization_;
+  const int sample_rate_hz_;
+  const size_t num_render_channels_;
+  const size_t num_capture_channels_;
+  const bool use_coarse_filter_output_;
+  Subtractor subtractor_;
+  SuppressionGain suppression_gain_;
+  ComfortNoiseGenerator cng_;
+  SuppressionFilter suppression_filter_;
+  RenderSignalAnalyzer render_signal_analyzer_;
+  ResidualEchoEstimator residual_echo_estimator_;
+  bool echo_leakage_detected_ = false;
+  AecState aec_state_;
+  EchoRemoverMetrics metrics_;
+  std::vector<std::array<float, kFftLengthBy2>> e_old_;
+  std::vector<std::array<float, kFftLengthBy2>> y_old_;
+  size_t block_counter_ = 0;
+  int gain_change_hangover_ = 0;
+  bool refined_filter_output_last_selected_ = true;
+
+  std::vector<std::array<float, kFftLengthBy2>> e_heap_;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> Y2_heap_;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> E2_heap_;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> R2_heap_;
+  std::vector<std::array<float, kFftLengthBy2Plus1>> S2_linear_heap_;
+  std::vector<FftData> Y_heap_;
+  std::vector<FftData> E_heap_;
+  std::vector<FftData> comfort_noise_heap_;
+  std::vector<FftData> high_band_comfort_noise_heap_;
+  std::vector<SubtractorOutput> subtractor_output_heap_;
+};
+
+int EchoRemoverImpl::instance_count_ = 0;
+
+EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
+                                 int sample_rate_hz,
+                                 size_t num_render_channels,
+                                 size_t num_capture_channels)
+    : config_(config),
+      fft_(),
+      data_dumper_(
+          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
+      optimization_(DetectOptimization()),
+      sample_rate_hz_(sample_rate_hz),
+      num_render_channels_(num_render_channels),
+      num_capture_channels_(num_capture_channels),
+      use_coarse_filter_output_(
+          config_.filter.enable_coarse_filter_output_usage),
+      subtractor_(config,
+                  num_render_channels_,
+                  num_capture_channels_,
+                  data_dumper_.get(),
+                  optimization_),
+      suppression_gain_(config_,
+                        optimization_,
+                        sample_rate_hz,
+                        num_capture_channels),
+      cng_(config_, optimization_, num_capture_channels_),
+      suppression_filter_(optimization_,
+                          sample_rate_hz_,
+                          num_capture_channels_),
+      render_signal_analyzer_(config_),
+      residual_echo_estimator_(config_, num_render_channels),
+      aec_state_(config_, num_capture_channels_),
+      e_old_(num_capture_channels_, {0.f}),
+      y_old_(num_capture_channels_, {0.f}),
+      e_heap_(NumChannelsOnHeap(num_capture_channels_), {0.f}),
+      Y2_heap_(NumChannelsOnHeap(num_capture_channels_)),
+      E2_heap_(NumChannelsOnHeap(num_capture_channels_)),
+      R2_heap_(NumChannelsOnHeap(num_capture_channels_)),
+      S2_linear_heap_(NumChannelsOnHeap(num_capture_channels_)),
+      Y_heap_(NumChannelsOnHeap(num_capture_channels_)),
+      E_heap_(NumChannelsOnHeap(num_capture_channels_)),
+      comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)),
+      high_band_comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)),
+      subtractor_output_heap_(NumChannelsOnHeap(num_capture_channels_)) {
+  RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
+}
+
+EchoRemoverImpl::~EchoRemoverImpl() = default;
+
+void EchoRemoverImpl::GetMetrics(EchoControl::Metrics* metrics) const {
+  // Echo return loss (ERL) is inverted to go from gain to attenuation.
+  metrics->echo_return_loss = -10.0 * std::log10(aec_state_.ErlTimeDomain());
+  metrics->echo_return_loss_enhancement =
+      Log2TodB(aec_state_.FullBandErleLog2());
+}
+
+void EchoRemoverImpl::ProcessCapture(
+    EchoPathVariability echo_path_variability,
+    bool capture_signal_saturation,
+    const absl::optional<DelayEstimate>& external_delay,
+    RenderBuffer* render_buffer,
+    std::vector<std::vector<std::vector<float>>>* linear_output,
+    std::vector<std::vector<std::vector<float>>>* capture) {
+  ++block_counter_;
+  const std::vector<std::vector<std::vector<float>>>& x =
+      render_buffer->Block(0);
+  std::vector<std::vector<std::vector<float>>>* y = capture;
+  RTC_DCHECK(render_buffer);
+  RTC_DCHECK(y);
+  RTC_DCHECK_EQ(x.size(), NumBandsForRate(sample_rate_hz_));
+  RTC_DCHECK_EQ(y->size(), NumBandsForRate(sample_rate_hz_));
+  RTC_DCHECK_EQ(x[0].size(), num_render_channels_);
+  RTC_DCHECK_EQ((*y)[0].size(), num_capture_channels_);
+  RTC_DCHECK_EQ(x[0][0].size(), kBlockSize);
+  RTC_DCHECK_EQ((*y)[0][0].size(), kBlockSize);
+
+  // Stack allocated data to use when the number of channels is low.
+  std::array<std::array<float, kFftLengthBy2>, kMaxNumChannelsOnStack> e_stack;
+  std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
+      Y2_stack;
+  std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
+      E2_stack;
+  std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
+      R2_stack;
+  std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
+      S2_linear_stack;
+  std::array<FftData, kMaxNumChannelsOnStack> Y_stack;
+  std::array<FftData, kMaxNumChannelsOnStack> E_stack;
+  std::array<FftData, kMaxNumChannelsOnStack> comfort_noise_stack;
+  std::array<FftData, kMaxNumChannelsOnStack> high_band_comfort_noise_stack;
+  std::array<SubtractorOutput, kMaxNumChannelsOnStack> subtractor_output_stack;
+
+  rtc::ArrayView<std::array<float, kFftLengthBy2>> e(e_stack.data(),
+                                                     num_capture_channels_);
+  rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> Y2(
+      Y2_stack.data(), num_capture_channels_);
+  rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> E2(
+      E2_stack.data(), num_capture_channels_);
+  rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2(
+      R2_stack.data(), num_capture_channels_);
+  rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> S2_linear(
+      S2_linear_stack.data(), num_capture_channels_);
+  rtc::ArrayView<FftData> Y(Y_stack.data(), num_capture_channels_);
+  rtc::ArrayView<FftData> E(E_stack.data(), num_capture_channels_);
+  rtc::ArrayView<FftData> comfort_noise(comfort_noise_stack.data(),
+                                        num_capture_channels_);
+  rtc::ArrayView<FftData> high_band_comfort_noise(
+      high_band_comfort_noise_stack.data(), num_capture_channels_);
+  rtc::ArrayView<SubtractorOutput> subtractor_output(
+      subtractor_output_stack.data(), num_capture_channels_);
+  if (NumChannelsOnHeap(num_capture_channels_) > 0) {
+    // If the stack-allocated space is too small, use the heap for storing the
+    // microphone data.
+    e = rtc::ArrayView<std::array<float, kFftLengthBy2>>(e_heap_.data(),
+                                                         num_capture_channels_);
+    Y2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
+        Y2_heap_.data(), num_capture_channels_);
+    E2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
+        E2_heap_.data(), num_capture_channels_);
+    R2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
+        R2_heap_.data(), num_capture_channels_);
+    S2_linear = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
+        S2_linear_heap_.data(), num_capture_channels_);
+    Y = rtc::ArrayView<FftData>(Y_heap_.data(), num_capture_channels_);
+    E = rtc::ArrayView<FftData>(E_heap_.data(), num_capture_channels_);
+    comfort_noise = rtc::ArrayView<FftData>(comfort_noise_heap_.data(),
+                                            num_capture_channels_);
+    high_band_comfort_noise = rtc::ArrayView<FftData>(
+        high_band_comfort_noise_heap_.data(), num_capture_channels_);
+    subtractor_output = rtc::ArrayView<SubtractorOutput>(
+        subtractor_output_heap_.data(), num_capture_channels_);
+  }
+
+  data_dumper_->DumpWav("aec3_echo_remover_capture_input", kBlockSize,
+                        &(*y)[0][0][0], 16000, 1);
+  data_dumper_->DumpWav("aec3_echo_remover_render_input", kBlockSize,
+                        &x[0][0][0], 16000, 1);
+  data_dumper_->DumpRaw("aec3_echo_remover_capture_input", (*y)[0][0]);
+  data_dumper_->DumpRaw("aec3_echo_remover_render_input", x[0][0]);
+
+  aec_state_.UpdateCaptureSaturation(capture_signal_saturation);
+
+  if (echo_path_variability.AudioPathChanged()) {
+    // Ensure that the gain change is only acted on once per frame.
+    if (echo_path_variability.gain_change) {
+      if (gain_change_hangover_ == 0) {
+        constexpr int kMaxBlocksPerFrame = 3;
+        gain_change_hangover_ = kMaxBlocksPerFrame;
+        rtc::LoggingSeverity log_level =
+            config_.delay.log_warning_on_delay_changes ? rtc::LS_WARNING
+                                                       : rtc::LS_VERBOSE;
+        RTC_LOG_V(log_level)
+            << "Gain change detected at block " << block_counter_;
+      } else {
+        echo_path_variability.gain_change = false;
+      }
+    }
+
+    subtractor_.HandleEchoPathChange(echo_path_variability);
+    aec_state_.HandleEchoPathChange(echo_path_variability);
+
+    if (echo_path_variability.delay_change !=
+        EchoPathVariability::DelayAdjustment::kNone) {
+      suppression_gain_.SetInitialState(true);
+    }
+  }
+  if (gain_change_hangover_ > 0) {
+    --gain_change_hangover_;
+  }
+
+  // Analyze the render signal.
+  render_signal_analyzer_.Update(*render_buffer,
+                                 aec_state_.MinDirectPathFilterDelay());
+
+  // State transition.
+  if (aec_state_.TransitionTriggered()) {
+    subtractor_.ExitInitialState();
+    suppression_gain_.SetInitialState(false);
+  }
+
+  // Perform linear echo cancellation.
+  subtractor_.Process(*render_buffer, (*y)[0], render_signal_analyzer_,
+                      aec_state_, subtractor_output);
+
+  // Compute spectra.
+  for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+    FormLinearFilterOutput(subtractor_output[ch], e[ch]);
+    WindowedPaddedFft(fft_, (*y)[0][ch], y_old_[ch], &Y[ch]);
+    WindowedPaddedFft(fft_, e[ch], e_old_[ch], &E[ch]);
+    LinearEchoPower(E[ch], Y[ch], &S2_linear[ch]);
+    Y[ch].Spectrum(optimization_, Y2[ch]);
+    E[ch].Spectrum(optimization_, E2[ch]);
+  }
+
+  // Optionally return the linear filter output.
+  if (linear_output) {
+    RTC_DCHECK_GE(1, linear_output->size());
+    RTC_DCHECK_EQ(num_capture_channels_, linear_output[0].size());
+    for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+      RTC_DCHECK_EQ(kBlockSize, (*linear_output)[0][ch].size());
+      std::copy(e[ch].begin(), e[ch].end(), (*linear_output)[0][ch].begin());
+    }
+  }
+
+  // Update the AEC state information.
+  aec_state_.Update(external_delay, subtractor_.FilterFrequencyResponses(),
+                    subtractor_.FilterImpulseResponses(), *render_buffer, E2,
+                    Y2, subtractor_output);
+
+  // Choose the linear output.
+  const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y;
+
+  data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &(*y)[0][0][0], 16000,
+                        1);
+  data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0][0], 16000, 1);
+
+  // Estimate the residual echo power.
+  residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2,
+                                    R2);
+
+  // Estimate the comfort noise.
+  cng_.Compute(aec_state_.SaturatedCapture(), Y2, comfort_noise,
+               high_band_comfort_noise);
+
+  // Suppressor nearend estimate.
+  if (aec_state_.UsableLinearEstimate()) {
+    // E2 is bound by Y2.
+    for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
+      std::transform(E2[ch].begin(), E2[ch].end(), Y2[ch].begin(),
+                     E2[ch].begin(),
+                     [](float a, float b) { return std::min(a, b); });
+    }
+  }
+  const auto& nearend_spectrum = aec_state_.UsableLinearEstimate() ? E2 : Y2;
+
+  // Suppressor echo estimate.
+  const auto& echo_spectrum =
+      aec_state_.UsableLinearEstimate() ? S2_linear : R2;
+
+  // Compute preferred gains.
+  float high_bands_gain;
+  std::array<float, kFftLengthBy2Plus1> G;
+  suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2,
+                            cng_.NoiseSpectrum(), render_signal_analyzer_,
+                            aec_state_, x, &high_bands_gain, &G);
+
+  suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
+                                high_bands_gain, Y_fft, y);
+
+  // Update the metrics.
+  metrics_.Update(aec_state_, cng_.NoiseSpectrum()[0], G);
+
+  // Debug outputs for the purpose of development and analysis.
+  data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize,
+                        &subtractor_output[0].s_refined[0], 16000, 1);
+  data_dumper_->DumpRaw("aec3_output", (*y)[0][0]);
+  data_dumper_->DumpRaw("aec3_narrow_render",
+                        render_signal_analyzer_.NarrowPeakBand() ? 1 : 0);
+  data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum()[0]);
+  data_dumper_->DumpRaw("aec3_suppressor_gain", G);
+  data_dumper_->DumpWav("aec3_output",
+                        rtc::ArrayView<const float>(&(*y)[0][0][0], kBlockSize),
+                        16000, 1);
+  data_dumper_->DumpRaw("aec3_using_subtractor_output[0]",
+                        aec_state_.UseLinearFilterOutput() ? 1 : 0);
+  data_dumper_->DumpRaw("aec3_E2", E2[0]);
+  data_dumper_->DumpRaw("aec3_S2_linear", S2_linear[0]);
+  data_dumper_->DumpRaw("aec3_Y2", Y2[0]);
+  data_dumper_->DumpRaw(
+      "aec3_X2", render_buffer->Spectrum(
+                     aec_state_.MinDirectPathFilterDelay())[/*channel=*/0]);
+  data_dumper_->DumpRaw("aec3_R2", R2[0]);
+  data_dumper_->DumpRaw("aec3_filter_delay",
+                        aec_state_.MinDirectPathFilterDelay());
+  data_dumper_->DumpRaw("aec3_capture_saturation",
+                        aec_state_.SaturatedCapture() ? 1 : 0);
+}
+
+void EchoRemoverImpl::FormLinearFilterOutput(
+    const SubtractorOutput& subtractor_output,
+    rtc::ArrayView<float> output) {
+  RTC_DCHECK_EQ(subtractor_output.e_refined.size(), output.size());
+  RTC_DCHECK_EQ(subtractor_output.e_coarse.size(), output.size());
+  bool use_refined_output = true;
+  if (use_coarse_filter_output_) {
+    // As the output of the refined adaptive filter generally should be better
+    // than the coarse filter output, add a margin and threshold for when
+    // choosing the coarse filter output.
+    if (subtractor_output.e2_coarse < 0.9f * subtractor_output.e2_refined &&
+        subtractor_output.y2 > 30.f * 30.f * kBlockSize &&
+        (subtractor_output.s2_refined > 60.f * 60.f * kBlockSize ||
+         subtractor_output.s2_coarse > 60.f * 60.f * kBlockSize)) {
+      use_refined_output = false;
+    } else {
+      // If the refined filter is diverged, choose the filter output that has
+      // the lowest power.
+      if (subtractor_output.e2_coarse < subtractor_output.e2_refined &&
+          subtractor_output.y2 < subtractor_output.e2_refined) {
+        use_refined_output = false;
+      }
+    }
+  }
+
+  SignalTransition(refined_filter_output_last_selected_
+                       ? subtractor_output.e_refined
+                       : subtractor_output.e_coarse,
+                   use_refined_output ? subtractor_output.e_refined
+                                      : subtractor_output.e_coarse,
+                   output);
+  refined_filter_output_last_selected_ = use_refined_output;
+}
+
+}  // namespace
+
+EchoRemover* EchoRemover::Create(const EchoCanceller3Config& config,
+                                 int sample_rate_hz,
+                                 size_t num_render_channels,
+                                 size_t num_capture_channels) {
+  return new EchoRemoverImpl(config, sample_rate_hz, num_render_channels,
+                             num_capture_channels);
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/echo_remover.h
+++ b/webrtc/modules/audio_processing/aec3/echo_remover.h
@ -0,0 +1,55 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_
+
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "api/audio/echo_control.h"
+#include "modules/audio_processing/aec3/delay_estimate.h"
+#include "modules/audio_processing/aec3/echo_path_variability.h"
+#include "modules/audio_processing/aec3/render_buffer.h"
+
+namespace webrtc {
+
+// Class for removing the echo from the capture signal.
+class EchoRemover {
+ public:
+  static EchoRemover* Create(const EchoCanceller3Config& config,
+                             int sample_rate_hz,
+                             size_t num_render_channels,
+                             size_t num_capture_channels);
+  virtual ~EchoRemover() = default;
+
+  // Get current metrics.
+  virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0;
+
+  // Removes the echo from a block of samples from the capture signal. The
+  // supplied render signal is assumed to be pre-aligned with the capture
+  // signal.
+  virtual void ProcessCapture(
+      EchoPathVariability echo_path_variability,
+      bool capture_signal_saturation,
+      const absl::optional<DelayEstimate>& external_delay,
+      RenderBuffer* render_buffer,
+      std::vector<std::vector<std::vector<float>>>* linear_output,
+      std::vector<std::vector<std::vector<float>>>* capture) = 0;
+
+  // Updates the status on whether echo leakage is detected in the output of the
+  // echo remover.
+  virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_
--- a/webrtc/modules/audio_processing/aec3/echo_remover_metrics.cc
+++ b/webrtc/modules/audio_processing/aec3/echo_remover_metrics.cc
@ -0,0 +1,246 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/echo_remover_metrics.h"
+
+#include <math.h>
+#include <stddef.h>
+
+#include <algorithm>
+#include <cmath>
+#include <numeric>
+
+#include "rtc_base/checks.h"
+#include "rtc_base/numerics/safe_minmax.h"
+#include "system_wrappers/include/metrics.h"
+
+namespace webrtc {
+
+namespace {
+
+constexpr float kOneByMetricsCollectionBlocks = 1.f / kMetricsCollectionBlocks;
+
+}  // namespace
+
+EchoRemoverMetrics::DbMetric::DbMetric() : DbMetric(0.f, 0.f, 0.f) {}
+EchoRemoverMetrics::DbMetric::DbMetric(float sum_value,
+                                       float floor_value,
+                                       float ceil_value)
+    : sum_value(sum_value), floor_value(floor_value), ceil_value(ceil_value) {}
+
+void EchoRemoverMetrics::DbMetric::Update(float value) {
+  sum_value += value;
+  floor_value = std::min(floor_value, value);
+  ceil_value = std::max(ceil_value, value);
+}
+
+void EchoRemoverMetrics::DbMetric::UpdateInstant(float value) {
+  sum_value = value;
+  floor_value = std::min(floor_value, value);
+  ceil_value = std::max(ceil_value, value);
+}
+
+EchoRemoverMetrics::EchoRemoverMetrics() {
+  ResetMetrics();
+}
+
+void EchoRemoverMetrics::ResetMetrics() {
+  erl_.fill(DbMetric(0.f, 10000.f, 0.000f));
+  erl_time_domain_ = DbMetric(0.f, 10000.f, 0.000f);
+  erle_.fill(DbMetric(0.f, 0.f, 1000.f));
+  erle_time_domain_ = DbMetric(0.f, 0.f, 1000.f);
+  active_render_count_ = 0;
+  saturated_capture_ = false;
+}
+
+void EchoRemoverMetrics::Update(
+    const AecState& aec_state,
+    const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
+    const std::array<float, kFftLengthBy2Plus1>& suppressor_gain) {
+  metrics_reported_ = false;
+  if (++block_counter_ <= kMetricsCollectionBlocks) {
+    aec3::UpdateDbMetric(aec_state.Erl(), &erl_);
+    erl_time_domain_.UpdateInstant(aec_state.ErlTimeDomain());
+    aec3::UpdateDbMetric(aec_state.Erle()[0], &erle_);
+    erle_time_domain_.UpdateInstant(aec_state.FullBandErleLog2());
+    active_render_count_ += (aec_state.ActiveRender() ? 1 : 0);
+    saturated_capture_ = saturated_capture_ || aec_state.SaturatedCapture();
+  } else {
+    // Report the metrics over several frames in order to lower the impact of
+    // the logarithms involved on the computational complexity.
+    constexpr int kMetricsCollectionBlocksBy2 = kMetricsCollectionBlocks / 2;
+    switch (block_counter_) {
+      case kMetricsCollectionBlocks + 1:
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.ErleBand0.Average",
+            aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f,
+                                                kOneByMetricsCollectionBlocks,
+                                                erle_[0].sum_value),
+            0, 19, 20);
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.ErleBand0.Max",
+            aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f,
+                                                erle_[0].ceil_value),
+            0, 19, 20);
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.ErleBand0.Min",
+            aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f,
+                                                erle_[0].floor_value),
+            0, 19, 20);
+        break;
+      case kMetricsCollectionBlocks + 2:
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.ErleBand1.Average",
+            aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f,
+                                                kOneByMetricsCollectionBlocks,
+                                                erle_[1].sum_value),
+            0, 19, 20);
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.ErleBand1.Max",
+            aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f,
+                                                erle_[1].ceil_value),
+            0, 19, 20);
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.ErleBand1.Min",
+            aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f,
+                                                erle_[1].floor_value),
+            0, 19, 20);
+        break;
+      case kMetricsCollectionBlocks + 3:
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.ErlBand0.Average",
+            aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f,
+                                                kOneByMetricsCollectionBlocks,
+                                                erl_[0].sum_value),
+            0, 59, 30);
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.ErlBand0.Max",
+            aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
+                                                erl_[0].ceil_value),
+            0, 59, 30);
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.ErlBand0.Min",
+            aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
+                                                erl_[0].floor_value),
+            0, 59, 30);
+        break;
+      case kMetricsCollectionBlocks + 4:
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.ErlBand1.Average",
+            aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f,
+                                                kOneByMetricsCollectionBlocks,
+                                                erl_[1].sum_value),
+            0, 59, 30);
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.ErlBand1.Max",
+            aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
+                                                erl_[1].ceil_value),
+            0, 59, 30);
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.ErlBand1.Min",
+            aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
+                                                erl_[1].floor_value),
+            0, 59, 30);
+        break;
+      case kMetricsCollectionBlocks + 5:
+        RTC_HISTOGRAM_BOOLEAN(
+            "WebRTC.Audio.EchoCanceller.UsableLinearEstimate",
+            static_cast<int>(aec_state.UsableLinearEstimate() ? 1 : 0));
+        RTC_HISTOGRAM_BOOLEAN(
+            "WebRTC.Audio.EchoCanceller.ActiveRender",
+            static_cast<int>(
+                active_render_count_ > kMetricsCollectionBlocksBy2 ? 1 : 0));
+        RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.FilterDelay",
+                                    aec_state.MinDirectPathFilterDelay(), 0, 30,
+                                    31);
+        RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.EchoCanceller.CaptureSaturation",
+                              static_cast<int>(saturated_capture_ ? 1 : 0));
+        break;
+      case kMetricsCollectionBlocks + 6:
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.Erl.Value",
+            aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
+                                                erl_time_domain_.sum_value),
+            0, 59, 30);
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.Erl.Max",
+            aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
+                                                erl_time_domain_.ceil_value),
+            0, 59, 30);
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.Erl.Min",
+            aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
+                                                erl_time_domain_.floor_value),
+            0, 59, 30);
+        break;
+      case kMetricsCollectionBlocks + 7:
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.Erle.Value",
+            aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f,
+                                                erle_time_domain_.sum_value),
+            0, 19, 20);
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.Erle.Max",
+            aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f,
+                                                erle_time_domain_.ceil_value),
+            0, 19, 20);
+        RTC_HISTOGRAM_COUNTS_LINEAR(
+            "WebRTC.Audio.EchoCanceller.Erle.Min",
+            aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f,
+                                                erle_time_domain_.floor_value),
+            0, 19, 20);
+        metrics_reported_ = true;
+        RTC_DCHECK_EQ(kMetricsReportingIntervalBlocks, block_counter_);
+        block_counter_ = 0;
+        ResetMetrics();
+        break;
+      default:
+        RTC_NOTREACHED();
+        break;
+    }
+  }
+}
+
+namespace aec3 {
+
+void UpdateDbMetric(const std::array<float, kFftLengthBy2Plus1>& value,
+                    std::array<EchoRemoverMetrics::DbMetric, 2>* statistic) {
+  RTC_DCHECK(statistic);
+  // Truncation is intended in the band width computation.
+  constexpr int kNumBands = 2;
+  constexpr int kBandWidth = 65 / kNumBands;
+  constexpr float kOneByBandWidth = 1.f / kBandWidth;
+  RTC_DCHECK_EQ(kNumBands, statistic->size());
+  RTC_DCHECK_EQ(65, value.size());
+  for (size_t k = 0; k < statistic->size(); ++k) {
+    float average_band =
+        std::accumulate(value.begin() + kBandWidth * k,
+                        value.begin() + kBandWidth * (k + 1), 0.f) *
+        kOneByBandWidth;
+    (*statistic)[k].Update(average_band);
+  }
+}
+
+int TransformDbMetricForReporting(bool negate,
+                                  float min_value,
+                                  float max_value,
+                                  float offset,
+                                  float scaling,
+                                  float value) {
+  float new_value = 10.f * std::log10(value * scaling + 1e-10f) + offset;
+  if (negate) {
+    new_value = -new_value;
+  }
+  return static_cast<int>(rtc::SafeClamp(new_value, min_value, max_value));
+}
+
+}  // namespace aec3
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/echo_remover_metrics.h
+++ b/webrtc/modules/audio_processing/aec3/echo_remover_metrics.h
@ -0,0 +1,81 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_
+
+#include <array>
+
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/aec_state.h"
+#include "rtc_base/constructor_magic.h"
+
+namespace webrtc {
+
+// Handles the reporting of metrics for the echo remover.
+class EchoRemoverMetrics {
+ public:
+  struct DbMetric {
+    DbMetric();
+    DbMetric(float sum_value, float floor_value, float ceil_value);
+    void Update(float value);
+    void UpdateInstant(float value);
+    float sum_value;
+    float floor_value;
+    float ceil_value;
+  };
+
+  EchoRemoverMetrics();
+
+  // Updates the metric with new data.
+  void Update(
+      const AecState& aec_state,
+      const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
+      const std::array<float, kFftLengthBy2Plus1>& suppressor_gain);
+
+  // Returns true if the metrics have just been reported, otherwise false.
+  bool MetricsReported() { return metrics_reported_; }
+
+ private:
+  // Resets the metrics.
+  void ResetMetrics();
+
+  int block_counter_ = 0;
+  std::array<DbMetric, 2> erl_;
+  DbMetric erl_time_domain_;
+  std::array<DbMetric, 2> erle_;
+  DbMetric erle_time_domain_;
+  int active_render_count_ = 0;
+  bool saturated_capture_ = false;
+  bool metrics_reported_ = false;
+
+  RTC_DISALLOW_COPY_AND_ASSIGN(EchoRemoverMetrics);
+};
+
+namespace aec3 {
+
+// Updates a banded metric of type DbMetric with the values in the supplied
+// array.
+void UpdateDbMetric(const std::array<float, kFftLengthBy2Plus1>& value,
+                    std::array<EchoRemoverMetrics::DbMetric, 2>* statistic);
+
+// Transforms a DbMetric from the linear domain into the logarithmic domain.
+int TransformDbMetricForReporting(bool negate,
+                                  float min_value,
+                                  float max_value,
+                                  float offset,
+                                  float scaling,
+                                  float value);
+
+}  // namespace aec3
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_
--- a/webrtc/modules/audio_processing/aec3/erl_estimator.cc
+++ b/webrtc/modules/audio_processing/aec3/erl_estimator.cc
@ -0,0 +1,146 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/erl_estimator.h"
+
+#include <algorithm>
+#include <numeric>
+
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+namespace {
+
+constexpr float kMinErl = 0.01f;
+constexpr float kMaxErl = 1000.f;
+
+}  // namespace
+
+ErlEstimator::ErlEstimator(size_t startup_phase_length_blocks_)
+    : startup_phase_length_blocks__(startup_phase_length_blocks_) {
+  erl_.fill(kMaxErl);
+  hold_counters_.fill(0);
+  erl_time_domain_ = kMaxErl;
+  hold_counter_time_domain_ = 0;
+}
+
+ErlEstimator::~ErlEstimator() = default;
+
+void ErlEstimator::Reset() {
+  blocks_since_reset_ = 0;
+}
+
+void ErlEstimator::Update(
+    const std::vector<bool>& converged_filters,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> render_spectra,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        capture_spectra) {
+  const size_t num_capture_channels = converged_filters.size();
+  RTC_DCHECK_EQ(capture_spectra.size(), num_capture_channels);
+
+  // Corresponds to WGN of power -46 dBFS.
+  constexpr float kX2Min = 44015068.0f;
+
+  const auto first_converged_iter =
+      std::find(converged_filters.begin(), converged_filters.end(), true);
+  const bool any_filter_converged =
+      first_converged_iter != converged_filters.end();
+
+  if (++blocks_since_reset_ < startup_phase_length_blocks__ ||
+      !any_filter_converged) {
+    return;
+  }
+
+  // Use the maximum spectrum across capture and the maximum across render.
+  std::array<float, kFftLengthBy2Plus1> max_capture_spectrum_data;
+  std::array<float, kFftLengthBy2Plus1> max_capture_spectrum =
+      capture_spectra[/*channel=*/0];
+  if (num_capture_channels > 1) {
+    // Initialize using the first channel with a converged filter.
+    const size_t first_converged =
+        std::distance(converged_filters.begin(), first_converged_iter);
+    RTC_DCHECK_GE(first_converged, 0);
+    RTC_DCHECK_LT(first_converged, num_capture_channels);
+    max_capture_spectrum_data = capture_spectra[first_converged];
+
+    for (size_t ch = first_converged + 1; ch < num_capture_channels; ++ch) {
+      if (!converged_filters[ch]) {
+        continue;
+      }
+      for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+        max_capture_spectrum_data[k] =
+            std::max(max_capture_spectrum_data[k], capture_spectra[ch][k]);
+      }
+    }
+    max_capture_spectrum = max_capture_spectrum_data;
+  }
+
+  const size_t num_render_channels = render_spectra.size();
+  std::array<float, kFftLengthBy2Plus1> max_render_spectrum_data;
+  rtc::ArrayView<const float, kFftLengthBy2Plus1> max_render_spectrum =
+      render_spectra[/*channel=*/0];
+  if (num_render_channels > 1) {
+    std::copy(render_spectra[0].begin(), render_spectra[0].end(),
+              max_render_spectrum_data.begin());
+    for (size_t ch = 1; ch < num_render_channels; ++ch) {
+      for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+        max_render_spectrum_data[k] =
+            std::max(max_render_spectrum_data[k], render_spectra[ch][k]);
+      }
+    }
+    max_render_spectrum = max_render_spectrum_data;
+  }
+
+  const auto& X2 = max_render_spectrum;
+  const auto& Y2 = max_capture_spectrum;
+
+  // Update the estimates in a maximum statistics manner.
+  for (size_t k = 1; k < kFftLengthBy2; ++k) {
+    if (X2[k] > kX2Min) {
+      const float new_erl = Y2[k] / X2[k];
+      if (new_erl < erl_[k]) {
+        hold_counters_[k - 1] = 1000;
+        erl_[k] += 0.1f * (new_erl - erl_[k]);
+        erl_[k] = std::max(erl_[k], kMinErl);
+      }
+    }
+  }
+
+  std::for_each(hold_counters_.begin(), hold_counters_.end(),
+                [](int& a) { --a; });
+  std::transform(hold_counters_.begin(), hold_counters_.end(), erl_.begin() + 1,
+                 erl_.begin() + 1, [](int a, float b) {
+                   return a > 0 ? b : std::min(kMaxErl, 2.f * b);
+                 });
+
+  erl_[0] = erl_[1];
+  erl_[kFftLengthBy2] = erl_[kFftLengthBy2 - 1];
+
+  // Compute ERL over all frequency bins.
+  const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f);
+
+  if (X2_sum > kX2Min * X2.size()) {
+    const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f);
+    const float new_erl = Y2_sum / X2_sum;
+    if (new_erl < erl_time_domain_) {
+      hold_counter_time_domain_ = 1000;
+      erl_time_domain_ += 0.1f * (new_erl - erl_time_domain_);
+      erl_time_domain_ = std::max(erl_time_domain_, kMinErl);
+    }
+  }
+
+  --hold_counter_time_domain_;
+  erl_time_domain_ = (hold_counter_time_domain_ > 0)
+                         ? erl_time_domain_
+                         : std::min(kMaxErl, 2.f * erl_time_domain_);
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/erl_estimator.h
+++ b/webrtc/modules/audio_processing/aec3/erl_estimator.h
@ -0,0 +1,57 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_
+
+#include <stddef.h>
+
+#include <array>
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "rtc_base/constructor_magic.h"
+
+namespace webrtc {
+
+// Estimates the echo return loss based on the signal spectra.
+class ErlEstimator {
+ public:
+  explicit ErlEstimator(size_t startup_phase_length_blocks_);
+  ~ErlEstimator();
+
+  // Resets the ERL estimation.
+  void Reset();
+
+  // Updates the ERL estimate.
+  void Update(const std::vector<bool>& converged_filters,
+              rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+                  render_spectra,
+              rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+                  capture_spectra);
+
+  // Returns the most recent ERL estimate.
+  const std::array<float, kFftLengthBy2Plus1>& Erl() const { return erl_; }
+  float ErlTimeDomain() const { return erl_time_domain_; }
+
+ private:
+  const size_t startup_phase_length_blocks__;
+  std::array<float, kFftLengthBy2Plus1> erl_;
+  std::array<int, kFftLengthBy2Minus1> hold_counters_;
+  float erl_time_domain_;
+  int hold_counter_time_domain_;
+  size_t blocks_since_reset_ = 0;
+  RTC_DISALLOW_COPY_AND_ASSIGN(ErlEstimator);
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_
--- a/webrtc/modules/audio_processing/aec3/erle_estimator.cc
+++ b/webrtc/modules/audio_processing/aec3/erle_estimator.cc
@ -0,0 +1,86 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/erle_estimator.h"
+
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+ErleEstimator::ErleEstimator(size_t startup_phase_length_blocks,
+                             const EchoCanceller3Config& config,
+                             size_t num_capture_channels)
+    : startup_phase_length_blocks_(startup_phase_length_blocks),
+      fullband_erle_estimator_(config.erle, num_capture_channels),
+      subband_erle_estimator_(config, num_capture_channels) {
+  if (config.erle.num_sections > 1) {
+    signal_dependent_erle_estimator_ =
+        std::make_unique<SignalDependentErleEstimator>(config,
+                                                       num_capture_channels);
+  }
+  Reset(true);
+}
+
+ErleEstimator::~ErleEstimator() = default;
+
+void ErleEstimator::Reset(bool delay_change) {
+  fullband_erle_estimator_.Reset();
+  subband_erle_estimator_.Reset();
+  if (signal_dependent_erle_estimator_) {
+    signal_dependent_erle_estimator_->Reset();
+  }
+  if (delay_change) {
+    blocks_since_reset_ = 0;
+  }
+}
+
+void ErleEstimator::Update(
+    const RenderBuffer& render_buffer,
+    rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
+        filter_frequency_responses,
+    rtc::ArrayView<const float, kFftLengthBy2Plus1>
+        avg_render_spectrum_with_reverb,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> capture_spectra,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+        subtractor_spectra,
+    const std::vector<bool>& converged_filters) {
+  RTC_DCHECK_EQ(subband_erle_estimator_.Erle().size(), capture_spectra.size());
+  RTC_DCHECK_EQ(subband_erle_estimator_.Erle().size(),
+                subtractor_spectra.size());
+  const auto& X2_reverb = avg_render_spectrum_with_reverb;
+  const auto& Y2 = capture_spectra;
+  const auto& E2 = subtractor_spectra;
+
+  if (++blocks_since_reset_ < startup_phase_length_blocks_) {
+    return;
+  }
+
+  subband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters);
+
+  if (signal_dependent_erle_estimator_) {
+    signal_dependent_erle_estimator_->Update(
+        render_buffer, filter_frequency_responses, X2_reverb, Y2, E2,
+        subband_erle_estimator_.Erle(), converged_filters);
+  }
+
+  fullband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters);
+}
+
+void ErleEstimator::Dump(
+    const std::unique_ptr<ApmDataDumper>& data_dumper) const {
+  fullband_erle_estimator_.Dump(data_dumper);
+  subband_erle_estimator_.Dump(data_dumper);
+  if (signal_dependent_erle_estimator_) {
+    signal_dependent_erle_estimator_->Dump(data_dumper);
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/erle_estimator.h
+++ b/webrtc/modules/audio_processing/aec3/erle_estimator.h
@ -0,0 +1,99 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_
+
+#include <stddef.h>
+
+#include <array>
+#include <memory>
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/fullband_erle_estimator.h"
+#include "modules/audio_processing/aec3/render_buffer.h"
+#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h"
+#include "modules/audio_processing/aec3/subband_erle_estimator.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+
+namespace webrtc {
+
+// Estimates the echo return loss enhancement. One estimate is done per subband
+// and another one is done using the aggreation of energy over all the subbands.
+class ErleEstimator {
+ public:
+  ErleEstimator(size_t startup_phase_length_blocks,
+                const EchoCanceller3Config& config,
+                size_t num_capture_channels);
+  ~ErleEstimator();
+
+  // Resets the fullband ERLE estimator and the subbands ERLE estimators.
+  void Reset(bool delay_change);
+
+  // Updates the ERLE estimates.
+  void Update(
+      const RenderBuffer& render_buffer,
+      rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
+          filter_frequency_responses,
+      rtc::ArrayView<const float, kFftLengthBy2Plus1>
+          avg_render_spectrum_with_reverb,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          capture_spectra,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          subtractor_spectra,
+      const std::vector<bool>& converged_filters);
+
+  // Returns the most recent subband ERLE estimates.
+  rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
+    return signal_dependent_erle_estimator_
+               ? signal_dependent_erle_estimator_->Erle()
+               : subband_erle_estimator_.Erle();
+  }
+
+  // Returns the subband ERLE that are estimated during onsets (only used for
+  // testing).
+  rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleOnsets()
+      const {
+    return subband_erle_estimator_.ErleOnsets();
+  }
+
+  // Returns the fullband ERLE estimate.
+  float FullbandErleLog2() const {
+    return fullband_erle_estimator_.FullbandErleLog2();
+  }
+
+  // Returns an estimation of the current linear filter quality based on the
+  // current and past fullband ERLE estimates. The returned value is a float
+  // vector with content between 0 and 1 where 1 indicates that, at this current
+  // time instant, the linear filter is reaching its maximum subtraction
+  // performance.
+  rtc::ArrayView<const absl::optional<float>> GetInstLinearQualityEstimates()
+      const {
+    return fullband_erle_estimator_.GetInstLinearQualityEstimates();
+  }
+
+  void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
+
+ private:
+  const size_t startup_phase_length_blocks_;
+  FullBandErleEstimator fullband_erle_estimator_;
+  SubbandErleEstimator subband_erle_estimator_;
+  std::unique_ptr<SignalDependentErleEstimator>
+      signal_dependent_erle_estimator_;
+  size_t blocks_since_reset_ = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_
--- a/webrtc/modules/audio_processing/aec3/fft_buffer.cc
+++ b/webrtc/modules/audio_processing/aec3/fft_buffer.cc
@ -0,0 +1,27 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/fft_buffer.h"
+
+namespace webrtc {
+
+FftBuffer::FftBuffer(size_t size, size_t num_channels)
+    : size(static_cast<int>(size)),
+      buffer(size, std::vector<FftData>(num_channels)) {
+  for (auto& block : buffer) {
+    for (auto& channel_fft_data : block) {
+      channel_fft_data.Clear();
+    }
+  }
+}
+
+FftBuffer::~FftBuffer() = default;
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/fft_buffer.h
+++ b/webrtc/modules/audio_processing/aec3/fft_buffer.h
@ -0,0 +1,60 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "modules/audio_processing/aec3/fft_data.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+// Struct for bundling a circular buffer of FftData objects together with the
+// read and write indices.
+struct FftBuffer {
+  FftBuffer(size_t size, size_t num_channels);
+  ~FftBuffer();
+
+  int IncIndex(int index) const {
+    RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
+    return index < size - 1 ? index + 1 : 0;
+  }
+
+  int DecIndex(int index) const {
+    RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
+    return index > 0 ? index - 1 : size - 1;
+  }
+
+  int OffsetIndex(int index, int offset) const {
+    RTC_DCHECK_GE(buffer.size(), offset);
+    RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
+    return (size + index + offset) % size;
+  }
+
+  void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); }
+  void IncWriteIndex() { write = IncIndex(write); }
+  void DecWriteIndex() { write = DecIndex(write); }
+  void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); }
+  void IncReadIndex() { read = IncIndex(read); }
+  void DecReadIndex() { read = DecIndex(read); }
+
+  const int size;
+  std::vector<std::vector<FftData>> buffer;
+  int write = 0;
+  int read = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_
--- a/webrtc/modules/audio_processing/aec3/fft_data.h
+++ b/webrtc/modules/audio_processing/aec3/fft_data.h
@ -0,0 +1,104 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_
+
+// Defines WEBRTC_ARCH_X86_FAMILY, used below.
+#include "rtc_base/system/arch.h"
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+#include <emmintrin.h>
+#endif
+#include <algorithm>
+#include <array>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+
+namespace webrtc {
+
+// Struct that holds imaginary data produced from 128 point real-valued FFTs.
+struct FftData {
+  // Copies the data in src.
+  void Assign(const FftData& src) {
+    std::copy(src.re.begin(), src.re.end(), re.begin());
+    std::copy(src.im.begin(), src.im.end(), im.begin());
+    im[0] = im[kFftLengthBy2] = 0;
+  }
+
+  // Clears all the imaginary.
+  void Clear() {
+    re.fill(0.f);
+    im.fill(0.f);
+  }
+
+  // Computes the power spectrum of the data.
+  void SpectrumAVX2(rtc::ArrayView<float> power_spectrum) const;
+
+  // Computes the power spectrum of the data.
+  void Spectrum(Aec3Optimization optimization,
+                rtc::ArrayView<float> power_spectrum) const {
+    RTC_DCHECK_EQ(kFftLengthBy2Plus1, power_spectrum.size());
+    switch (optimization) {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+      case Aec3Optimization::kSse2: {
+        constexpr int kNumFourBinBands = kFftLengthBy2 / 4;
+        constexpr int kLimit = kNumFourBinBands * 4;
+        for (size_t k = 0; k < kLimit; k += 4) {
+          const __m128 r = _mm_loadu_ps(&re[k]);
+          const __m128 i = _mm_loadu_ps(&im[k]);
+          const __m128 ii = _mm_mul_ps(i, i);
+          const __m128 rr = _mm_mul_ps(r, r);
+          const __m128 rrii = _mm_add_ps(rr, ii);
+          _mm_storeu_ps(&power_spectrum[k], rrii);
+        }
+        power_spectrum[kFftLengthBy2] = re[kFftLengthBy2] * re[kFftLengthBy2] +
+                                        im[kFftLengthBy2] * im[kFftLengthBy2];
+      } break;
+      case Aec3Optimization::kAvx2:
+        SpectrumAVX2(power_spectrum);
+        break;
+#endif
+      default:
+        std::transform(re.begin(), re.end(), im.begin(), power_spectrum.begin(),
+                       [](float a, float b) { return a * a + b * b; });
+    }
+  }
+
+  // Copy the data from an interleaved array.
+  void CopyFromPackedArray(const std::array<float, kFftLength>& v) {
+    re[0] = v[0];
+    re[kFftLengthBy2] = v[1];
+    im[0] = im[kFftLengthBy2] = 0;
+    for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) {
+      re[k] = v[j++];
+      im[k] = v[j++];
+    }
+  }
+
+  // Copies the data into an interleaved array.
+  void CopyToPackedArray(std::array<float, kFftLength>* v) const {
+    RTC_DCHECK(v);
+    (*v)[0] = re[0];
+    (*v)[1] = re[kFftLengthBy2];
+    for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) {
+      (*v)[j++] = re[k];
+      (*v)[j++] = im[k];
+    }
+  }
+
+  std::array<float, kFftLengthBy2Plus1> re;
+  std::array<float, kFftLengthBy2Plus1> im;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_
--- a/webrtc/modules/audio_processing/aec3/fft_data_avx2.cc
+++ b/webrtc/modules/audio_processing/aec3/fft_data_avx2.cc
@ -0,0 +1,33 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/fft_data.h"
+
+#include <immintrin.h>
+
+#include "api/array_view.h"
+
+namespace webrtc {
+
+// Computes the power spectrum of the data.
+void FftData::SpectrumAVX2(rtc::ArrayView<float> power_spectrum) const {
+  RTC_DCHECK_EQ(kFftLengthBy2Plus1, power_spectrum.size());
+  for (size_t k = 0; k < kFftLengthBy2; k += 8) {
+    __m256 r = _mm256_loadu_ps(&re[k]);
+    __m256 i = _mm256_loadu_ps(&im[k]);
+    __m256 ii = _mm256_mul_ps(i, i);
+    ii = _mm256_fmadd_ps(r, r, ii);
+    _mm256_storeu_ps(&power_spectrum[k], ii);
+  }
+  power_spectrum[kFftLengthBy2] = re[kFftLengthBy2] * re[kFftLengthBy2] +
+                                  im[kFftLengthBy2] * im[kFftLengthBy2];
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/filter_analyzer.cc
+++ b/webrtc/modules/audio_processing/aec3/filter_analyzer.cc
@ -0,0 +1,280 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/filter_analyzer.h"
+
+#include <math.h>
+
+#include <algorithm>
+#include <array>
+#include <numeric>
+
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/render_buffer.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/atomic_ops.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+namespace {
+
+size_t FindPeakIndex(rtc::ArrayView<const float> filter_time_domain,
+                     size_t peak_index_in,
+                     size_t start_sample,
+                     size_t end_sample) {
+  size_t peak_index_out = peak_index_in;
+  float max_h2 =
+      filter_time_domain[peak_index_out] * filter_time_domain[peak_index_out];
+  for (size_t k = start_sample; k <= end_sample; ++k) {
+    float tmp = filter_time_domain[k] * filter_time_domain[k];
+    if (tmp > max_h2) {
+      peak_index_out = k;
+      max_h2 = tmp;
+    }
+  }
+
+  return peak_index_out;
+}
+
+}  // namespace
+
+int FilterAnalyzer::instance_count_ = 0;
+
+FilterAnalyzer::FilterAnalyzer(const EchoCanceller3Config& config,
+                               size_t num_capture_channels)
+    : data_dumper_(
+          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
+      bounded_erl_(config.ep_strength.bounded_erl),
+      default_gain_(config.ep_strength.default_gain),
+      h_highpass_(num_capture_channels,
+                  std::vector<float>(
+                      GetTimeDomainLength(config.filter.refined.length_blocks),
+                      0.f)),
+      filter_analysis_states_(num_capture_channels,
+                              FilterAnalysisState(config)),
+      filter_delays_blocks_(num_capture_channels, 0) {
+  Reset();
+}
+
+FilterAnalyzer::~FilterAnalyzer() = default;
+
+void FilterAnalyzer::Reset() {
+  blocks_since_reset_ = 0;
+  ResetRegion();
+  for (auto& state : filter_analysis_states_) {
+    state.Reset(default_gain_);
+  }
+  std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(), 0);
+}
+
+void FilterAnalyzer::Update(
+    rtc::ArrayView<const std::vector<float>> filters_time_domain,
+    const RenderBuffer& render_buffer,
+    bool* any_filter_consistent,
+    float* max_echo_path_gain) {
+  RTC_DCHECK(any_filter_consistent);
+  RTC_DCHECK(max_echo_path_gain);
+  RTC_DCHECK_EQ(filters_time_domain.size(), filter_analysis_states_.size());
+  RTC_DCHECK_EQ(filters_time_domain.size(), h_highpass_.size());
+
+  ++blocks_since_reset_;
+  SetRegionToAnalyze(filters_time_domain[0].size());
+  AnalyzeRegion(filters_time_domain, render_buffer);
+
+  // Aggregate the results for all capture channels.
+  auto& st_ch0 = filter_analysis_states_[0];
+  *any_filter_consistent = st_ch0.consistent_estimate;
+  *max_echo_path_gain = st_ch0.gain;
+  min_filter_delay_blocks_ = filter_delays_blocks_[0];
+  for (size_t ch = 1; ch < filters_time_domain.size(); ++ch) {
+    auto& st_ch = filter_analysis_states_[ch];
+    *any_filter_consistent =
+        *any_filter_consistent || st_ch.consistent_estimate;
+    *max_echo_path_gain = std::max(*max_echo_path_gain, st_ch.gain);
+    min_filter_delay_blocks_ =
+        std::min(min_filter_delay_blocks_, filter_delays_blocks_[ch]);
+  }
+}
+
+void FilterAnalyzer::AnalyzeRegion(
+    rtc::ArrayView<const std::vector<float>> filters_time_domain,
+    const RenderBuffer& render_buffer) {
+  // Preprocess the filter to avoid issues with low-frequency components in the
+  // filter.
+  PreProcessFilters(filters_time_domain);
+  data_dumper_->DumpRaw("aec3_linear_filter_processed_td", h_highpass_[0]);
+
+  constexpr float kOneByBlockSize = 1.f / kBlockSize;
+  for (size_t ch = 0; ch < filters_time_domain.size(); ++ch) {
+    RTC_DCHECK_LT(region_.start_sample_, filters_time_domain[ch].size());
+    RTC_DCHECK_LT(region_.end_sample_, filters_time_domain[ch].size());
+
+    auto& st_ch = filter_analysis_states_[ch];
+    RTC_DCHECK_EQ(h_highpass_[ch].size(), filters_time_domain[ch].size());
+    RTC_DCHECK_GT(h_highpass_[ch].size(), 0);
+    st_ch.peak_index = std::min(st_ch.peak_index, h_highpass_[ch].size() - 1);
+
+    st_ch.peak_index =
+        FindPeakIndex(h_highpass_[ch], st_ch.peak_index, region_.start_sample_,
+                      region_.end_sample_);
+    filter_delays_blocks_[ch] = st_ch.peak_index >> kBlockSizeLog2;
+    UpdateFilterGain(h_highpass_[ch], &st_ch);
+    st_ch.filter_length_blocks =
+        filters_time_domain[ch].size() * kOneByBlockSize;
+
+    st_ch.consistent_estimate = st_ch.consistent_filter_detector.Detect(
+        h_highpass_[ch], region_,
+        render_buffer.Block(-filter_delays_blocks_[ch])[0], st_ch.peak_index,
+        filter_delays_blocks_[ch]);
+  }
+}
+
+void FilterAnalyzer::UpdateFilterGain(
+    rtc::ArrayView<const float> filter_time_domain,
+    FilterAnalysisState* st) {
+  bool sufficient_time_to_converge =
+      blocks_since_reset_ > 5 * kNumBlocksPerSecond;
+
+  if (sufficient_time_to_converge && st->consistent_estimate) {
+    st->gain = fabsf(filter_time_domain[st->peak_index]);
+  } else {
+    // TODO(peah): Verify whether this check against a float is ok.
+    if (st->gain) {
+      st->gain = std::max(st->gain, fabsf(filter_time_domain[st->peak_index]));
+    }
+  }
+
+  if (bounded_erl_ && st->gain) {
+    st->gain = std::max(st->gain, 0.01f);
+  }
+}
+
+void FilterAnalyzer::PreProcessFilters(
+    rtc::ArrayView<const std::vector<float>> filters_time_domain) {
+  for (size_t ch = 0; ch < filters_time_domain.size(); ++ch) {
+    RTC_DCHECK_LT(region_.start_sample_, filters_time_domain[ch].size());
+    RTC_DCHECK_LT(region_.end_sample_, filters_time_domain[ch].size());
+
+    RTC_DCHECK_GE(h_highpass_[ch].capacity(), filters_time_domain[ch].size());
+    h_highpass_[ch].resize(filters_time_domain[ch].size());
+    // Minimum phase high-pass filter with cutoff frequency at about 600 Hz.
+    constexpr std::array<float, 3> h = {
+        {0.7929742f, -0.36072128f, -0.47047766f}};
+
+    std::fill(h_highpass_[ch].begin() + region_.start_sample_,
+              h_highpass_[ch].begin() + region_.end_sample_ + 1, 0.f);
+    for (size_t k = std::max(h.size() - 1, region_.start_sample_);
+         k <= region_.end_sample_; ++k) {
+      for (size_t j = 0; j < h.size(); ++j) {
+        h_highpass_[ch][k] += filters_time_domain[ch][k - j] * h[j];
+      }
+    }
+  }
+}
+
+void FilterAnalyzer::ResetRegion() {
+  region_.start_sample_ = 0;
+  region_.end_sample_ = 0;
+}
+
+void FilterAnalyzer::SetRegionToAnalyze(size_t filter_size) {
+  constexpr size_t kNumberBlocksToUpdate = 1;
+  auto& r = region_;
+  r.start_sample_ = r.end_sample_ >= filter_size - 1 ? 0 : r.end_sample_ + 1;
+  r.end_sample_ =
+      std::min(r.start_sample_ + kNumberBlocksToUpdate * kBlockSize - 1,
+               filter_size - 1);
+
+  // Check range.
+  RTC_DCHECK_LT(r.start_sample_, filter_size);
+  RTC_DCHECK_LT(r.end_sample_, filter_size);
+  RTC_DCHECK_LE(r.start_sample_, r.end_sample_);
+}
+
+FilterAnalyzer::ConsistentFilterDetector::ConsistentFilterDetector(
+    const EchoCanceller3Config& config)
+    : active_render_threshold_(config.render_levels.active_render_limit *
+                               config.render_levels.active_render_limit *
+                               kFftLengthBy2) {
+  Reset();
+}
+
+void FilterAnalyzer::ConsistentFilterDetector::Reset() {
+  significant_peak_ = false;
+  filter_floor_accum_ = 0.f;
+  filter_secondary_peak_ = 0.f;
+  filter_floor_low_limit_ = 0;
+  filter_floor_high_limit_ = 0;
+  consistent_estimate_counter_ = 0;
+  consistent_delay_reference_ = -10;
+}
+
+bool FilterAnalyzer::ConsistentFilterDetector::Detect(
+    rtc::ArrayView<const float> filter_to_analyze,
+    const FilterRegion& region,
+    rtc::ArrayView<const std::vector<float>> x_block,
+    size_t peak_index,
+    int delay_blocks) {
+  if (region.start_sample_ == 0) {
+    filter_floor_accum_ = 0.f;
+    filter_secondary_peak_ = 0.f;
+    filter_floor_low_limit_ = peak_index < 64 ? 0 : peak_index - 64;
+    filter_floor_high_limit_ =
+        peak_index > filter_to_analyze.size() - 129 ? 0 : peak_index + 128;
+  }
+
+  for (size_t k = region.start_sample_;
+       k < std::min(region.end_sample_ + 1, filter_floor_low_limit_); ++k) {
+    float abs_h = fabsf(filter_to_analyze[k]);
+    filter_floor_accum_ += abs_h;
+    filter_secondary_peak_ = std::max(filter_secondary_peak_, abs_h);
+  }
+
+  for (size_t k = std::max(filter_floor_high_limit_, region.start_sample_);
+       k <= region.end_sample_; ++k) {
+    float abs_h = fabsf(filter_to_analyze[k]);
+    filter_floor_accum_ += abs_h;
+    filter_secondary_peak_ = std::max(filter_secondary_peak_, abs_h);
+  }
+
+  if (region.end_sample_ == filter_to_analyze.size() - 1) {
+    float filter_floor = filter_floor_accum_ /
+                         (filter_floor_low_limit_ + filter_to_analyze.size() -
+                          filter_floor_high_limit_);
+
+    float abs_peak = fabsf(filter_to_analyze[peak_index]);
+    significant_peak_ = abs_peak > 10.f * filter_floor &&
+                        abs_peak > 2.f * filter_secondary_peak_;
+  }
+
+  if (significant_peak_) {
+    bool active_render_block = false;
+    for (auto& x_channel : x_block) {
+      const float x_energy = std::inner_product(
+          x_channel.begin(), x_channel.end(), x_channel.begin(), 0.f);
+      if (x_energy > active_render_threshold_) {
+        active_render_block = true;
+        break;
+      }
+    }
+
+    if (consistent_delay_reference_ == delay_blocks) {
+      if (active_render_block) {
+        ++consistent_estimate_counter_;
+      }
+    } else {
+      consistent_estimate_counter_ = 0;
+      consistent_delay_reference_ = delay_blocks;
+    }
+  }
+  return consistent_estimate_counter_ > 1.5f * kNumBlocksPerSecond;
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/filter_analyzer.h
+++ b/webrtc/modules/audio_processing/aec3/filter_analyzer.h
@ -0,0 +1,149 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_
+
+#include <stddef.h>
+
+#include <array>
+#include <memory>
+#include <vector>
+
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "rtc_base/constructor_magic.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+class RenderBuffer;
+
+// Class for analyzing the properties of an adaptive filter.
+class FilterAnalyzer {
+ public:
+  FilterAnalyzer(const EchoCanceller3Config& config,
+                 size_t num_capture_channels);
+  ~FilterAnalyzer();
+
+  FilterAnalyzer(const FilterAnalyzer&) = delete;
+  FilterAnalyzer& operator=(const FilterAnalyzer&) = delete;
+
+  // Resets the analysis.
+  void Reset();
+
+  // Updates the estimates with new input data.
+  void Update(rtc::ArrayView<const std::vector<float>> filters_time_domain,
+              const RenderBuffer& render_buffer,
+              bool* any_filter_consistent,
+              float* max_echo_path_gain);
+
+  // Returns the delay in blocks for each filter.
+  rtc::ArrayView<const int> FilterDelaysBlocks() const {
+    return filter_delays_blocks_;
+  }
+
+  // Returns the minimum delay of all filters in terms of blocks.
+  int MinFilterDelayBlocks() const { return min_filter_delay_blocks_; }
+
+  // Returns the number of blocks for the current used filter.
+  int FilterLengthBlocks() const {
+    return filter_analysis_states_[0].filter_length_blocks;
+  }
+
+  // Returns the preprocessed filter.
+  rtc::ArrayView<const std::vector<float>> GetAdjustedFilters() const {
+    return h_highpass_;
+  }
+
+  // Public for testing purposes only.
+  void SetRegionToAnalyze(size_t filter_size);
+
+ private:
+  struct FilterAnalysisState;
+
+  void AnalyzeRegion(
+      rtc::ArrayView<const std::vector<float>> filters_time_domain,
+      const RenderBuffer& render_buffer);
+
+  void UpdateFilterGain(rtc::ArrayView<const float> filters_time_domain,
+                        FilterAnalysisState* st);
+  void PreProcessFilters(
+      rtc::ArrayView<const std::vector<float>> filters_time_domain);
+
+  void ResetRegion();
+
+  struct FilterRegion {
+    size_t start_sample_;
+    size_t end_sample_;
+  };
+
+  // This class checks whether the shape of the impulse response has been
+  // consistent over time.
+  class ConsistentFilterDetector {
+   public:
+    explicit ConsistentFilterDetector(const EchoCanceller3Config& config);
+    void Reset();
+    bool Detect(rtc::ArrayView<const float> filter_to_analyze,
+                const FilterRegion& region,
+                rtc::ArrayView<const std::vector<float>> x_block,
+                size_t peak_index,
+                int delay_blocks);
+
+   private:
+    bool significant_peak_;
+    float filter_floor_accum_;
+    float filter_secondary_peak_;
+    size_t filter_floor_low_limit_;
+    size_t filter_floor_high_limit_;
+    const float active_render_threshold_;
+    size_t consistent_estimate_counter_ = 0;
+    int consistent_delay_reference_ = -10;
+  };
+
+  struct FilterAnalysisState {
+    explicit FilterAnalysisState(const EchoCanceller3Config& config)
+        : filter_length_blocks(config.filter.refined_initial.length_blocks),
+          consistent_filter_detector(config) {
+      Reset(config.ep_strength.default_gain);
+    }
+
+    void Reset(float default_gain) {
+      peak_index = 0;
+      gain = default_gain;
+      consistent_filter_detector.Reset();
+    }
+
+    float gain;
+    size_t peak_index;
+    int filter_length_blocks;
+    bool consistent_estimate = false;
+    ConsistentFilterDetector consistent_filter_detector;
+  };
+
+  static int instance_count_;
+  std::unique_ptr<ApmDataDumper> data_dumper_;
+  const bool bounded_erl_;
+  const float default_gain_;
+  std::vector<std::vector<float>> h_highpass_;
+
+  size_t blocks_since_reset_ = 0;
+  FilterRegion region_;
+
+  std::vector<FilterAnalysisState> filter_analysis_states_;
+  std::vector<int> filter_delays_blocks_;
+
+  int min_filter_delay_blocks_ = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_
--- a/webrtc/modules/audio_processing/aec3/frame_blocker.cc
+++ b/webrtc/modules/audio_processing/aec3/frame_blocker.cc
@ -0,0 +1,88 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/frame_blocker.h"
+
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+FrameBlocker::FrameBlocker(size_t num_bands, size_t num_channels)
+    : num_bands_(num_bands),
+      num_channels_(num_channels),
+      buffer_(num_bands_, std::vector<std::vector<float>>(num_channels)) {
+  RTC_DCHECK_LT(0, num_bands);
+  RTC_DCHECK_LT(0, num_channels);
+  for (auto& band : buffer_) {
+    for (auto& channel : band) {
+      channel.reserve(kBlockSize);
+      RTC_DCHECK(channel.empty());
+    }
+  }
+}
+
+FrameBlocker::~FrameBlocker() = default;
+
+void FrameBlocker::InsertSubFrameAndExtractBlock(
+    const std::vector<std::vector<rtc::ArrayView<float>>>& sub_frame,
+    std::vector<std::vector<std::vector<float>>>* block) {
+  RTC_DCHECK(block);
+  RTC_DCHECK_EQ(num_bands_, block->size());
+  RTC_DCHECK_EQ(num_bands_, sub_frame.size());
+  for (size_t band = 0; band < num_bands_; ++band) {
+    RTC_DCHECK_EQ(num_channels_, (*block)[band].size());
+    RTC_DCHECK_EQ(num_channels_, sub_frame[band].size());
+    for (size_t channel = 0; channel < num_channels_; ++channel) {
+      RTC_DCHECK_GE(kBlockSize - 16, buffer_[band][channel].size());
+      RTC_DCHECK_EQ(kBlockSize, (*block)[band][channel].size());
+      RTC_DCHECK_EQ(kSubFrameLength, sub_frame[band][channel].size());
+      const int samples_to_block = kBlockSize - buffer_[band][channel].size();
+      (*block)[band][channel].clear();
+      (*block)[band][channel].insert((*block)[band][channel].begin(),
+                                     buffer_[band][channel].begin(),
+                                     buffer_[band][channel].end());
+      (*block)[band][channel].insert(
+          (*block)[band][channel].begin() + buffer_[band][channel].size(),
+          sub_frame[band][channel].begin(),
+          sub_frame[band][channel].begin() + samples_to_block);
+      buffer_[band][channel].clear();
+      buffer_[band][channel].insert(
+          buffer_[band][channel].begin(),
+          sub_frame[band][channel].begin() + samples_to_block,
+          sub_frame[band][channel].end());
+    }
+  }
+}
+
+bool FrameBlocker::IsBlockAvailable() const {
+  return kBlockSize == buffer_[0][0].size();
+}
+
+void FrameBlocker::ExtractBlock(
+    std::vector<std::vector<std::vector<float>>>* block) {
+  RTC_DCHECK(block);
+  RTC_DCHECK_EQ(num_bands_, block->size());
+  RTC_DCHECK(IsBlockAvailable());
+  for (size_t band = 0; band < num_bands_; ++band) {
+    RTC_DCHECK_EQ(num_channels_, (*block)[band].size());
+    for (size_t channel = 0; channel < num_channels_; ++channel) {
+      RTC_DCHECK_EQ(kBlockSize, buffer_[band][channel].size());
+      RTC_DCHECK_EQ(kBlockSize, (*block)[band][channel].size());
+      (*block)[band][channel].clear();
+      (*block)[band][channel].insert((*block)[band][channel].begin(),
+                                     buffer_[band][channel].begin(),
+                                     buffer_[band][channel].end());
+      buffer_[band][channel].clear();
+    }
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/frame_blocker.h
+++ b/webrtc/modules/audio_processing/aec3/frame_blocker.h
@ -0,0 +1,50 @@
+/*
+ *  Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+
+namespace webrtc {
+
+// Class for producing 64 sample multiband blocks from frames consisting of 2
+// subframes of 80 samples.
+class FrameBlocker {
+ public:
+  FrameBlocker(size_t num_bands, size_t num_channels);
+  ~FrameBlocker();
+  FrameBlocker(const FrameBlocker&) = delete;
+  FrameBlocker& operator=(const FrameBlocker&) = delete;
+
+  // Inserts one 80 sample multiband subframe from the multiband frame and
+  // extracts one 64 sample multiband block.
+  void InsertSubFrameAndExtractBlock(
+      const std::vector<std::vector<rtc::ArrayView<float>>>& sub_frame,
+      std::vector<std::vector<std::vector<float>>>* block);
+  // Reports whether a multiband block of 64 samples is available for
+  // extraction.
+  bool IsBlockAvailable() const;
+  // Extracts a multiband block of 64 samples.
+  void ExtractBlock(std::vector<std::vector<std::vector<float>>>* block);
+
+ private:
+  const size_t num_bands_;
+  const size_t num_channels_;
+  std::vector<std::vector<std::vector<float>>> buffer_;
+};
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_
--- a/webrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc
+++ b/webrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc
@ -0,0 +1,200 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/fullband_erle_estimator.h"
+
+#include <algorithm>
+#include <memory>
+#include <numeric>
+
+#include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/numerics/safe_minmax.h"
+
+namespace webrtc {
+
+namespace {
+constexpr float kEpsilon = 1e-3f;
+constexpr float kX2BandEnergyThreshold = 44015068.0f;
+constexpr int kBlocksToHoldErle = 100;
+constexpr int kPointsToAccumulate = 6;
+}  // namespace
+
+FullBandErleEstimator::FullBandErleEstimator(
+    const EchoCanceller3Config::Erle& config,
+    size_t num_capture_channels)
+    : min_erle_log2_(FastApproxLog2f(config.min + kEpsilon)),
+      max_erle_lf_log2(FastApproxLog2f(config.max_l + kEpsilon)),
+      hold_counters_time_domain_(num_capture_channels, 0),
+      erle_time_domain_log2_(num_capture_channels, min_erle_log2_),
+      instantaneous_erle_(num_capture_channels, ErleInstantaneous(config)),
+      linear_filters_qualities_(num_capture_channels) {
+  Reset();
+}
+
+FullBandErleEstimator::~FullBandErleEstimator() = default;
+
+void FullBandErleEstimator::Reset() {
+  for (auto& instantaneous_erle_ch : instantaneous_erle_) {
+    instantaneous_erle_ch.Reset();
+  }
+
+  UpdateQualityEstimates();
+  std::fill(erle_time_domain_log2_.begin(), erle_time_domain_log2_.end(),
+            min_erle_log2_);
+  std::fill(hold_counters_time_domain_.begin(),
+            hold_counters_time_domain_.end(), 0);
+}
+
+void FullBandErleEstimator::Update(
+    rtc::ArrayView<const float> X2,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
+    rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
+    const std::vector<bool>& converged_filters) {
+  for (size_t ch = 0; ch < Y2.size(); ++ch) {
+    if (converged_filters[ch]) {
+      // Computes the fullband ERLE.
+      const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f);
+      if (X2_sum > kX2BandEnergyThreshold * X2.size()) {
+        const float Y2_sum =
+            std::accumulate(Y2[ch].begin(), Y2[ch].end(), 0.0f);
+        const float E2_sum =
+            std::accumulate(E2[ch].begin(), E2[ch].end(), 0.0f);
+        if (instantaneous_erle_[ch].Update(Y2_sum, E2_sum)) {
+          hold_counters_time_domain_[ch] = kBlocksToHoldErle;
+          erle_time_domain_log2_[ch] +=
+              0.1f * ((instantaneous_erle_[ch].GetInstErleLog2().value()) -
+                      erle_time_domain_log2_[ch]);
+          erle_time_domain_log2_[ch] = rtc::SafeClamp(
+              erle_time_domain_log2_[ch], min_erle_log2_, max_erle_lf_log2);
+        }
+      }
+    }
+    --hold_counters_time_domain_[ch];
+    if (hold_counters_time_domain_[ch] <= 0) {
+      erle_time_domain_log2_[ch] =
+          std::max(min_erle_log2_, erle_time_domain_log2_[ch] - 0.044f);
+    }
+    if (hold_counters_time_domain_[ch] == 0) {
+      instantaneous_erle_[ch].ResetAccumulators();
+    }
+  }
+
+  UpdateQualityEstimates();
+}
+
+void FullBandErleEstimator::Dump(
+    const std::unique_ptr<ApmDataDumper>& data_dumper) const {
+  data_dumper->DumpRaw("aec3_fullband_erle_log2", FullbandErleLog2());
+  instantaneous_erle_[0].Dump(data_dumper);
+}
+
+void FullBandErleEstimator::UpdateQualityEstimates() {
+  for (size_t ch = 0; ch < instantaneous_erle_.size(); ++ch) {
+    linear_filters_qualities_[ch] =
+        instantaneous_erle_[ch].GetQualityEstimate();
+  }
+}
+
+FullBandErleEstimator::ErleInstantaneous::ErleInstantaneous(
+    const EchoCanceller3Config::Erle& config)
+    : clamp_inst_quality_to_zero_(config.clamp_quality_estimate_to_zero),
+      clamp_inst_quality_to_one_(config.clamp_quality_estimate_to_one) {
+  Reset();
+}
+
+FullBandErleEstimator::ErleInstantaneous::~ErleInstantaneous() = default;
+
+bool FullBandErleEstimator::ErleInstantaneous::Update(const float Y2_sum,
+                                                      const float E2_sum) {
+  bool update_estimates = false;
+  E2_acum_ += E2_sum;
+  Y2_acum_ += Y2_sum;
+  num_points_++;
+  if (num_points_ == kPointsToAccumulate) {
+    if (E2_acum_ > 0.f) {
+      update_estimates = true;
+      erle_log2_ = FastApproxLog2f(Y2_acum_ / E2_acum_ + kEpsilon);
+    }
+    num_points_ = 0;
+    E2_acum_ = 0.f;
+    Y2_acum_ = 0.f;
+  }
+
+  if (update_estimates) {
+    UpdateMaxMin();
+    UpdateQualityEstimate();
+  }
+  return update_estimates;
+}
+
+void FullBandErleEstimator::ErleInstantaneous::Reset() {
+  ResetAccumulators();
+  max_erle_log2_ = -10.f;  // -30 dB.
+  min_erle_log2_ = 33.f;   // 100 dB.
+  inst_quality_estimate_ = 0.f;
+}
+
+void FullBandErleEstimator::ErleInstantaneous::ResetAccumulators() {
+  erle_log2_ = absl::nullopt;
+  inst_quality_estimate_ = 0.f;
+  num_points_ = 0;
+  E2_acum_ = 0.f;
+  Y2_acum_ = 0.f;
+}
+
+void FullBandErleEstimator::ErleInstantaneous::Dump(
+    const std::unique_ptr<ApmDataDumper>& data_dumper) const {
+  data_dumper->DumpRaw("aec3_fullband_erle_inst_log2",
+                       erle_log2_ ? *erle_log2_ : -10.f);
+  data_dumper->DumpRaw(
+      "aec3_erle_instantaneous_quality",
+      GetQualityEstimate() ? GetQualityEstimate().value() : 0.f);
+  data_dumper->DumpRaw("aec3_fullband_erle_max_log2", max_erle_log2_);
+  data_dumper->DumpRaw("aec3_fullband_erle_min_log2", min_erle_log2_);
+}
+
+void FullBandErleEstimator::ErleInstantaneous::UpdateMaxMin() {
+  RTC_DCHECK(erle_log2_);
+  if (erle_log2_.value() > max_erle_log2_) {
+    max_erle_log2_ = erle_log2_.value();
+  } else {
+    max_erle_log2_ -= 0.0004;  // Forget factor, approx 1dB every 3 sec.
+  }
+
+  if (erle_log2_.value() < min_erle_log2_) {
+    min_erle_log2_ = erle_log2_.value();
+  } else {
+    min_erle_log2_ += 0.0004;  // Forget factor, approx 1dB every 3 sec.
+  }
+}
+
+void FullBandErleEstimator::ErleInstantaneous::UpdateQualityEstimate() {
+  const float alpha = 0.07f;
+  float quality_estimate = 0.f;
+  RTC_DCHECK(erle_log2_);
+  // TODO(peah): Currently, the estimate can become be less than 0; this should
+  // be corrected.
+  if (max_erle_log2_ > min_erle_log2_) {
+    quality_estimate = (erle_log2_.value() - min_erle_log2_) /
+                       (max_erle_log2_ - min_erle_log2_);
+  }
+  if (quality_estimate > inst_quality_estimate_) {
+    inst_quality_estimate_ = quality_estimate;
+  } else {
+    inst_quality_estimate_ +=
+        alpha * (quality_estimate - inst_quality_estimate_);
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/fullband_erle_estimator.h
+++ b/webrtc/modules/audio_processing/aec3/fullband_erle_estimator.h
@ -0,0 +1,118 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_
+
+#include <memory>
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+
+namespace webrtc {
+
+// Estimates the echo return loss enhancement using the energy of all the
+// freuquency bands.
+class FullBandErleEstimator {
+ public:
+  FullBandErleEstimator(const EchoCanceller3Config::Erle& config,
+                        size_t num_capture_channels);
+  ~FullBandErleEstimator();
+  // Resets the ERLE estimator.
+  void Reset();
+
+  // Updates the ERLE estimator.
+  void Update(rtc::ArrayView<const float> X2,
+              rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
+              rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
+              const std::vector<bool>& converged_filters);
+
+  // Returns the fullband ERLE estimates in log2 units.
+  float FullbandErleLog2() const {
+    float min_erle = erle_time_domain_log2_[0];
+    for (size_t ch = 1; ch < erle_time_domain_log2_.size(); ++ch) {
+      min_erle = std::min(min_erle, erle_time_domain_log2_[ch]);
+    }
+    return min_erle;
+  }
+
+  // Returns an estimation of the current linear filter quality. It returns a
+  // float number between 0 and 1 mapping 1 to the highest possible quality.
+  rtc::ArrayView<const absl::optional<float>> GetInstLinearQualityEstimates()
+      const {
+    return linear_filters_qualities_;
+  }
+
+  void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
+
+ private:
+  void UpdateQualityEstimates();
+
+  class ErleInstantaneous {
+   public:
+    explicit ErleInstantaneous(const EchoCanceller3Config::Erle& config);
+    ~ErleInstantaneous();
+
+    // Updates the estimator with a new point, returns true
+    // if the instantaneous ERLE was updated due to having enough
+    // points for performing the estimate.
+    bool Update(const float Y2_sum, const float E2_sum);
+    // Resets the instantaneous ERLE estimator to its initial state.
+    void Reset();
+    // Resets the members related with an instantaneous estimate.
+    void ResetAccumulators();
+    // Returns the instantaneous ERLE in log2 units.
+    absl::optional<float> GetInstErleLog2() const { return erle_log2_; }
+    // Gets an indication between 0 and 1 of the performance of the linear
+    // filter for the current time instant.
+    absl::optional<float> GetQualityEstimate() const {
+      if (erle_log2_) {
+        float value = inst_quality_estimate_;
+        if (clamp_inst_quality_to_zero_) {
+          value = std::max(0.f, value);
+        }
+        if (clamp_inst_quality_to_one_) {
+          value = std::min(1.f, value);
+        }
+        return absl::optional<float>(value);
+      }
+      return absl::nullopt;
+    }
+    void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
+
+   private:
+    void UpdateMaxMin();
+    void UpdateQualityEstimate();
+    const bool clamp_inst_quality_to_zero_;
+    const bool clamp_inst_quality_to_one_;
+    absl::optional<float> erle_log2_;
+    float inst_quality_estimate_;
+    float max_erle_log2_;
+    float min_erle_log2_;
+    float Y2_acum_;
+    float E2_acum_;
+    int num_points_;
+  };
+
+  const float min_erle_log2_;
+  const float max_erle_lf_log2;
+  std::vector<int> hold_counters_time_domain_;
+  std::vector<float> erle_time_domain_log2_;
+  std::vector<ErleInstantaneous> instantaneous_erle_;
+  std::vector<absl::optional<float>> linear_filters_qualities_;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_
--- a/webrtc/modules/audio_processing/aec3/matched_filter.cc
+++ b/webrtc/modules/audio_processing/aec3/matched_filter.cc
@ -0,0 +1,464 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/audio_processing/aec3/matched_filter.h"
+
+// Defines WEBRTC_ARCH_X86_FAMILY, used below.
+#include "rtc_base/system/arch.h"
+
+#if defined(WEBRTC_HAS_NEON)
+#include <arm_neon.h>
+#endif
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+#include <emmintrin.h>
+#endif
+#include <algorithm>
+#include <cstddef>
+#include <initializer_list>
+#include <iterator>
+#include <numeric>
+
+#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+
+namespace webrtc {
+namespace aec3 {
+
+#if defined(WEBRTC_HAS_NEON)
+
+void MatchedFilterCore_NEON(size_t x_start_index,
+                            float x2_sum_threshold,
+                            float smoothing,
+                            rtc::ArrayView<const float> x,
+                            rtc::ArrayView<const float> y,
+                            rtc::ArrayView<float> h,
+                            bool* filters_updated,
+                            float* error_sum) {
+  const int h_size = static_cast<int>(h.size());
+  const int x_size = static_cast<int>(x.size());
+  RTC_DCHECK_EQ(0, h_size % 4);
+
+  // Process for all samples in the sub-block.
+  for (size_t i = 0; i < y.size(); ++i) {
+    // Apply the matched filter as filter * x, and compute x * x.
+
+    RTC_DCHECK_GT(x_size, x_start_index);
+    const float* x_p = &x[x_start_index];
+    const float* h_p = &h[0];
+
+    // Initialize values for the accumulation.
+    float32x4_t s_128 = vdupq_n_f32(0);
+    float32x4_t x2_sum_128 = vdupq_n_f32(0);
+    float x2_sum = 0.f;
+    float s = 0;
+
+    // Compute loop chunk sizes until, and after, the wraparound of the circular
+    // buffer for x.
+    const int chunk1 =
+        std::min(h_size, static_cast<int>(x_size - x_start_index));
+
+    // Perform the loop in two chunks.
+    const int chunk2 = h_size - chunk1;
+    for (int limit : {chunk1, chunk2}) {
+      // Perform 128 bit vector operations.
+      const int limit_by_4 = limit >> 2;
+      for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) {
+        // Load the data into 128 bit vectors.
+        const float32x4_t x_k = vld1q_f32(x_p);
+        const float32x4_t h_k = vld1q_f32(h_p);
+        // Compute and accumulate x * x and h * x.
+        x2_sum_128 = vmlaq_f32(x2_sum_128, x_k, x_k);
+        s_128 = vmlaq_f32(s_128, h_k, x_k);
+      }
+
+      // Perform non-vector operations for any remaining items.
+      for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) {
+        const float x_k = *x_p;
+        x2_sum += x_k * x_k;
+        s += *h_p * x_k;
+      }
+
+      x_p = &x[0];
+    }
+
+    // Combine the accumulated vector and scalar values.
+    float* v = reinterpret_cast<float*>(&x2_sum_128);
+    x2_sum += v[0] + v[1] + v[2] + v[3];
+    v = reinterpret_cast<float*>(&s_128);
+    s += v[0] + v[1] + v[2] + v[3];
+
+    // Compute the matched filter error.
+    float e = y[i] - s;
+    const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f;
+    (*error_sum) += e * e;
+
+    // Update the matched filter estimate in an NLMS manner.
+    if (x2_sum > x2_sum_threshold && !saturation) {
+      RTC_DCHECK_LT(0.f, x2_sum);
+      const float alpha = smoothing * e / x2_sum;
+      const float32x4_t alpha_128 = vmovq_n_f32(alpha);
+
+      // filter = filter + smoothing * (y - filter * x) * x / x * x.
+      float* h_p = &h[0];
+      x_p = &x[x_start_index];
+
+      // Perform the loop in two chunks.
+      for (int limit : {chunk1, chunk2}) {
+        // Perform 128 bit vector operations.
+        const int limit_by_4 = limit >> 2;
+        for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) {
+          // Load the data into 128 bit vectors.
+          float32x4_t h_k = vld1q_f32(h_p);
+          const float32x4_t x_k = vld1q_f32(x_p);
+          // Compute h = h + alpha * x.
+          h_k = vmlaq_f32(h_k, alpha_128, x_k);
+
+          // Store the result.
+          vst1q_f32(h_p, h_k);
+        }
+
+        // Perform non-vector operations for any remaining items.
+        for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) {
+          *h_p += alpha * *x_p;
+        }
+
+        x_p = &x[0];
+      }
+
+      *filters_updated = true;
+    }
+
+    x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1;
+  }
+}
+
+#endif
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+
+void MatchedFilterCore_SSE2(size_t x_start_index,
+                            float x2_sum_threshold,
+                            float smoothing,
+                            rtc::ArrayView<const float> x,
+                            rtc::ArrayView<const float> y,
+                            rtc::ArrayView<float> h,
+                            bool* filters_updated,
+                            float* error_sum) {
+  const int h_size = static_cast<int>(h.size());
+  const int x_size = static_cast<int>(x.size());
+  RTC_DCHECK_EQ(0, h_size % 4);
+
+  // Process for all samples in the sub-block.
+  for (size_t i = 0; i < y.size(); ++i) {
+    // Apply the matched filter as filter * x, and compute x * x.
+
+    RTC_DCHECK_GT(x_size, x_start_index);
+    const float* x_p = &x[x_start_index];
+    const float* h_p = &h[0];
+
+    // Initialize values for the accumulation.
+    __m128 s_128 = _mm_set1_ps(0);
+    __m128 x2_sum_128 = _mm_set1_ps(0);
+    float x2_sum = 0.f;
+    float s = 0;
+
+    // Compute loop chunk sizes until, and after, the wraparound of the circular
+    // buffer for x.
+    const int chunk1 =
+        std::min(h_size, static_cast<int>(x_size - x_start_index));
+
+    // Perform the loop in two chunks.
+    const int chunk2 = h_size - chunk1;
+    for (int limit : {chunk1, chunk2}) {
+      // Perform 128 bit vector operations.
+      const int limit_by_4 = limit >> 2;
+      for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) {
+        // Load the data into 128 bit vectors.
+        const __m128 x_k = _mm_loadu_ps(x_p);
+        const __m128 h_k = _mm_loadu_ps(h_p);
+        const __m128 xx = _mm_mul_ps(x_k, x_k);
+        // Compute and accumulate x * x and h * x.
+        x2_sum_128 = _mm_add_ps(x2_sum_128, xx);
+        const __m128 hx = _mm_mul_ps(h_k, x_k);
+        s_128 = _mm_add_ps(s_128, hx);
+      }
+
+      // Perform non-vector operations for any remaining items.
+      for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) {
+        const float x_k = *x_p;
+        x2_sum += x_k * x_k;
+        s += *h_p * x_k;
+      }
+
+      x_p = &x[0];
+    }
+
+    // Combine the accumulated vector and scalar values.
+    float* v = reinterpret_cast<float*>(&x2_sum_128);
+    x2_sum += v[0] + v[1] + v[2] + v[3];
+    v = reinterpret_cast<float*>(&s_128);
+    s += v[0] + v[1] + v[2] + v[3];
+
+    // Compute the matched filter error.
+    float e = y[i] - s;
+    const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f;
+    (*error_sum) += e * e;
+
+    // Update the matched filter estimate in an NLMS manner.
+    if (x2_sum > x2_sum_threshold && !saturation) {
+      RTC_DCHECK_LT(0.f, x2_sum);
+      const float alpha = smoothing * e / x2_sum;
+      const __m128 alpha_128 = _mm_set1_ps(alpha);
+
+      // filter = filter + smoothing * (y - filter * x) * x / x * x.
+      float* h_p = &h[0];
+      x_p = &x[x_start_index];
+
+      // Perform the loop in two chunks.
+      for (int limit : {chunk1, chunk2}) {
+        // Perform 128 bit vector operations.
+        const int limit_by_4 = limit >> 2;
+        for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) {
+          // Load the data into 128 bit vectors.
+          __m128 h_k = _mm_loadu_ps(h_p);
+          const __m128 x_k = _mm_loadu_ps(x_p);
+
+          // Compute h = h + alpha * x.
+          const __m128 alpha_x = _mm_mul_ps(alpha_128, x_k);
+          h_k = _mm_add_ps(h_k, alpha_x);
+
+          // Store the result.
+          _mm_storeu_ps(h_p, h_k);
+        }
+
+        // Perform non-vector operations for any remaining items.
+        for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) {
+          *h_p += alpha * *x_p;
+        }
+
+        x_p = &x[0];
+      }
+
+      *filters_updated = true;
+    }
+
+    x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1;
+  }
+}
+#endif
+
+void MatchedFilterCore(size_t x_start_index,
+                       float x2_sum_threshold,
+                       float smoothing,
+                       rtc::ArrayView<const float> x,
+                       rtc::ArrayView<const float> y,
+                       rtc::ArrayView<float> h,
+                       bool* filters_updated,
+                       float* error_sum) {
+  // Process for all samples in the sub-block.
+  for (size_t i = 0; i < y.size(); ++i) {
+    // Apply the matched filter as filter * x, and compute x * x.
+    float x2_sum = 0.f;
+    float s = 0;
+    size_t x_index = x_start_index;
+    for (size_t k = 0; k < h.size(); ++k) {
+      x2_sum += x[x_index] * x[x_index];
+      s += h[k] * x[x_index];
+      x_index = x_index < (x.size() - 1) ? x_index + 1 : 0;
+    }
+
+    // Compute the matched filter error.
+    float e = y[i] - s;
+    const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f;
+    (*error_sum) += e * e;
+
+    // Update the matched filter estimate in an NLMS manner.
+    if (x2_sum > x2_sum_threshold && !saturation) {
+      RTC_DCHECK_LT(0.f, x2_sum);
+      const float alpha = smoothing * e / x2_sum;
+
+      // filter = filter + smoothing * (y - filter * x) * x / x * x.
+      size_t x_index = x_start_index;
+      for (size_t k = 0; k < h.size(); ++k) {
+        h[k] += alpha * x[x_index];
+        x_index = x_index < (x.size() - 1) ? x_index + 1 : 0;
+      }
+      *filters_updated = true;
+    }
+
+    x_start_index = x_start_index > 0 ? x_start_index - 1 : x.size() - 1;
+  }
+}
+
+}  // namespace aec3
+
+MatchedFilter::MatchedFilter(ApmDataDumper* data_dumper,
+                             Aec3Optimization optimization,
+                             size_t sub_block_size,
+                             size_t window_size_sub_blocks,
+                             int num_matched_filters,
+                             size_t alignment_shift_sub_blocks,
+                             float excitation_limit,
+                             float smoothing,
+                             float matching_filter_threshold)
+    : data_dumper_(data_dumper),
+      optimization_(optimization),
+      sub_block_size_(sub_block_size),
+      filter_intra_lag_shift_(alignment_shift_sub_blocks * sub_block_size_),
+      filters_(
+          num_matched_filters,
+          std::vector<float>(window_size_sub_blocks * sub_block_size_, 0.f)),
+      lag_estimates_(num_matched_filters),
+      filters_offsets_(num_matched_filters, 0),
+      excitation_limit_(excitation_limit),
+      smoothing_(smoothing),
+      matching_filter_threshold_(matching_filter_threshold) {
+  RTC_DCHECK(data_dumper);
+  RTC_DCHECK_LT(0, window_size_sub_blocks);
+  RTC_DCHECK((kBlockSize % sub_block_size) == 0);
+  RTC_DCHECK((sub_block_size % 4) == 0);
+}
+
+MatchedFilter::~MatchedFilter() = default;
+
+void MatchedFilter::Reset() {
+  for (auto& f : filters_) {
+    std::fill(f.begin(), f.end(), 0.f);
+  }
+
+  for (auto& l : lag_estimates_) {
+    l = MatchedFilter::LagEstimate();
+  }
+}
+
+void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer,
+                           rtc::ArrayView<const float> capture) {
+  RTC_DCHECK_EQ(sub_block_size_, capture.size());
+  auto& y = capture;
+
+  const float x2_sum_threshold =
+      filters_[0].size() * excitation_limit_ * excitation_limit_;
+
+  // Apply all matched filters.
+  size_t alignment_shift = 0;
+  for (size_t n = 0; n < filters_.size(); ++n) {
+    float error_sum = 0.f;
+    bool filters_updated = false;
+
+    size_t x_start_index =
+        (render_buffer.read + alignment_shift + sub_block_size_ - 1) %
+        render_buffer.buffer.size();
+
+    switch (optimization_) {
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+      case Aec3Optimization::kSse2:
+        aec3::MatchedFilterCore_SSE2(x_start_index, x2_sum_threshold,
+                                     smoothing_, render_buffer.buffer, y,
+                                     filters_[n], &filters_updated, &error_sum);
+        break;
+      case Aec3Optimization::kAvx2:
+        aec3::MatchedFilterCore_AVX2(x_start_index, x2_sum_threshold,
+                                     smoothing_, render_buffer.buffer, y,
+                                     filters_[n], &filters_updated, &error_sum);
+        break;
+#endif
+#if defined(WEBRTC_HAS_NEON)
+      case Aec3Optimization::kNeon:
+        aec3::MatchedFilterCore_NEON(x_start_index, x2_sum_threshold,
+                                     smoothing_, render_buffer.buffer, y,
+                                     filters_[n], &filters_updated, &error_sum);
+        break;
+#endif
+      default:
+        aec3::MatchedFilterCore(x_start_index, x2_sum_threshold, smoothing_,
+                                render_buffer.buffer, y, filters_[n],
+                                &filters_updated, &error_sum);
+    }
+
+    // Compute anchor for the matched filter error.
+    const float error_sum_anchor =
+        std::inner_product(y.begin(), y.end(), y.begin(), 0.f);
+
+    // Estimate the lag in the matched filter as the distance to the portion in
+    // the filter that contributes the most to the matched filter output. This
+    // is detected as the peak of the matched filter.
+    const size_t lag_estimate = std::distance(
+        filters_[n].begin(),
+        std::max_element(
+            filters_[n].begin(), filters_[n].end(),
+            [](float a, float b) -> bool { return a * a < b * b; }));
+
+    // Update the lag estimates for the matched filter.
+    lag_estimates_[n] = LagEstimate(
+        error_sum_anchor - error_sum,
+        (lag_estimate > 2 && lag_estimate < (filters_[n].size() - 10) &&
+         error_sum < matching_filter_threshold_ * error_sum_anchor),
+        lag_estimate + alignment_shift, filters_updated);
+
+    RTC_DCHECK_GE(10, filters_.size());
+    switch (n) {
+      case 0:
+        data_dumper_->DumpRaw("aec3_correlator_0_h", filters_[0]);
+        break;
+      case 1:
+        data_dumper_->DumpRaw("aec3_correlator_1_h", filters_[1]);
+        break;
+      case 2:
+        data_dumper_->DumpRaw("aec3_correlator_2_h", filters_[2]);
+        break;
+      case 3:
+        data_dumper_->DumpRaw("aec3_correlator_3_h", filters_[3]);
+        break;
+      case 4:
+        data_dumper_->DumpRaw("aec3_correlator_4_h", filters_[4]);
+        break;
+      case 5:
+        data_dumper_->DumpRaw("aec3_correlator_5_h", filters_[5]);
+        break;
+      case 6:
+        data_dumper_->DumpRaw("aec3_correlator_6_h", filters_[6]);
+        break;
+      case 7:
+        data_dumper_->DumpRaw("aec3_correlator_7_h", filters_[7]);
+        break;
+      case 8:
+        data_dumper_->DumpRaw("aec3_correlator_8_h", filters_[8]);
+        break;
+      case 9:
+        data_dumper_->DumpRaw("aec3_correlator_9_h", filters_[9]);
+        break;
+      default:
+        RTC_NOTREACHED();
+    }
+
+    alignment_shift += filter_intra_lag_shift_;
+  }
+}
+
+void MatchedFilter::LogFilterProperties(int sample_rate_hz,
+                                        size_t shift,
+                                        size_t downsampling_factor) const {
+  size_t alignment_shift = 0;
+  constexpr int kFsBy1000 = 16;
+  for (size_t k = 0; k < filters_.size(); ++k) {
+    int start = static_cast<int>(alignment_shift * downsampling_factor);
+    int end = static_cast<int>((alignment_shift + filters_[k].size()) *
+                               downsampling_factor);
+    RTC_LOG(LS_VERBOSE) << "Filter " << k << ": start: "
+                        << (start - static_cast<int>(shift)) / kFsBy1000
+                        << " ms, end: "
+                        << (end - static_cast<int>(shift)) / kFsBy1000
+                        << " ms.";
+    alignment_shift += filter_intra_lag_shift_;
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/matched_filter.h
+++ b/webrtc/modules/audio_processing/aec3/matched_filter.h
@ -0,0 +1,149 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "rtc_base/system/arch.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+struct DownsampledRenderBuffer;
+
+namespace aec3 {
+
+#if defined(WEBRTC_HAS_NEON)
+
+// Filter core for the matched filter that is optimized for NEON.
+void MatchedFilterCore_NEON(size_t x_start_index,
+                            float x2_sum_threshold,
+                            float smoothing,
+                            rtc::ArrayView<const float> x,
+                            rtc::ArrayView<const float> y,
+                            rtc::ArrayView<float> h,
+                            bool* filters_updated,
+                            float* error_sum);
+
+#endif
+
+#if defined(WEBRTC_ARCH_X86_FAMILY)
+
+// Filter core for the matched filter that is optimized for SSE2.
+void MatchedFilterCore_SSE2(size_t x_start_index,
+                            float x2_sum_threshold,
+                            float smoothing,
+                            rtc::ArrayView<const float> x,
+                            rtc::ArrayView<const float> y,
+                            rtc::ArrayView<float> h,
+                            bool* filters_updated,
+                            float* error_sum);
+
+// Filter core for the matched filter that is optimized for AVX2.
+void MatchedFilterCore_AVX2(size_t x_start_index,
+                            float x2_sum_threshold,
+                            float smoothing,
+                            rtc::ArrayView<const float> x,
+                            rtc::ArrayView<const float> y,
+                            rtc::ArrayView<float> h,
+                            bool* filters_updated,
+                            float* error_sum);
+
+#endif
+
+// Filter core for the matched filter.
+void MatchedFilterCore(size_t x_start_index,
+                       float x2_sum_threshold,
+                       float smoothing,
+                       rtc::ArrayView<const float> x,
+                       rtc::ArrayView<const float> y,
+                       rtc::ArrayView<float> h,
+                       bool* filters_updated,
+                       float* error_sum);
+
+}  // namespace aec3
+
+// Produces recursively updated cross-correlation estimates for several signal
+// shifts where the intra-shift spacing is uniform.
+class MatchedFilter {
+ public:
+  // Stores properties for the lag estimate corresponding to a particular signal
+  // shift.
+  struct LagEstimate {
+    LagEstimate() = default;
+    LagEstimate(float accuracy, bool reliable, size_t lag, bool updated)
+        : accuracy(accuracy), reliable(reliable), lag(lag), updated(updated) {}
+
+    float accuracy = 0.f;
+    bool reliable = false;
+    size_t lag = 0;
+    bool updated = false;
+  };
+
+  MatchedFilter(ApmDataDumper* data_dumper,
+                Aec3Optimization optimization,
+                size_t sub_block_size,
+                size_t window_size_sub_blocks,
+                int num_matched_filters,
+                size_t alignment_shift_sub_blocks,
+                float excitation_limit,
+                float smoothing,
+                float matching_filter_threshold);
+
+  MatchedFilter() = delete;
+  MatchedFilter(const MatchedFilter&) = delete;
+  MatchedFilter& operator=(const MatchedFilter&) = delete;
+
+  ~MatchedFilter();
+
+  // Updates the correlation with the values in the capture buffer.
+  void Update(const DownsampledRenderBuffer& render_buffer,
+              rtc::ArrayView<const float> capture);
+
+  // Resets the matched filter.
+  void Reset();
+
+  // Returns the current lag estimates.
+  rtc::ArrayView<const MatchedFilter::LagEstimate> GetLagEstimates() const {
+    return lag_estimates_;
+  }
+
+  // Returns the maximum filter lag.
+  size_t GetMaxFilterLag() const {
+    return filters_.size() * filter_intra_lag_shift_ + filters_[0].size();
+  }
+
+  // Log matched filter properties.
+  void LogFilterProperties(int sample_rate_hz,
+                           size_t shift,
+                           size_t downsampling_factor) const;
+
+ private:
+  ApmDataDumper* const data_dumper_;
+  const Aec3Optimization optimization_;
+  const size_t sub_block_size_;
+  const size_t filter_intra_lag_shift_;
+  std::vector<std::vector<float>> filters_;
+  std::vector<LagEstimate> lag_estimates_;
+  std::vector<size_t> filters_offsets_;
+  const float excitation_limit_;
+  const float smoothing_;
+  const float matching_filter_threshold_;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_
--- a/webrtc/modules/audio_processing/aec3/matched_filter_avx2.cc
+++ b/webrtc/modules/audio_processing/aec3/matched_filter_avx2.cc
@ -0,0 +1,132 @@
+/*
+ *  Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/matched_filter.h"
+
+#include <immintrin.h>
+
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+namespace aec3 {
+
+void MatchedFilterCore_AVX2(size_t x_start_index,
+                            float x2_sum_threshold,
+                            float smoothing,
+                            rtc::ArrayView<const float> x,
+                            rtc::ArrayView<const float> y,
+                            rtc::ArrayView<float> h,
+                            bool* filters_updated,
+                            float* error_sum) {
+  const int h_size = static_cast<int>(h.size());
+  const int x_size = static_cast<int>(x.size());
+  RTC_DCHECK_EQ(0, h_size % 8);
+
+  // Process for all samples in the sub-block.
+  for (size_t i = 0; i < y.size(); ++i) {
+    // Apply the matched filter as filter * x, and compute x * x.
+
+    RTC_DCHECK_GT(x_size, x_start_index);
+    const float* x_p = &x[x_start_index];
+    const float* h_p = &h[0];
+
+    // Initialize values for the accumulation.
+    __m256 s_256 = _mm256_set1_ps(0);
+    __m256 x2_sum_256 = _mm256_set1_ps(0);
+    float x2_sum = 0.f;
+    float s = 0;
+
+    // Compute loop chunk sizes until, and after, the wraparound of the circular
+    // buffer for x.
+    const int chunk1 =
+        std::min(h_size, static_cast<int>(x_size - x_start_index));
+
+    // Perform the loop in two chunks.
+    const int chunk2 = h_size - chunk1;
+    for (int limit : {chunk1, chunk2}) {
+      // Perform 256 bit vector operations.
+      const int limit_by_8 = limit >> 3;
+      for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8) {
+        // Load the data into 256 bit vectors.
+        __m256 x_k = _mm256_loadu_ps(x_p);
+        __m256 h_k = _mm256_loadu_ps(h_p);
+        // Compute and accumulate x * x and h * x.
+        x2_sum_256 = _mm256_fmadd_ps(x_k, x_k, x2_sum_256);
+        s_256 = _mm256_fmadd_ps(h_k, x_k, s_256);
+      }
+
+      // Perform non-vector operations for any remaining items.
+      for (int k = limit - limit_by_8 * 8; k > 0; --k, ++h_p, ++x_p) {
+        const float x_k = *x_p;
+        x2_sum += x_k * x_k;
+        s += *h_p * x_k;
+      }
+
+      x_p = &x[0];
+    }
+
+    // Sum components together.
+    __m128 x2_sum_128 = _mm_add_ps(_mm256_extractf128_ps(x2_sum_256, 0),
+                                   _mm256_extractf128_ps(x2_sum_256, 1));
+    __m128 s_128 = _mm_add_ps(_mm256_extractf128_ps(s_256, 0),
+                              _mm256_extractf128_ps(s_256, 1));
+    // Combine the accumulated vector and scalar values.
+    float* v = reinterpret_cast<float*>(&x2_sum_128);
+    x2_sum += v[0] + v[1] + v[2] + v[3];
+    v = reinterpret_cast<float*>(&s_128);
+    s += v[0] + v[1] + v[2] + v[3];
+
+    // Compute the matched filter error.
+    float e = y[i] - s;
+    const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f;
+    (*error_sum) += e * e;
+
+    // Update the matched filter estimate in an NLMS manner.
+    if (x2_sum > x2_sum_threshold && !saturation) {
+      RTC_DCHECK_LT(0.f, x2_sum);
+      const float alpha = smoothing * e / x2_sum;
+      const __m256 alpha_256 = _mm256_set1_ps(alpha);
+
+      // filter = filter + smoothing * (y - filter * x) * x / x * x.
+      float* h_p = &h[0];
+      x_p = &x[x_start_index];
+
+      // Perform the loop in two chunks.
+      for (int limit : {chunk1, chunk2}) {
+        // Perform 256 bit vector operations.
+        const int limit_by_8 = limit >> 3;
+        for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8) {
+          // Load the data into 256 bit vectors.
+          __m256 h_k = _mm256_loadu_ps(h_p);
+          __m256 x_k = _mm256_loadu_ps(x_p);
+          // Compute h = h + alpha * x.
+          h_k = _mm256_fmadd_ps(x_k, alpha_256, h_k);
+
+          // Store the result.
+          _mm256_storeu_ps(h_p, h_k);
+        }
+
+        // Perform non-vector operations for any remaining items.
+        for (int k = limit - limit_by_8 * 8; k > 0; --k, ++h_p, ++x_p) {
+          *h_p += alpha * *x_p;
+        }
+
+        x_p = &x[0];
+      }
+
+      *filters_updated = true;
+    }
+
+    x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1;
+  }
+}
+
+}  // namespace aec3
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc
+++ b/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.cc
@ -0,0 +1,97 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h"
+
+#include <algorithm>
+#include <iterator>
+
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+MatchedFilterLagAggregator::MatchedFilterLagAggregator(
+    ApmDataDumper* data_dumper,
+    size_t max_filter_lag,
+    const EchoCanceller3Config::Delay::DelaySelectionThresholds& thresholds)
+    : data_dumper_(data_dumper),
+      histogram_(max_filter_lag + 1, 0),
+      thresholds_(thresholds) {
+  RTC_DCHECK(data_dumper);
+  RTC_DCHECK_LE(thresholds_.initial, thresholds_.converged);
+  histogram_data_.fill(0);
+}
+
+MatchedFilterLagAggregator::~MatchedFilterLagAggregator() = default;
+
+void MatchedFilterLagAggregator::Reset(bool hard_reset) {
+  std::fill(histogram_.begin(), histogram_.end(), 0);
+  histogram_data_.fill(0);
+  histogram_data_index_ = 0;
+  if (hard_reset) {
+    significant_candidate_found_ = false;
+  }
+}
+
+absl::optional<DelayEstimate> MatchedFilterLagAggregator::Aggregate(
+    rtc::ArrayView<const MatchedFilter::LagEstimate> lag_estimates) {
+  // Choose the strongest lag estimate as the best one.
+  float best_accuracy = 0.f;
+  int best_lag_estimate_index = -1;
+  for (size_t k = 0; k < lag_estimates.size(); ++k) {
+    if (lag_estimates[k].updated && lag_estimates[k].reliable) {
+      if (lag_estimates[k].accuracy > best_accuracy) {
+        best_accuracy = lag_estimates[k].accuracy;
+        best_lag_estimate_index = static_cast<int>(k);
+      }
+    }
+  }
+
+  // TODO(peah): Remove this logging once all development is done.
+  data_dumper_->DumpRaw("aec3_echo_path_delay_estimator_best_index",
+                        best_lag_estimate_index);
+  data_dumper_->DumpRaw("aec3_echo_path_delay_estimator_histogram", histogram_);
+
+  if (best_lag_estimate_index != -1) {
+    RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]);
+    RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]);
+    --histogram_[histogram_data_[histogram_data_index_]];
+
+    histogram_data_[histogram_data_index_] =
+        lag_estimates[best_lag_estimate_index].lag;
+
+    RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]);
+    RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]);
+    ++histogram_[histogram_data_[histogram_data_index_]];
+
+    histogram_data_index_ =
+        (histogram_data_index_ + 1) % histogram_data_.size();
+
+    const int candidate =
+        std::distance(histogram_.begin(),
+                      std::max_element(histogram_.begin(), histogram_.end()));
+
+    significant_candidate_found_ =
+        significant_candidate_found_ ||
+        histogram_[candidate] > thresholds_.converged;
+    if (histogram_[candidate] > thresholds_.converged ||
+        (histogram_[candidate] > thresholds_.initial &&
+         !significant_candidate_found_)) {
+      DelayEstimate::Quality quality = significant_candidate_found_
+                                           ? DelayEstimate::Quality::kRefined
+                                           : DelayEstimate::Quality::kCoarse;
+      return DelayEstimate(quality, candidate);
+    }
+  }
+
+  return absl::nullopt;
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.h
+++ b/webrtc/modules/audio_processing/aec3/matched_filter_lag_aggregator.h
@ -0,0 +1,58 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_
+
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/delay_estimate.h"
+#include "modules/audio_processing/aec3/matched_filter.h"
+
+namespace webrtc {
+
+class ApmDataDumper;
+
+// Aggregates lag estimates produced by the MatchedFilter class into a single
+// reliable combined lag estimate.
+class MatchedFilterLagAggregator {
+ public:
+  MatchedFilterLagAggregator(
+      ApmDataDumper* data_dumper,
+      size_t max_filter_lag,
+      const EchoCanceller3Config::Delay::DelaySelectionThresholds& thresholds);
+
+  MatchedFilterLagAggregator() = delete;
+  MatchedFilterLagAggregator(const MatchedFilterLagAggregator&) = delete;
+  MatchedFilterLagAggregator& operator=(const MatchedFilterLagAggregator&) =
+      delete;
+
+  ~MatchedFilterLagAggregator();
+
+  // Resets the aggregator.
+  void Reset(bool hard_reset);
+
+  // Aggregates the provided lag estimates.
+  absl::optional<DelayEstimate> Aggregate(
+      rtc::ArrayView<const MatchedFilter::LagEstimate> lag_estimates);
+
+ private:
+  ApmDataDumper* const data_dumper_;
+  std::vector<int> histogram_;
+  std::array<int, 250> histogram_data_;
+  int histogram_data_index_ = 0;
+  bool significant_candidate_found_ = false;
+  const EchoCanceller3Config::Delay::DelaySelectionThresholds thresholds_;
+};
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_
--- a/webrtc/modules/audio_processing/aec3/moving_average.cc
+++ b/webrtc/modules/audio_processing/aec3/moving_average.cc
@ -0,0 +1,60 @@
+
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/moving_average.h"
+
+#include <algorithm>
+#include <functional>
+
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+namespace aec3 {
+
+MovingAverage::MovingAverage(size_t num_elem, size_t mem_len)
+    : num_elem_(num_elem),
+      mem_len_(mem_len - 1),
+      scaling_(1.0f / static_cast<float>(mem_len)),
+      memory_(num_elem * mem_len_, 0.f),
+      mem_index_(0) {
+  RTC_DCHECK(num_elem_ > 0);
+  RTC_DCHECK(mem_len > 0);
+}
+
+MovingAverage::~MovingAverage() = default;
+
+void MovingAverage::Average(rtc::ArrayView<const float> input,
+                            rtc::ArrayView<float> output) {
+  RTC_DCHECK(input.size() == num_elem_);
+  RTC_DCHECK(output.size() == num_elem_);
+
+  // Sum all contributions.
+  std::copy(input.begin(), input.end(), output.begin());
+  for (auto i = memory_.begin(); i < memory_.end(); i += num_elem_) {
+    std::transform(i, i + num_elem_, output.begin(), output.begin(),
+                   std::plus<float>());
+  }
+
+  // Divide by mem_len_.
+  for (float& o : output) {
+    o *= scaling_;
+  }
+
+  // Update memory.
+  if (mem_len_ > 0) {
+    std::copy(input.begin(), input.end(),
+              memory_.begin() + mem_index_ * num_elem_);
+    mem_index_ = (mem_index_ + 1) % mem_len_;
+  }
+}
+
+}  // namespace aec3
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/moving_average.h
+++ b/webrtc/modules/audio_processing/aec3/moving_average.h
@ -0,0 +1,45 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOVING_AVERAGE_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_MOVING_AVERAGE_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "api/array_view.h"
+
+namespace webrtc {
+namespace aec3 {
+
+class MovingAverage {
+ public:
+  // Creates an instance of MovingAverage that accepts inputs of length num_elem
+  // and averages over mem_len inputs.
+  MovingAverage(size_t num_elem, size_t mem_len);
+  ~MovingAverage();
+
+  // Computes the average of input and mem_len-1 previous inputs and stores the
+  // result in output.
+  void Average(rtc::ArrayView<const float> input, rtc::ArrayView<float> output);
+
+ private:
+  const size_t num_elem_;
+  const size_t mem_len_;
+  const float scaling_;
+  std::vector<float> memory_;
+  size_t mem_index_;
+};
+
+}  // namespace aec3
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_MOVING_AVERAGE_H_
--- a/webrtc/modules/audio_processing/aec3/nearend_detector.h
+++ b/webrtc/modules/audio_processing/aec3/nearend_detector.h
@ -0,0 +1,42 @@
+/*
+ *  Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_NEAREND_DETECTOR_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_NEAREND_DETECTOR_H_
+
+#include <vector>
+
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+
+namespace webrtc {
+// Class for selecting whether the suppressor is in the nearend or echo state.
+class NearendDetector {
+ public:
+  virtual ~NearendDetector() {}
+
+  // Returns whether the current state is the nearend state.
+  virtual bool IsNearendState() const = 0;
+
+  // Updates the state selection based on latest spectral estimates.
+  virtual void Update(
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          nearend_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          residual_echo_spectrum,
+      rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
+          comfort_noise_spectrum,
+      bool initial_state) = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_NEAREND_DETECTOR_H_
--- a/webrtc/modules/audio_processing/aec3/refined_filter_update_gain.cc
+++ b/webrtc/modules/audio_processing/aec3/refined_filter_update_gain.cc
@ -0,0 +1,174 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/refined_filter_update_gain.h"
+
+#include <algorithm>
+#include <functional>
+
+#include "modules/audio_processing/aec3/adaptive_fir_filter.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/echo_path_variability.h"
+#include "modules/audio_processing/aec3/fft_data.h"
+#include "modules/audio_processing/aec3/render_signal_analyzer.h"
+#include "modules/audio_processing/aec3/subtractor_output.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/atomic_ops.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+namespace {
+
+constexpr float kHErrorInitial = 10000.f;
+constexpr int kPoorExcitationCounterInitial = 1000;
+
+}  // namespace
+
+int RefinedFilterUpdateGain::instance_count_ = 0;
+
+RefinedFilterUpdateGain::RefinedFilterUpdateGain(
+    const EchoCanceller3Config::Filter::RefinedConfiguration& config,
+    size_t config_change_duration_blocks)
+    : data_dumper_(
+          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
+      config_change_duration_blocks_(
+          static_cast<int>(config_change_duration_blocks)),
+      poor_excitation_counter_(kPoorExcitationCounterInitial) {
+  SetConfig(config, true);
+  H_error_.fill(kHErrorInitial);
+  RTC_DCHECK_LT(0, config_change_duration_blocks_);
+  one_by_config_change_duration_blocks_ = 1.f / config_change_duration_blocks_;
+}
+
+RefinedFilterUpdateGain::~RefinedFilterUpdateGain() {}
+
+void RefinedFilterUpdateGain::HandleEchoPathChange(
+    const EchoPathVariability& echo_path_variability) {
+  if (echo_path_variability.gain_change) {
+    // TODO(bugs.webrtc.org/9526) Handle gain changes.
+  }
+
+  if (echo_path_variability.delay_change !=
+      EchoPathVariability::DelayAdjustment::kNone) {
+    H_error_.fill(kHErrorInitial);
+  }
+
+  if (!echo_path_variability.gain_change) {
+    poor_excitation_counter_ = kPoorExcitationCounterInitial;
+    call_counter_ = 0;
+  }
+}
+
+void RefinedFilterUpdateGain::Compute(
+    const std::array<float, kFftLengthBy2Plus1>& render_power,
+    const RenderSignalAnalyzer& render_signal_analyzer,
+    const SubtractorOutput& subtractor_output,
+    rtc::ArrayView<const float> erl,
+    size_t size_partitions,
+    bool saturated_capture_signal,
+    FftData* gain_fft) {
+  RTC_DCHECK(gain_fft);
+  // Introducing shorter notation to improve readability.
+  const FftData& E_refined = subtractor_output.E_refined;
+  const auto& E2_refined = subtractor_output.E2_refined;
+  const auto& E2_coarse = subtractor_output.E2_coarse;
+  FftData* G = gain_fft;
+  const auto& X2 = render_power;
+
+  ++call_counter_;
+
+  UpdateCurrentConfig();
+
+  if (render_signal_analyzer.PoorSignalExcitation()) {
+    poor_excitation_counter_ = 0;
+  }
+
+  // Do not update the filter if the render is not sufficiently excited.
+  if (++poor_excitation_counter_ < size_partitions ||
+      saturated_capture_signal || call_counter_ <= size_partitions) {
+    G->re.fill(0.f);
+    G->im.fill(0.f);
+  } else {
+    // Corresponds to WGN of power -39 dBFS.
+    std::array<float, kFftLengthBy2Plus1> mu;
+    // mu = H_error / (0.5* H_error* X2 + n * E2).
+    for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+      if (X2[k] >= current_config_.noise_gate) {
+        mu[k] = H_error_[k] /
+                (0.5f * H_error_[k] * X2[k] + size_partitions * E2_refined[k]);
+      } else {
+        mu[k] = 0.f;
+      }
+    }
+
+    // Avoid updating the filter close to narrow bands in the render signals.
+    render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu);
+
+    // H_error = H_error - 0.5 * mu * X2 * H_error.
+    for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+      H_error_[k] -= 0.5f * mu[k] * X2[k] * H_error_[k];
+    }
+
+    // G = mu * E.
+    for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+      G->re[k] = mu[k] * E_refined.re[k];
+      G->im[k] = mu[k] * E_refined.im[k];
+    }
+  }
+
+  // H_error = H_error + factor * erl.
+  for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
+    if (E2_coarse[k] >= E2_refined[k]) {
+      H_error_[k] += current_config_.leakage_converged * erl[k];
+    } else {
+      H_error_[k] += current_config_.leakage_diverged * erl[k];
+    }
+
+    H_error_[k] = std::max(H_error_[k], current_config_.error_floor);
+    H_error_[k] = std::min(H_error_[k], current_config_.error_ceil);
+  }
+
+  data_dumper_->DumpRaw("aec3_refined_gain_H_error", H_error_);
+}
+
+void RefinedFilterUpdateGain::UpdateCurrentConfig() {
+  RTC_DCHECK_GE(config_change_duration_blocks_, config_change_counter_);
+  if (config_change_counter_ > 0) {
+    if (--config_change_counter_ > 0) {
+      auto average = [](float from, float to, float from_weight) {
+        return from * from_weight + to * (1.f - from_weight);
+      };
+
+      float change_factor =
+          config_change_counter_ * one_by_config_change_duration_blocks_;
+
+      current_config_.leakage_converged =
+          average(old_target_config_.leakage_converged,
+                  target_config_.leakage_converged, change_factor);
+      current_config_.leakage_diverged =
+          average(old_target_config_.leakage_diverged,
+                  target_config_.leakage_diverged, change_factor);
+      current_config_.error_floor =
+          average(old_target_config_.error_floor, target_config_.error_floor,
+                  change_factor);
+      current_config_.error_ceil =
+          average(old_target_config_.error_ceil, target_config_.error_ceil,
+                  change_factor);
+      current_config_.noise_gate =
+          average(old_target_config_.noise_gate, target_config_.noise_gate,
+                  change_factor);
+    } else {
+      current_config_ = old_target_config_ = target_config_;
+    }
+  }
+  RTC_DCHECK_LE(0, config_change_counter_);
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/refined_filter_update_gain.h
+++ b/webrtc/modules/audio_processing/aec3/refined_filter_update_gain.h
@ -0,0 +1,89 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_REFINED_FILTER_UPDATE_GAIN_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_REFINED_FILTER_UPDATE_GAIN_H_
+
+#include <stddef.h>
+
+#include <array>
+#include <memory>
+
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+
+namespace webrtc {
+
+class AdaptiveFirFilter;
+class ApmDataDumper;
+struct EchoPathVariability;
+struct FftData;
+class RenderSignalAnalyzer;
+struct SubtractorOutput;
+
+// Provides functionality for  computing the adaptive gain for the refined
+// filter.
+class RefinedFilterUpdateGain {
+ public:
+  RefinedFilterUpdateGain(
+      const EchoCanceller3Config::Filter::RefinedConfiguration& config,
+      size_t config_change_duration_blocks);
+  ~RefinedFilterUpdateGain();
+
+  RefinedFilterUpdateGain(const RefinedFilterUpdateGain&) = delete;
+  RefinedFilterUpdateGain& operator=(const RefinedFilterUpdateGain&) = delete;
+
+  // Takes action in the case of a known echo path change.
+  void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
+
+  // Computes the gain.
+  void Compute(const std::array<float, kFftLengthBy2Plus1>& render_power,
+               const RenderSignalAnalyzer& render_signal_analyzer,
+               const SubtractorOutput& subtractor_output,
+               rtc::ArrayView<const float> erl,
+               size_t size_partitions,
+               bool saturated_capture_signal,
+               FftData* gain_fft);
+
+  // Sets a new config.
+  void SetConfig(
+      const EchoCanceller3Config::Filter::RefinedConfiguration& config,
+      bool immediate_effect) {
+    if (immediate_effect) {
+      old_target_config_ = current_config_ = target_config_ = config;
+      config_change_counter_ = 0;
+    } else {
+      old_target_config_ = current_config_;
+      target_config_ = config;
+      config_change_counter_ = config_change_duration_blocks_;
+    }
+  }
+
+ private:
+  static int instance_count_;
+  std::unique_ptr<ApmDataDumper> data_dumper_;
+  const int config_change_duration_blocks_;
+  float one_by_config_change_duration_blocks_;
+  EchoCanceller3Config::Filter::RefinedConfiguration current_config_;
+  EchoCanceller3Config::Filter::RefinedConfiguration target_config_;
+  EchoCanceller3Config::Filter::RefinedConfiguration old_target_config_;
+  std::array<float, kFftLengthBy2Plus1> H_error_;
+  size_t poor_excitation_counter_;
+  size_t call_counter_ = 0;
+  int config_change_counter_ = 0;
+
+  // Updates the current config towards the target config.
+  void UpdateCurrentConfig();
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_REFINED_FILTER_UPDATE_GAIN_H_
--- a/webrtc/modules/audio_processing/aec3/render_buffer.cc
+++ b/webrtc/modules/audio_processing/aec3/render_buffer.cc
@ -0,0 +1,80 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/render_buffer.h"
+
+#include <algorithm>
+#include <functional>
+
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+RenderBuffer::RenderBuffer(BlockBuffer* block_buffer,
+                           SpectrumBuffer* spectrum_buffer,
+                           FftBuffer* fft_buffer)
+    : block_buffer_(block_buffer),
+      spectrum_buffer_(spectrum_buffer),
+      fft_buffer_(fft_buffer) {
+  RTC_DCHECK(block_buffer_);
+  RTC_DCHECK(spectrum_buffer_);
+  RTC_DCHECK(fft_buffer_);
+  RTC_DCHECK_EQ(block_buffer_->buffer.size(), fft_buffer_->buffer.size());
+  RTC_DCHECK_EQ(spectrum_buffer_->buffer.size(), fft_buffer_->buffer.size());
+  RTC_DCHECK_EQ(spectrum_buffer_->read, fft_buffer_->read);
+  RTC_DCHECK_EQ(spectrum_buffer_->write, fft_buffer_->write);
+}
+
+RenderBuffer::~RenderBuffer() = default;
+
+void RenderBuffer::SpectralSum(
+    size_t num_spectra,
+    std::array<float, kFftLengthBy2Plus1>* X2) const {
+  X2->fill(0.f);
+  int position = spectrum_buffer_->read;
+  for (size_t j = 0; j < num_spectra; ++j) {
+    for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) {
+      std::transform(X2->begin(), X2->end(), channel_spectrum.begin(),
+                     X2->begin(), std::plus<float>());
+    }
+    position = spectrum_buffer_->IncIndex(position);
+  }
+}
+
+void RenderBuffer::SpectralSums(
+    size_t num_spectra_shorter,
+    size_t num_spectra_longer,
+    std::array<float, kFftLengthBy2Plus1>* X2_shorter,
+    std::array<float, kFftLengthBy2Plus1>* X2_longer) const {
+  RTC_DCHECK_LE(num_spectra_shorter, num_spectra_longer);
+  X2_shorter->fill(0.f);
+  int position = spectrum_buffer_->read;
+  size_t j = 0;
+  for (; j < num_spectra_shorter; ++j) {
+    for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) {
+      std::transform(X2_shorter->begin(), X2_shorter->end(),
+                     channel_spectrum.begin(), X2_shorter->begin(),
+                     std::plus<float>());
+    }
+    position = spectrum_buffer_->IncIndex(position);
+  }
+  std::copy(X2_shorter->begin(), X2_shorter->end(), X2_longer->begin());
+  for (; j < num_spectra_longer; ++j) {
+    for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) {
+      std::transform(X2_longer->begin(), X2_longer->end(),
+                     channel_spectrum.begin(), X2_longer->begin(),
+                     std::plus<float>());
+    }
+    position = spectrum_buffer_->IncIndex(position);
+  }
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/render_buffer.h
+++ b/webrtc/modules/audio_processing/aec3/render_buffer.h
@ -0,0 +1,116 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_
+
+#include <stddef.h>
+
+#include <array>
+#include <vector>
+
+#include "api/array_view.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/block_buffer.h"
+#include "modules/audio_processing/aec3/fft_buffer.h"
+#include "modules/audio_processing/aec3/fft_data.h"
+#include "modules/audio_processing/aec3/spectrum_buffer.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+// Provides a buffer of the render data for the echo remover.
+class RenderBuffer {
+ public:
+  RenderBuffer(BlockBuffer* block_buffer,
+               SpectrumBuffer* spectrum_buffer,
+               FftBuffer* fft_buffer);
+
+  RenderBuffer() = delete;
+  RenderBuffer(const RenderBuffer&) = delete;
+  RenderBuffer& operator=(const RenderBuffer&) = delete;
+
+  ~RenderBuffer();
+
+  // Get a block.
+  const std::vector<std::vector<std::vector<float>>>& Block(
+      int buffer_offset_blocks) const {
+    int position =
+        block_buffer_->OffsetIndex(block_buffer_->read, buffer_offset_blocks);
+    return block_buffer_->buffer[position];
+  }
+
+  // Get the spectrum from one of the FFTs in the buffer.
+  rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Spectrum(
+      int buffer_offset_ffts) const {
+    int position = spectrum_buffer_->OffsetIndex(spectrum_buffer_->read,
+                                                 buffer_offset_ffts);
+    return spectrum_buffer_->buffer[position];
+  }
+
+  // Returns the circular fft buffer.
+  rtc::ArrayView<const std::vector<FftData>> GetFftBuffer() const {
+    return fft_buffer_->buffer;
+  }
+
+  // Returns the current position in the circular buffer.
+  size_t Position() const {
+    RTC_DCHECK_EQ(spectrum_buffer_->read, fft_buffer_->read);
+    RTC_DCHECK_EQ(spectrum_buffer_->write, fft_buffer_->write);
+    return fft_buffer_->read;
+  }
+
+  // Returns the sum of the spectrums for a certain number of FFTs.
+  void SpectralSum(size_t num_spectra,
+                   std::array<float, kFftLengthBy2Plus1>* X2) const;
+
+  // Returns the sums of the spectrums for two numbers of FFTs.
+  void SpectralSums(size_t num_spectra_shorter,
+                    size_t num_spectra_longer,
+                    std::array<float, kFftLengthBy2Plus1>* X2_shorter,
+                    std::array<float, kFftLengthBy2Plus1>* X2_longer) const;
+
+  // Gets the recent activity seen in the render signal.
+  bool GetRenderActivity() const { return render_activity_; }
+
+  // Specifies the recent activity seen in the render signal.
+  void SetRenderActivity(bool activity) { render_activity_ = activity; }
+
+  // Returns the headroom between the write and the read positions in the
+  // buffer.
+  int Headroom() const {
+    // The write and read indices are decreased over time.
+    int headroom =
+        fft_buffer_->write < fft_buffer_->read
+            ? fft_buffer_->read - fft_buffer_->write
+            : fft_buffer_->size - fft_buffer_->write + fft_buffer_->read;
+
+    RTC_DCHECK_LE(0, headroom);
+    RTC_DCHECK_GE(fft_buffer_->size, headroom);
+
+    return headroom;
+  }
+
+  // Returns a reference to the spectrum buffer.
+  const SpectrumBuffer& GetSpectrumBuffer() const { return *spectrum_buffer_; }
+
+  // Returns a reference to the block buffer.
+  const BlockBuffer& GetBlockBuffer() const { return *block_buffer_; }
+
+ private:
+  const BlockBuffer* const block_buffer_;
+  const SpectrumBuffer* const spectrum_buffer_;
+  const FftBuffer* const fft_buffer_;
+  bool render_activity_ = false;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_
--- a/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc
+++ b/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc
@ -0,0 +1,523 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "modules/audio_processing/aec3/render_delay_buffer.h"
+
+#include <string.h>
+
+#include <algorithm>
+#include <cmath>
+#include <memory>
+#include <numeric>
+#include <vector>
+
+#include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/aec3_fft.h"
+#include "modules/audio_processing/aec3/alignment_mixer.h"
+#include "modules/audio_processing/aec3/block_buffer.h"
+#include "modules/audio_processing/aec3/decimator.h"
+#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
+#include "modules/audio_processing/aec3/fft_buffer.h"
+#include "modules/audio_processing/aec3/fft_data.h"
+#include "modules/audio_processing/aec3/render_buffer.h"
+#include "modules/audio_processing/aec3/spectrum_buffer.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/atomic_ops.h"
+#include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
+#include "system_wrappers/include/field_trial.h"
+
+namespace webrtc {
+namespace {
+
+bool UpdateCaptureCallCounterOnSkippedBlocks() {
+  return !field_trial::IsEnabled(
+      "WebRTC-Aec3RenderBufferCallCounterUpdateKillSwitch");
+}
+
+class RenderDelayBufferImpl final : public RenderDelayBuffer {
+ public:
+  RenderDelayBufferImpl(const EchoCanceller3Config& config,
+                        int sample_rate_hz,
+                        size_t num_render_channels);
+  RenderDelayBufferImpl() = delete;
+  ~RenderDelayBufferImpl() override;
+
+  void Reset() override;
+  BufferingEvent Insert(
+      const std::vector<std::vector<std::vector<float>>>& block) override;
+  BufferingEvent PrepareCaptureProcessing() override;
+  void HandleSkippedCaptureProcessing() override;
+  bool AlignFromDelay(size_t delay) override;
+  void AlignFromExternalDelay() override;
+  size_t Delay() const override { return ComputeDelay(); }
+  size_t MaxDelay() const override {
+    return blocks_.buffer.size() - 1 - buffer_headroom_;
+  }
+  RenderBuffer* GetRenderBuffer() override { return &echo_remover_buffer_; }
+
+  const DownsampledRenderBuffer& GetDownsampledRenderBuffer() const override {
+    return low_rate_;
+  }
+
+  int BufferLatency() const;
+  void SetAudioBufferDelay(int delay_ms) override;
+  bool HasReceivedBufferDelay() override;
+
+ private:
+  static int instance_count_;
+  std::unique_ptr<ApmDataDumper> data_dumper_;
+  const Aec3Optimization optimization_;
+  const EchoCanceller3Config config_;
+  const bool update_capture_call_counter_on_skipped_blocks_;
+  const float render_linear_amplitude_gain_;
+  const rtc::LoggingSeverity delay_log_level_;
+  size_t down_sampling_factor_;
+  const int sub_block_size_;
+  BlockBuffer blocks_;
+  SpectrumBuffer spectra_;
+  FftBuffer ffts_;
+  absl::optional<size_t> delay_;
+  RenderBuffer echo_remover_buffer_;
+  DownsampledRenderBuffer low_rate_;
+  AlignmentMixer render_mixer_;
+  Decimator render_decimator_;
+  const Aec3Fft fft_;
+  std::vector<float> render_ds_;
+  const int buffer_headroom_;
+  bool last_call_was_render_ = false;
+  int num_api_calls_in_a_row_ = 0;
+  int max_observed_jitter_ = 1;
+  int64_t capture_call_counter_ = 0;
+  int64_t render_call_counter_ = 0;
+  bool render_activity_ = false;
+  size_t render_activity_counter_ = 0;
+  absl::optional<int> external_audio_buffer_delay_;
+  bool external_audio_buffer_delay_verified_after_reset_ = false;
+  size_t min_latency_blocks_ = 0;
+  size_t excess_render_detection_counter_ = 0;
+
+  int MapDelayToTotalDelay(size_t delay) const;
+  int ComputeDelay() const;
+  void ApplyTotalDelay(int delay);
+  void InsertBlock(const std::vector<std::vector<std::vector<float>>>& block,
+                   int previous_write);
+  bool DetectActiveRender(rtc::ArrayView<const float> x) const;
+  bool DetectExcessRenderBlocks();
+  void IncrementWriteIndices();
+  void IncrementLowRateReadIndices();
+  void IncrementReadIndices();
+  bool RenderOverrun();
+  bool RenderUnderrun();
+};
+
+int RenderDelayBufferImpl::instance_count_ = 0;
+
+RenderDelayBufferImpl::RenderDelayBufferImpl(const EchoCanceller3Config& config,
+                                             int sample_rate_hz,
+                                             size_t num_render_channels)
+    : data_dumper_(
+          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
+      optimization_(DetectOptimization()),
+      config_(config),
+      update_capture_call_counter_on_skipped_blocks_(
+          UpdateCaptureCallCounterOnSkippedBlocks()),
+      render_linear_amplitude_gain_(
+          std::pow(10.0f, config_.render_levels.render_power_gain_db / 20.f)),
+      delay_log_level_(config_.delay.log_warning_on_delay_changes
+                           ? rtc::LS_WARNING
+                           : rtc::LS_VERBOSE),
+      down_sampling_factor_(config.delay.down_sampling_factor),
+      sub_block_size_(static_cast<int>(down_sampling_factor_ > 0
+                                           ? kBlockSize / down_sampling_factor_
+                                           : kBlockSize)),
+      blocks_(GetRenderDelayBufferSize(down_sampling_factor_,
+                                       config.delay.num_filters,
+                                       config.filter.refined.length_blocks),
+              NumBandsForRate(sample_rate_hz),
+              num_render_channels,
+              kBlockSize),
+      spectra_(blocks_.buffer.size(), num_render_channels),
+      ffts_(blocks_.buffer.size(), num_render_channels),
+      delay_(config_.delay.default_delay),
+      echo_remover_buffer_(&blocks_, &spectra_, &ffts_),
+      low_rate_(GetDownSampledBufferSize(down_sampling_factor_,
+                                         config.delay.num_filters)),
+      render_mixer_(num_render_channels, config.delay.render_alignment_mixing),
+      render_decimator_(down_sampling_factor_),
+      fft_(),
+      render_ds_(sub_block_size_, 0.f),
+      buffer_headroom_(config.filter.refined.length_blocks) {
+  RTC_DCHECK_EQ(blocks_.buffer.size(), ffts_.buffer.size());
+  RTC_DCHECK_EQ(spectra_.buffer.size(), ffts_.buffer.size());
+  for (size_t i = 0; i < blocks_.buffer.size(); ++i) {
+    RTC_DCHECK_EQ(blocks_.buffer[i][0].size(), ffts_.buffer[i].size());
+    RTC_DCHECK_EQ(spectra_.buffer[i].size(), ffts_.buffer[i].size());
+  }
+
+  Reset();
+}
+
+RenderDelayBufferImpl::~RenderDelayBufferImpl() = default;
+
+// Resets the buffer delays and clears the reported delays.
+void RenderDelayBufferImpl::Reset() {
+  last_call_was_render_ = false;
+  num_api_calls_in_a_row_ = 1;
+  min_latency_blocks_ = 0;
+  excess_render_detection_counter_ = 0;
+
+  // Initialize the read index to one sub-block before the write index.
+  low_rate_.read = low_rate_.OffsetIndex(low_rate_.write, sub_block_size_);
+
+  // Check for any external audio buffer delay and whether it is feasible.
+  if (external_audio_buffer_delay_) {
+    const int headroom = 2;
+    size_t audio_buffer_delay_to_set;
+    // Minimum delay is 1 (like the low-rate render buffer).
+    if (*external_audio_buffer_delay_ <= headroom) {
+      audio_buffer_delay_to_set = 1;
+    } else {
+      audio_buffer_delay_to_set = *external_audio_buffer_delay_ - headroom;
+    }
+
+    audio_buffer_delay_to_set = std::min(audio_buffer_delay_to_set, MaxDelay());
+
+    // When an external delay estimate is available, use that delay as the
+    // initial render buffer delay.
+    ApplyTotalDelay(audio_buffer_delay_to_set);
+    delay_ = ComputeDelay();
+
+    external_audio_buffer_delay_verified_after_reset_ = false;
+  } else {
+    // If an external delay estimate is not available, use that delay as the
+    // initial delay. Set the render buffer delays to the default delay.
+    ApplyTotalDelay(config_.delay.default_delay);
+
+    // Unset the delays which are set by AlignFromDelay.
+    delay_ = absl::nullopt;
+  }
+}
+
+// Inserts a new block into the render buffers.
+RenderDelayBuffer::BufferingEvent RenderDelayBufferImpl::Insert(
+    const std::vector<std::vector<std::vector<float>>>& block) {
+  ++render_call_counter_;
+  if (delay_) {
+    if (!last_call_was_render_) {
+      last_call_was_render_ = true;
+      num_api_calls_in_a_row_ = 1;
+    } else {
+      if (++num_api_calls_in_a_row_ > max_observed_jitter_) {
+        max_observed_jitter_ = num_api_calls_in_a_row_;
+        RTC_LOG_V(delay_log_level_)
+            << "New max number api jitter observed at render block "
+            << render_call_counter_ << ":  " << num_api_calls_in_a_row_
+            << " blocks";
+      }
+    }
+  }
+
+  // Increase the write indices to where the new blocks should be written.
+  const int previous_write = blocks_.write;
+  IncrementWriteIndices();
+
+  // Allow overrun and do a reset when render overrun occurrs due to more render
+  // data being inserted than capture data is received.
+  BufferingEvent event =
+      RenderOverrun() ? BufferingEvent::kRenderOverrun : BufferingEvent::kNone;
+
+  // Detect and update render activity.
+  if (!render_activity_) {
+    render_activity_counter_ += DetectActiveRender(block[0][0]) ? 1 : 0;
+    render_activity_ = render_activity_counter_ >= 20;
+  }
+
+  // Insert the new render block into the specified position.
+  InsertBlock(block, previous_write);
+
+  if (event != BufferingEvent::kNone) {
+    Reset();
+  }
+
+  return event;
+}
+
+void RenderDelayBufferImpl::HandleSkippedCaptureProcessing() {
+  if (update_capture_call_counter_on_skipped_blocks_) {
+    ++capture_call_counter_;
+  }
+}
+
+// Prepares the render buffers for processing another capture block.
+RenderDelayBuffer::BufferingEvent
+RenderDelayBufferImpl::PrepareCaptureProcessing() {
+  RenderDelayBuffer::BufferingEvent event = BufferingEvent::kNone;
+  ++capture_call_counter_;
+
+  if (delay_) {
+    if (last_call_was_render_) {
+      last_call_was_render_ = false;
+      num_api_calls_in_a_row_ = 1;
+    } else {
+      if (++num_api_calls_in_a_row_ > max_observed_jitter_) {
+        max_observed_jitter_ = num_api_calls_in_a_row_;
+        RTC_LOG_V(delay_log_level_)
+            << "New max number api jitter observed at capture block "
+            << capture_call_counter_ << ":  " << num_api_calls_in_a_row_
+            << " blocks";
+      }
+    }
+  }
+
+  if (DetectExcessRenderBlocks()) {
+    // Too many render blocks compared to capture blocks. Risk of delay ending
+    // up before the filter used by the delay estimator.
+    RTC_LOG_V(delay_log_level_)
+        << "Excess render blocks detected at block " << capture_call_counter_;
+    Reset();
+    event = BufferingEvent::kRenderOverrun;
+  } else if (RenderUnderrun()) {
+    // Don't increment the read indices of the low rate buffer if there is a
+    // render underrun.
+    RTC_LOG_V(delay_log_level_)
+        << "Render buffer underrun detected at block " << capture_call_counter_;
+    IncrementReadIndices();
+    // Incrementing the buffer index without increasing the low rate buffer
+    // index means that the delay is reduced by one.
+    if (delay_ && *delay_ > 0)
+      delay_ = *delay_ - 1;
+    event = BufferingEvent::kRenderUnderrun;
+  } else {
+    // Increment the read indices in the render buffers to point to the most
+    // recent block to use in the capture processing.
+    IncrementLowRateReadIndices();
+    IncrementReadIndices();
+  }
+
+  echo_remover_buffer_.SetRenderActivity(render_activity_);
+  if (render_activity_) {
+    render_activity_counter_ = 0;
+    render_activity_ = false;
+  }
+
+  return event;
+}
+
+// Sets the delay and returns a bool indicating whether the delay was changed.
+bool RenderDelayBufferImpl::AlignFromDelay(size_t delay) {
+  RTC_DCHECK(!config_.delay.use_external_delay_estimator);
+  if (!external_audio_buffer_delay_verified_after_reset_ &&
+      external_audio_buffer_delay_ && delay_) {
+    int difference = static_cast<int>(delay) - static_cast<int>(*delay_);
+    RTC_LOG_V(delay_log_level_)
+        << "Mismatch between first estimated delay after reset "
+           "and externally reported audio buffer delay: "
+        << difference << " blocks";
+    external_audio_buffer_delay_verified_after_reset_ = true;
+  }
+  if (delay_ && *delay_ == delay) {
+    return false;
+  }
+  delay_ = delay;
+
+  // Compute the total delay and limit the delay to the allowed range.
+  int total_delay = MapDelayToTotalDelay(*delay_);
+  total_delay =
+      std::min(MaxDelay(), static_cast<size_t>(std::max(total_delay, 0)));
+
+  // Apply the delay to the buffers.
+  ApplyTotalDelay(total_delay);
+  return true;
+}
+
+void RenderDelayBufferImpl::SetAudioBufferDelay(int delay_ms) {
+  if (!external_audio_buffer_delay_) {
+    RTC_LOG_V(delay_log_level_)
+        << "Receiving a first externally reported audio buffer delay of "
+        << delay_ms << " ms.";
+  }
+
+  // Convert delay from milliseconds to blocks (rounded down).
+  external_audio_buffer_delay_ = delay_ms / 4;
+}
+
+bool RenderDelayBufferImpl::HasReceivedBufferDelay() {
+  return external_audio_buffer_delay_.has_value();
+}
+
+// Maps the externally computed delay to the delay used internally.
+int RenderDelayBufferImpl::MapDelayToTotalDelay(
+    size_t external_delay_blocks) const {
+  const int latency_blocks = BufferLatency();
+  return latency_blocks + static_cast<int>(external_delay_blocks);
+}
+
+// Returns the delay (not including call jitter).
+int RenderDelayBufferImpl::ComputeDelay() const {
+  const int latency_blocks = BufferLatency();
+  int internal_delay = spectra_.read >= spectra_.write
+                           ? spectra_.read - spectra_.write
+                           : spectra_.size + spectra_.read - spectra_.write;
+
+  return internal_delay - latency_blocks;
+}
+
+// Set the read indices according to the delay.
+void RenderDelayBufferImpl::ApplyTotalDelay(int delay) {
+  RTC_LOG_V(delay_log_level_)
+      << "Applying total delay of " << delay << " blocks.";
+  blocks_.read = blocks_.OffsetIndex(blocks_.write, -delay);
+  spectra_.read = spectra_.OffsetIndex(spectra_.write, delay);
+  ffts_.read = ffts_.OffsetIndex(ffts_.write, delay);
+}
+
+void RenderDelayBufferImpl::AlignFromExternalDelay() {
+  RTC_DCHECK(config_.delay.use_external_delay_estimator);
+  if (external_audio_buffer_delay_) {
+    const int64_t delay = render_call_counter_ - capture_call_counter_ +
+                          *external_audio_buffer_delay_;
+    const int64_t delay_with_headroom =
+        delay - config_.delay.delay_headroom_samples / kBlockSize;
+    ApplyTotalDelay(delay_with_headroom);
+  }
+}
+
+// Inserts a block into the render buffers.
+void RenderDelayBufferImpl::InsertBlock(
+    const std::vector<std::vector<std::vector<float>>>& block,
+    int previous_write) {
+  auto& b = blocks_;
+  auto& lr = low_rate_;
+  auto& ds = render_ds_;
+  auto& f = ffts_;
+  auto& s = spectra_;
+  const size_t num_bands = b.buffer[b.write].size();
+  const size_t num_render_channels = b.buffer[b.write][0].size();
+  RTC_DCHECK_EQ(block.size(), b.buffer[b.write].size());
+  for (size_t band = 0; band < num_bands; ++band) {
+    RTC_DCHECK_EQ(block[band].size(), num_render_channels);
+    RTC_DCHECK_EQ(b.buffer[b.write][band].size(), num_render_channels);
+    for (size_t ch = 0; ch < num_render_channels; ++ch) {
+      RTC_DCHECK_EQ(block[band][ch].size(), b.buffer[b.write][band][ch].size());
+      std::copy(block[band][ch].begin(), block[band][ch].end(),
+                b.buffer[b.write][band][ch].begin());
+    }
+  }
+
+  if (render_linear_amplitude_gain_ != 1.f) {
+    for (size_t band = 0; band < num_bands; ++band) {
+      for (size_t ch = 0; ch < num_render_channels; ++ch) {
+        for (size_t k = 0; k < 64; ++k) {
+          b.buffer[b.write][band][ch][k] *= render_linear_amplitude_gain_;
+        }
+      }
+    }
+  }
+
+  std::array<float, kBlockSize> downmixed_render;
+  render_mixer_.ProduceOutput(b.buffer[b.write][0], downmixed_render);
+  render_decimator_.Decimate(downmixed_render, ds);
+  data_dumper_->DumpWav("aec3_render_decimator_output", ds.size(), ds.data(),
+                        16000 / down_sampling_factor_, 1);
+  std::copy(ds.rbegin(), ds.rend(), lr.buffer.begin() + lr.write);
+  for (size_t channel = 0; channel < b.buffer[b.write][0].size(); ++channel) {
+    fft_.PaddedFft(b.buffer[b.write][0][channel],
+                   b.buffer[previous_write][0][channel],
+                   &f.buffer[f.write][channel]);
+    f.buffer[f.write][channel].Spectrum(optimization_,
+                                        s.buffer[s.write][channel]);
+  }
+}
+
+bool RenderDelayBufferImpl::DetectActiveRender(
+    rtc::ArrayView<const float> x) const {
+  const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
+  return x_energy > (config_.render_levels.active_render_limit *
+                     config_.render_levels.active_render_limit) *
+                        kFftLengthBy2;
+}
+
+bool RenderDelayBufferImpl::DetectExcessRenderBlocks() {
+  bool excess_render_detected = false;
+  const size_t latency_blocks = static_cast<size_t>(BufferLatency());
+  // The recently seen minimum latency in blocks. Should be close to 0.
+  min_latency_blocks_ = std::min(min_latency_blocks_, latency_blocks);
+  // After processing a configurable number of blocks the minimum latency is
+  // checked.
+  if (++excess_render_detection_counter_ >=
+      config_.buffering.excess_render_detection_interval_blocks) {
+    // If the minimum latency is not lower than the threshold there have been
+    // more render than capture frames.
+    excess_render_detected = min_latency_blocks_ >
+                             config_.buffering.max_allowed_excess_render_blocks;
+    // Reset the counter and let the minimum latency be the current latency.
+    min_latency_blocks_ = latency_blocks;
+    excess_render_detection_counter_ = 0;
+  }
+
+  data_dumper_->DumpRaw("aec3_latency_blocks", latency_blocks);
+  data_dumper_->DumpRaw("aec3_min_latency_blocks", min_latency_blocks_);
+  data_dumper_->DumpRaw("aec3_excess_render_detected", excess_render_detected);
+  return excess_render_detected;
+}
+
+// Computes the latency in the buffer (the number of unread sub-blocks).
+int RenderDelayBufferImpl::BufferLatency() const {
+  const DownsampledRenderBuffer& l = low_rate_;
+  int latency_samples = (l.buffer.size() + l.read - l.write) % l.buffer.size();
+  int latency_blocks = latency_samples / sub_block_size_;
+  return latency_blocks;
+}
+
+// Increments the write indices for the render buffers.
+void RenderDelayBufferImpl::IncrementWriteIndices() {
+  low_rate_.UpdateWriteIndex(-sub_block_size_);
+  blocks_.IncWriteIndex();
+  spectra_.DecWriteIndex();
+  ffts_.DecWriteIndex();
+}
+
+// Increments the read indices of the low rate render buffers.
+void RenderDelayBufferImpl::IncrementLowRateReadIndices() {
+  low_rate_.UpdateReadIndex(-sub_block_size_);
+}
+
+// Increments the read indices for the render buffers.
+void RenderDelayBufferImpl::IncrementReadIndices() {
+  if (blocks_.read != blocks_.write) {
+    blocks_.IncReadIndex();
+    spectra_.DecReadIndex();
+    ffts_.DecReadIndex();
+  }
+}
+
+// Checks for a render buffer overrun.
+bool RenderDelayBufferImpl::RenderOverrun() {
+  return low_rate_.read == low_rate_.write || blocks_.read == blocks_.write;
+}
+
+// Checks for a render buffer underrun.
+bool RenderDelayBufferImpl::RenderUnderrun() {
+  return low_rate_.read == low_rate_.write;
+}
+
+}  // namespace
+
+RenderDelayBuffer* RenderDelayBuffer::Create(const EchoCanceller3Config& config,
+                                             int sample_rate_hz,
+                                             size_t num_render_channels) {
+  return new RenderDelayBufferImpl(config, sample_rate_hz, num_render_channels);
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/render_delay_buffer.h
+++ b/webrtc/modules/audio_processing/aec3/render_delay_buffer.h
@ -0,0 +1,86 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_
+
+#include <stddef.h>
+
+#include <vector>
+
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
+#include "modules/audio_processing/aec3/render_buffer.h"
+
+namespace webrtc {
+
+// Class for buffering the incoming render blocks such that these may be
+// extracted with a specified delay.
+class RenderDelayBuffer {
+ public:
+  enum class BufferingEvent {
+    kNone,
+    kRenderUnderrun,
+    kRenderOverrun,
+    kApiCallSkew
+  };
+
+  static RenderDelayBuffer* Create(const EchoCanceller3Config& config,
+                                   int sample_rate_hz,
+                                   size_t num_render_channels);
+  virtual ~RenderDelayBuffer() = default;
+
+  // Resets the buffer alignment.
+  virtual void Reset() = 0;
+
+  // Inserts a block into the buffer.
+  virtual BufferingEvent Insert(
+      const std::vector<std::vector<std::vector<float>>>& block) = 0;
+
+  // Updates the buffers one step based on the specified buffer delay. Returns
+  // an enum indicating whether there was a special event that occurred.
+  virtual BufferingEvent PrepareCaptureProcessing() = 0;
+
+  // Called on capture blocks where PrepareCaptureProcessing is not called.
+  virtual void HandleSkippedCaptureProcessing() = 0;
+
+  // Sets the buffer delay and returns a bool indicating whether the delay
+  // changed.
+  virtual bool AlignFromDelay(size_t delay) = 0;
+
+  // Sets the buffer delay from the most recently reported external delay.
+  virtual void AlignFromExternalDelay() = 0;
+
+  // Gets the buffer delay.
+  virtual size_t Delay() const = 0;
+
+  // Gets the buffer delay.
+  virtual size_t MaxDelay() const = 0;
+
+  // Returns the render buffer for the echo remover.
+  virtual RenderBuffer* GetRenderBuffer() = 0;
+
+  // Returns the downsampled render buffer.
+  virtual const DownsampledRenderBuffer& GetDownsampledRenderBuffer() const = 0;
+
+  // Returns the maximum non calusal offset that can occur in the delay buffer.
+  static int DelayEstimatorOffset(const EchoCanceller3Config& config);
+
+  // Provides an optional external estimate of the audio buffer delay.
+  virtual void SetAudioBufferDelay(int delay_ms) = 0;
+
+  // Returns whether an external delay estimate has been reported via
+  // SetAudioBufferDelay.
+  virtual bool HasReceivedBufferDelay() = 0;
+};
+
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_
--- a/webrtc/modules/audio_processing/aec3/render_delay_controller.cc
+++ b/webrtc/modules/audio_processing/aec3/render_delay_controller.cc
@ -0,0 +1,196 @@
+/*
+ *  Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+#include "modules/audio_processing/aec3/render_delay_controller.h"
+
+#include <stddef.h>
+
+#include <algorithm>
+#include <memory>
+
+#include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/aec3_common.h"
+#include "modules/audio_processing/aec3/delay_estimate.h"
+#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
+#include "modules/audio_processing/aec3/echo_path_delay_estimator.h"
+#include "modules/audio_processing/aec3/render_delay_controller_metrics.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/atomic_ops.h"
+#include "rtc_base/checks.h"
+
+namespace webrtc {
+
+namespace {
+
+class RenderDelayControllerImpl final : public RenderDelayController {
+ public:
+  RenderDelayControllerImpl(const EchoCanceller3Config& config,
+                            int sample_rate_hz,
+                            size_t num_capture_channels);
+
+  RenderDelayControllerImpl() = delete;
+  RenderDelayControllerImpl(const RenderDelayControllerImpl&) = delete;
+  RenderDelayControllerImpl& operator=(const RenderDelayControllerImpl&) =
+      delete;
+
+  ~RenderDelayControllerImpl() override;
+  void Reset(bool reset_delay_confidence) override;
+  void LogRenderCall() override;
+  absl::optional<DelayEstimate> GetDelay(
+      const DownsampledRenderBuffer& render_buffer,
+      size_t render_delay_buffer_delay,
+      const std::vector<std::vector<float>>& capture) override;
+  bool HasClockdrift() const override;
+
+ private:
+  static int instance_count_;
+  std::unique_ptr<ApmDataDumper> data_dumper_;
+  const int hysteresis_limit_blocks_;
+  const int delay_headroom_samples_;
+  absl::optional<DelayEstimate> delay_;
+  EchoPathDelayEstimator delay_estimator_;
+  RenderDelayControllerMetrics metrics_;
+  absl::optional<DelayEstimate> delay_samples_;
+  size_t capture_call_counter_ = 0;
+  int delay_change_counter_ = 0;
+  DelayEstimate::Quality last_delay_estimate_quality_;
+};
+
+DelayEstimate ComputeBufferDelay(
+    const absl::optional<DelayEstimate>& current_delay,
+    int hysteresis_limit_blocks,
+    int delay_headroom_samples,
+    DelayEstimate estimated_delay) {
+  // Subtract delay headroom.
+  const int delay_with_headroom_samples = std::max(
+      static_cast<int>(estimated_delay.delay) - delay_headroom_samples, 0);
+
+  // Compute the buffer delay increase required to achieve the desired latency.
+  size_t new_delay_blocks = delay_with_headroom_samples >> kBlockSizeLog2;
+
+  // Add hysteresis.
+  if (current_delay) {
+    size_t current_delay_blocks = current_delay->delay;
+    if (new_delay_blocks > current_delay_blocks &&
+        new_delay_blocks <= current_delay_blocks + hysteresis_limit_blocks) {
+      new_delay_blocks = current_delay_blocks;
+    }
+  }
+
+  DelayEstimate new_delay = estimated_delay;
+  new_delay.delay = new_delay_blocks;
+  return new_delay;
+}
+
+int RenderDelayControllerImpl::instance_count_ = 0;
+
+RenderDelayControllerImpl::RenderDelayControllerImpl(
+    const EchoCanceller3Config& config,
+    int sample_rate_hz,
+    size_t num_capture_channels)
+    : data_dumper_(
+          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
+      hysteresis_limit_blocks_(
+          static_cast<int>(config.delay.hysteresis_limit_blocks)),
+      delay_headroom_samples_(config.delay.delay_headroom_samples),
+      delay_estimator_(data_dumper_.get(), config, num_capture_channels),
+      last_delay_estimate_quality_(DelayEstimate::Quality::kCoarse) {
+  RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
+  delay_estimator_.LogDelayEstimationProperties(sample_rate_hz, 0);
+}
+
+RenderDelayControllerImpl::~RenderDelayControllerImpl() = default;
+
+void RenderDelayControllerImpl::Reset(bool reset_delay_confidence) {
+  delay_ = absl::nullopt;
+  delay_samples_ = absl::nullopt;
+  delay_estimator_.Reset(reset_delay_confidence);
+  delay_change_counter_ = 0;
+  if (reset_delay_confidence) {
+    last_delay_estimate_quality_ = DelayEstimate::Quality::kCoarse;
+  }
+}
+
+void RenderDelayControllerImpl::LogRenderCall() {}
+
+absl::optional<DelayEstimate> RenderDelayControllerImpl::GetDelay(
+    const DownsampledRenderBuffer& render_buffer,
+    size_t render_delay_buffer_delay,
+    const std::vector<std::vector<float>>& capture) {
+  RTC_DCHECK_EQ(kBlockSize, capture[0].size());
+  ++capture_call_counter_;
+
+  auto delay_samples = delay_estimator_.EstimateDelay(render_buffer, capture);
+
+  if (delay_samples) {
+    if (!delay_samples_ || delay_samples->delay != delay_samples_->delay) {
+      delay_change_counter_ = 0;
+    }
+    if (delay_samples_) {
+      delay_samples_->blocks_since_last_change =
+          delay_samples_->delay == delay_samples->delay
+              ? delay_samples_->blocks_since_last_change + 1
+              : 0;
+      delay_samples_->blocks_since_last_update = 0;
+      delay_samples_->delay = delay_samples->delay;
+      delay_samples_->quality = delay_samples->quality;
+    } else {
+      delay_samples_ = delay_samples;
+    }
+  } else {
+    if (delay_samples_) {
+      ++delay_samples_->blocks_since_last_change;
+      ++delay_samples_->blocks_since_last_update;
+    }
+  }
+
+  if (delay_change_counter_ < 2 * kNumBlocksPerSecond) {
+    ++delay_change_counter_;
+  }
+
+  if (delay_samples_) {
+    // Compute the render delay buffer delay.
+    const bool use_hysteresis =
+        last_delay_estimate_quality_ == DelayEstimate::Quality::kRefined &&
+        delay_samples_->quality == DelayEstimate::Quality::kRefined;
+    delay_ = ComputeBufferDelay(delay_,
+                                use_hysteresis ? hysteresis_limit_blocks_ : 0,
+                                delay_headroom_samples_, *delay_samples_);
+    last_delay_estimate_quality_ = delay_samples_->quality;
+  }
+
+  metrics_.Update(delay_samples_ ? absl::optional<size_t>(delay_samples_->delay)
+                                 : absl::nullopt,
+                  delay_ ? delay_->delay : 0, 0, delay_estimator_.Clockdrift());
+
+  data_dumper_->DumpRaw("aec3_render_delay_controller_delay",
+                        delay_samples ? delay_samples->delay : 0);
+  data_dumper_->DumpRaw("aec3_render_delay_controller_buffer_delay",
+                        delay_ ? delay_->delay : 0);
+
+  return delay_;
+}
+
+bool RenderDelayControllerImpl::HasClockdrift() const {
+  return delay_estimator_.Clockdrift() != ClockdriftDetector::Level::kNone;
+}
+
+}  // namespace
+
+RenderDelayController* RenderDelayController::Create(
+    const EchoCanceller3Config& config,
+    int sample_rate_hz,
+    size_t num_capture_channels) {
+  return new RenderDelayControllerImpl(config, sample_rate_hz,
+                                       num_capture_channels);
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/aec3/render_delay_controller.h
+++ b/webrtc/modules/audio_processing/aec3/render_delay_controller.h
@ -0,0 +1,50 @@
+/*
+ *  Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_
+#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_
+
+#include "absl/types/optional.h"
+#include "api/array_view.h"
+#include "api/audio/echo_canceller3_config.h"
+#include "modules/audio_processing/aec3/delay_estimate.h"
+#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
+#include "modules/audio_processing/aec3/render_delay_buffer.h"
+#include "modules/audio_processing/logging/apm_data_dumper.h"
+
+namespace webrtc {
+
+// Class for aligning the render and capture signal using a RenderDelayBuffer.
+class RenderDelayController {
+ public:
+  static RenderDelayController* Create(const EchoCanceller3Config& config,
+                                       int sample_rate_hz,
+                                       size_t num_capture_channels);
+  virtual ~RenderDelayController() = default;
+
+  // Resets the delay controller. If the delay confidence is reset, the reset
+  // behavior is as if the call is restarted.
+  virtual void Reset(bool reset_delay_confidence) = 0;
+
+  // Logs a render call.
+  virtual void LogRenderCall() = 0;
+
+  // Aligns the render buffer content with the capture signal.
+  virtual absl::optional<DelayEstimate> GetDelay(
+      const DownsampledRenderBuffer& render_buffer,
+      size_t render_delay_buffer_delay,
+      const std::vector<std::vector<float>>& capture) = 0;
+
+  // Returns true if clockdrift has been detected.
+  virtual bool HasClockdrift() const = 0;
+};
+}  // namespace webrtc
+
+#endif  // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_
--- a/Show More
+++ b/Show More