Update to current webrtc library
This is from the upstream library commit id 3326535126e435f1ba647885ce43a8f0f3d317eb, corresponding to Chromium 88.0.4290.1.
This commit is contained in:
367
webrtc/modules/audio_processing/aec3/BUILD.gn
Normal file
367
webrtc/modules/audio_processing/aec3/BUILD.gn
Normal file
@ -0,0 +1,367 @@
|
||||
# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
import("../../../webrtc.gni")
|
||||
|
||||
rtc_library("aec3") {
|
||||
visibility = [ "*" ]
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
sources = [
|
||||
"adaptive_fir_filter.cc",
|
||||
"adaptive_fir_filter_erl.cc",
|
||||
"aec3_common.cc",
|
||||
"aec3_fft.cc",
|
||||
"aec_state.cc",
|
||||
"aec_state.h",
|
||||
"alignment_mixer.cc",
|
||||
"alignment_mixer.h",
|
||||
"api_call_jitter_metrics.cc",
|
||||
"api_call_jitter_metrics.h",
|
||||
"block_buffer.cc",
|
||||
"block_delay_buffer.cc",
|
||||
"block_delay_buffer.h",
|
||||
"block_framer.cc",
|
||||
"block_framer.h",
|
||||
"block_processor.cc",
|
||||
"block_processor.h",
|
||||
"block_processor_metrics.cc",
|
||||
"block_processor_metrics.h",
|
||||
"clockdrift_detector.cc",
|
||||
"clockdrift_detector.h",
|
||||
"coarse_filter_update_gain.cc",
|
||||
"coarse_filter_update_gain.h",
|
||||
"comfort_noise_generator.cc",
|
||||
"comfort_noise_generator.h",
|
||||
"decimator.cc",
|
||||
"decimator.h",
|
||||
"delay_estimate.h",
|
||||
"dominant_nearend_detector.cc",
|
||||
"dominant_nearend_detector.h",
|
||||
"downsampled_render_buffer.cc",
|
||||
"downsampled_render_buffer.h",
|
||||
"echo_audibility.cc",
|
||||
"echo_audibility.h",
|
||||
"echo_canceller3.cc",
|
||||
"echo_canceller3.h",
|
||||
"echo_path_delay_estimator.cc",
|
||||
"echo_path_delay_estimator.h",
|
||||
"echo_path_variability.cc",
|
||||
"echo_path_variability.h",
|
||||
"echo_remover.cc",
|
||||
"echo_remover.h",
|
||||
"echo_remover_metrics.cc",
|
||||
"echo_remover_metrics.h",
|
||||
"erl_estimator.cc",
|
||||
"erl_estimator.h",
|
||||
"erle_estimator.cc",
|
||||
"erle_estimator.h",
|
||||
"fft_buffer.cc",
|
||||
"filter_analyzer.cc",
|
||||
"filter_analyzer.h",
|
||||
"frame_blocker.cc",
|
||||
"frame_blocker.h",
|
||||
"fullband_erle_estimator.cc",
|
||||
"fullband_erle_estimator.h",
|
||||
"matched_filter.cc",
|
||||
"matched_filter_lag_aggregator.cc",
|
||||
"matched_filter_lag_aggregator.h",
|
||||
"moving_average.cc",
|
||||
"moving_average.h",
|
||||
"nearend_detector.h",
|
||||
"refined_filter_update_gain.cc",
|
||||
"refined_filter_update_gain.h",
|
||||
"render_buffer.cc",
|
||||
"render_delay_buffer.cc",
|
||||
"render_delay_buffer.h",
|
||||
"render_delay_controller.cc",
|
||||
"render_delay_controller.h",
|
||||
"render_delay_controller_metrics.cc",
|
||||
"render_delay_controller_metrics.h",
|
||||
"render_signal_analyzer.cc",
|
||||
"render_signal_analyzer.h",
|
||||
"residual_echo_estimator.cc",
|
||||
"residual_echo_estimator.h",
|
||||
"reverb_decay_estimator.cc",
|
||||
"reverb_decay_estimator.h",
|
||||
"reverb_frequency_response.cc",
|
||||
"reverb_frequency_response.h",
|
||||
"reverb_model.cc",
|
||||
"reverb_model.h",
|
||||
"reverb_model_estimator.cc",
|
||||
"reverb_model_estimator.h",
|
||||
"signal_dependent_erle_estimator.cc",
|
||||
"signal_dependent_erle_estimator.h",
|
||||
"spectrum_buffer.cc",
|
||||
"stationarity_estimator.cc",
|
||||
"stationarity_estimator.h",
|
||||
"subband_erle_estimator.cc",
|
||||
"subband_erle_estimator.h",
|
||||
"subband_nearend_detector.cc",
|
||||
"subband_nearend_detector.h",
|
||||
"subtractor.cc",
|
||||
"subtractor.h",
|
||||
"subtractor_output.cc",
|
||||
"subtractor_output.h",
|
||||
"subtractor_output_analyzer.cc",
|
||||
"subtractor_output_analyzer.h",
|
||||
"suppression_filter.cc",
|
||||
"suppression_filter.h",
|
||||
"suppression_gain.cc",
|
||||
"suppression_gain.h",
|
||||
"transparent_mode.cc",
|
||||
"transparent_mode.h",
|
||||
]
|
||||
|
||||
defines = []
|
||||
if (rtc_build_with_neon && current_cpu != "arm64") {
|
||||
suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ]
|
||||
cflags = [ "-mfpu=neon" ]
|
||||
}
|
||||
|
||||
deps = [
|
||||
":adaptive_fir_filter",
|
||||
":adaptive_fir_filter_erl",
|
||||
":aec3_common",
|
||||
":aec3_fft",
|
||||
":fft_data",
|
||||
":matched_filter",
|
||||
":render_buffer",
|
||||
":vector_math",
|
||||
"..:apm_logging",
|
||||
"..:audio_buffer",
|
||||
"..:high_pass_filter",
|
||||
"../../../api:array_view",
|
||||
"../../../api/audio:aec3_config",
|
||||
"../../../api/audio:echo_control",
|
||||
"../../../common_audio:common_audio_c",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base:safe_minmax",
|
||||
"../../../rtc_base/experiments:field_trial_parser",
|
||||
"../../../rtc_base/system:arch",
|
||||
"../../../system_wrappers",
|
||||
"../../../system_wrappers:field_trial",
|
||||
"../../../system_wrappers:metrics",
|
||||
"../utility:cascaded_biquad_filter",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
|
||||
if (current_cpu == "x86" || current_cpu == "x64") {
|
||||
deps += [ ":aec3_avx2" ]
|
||||
}
|
||||
}
|
||||
|
||||
rtc_source_set("aec3_common") {
|
||||
sources = [ "aec3_common.h" ]
|
||||
}
|
||||
|
||||
rtc_source_set("aec3_fft") {
|
||||
sources = [ "aec3_fft.h" ]
|
||||
deps = [
|
||||
":aec3_common",
|
||||
":fft_data",
|
||||
"../../../api:array_view",
|
||||
"../../../common_audio/third_party/ooura:fft_size_128",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base/system:arch",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("render_buffer") {
|
||||
sources = [
|
||||
"block_buffer.h",
|
||||
"fft_buffer.h",
|
||||
"render_buffer.h",
|
||||
"spectrum_buffer.h",
|
||||
]
|
||||
deps = [
|
||||
":aec3_common",
|
||||
":fft_data",
|
||||
"../../../api:array_view",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base/system:arch",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("adaptive_fir_filter") {
|
||||
sources = [ "adaptive_fir_filter.h" ]
|
||||
deps = [
|
||||
":aec3_common",
|
||||
":aec3_fft",
|
||||
":fft_data",
|
||||
":render_buffer",
|
||||
"..:apm_logging",
|
||||
"../../../api:array_view",
|
||||
"../../../rtc_base/system:arch",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("adaptive_fir_filter_erl") {
|
||||
sources = [ "adaptive_fir_filter_erl.h" ]
|
||||
deps = [
|
||||
":aec3_common",
|
||||
"../../../api:array_view",
|
||||
"../../../rtc_base/system:arch",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("matched_filter") {
|
||||
sources = [ "matched_filter.h" ]
|
||||
deps = [
|
||||
":aec3_common",
|
||||
"../../../api:array_view",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base/system:arch",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("vector_math") {
|
||||
sources = [ "vector_math.h" ]
|
||||
deps = [
|
||||
":aec3_common",
|
||||
"../../../api:array_view",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base/system:arch",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("fft_data") {
|
||||
sources = [ "fft_data.h" ]
|
||||
deps = [
|
||||
":aec3_common",
|
||||
"../../../api:array_view",
|
||||
"../../../rtc_base/system:arch",
|
||||
]
|
||||
}
|
||||
|
||||
if (current_cpu == "x86" || current_cpu == "x64") {
|
||||
rtc_library("aec3_avx2") {
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
sources = [
|
||||
"adaptive_fir_filter_avx2.cc",
|
||||
"adaptive_fir_filter_erl_avx2.cc",
|
||||
"fft_data_avx2.cc",
|
||||
"matched_filter_avx2.cc",
|
||||
"vector_math_avx2.cc",
|
||||
]
|
||||
|
||||
if (is_win) {
|
||||
cflags = [ "/arch:AVX2" ]
|
||||
} else {
|
||||
cflags = [
|
||||
"-mavx2",
|
||||
"-mfma",
|
||||
]
|
||||
}
|
||||
|
||||
deps = [
|
||||
":adaptive_fir_filter",
|
||||
":adaptive_fir_filter_erl",
|
||||
":fft_data",
|
||||
":matched_filter",
|
||||
":vector_math",
|
||||
"../../../api:array_view",
|
||||
"../../../rtc_base:checks",
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
if (rtc_include_tests) {
|
||||
rtc_library("aec3_unittests") {
|
||||
testonly = true
|
||||
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
sources = [
|
||||
"mock/mock_block_processor.cc",
|
||||
"mock/mock_block_processor.h",
|
||||
"mock/mock_echo_remover.cc",
|
||||
"mock/mock_echo_remover.h",
|
||||
"mock/mock_render_delay_buffer.cc",
|
||||
"mock/mock_render_delay_buffer.h",
|
||||
"mock/mock_render_delay_controller.cc",
|
||||
"mock/mock_render_delay_controller.h",
|
||||
]
|
||||
|
||||
deps = [
|
||||
":adaptive_fir_filter",
|
||||
":adaptive_fir_filter_erl",
|
||||
":aec3",
|
||||
":aec3_common",
|
||||
":aec3_fft",
|
||||
":fft_data",
|
||||
":matched_filter",
|
||||
":render_buffer",
|
||||
":vector_math",
|
||||
"..:apm_logging",
|
||||
"..:audio_buffer",
|
||||
"..:audio_processing",
|
||||
"..:audio_processing_unittests",
|
||||
"..:high_pass_filter",
|
||||
"../../../api:array_view",
|
||||
"../../../api/audio:aec3_config",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base:safe_minmax",
|
||||
"../../../rtc_base/system:arch",
|
||||
"../../../system_wrappers",
|
||||
"../../../test:field_trial",
|
||||
"../../../test:test_support",
|
||||
"../utility:cascaded_biquad_filter",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
|
||||
defines = []
|
||||
|
||||
if (rtc_enable_protobuf) {
|
||||
sources += [
|
||||
"adaptive_fir_filter_erl_unittest.cc",
|
||||
"adaptive_fir_filter_unittest.cc",
|
||||
"aec3_fft_unittest.cc",
|
||||
"aec_state_unittest.cc",
|
||||
"alignment_mixer_unittest.cc",
|
||||
"api_call_jitter_metrics_unittest.cc",
|
||||
"block_delay_buffer_unittest.cc",
|
||||
"block_framer_unittest.cc",
|
||||
"block_processor_metrics_unittest.cc",
|
||||
"block_processor_unittest.cc",
|
||||
"clockdrift_detector_unittest.cc",
|
||||
"coarse_filter_update_gain_unittest.cc",
|
||||
"comfort_noise_generator_unittest.cc",
|
||||
"decimator_unittest.cc",
|
||||
"echo_canceller3_unittest.cc",
|
||||
"echo_path_delay_estimator_unittest.cc",
|
||||
"echo_path_variability_unittest.cc",
|
||||
"echo_remover_metrics_unittest.cc",
|
||||
"echo_remover_unittest.cc",
|
||||
"erl_estimator_unittest.cc",
|
||||
"erle_estimator_unittest.cc",
|
||||
"fft_data_unittest.cc",
|
||||
"filter_analyzer_unittest.cc",
|
||||
"frame_blocker_unittest.cc",
|
||||
"matched_filter_lag_aggregator_unittest.cc",
|
||||
"matched_filter_unittest.cc",
|
||||
"moving_average_unittest.cc",
|
||||
"refined_filter_update_gain_unittest.cc",
|
||||
"render_buffer_unittest.cc",
|
||||
"render_delay_buffer_unittest.cc",
|
||||
"render_delay_controller_metrics_unittest.cc",
|
||||
"render_delay_controller_unittest.cc",
|
||||
"render_signal_analyzer_unittest.cc",
|
||||
"residual_echo_estimator_unittest.cc",
|
||||
"reverb_model_estimator_unittest.cc",
|
||||
"signal_dependent_erle_estimator_unittest.cc",
|
||||
"subtractor_unittest.cc",
|
||||
"suppression_filter_unittest.cc",
|
||||
"suppression_gain_unittest.cc",
|
||||
"vector_math_unittest.cc",
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
740
webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc
Normal file
740
webrtc/modules/audio_processing/aec3/adaptive_fir_filter.cc
Normal file
@ -0,0 +1,740 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/adaptive_fir_filter.h"
|
||||
|
||||
// Defines WEBRTC_ARCH_X86_FAMILY, used below.
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace aec3 {
|
||||
|
||||
// Computes and stores the frequency response of the filter.
|
||||
void ComputeFrequencyResponse(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
|
||||
for (auto& H2_ch : *H2) {
|
||||
H2_ch.fill(0.f);
|
||||
}
|
||||
|
||||
const size_t num_render_channels = H[0].size();
|
||||
RTC_DCHECK_EQ(H.size(), H2->capacity());
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
|
||||
float tmp =
|
||||
H[p][ch].re[j] * H[p][ch].re[j] + H[p][ch].im[j] * H[p][ch].im[j];
|
||||
(*H2)[p][j] = std::max((*H2)[p][j], tmp);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
// Computes and stores the frequency response of the filter.
|
||||
void ComputeFrequencyResponse_Neon(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
|
||||
for (auto& H2_ch : *H2) {
|
||||
H2_ch.fill(0.f);
|
||||
}
|
||||
|
||||
const size_t num_render_channels = H[0].size();
|
||||
RTC_DCHECK_EQ(H.size(), H2->capacity());
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
for (size_t j = 0; j < kFftLengthBy2; j += 4) {
|
||||
const float32x4_t re = vld1q_f32(&H[p][ch].re[j]);
|
||||
const float32x4_t im = vld1q_f32(&H[p][ch].im[j]);
|
||||
float32x4_t H2_new = vmulq_f32(re, re);
|
||||
H2_new = vmlaq_f32(H2_new, im, im);
|
||||
float32x4_t H2_p_j = vld1q_f32(&(*H2)[p][j]);
|
||||
H2_p_j = vmaxq_f32(H2_p_j, H2_new);
|
||||
vst1q_f32(&(*H2)[p][j], H2_p_j);
|
||||
}
|
||||
float H2_new = H[p][ch].re[kFftLengthBy2] * H[p][ch].re[kFftLengthBy2] +
|
||||
H[p][ch].im[kFftLengthBy2] * H[p][ch].im[kFftLengthBy2];
|
||||
(*H2)[p][kFftLengthBy2] = std::max((*H2)[p][kFftLengthBy2], H2_new);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
// Computes and stores the frequency response of the filter.
|
||||
void ComputeFrequencyResponse_Sse2(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
|
||||
for (auto& H2_ch : *H2) {
|
||||
H2_ch.fill(0.f);
|
||||
}
|
||||
|
||||
const size_t num_render_channels = H[0].size();
|
||||
RTC_DCHECK_EQ(H.size(), H2->capacity());
|
||||
// constexpr __mmmask8 kMaxMask = static_cast<__mmmask8>(256u);
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
for (size_t j = 0; j < kFftLengthBy2; j += 4) {
|
||||
const __m128 re = _mm_loadu_ps(&H[p][ch].re[j]);
|
||||
const __m128 re2 = _mm_mul_ps(re, re);
|
||||
const __m128 im = _mm_loadu_ps(&H[p][ch].im[j]);
|
||||
const __m128 im2 = _mm_mul_ps(im, im);
|
||||
const __m128 H2_new = _mm_add_ps(re2, im2);
|
||||
__m128 H2_k_j = _mm_loadu_ps(&(*H2)[p][j]);
|
||||
H2_k_j = _mm_max_ps(H2_k_j, H2_new);
|
||||
_mm_storeu_ps(&(*H2)[p][j], H2_k_j);
|
||||
}
|
||||
float H2_new = H[p][ch].re[kFftLengthBy2] * H[p][ch].re[kFftLengthBy2] +
|
||||
H[p][ch].im[kFftLengthBy2] * H[p][ch].im[kFftLengthBy2];
|
||||
(*H2)[p][kFftLengthBy2] = std::max((*H2)[p][kFftLengthBy2], H2_new);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Adapts the filter partitions as H(t+1)=H(t)+G(t)*conj(X(t)).
|
||||
void AdaptPartitions(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H) {
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
size_t index = render_buffer.Position();
|
||||
const size_t num_render_channels = render_buffer_data[index].size();
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& X_p_ch = render_buffer_data[index][ch];
|
||||
FftData& H_p_ch = (*H)[p][ch];
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
H_p_ch.re[k] += X_p_ch.re[k] * G.re[k] + X_p_ch.im[k] * G.im[k];
|
||||
H_p_ch.im[k] += X_p_ch.re[k] * G.im[k] - X_p_ch.im[k] * G.re[k];
|
||||
}
|
||||
}
|
||||
index = index < (render_buffer_data.size() - 1) ? index + 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
// Adapts the filter partitions. (Neon variant)
|
||||
void AdaptPartitions_Neon(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H) {
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
const size_t num_render_channels = render_buffer_data[0].size();
|
||||
const size_t lim1 = std::min(
|
||||
render_buffer_data.size() - render_buffer.Position(), num_partitions);
|
||||
const size_t lim2 = num_partitions;
|
||||
constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
|
||||
|
||||
size_t X_partition = render_buffer.Position();
|
||||
size_t limit = lim1;
|
||||
size_t p = 0;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
FftData& H_p_ch = (*H)[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
|
||||
const float32x4_t G_re = vld1q_f32(&G.re[k]);
|
||||
const float32x4_t G_im = vld1q_f32(&G.im[k]);
|
||||
const float32x4_t X_re = vld1q_f32(&X.re[k]);
|
||||
const float32x4_t X_im = vld1q_f32(&X.im[k]);
|
||||
const float32x4_t H_re = vld1q_f32(&H_p_ch.re[k]);
|
||||
const float32x4_t H_im = vld1q_f32(&H_p_ch.im[k]);
|
||||
const float32x4_t a = vmulq_f32(X_re, G_re);
|
||||
const float32x4_t e = vmlaq_f32(a, X_im, G_im);
|
||||
const float32x4_t c = vmulq_f32(X_re, G_im);
|
||||
const float32x4_t f = vmlsq_f32(c, X_im, G_re);
|
||||
const float32x4_t g = vaddq_f32(H_re, e);
|
||||
const float32x4_t h = vaddq_f32(H_im, f);
|
||||
vst1q_f32(&H_p_ch.re[k], g);
|
||||
vst1q_f32(&H_p_ch.im[k], h);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
X_partition = 0;
|
||||
limit = lim2;
|
||||
} while (p < lim2);
|
||||
|
||||
X_partition = render_buffer.Position();
|
||||
limit = lim1;
|
||||
p = 0;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
FftData& H_p_ch = (*H)[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
|
||||
H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] +
|
||||
X.im[kFftLengthBy2] * G.im[kFftLengthBy2];
|
||||
H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] -
|
||||
X.im[kFftLengthBy2] * G.re[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
X_partition = 0;
|
||||
limit = lim2;
|
||||
} while (p < lim2);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
// Adapts the filter partitions. (SSE2 variant)
|
||||
void AdaptPartitions_Sse2(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H) {
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
const size_t num_render_channels = render_buffer_data[0].size();
|
||||
const size_t lim1 = std::min(
|
||||
render_buffer_data.size() - render_buffer.Position(), num_partitions);
|
||||
const size_t lim2 = num_partitions;
|
||||
constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
|
||||
|
||||
size_t X_partition = render_buffer.Position();
|
||||
size_t limit = lim1;
|
||||
size_t p = 0;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
FftData& H_p_ch = (*H)[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
|
||||
for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
|
||||
const __m128 G_re = _mm_loadu_ps(&G.re[k]);
|
||||
const __m128 G_im = _mm_loadu_ps(&G.im[k]);
|
||||
const __m128 X_re = _mm_loadu_ps(&X.re[k]);
|
||||
const __m128 X_im = _mm_loadu_ps(&X.im[k]);
|
||||
const __m128 H_re = _mm_loadu_ps(&H_p_ch.re[k]);
|
||||
const __m128 H_im = _mm_loadu_ps(&H_p_ch.im[k]);
|
||||
const __m128 a = _mm_mul_ps(X_re, G_re);
|
||||
const __m128 b = _mm_mul_ps(X_im, G_im);
|
||||
const __m128 c = _mm_mul_ps(X_re, G_im);
|
||||
const __m128 d = _mm_mul_ps(X_im, G_re);
|
||||
const __m128 e = _mm_add_ps(a, b);
|
||||
const __m128 f = _mm_sub_ps(c, d);
|
||||
const __m128 g = _mm_add_ps(H_re, e);
|
||||
const __m128 h = _mm_add_ps(H_im, f);
|
||||
_mm_storeu_ps(&H_p_ch.re[k], g);
|
||||
_mm_storeu_ps(&H_p_ch.im[k], h);
|
||||
}
|
||||
}
|
||||
}
|
||||
X_partition = 0;
|
||||
limit = lim2;
|
||||
} while (p < lim2);
|
||||
|
||||
X_partition = render_buffer.Position();
|
||||
limit = lim1;
|
||||
p = 0;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
FftData& H_p_ch = (*H)[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
|
||||
H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] +
|
||||
X.im[kFftLengthBy2] * G.im[kFftLengthBy2];
|
||||
H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] -
|
||||
X.im[kFftLengthBy2] * G.re[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
|
||||
X_partition = 0;
|
||||
limit = lim2;
|
||||
} while (p < lim2);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Produces the filter output.
|
||||
void ApplyFilter(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S) {
|
||||
S->re.fill(0.f);
|
||||
S->im.fill(0.f);
|
||||
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
size_t index = render_buffer.Position();
|
||||
const size_t num_render_channels = render_buffer_data[index].size();
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(num_render_channels, H[p].size());
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& X_p_ch = render_buffer_data[index][ch];
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
S->re[k] += X_p_ch.re[k] * H_p_ch.re[k] - X_p_ch.im[k] * H_p_ch.im[k];
|
||||
S->im[k] += X_p_ch.re[k] * H_p_ch.im[k] + X_p_ch.im[k] * H_p_ch.re[k];
|
||||
}
|
||||
}
|
||||
index = index < (render_buffer_data.size() - 1) ? index + 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
// Produces the filter output (Neon variant).
|
||||
void ApplyFilter_Neon(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S) {
|
||||
// const RenderBuffer& render_buffer,
|
||||
// rtc::ArrayView<const FftData> H,
|
||||
// FftData* S) {
|
||||
RTC_DCHECK_GE(H.size(), H.size() - 1);
|
||||
S->Clear();
|
||||
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
const size_t num_render_channels = render_buffer_data[0].size();
|
||||
const size_t lim1 = std::min(
|
||||
render_buffer_data.size() - render_buffer.Position(), num_partitions);
|
||||
const size_t lim2 = num_partitions;
|
||||
constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
|
||||
|
||||
size_t X_partition = render_buffer.Position();
|
||||
size_t p = 0;
|
||||
size_t limit = lim1;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
|
||||
const float32x4_t X_re = vld1q_f32(&X.re[k]);
|
||||
const float32x4_t X_im = vld1q_f32(&X.im[k]);
|
||||
const float32x4_t H_re = vld1q_f32(&H_p_ch.re[k]);
|
||||
const float32x4_t H_im = vld1q_f32(&H_p_ch.im[k]);
|
||||
const float32x4_t S_re = vld1q_f32(&S->re[k]);
|
||||
const float32x4_t S_im = vld1q_f32(&S->im[k]);
|
||||
const float32x4_t a = vmulq_f32(X_re, H_re);
|
||||
const float32x4_t e = vmlsq_f32(a, X_im, H_im);
|
||||
const float32x4_t c = vmulq_f32(X_re, H_im);
|
||||
const float32x4_t f = vmlaq_f32(c, X_im, H_re);
|
||||
const float32x4_t g = vaddq_f32(S_re, e);
|
||||
const float32x4_t h = vaddq_f32(S_im, f);
|
||||
vst1q_f32(&S->re[k], g);
|
||||
vst1q_f32(&S->im[k], h);
|
||||
}
|
||||
}
|
||||
}
|
||||
limit = lim2;
|
||||
X_partition = 0;
|
||||
} while (p < lim2);
|
||||
|
||||
X_partition = render_buffer.Position();
|
||||
p = 0;
|
||||
limit = lim1;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] -
|
||||
X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
|
||||
S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] +
|
||||
X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
limit = lim2;
|
||||
X_partition = 0;
|
||||
} while (p < lim2);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
// Produces the filter output (SSE2 variant).
|
||||
void ApplyFilter_Sse2(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S) {
|
||||
// const RenderBuffer& render_buffer,
|
||||
// rtc::ArrayView<const FftData> H,
|
||||
// FftData* S) {
|
||||
RTC_DCHECK_GE(H.size(), H.size() - 1);
|
||||
S->re.fill(0.f);
|
||||
S->im.fill(0.f);
|
||||
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
const size_t num_render_channels = render_buffer_data[0].size();
|
||||
const size_t lim1 = std::min(
|
||||
render_buffer_data.size() - render_buffer.Position(), num_partitions);
|
||||
const size_t lim2 = num_partitions;
|
||||
constexpr size_t kNumFourBinBands = kFftLengthBy2 / 4;
|
||||
|
||||
size_t X_partition = render_buffer.Position();
|
||||
size_t p = 0;
|
||||
size_t limit = lim1;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
for (size_t k = 0, n = 0; n < kNumFourBinBands; ++n, k += 4) {
|
||||
const __m128 X_re = _mm_loadu_ps(&X.re[k]);
|
||||
const __m128 X_im = _mm_loadu_ps(&X.im[k]);
|
||||
const __m128 H_re = _mm_loadu_ps(&H_p_ch.re[k]);
|
||||
const __m128 H_im = _mm_loadu_ps(&H_p_ch.im[k]);
|
||||
const __m128 S_re = _mm_loadu_ps(&S->re[k]);
|
||||
const __m128 S_im = _mm_loadu_ps(&S->im[k]);
|
||||
const __m128 a = _mm_mul_ps(X_re, H_re);
|
||||
const __m128 b = _mm_mul_ps(X_im, H_im);
|
||||
const __m128 c = _mm_mul_ps(X_re, H_im);
|
||||
const __m128 d = _mm_mul_ps(X_im, H_re);
|
||||
const __m128 e = _mm_sub_ps(a, b);
|
||||
const __m128 f = _mm_add_ps(c, d);
|
||||
const __m128 g = _mm_add_ps(S_re, e);
|
||||
const __m128 h = _mm_add_ps(S_im, f);
|
||||
_mm_storeu_ps(&S->re[k], g);
|
||||
_mm_storeu_ps(&S->im[k], h);
|
||||
}
|
||||
}
|
||||
}
|
||||
limit = lim2;
|
||||
X_partition = 0;
|
||||
} while (p < lim2);
|
||||
|
||||
X_partition = render_buffer.Position();
|
||||
p = 0;
|
||||
limit = lim1;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] -
|
||||
X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
|
||||
S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] +
|
||||
X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
limit = lim2;
|
||||
X_partition = 0;
|
||||
} while (p < lim2);
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
namespace {
|
||||
|
||||
// Ensures that the newly added filter partitions after a size increase are set
|
||||
// to zero.
|
||||
void ZeroFilter(size_t old_size,
|
||||
size_t new_size,
|
||||
std::vector<std::vector<FftData>>* H) {
|
||||
RTC_DCHECK_GE(H->size(), old_size);
|
||||
RTC_DCHECK_GE(H->size(), new_size);
|
||||
|
||||
for (size_t p = old_size; p < new_size; ++p) {
|
||||
RTC_DCHECK_EQ((*H)[p].size(), (*H)[0].size());
|
||||
for (size_t ch = 0; ch < (*H)[0].size(); ++ch) {
|
||||
(*H)[p][ch].Clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
AdaptiveFirFilter::AdaptiveFirFilter(size_t max_size_partitions,
|
||||
size_t initial_size_partitions,
|
||||
size_t size_change_duration_blocks,
|
||||
size_t num_render_channels,
|
||||
Aec3Optimization optimization,
|
||||
ApmDataDumper* data_dumper)
|
||||
: data_dumper_(data_dumper),
|
||||
fft_(),
|
||||
optimization_(optimization),
|
||||
num_render_channels_(num_render_channels),
|
||||
max_size_partitions_(max_size_partitions),
|
||||
size_change_duration_blocks_(
|
||||
static_cast<int>(size_change_duration_blocks)),
|
||||
current_size_partitions_(initial_size_partitions),
|
||||
target_size_partitions_(initial_size_partitions),
|
||||
old_target_size_partitions_(initial_size_partitions),
|
||||
H_(max_size_partitions_, std::vector<FftData>(num_render_channels_)) {
|
||||
RTC_DCHECK(data_dumper_);
|
||||
RTC_DCHECK_GE(max_size_partitions, initial_size_partitions);
|
||||
|
||||
RTC_DCHECK_LT(0, size_change_duration_blocks_);
|
||||
one_by_size_change_duration_blocks_ = 1.f / size_change_duration_blocks_;
|
||||
|
||||
ZeroFilter(0, max_size_partitions_, &H_);
|
||||
|
||||
SetSizePartitions(current_size_partitions_, true);
|
||||
}
|
||||
|
||||
AdaptiveFirFilter::~AdaptiveFirFilter() = default;
|
||||
|
||||
void AdaptiveFirFilter::HandleEchoPathChange() {
|
||||
// TODO(peah): Check the value and purpose of the code below.
|
||||
ZeroFilter(current_size_partitions_, max_size_partitions_, &H_);
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::SetSizePartitions(size_t size, bool immediate_effect) {
|
||||
RTC_DCHECK_EQ(max_size_partitions_, H_.capacity());
|
||||
RTC_DCHECK_LE(size, max_size_partitions_);
|
||||
|
||||
target_size_partitions_ = std::min(max_size_partitions_, size);
|
||||
if (immediate_effect) {
|
||||
size_t old_size_partitions_ = current_size_partitions_;
|
||||
current_size_partitions_ = old_target_size_partitions_ =
|
||||
target_size_partitions_;
|
||||
ZeroFilter(old_size_partitions_, current_size_partitions_, &H_);
|
||||
|
||||
partition_to_constrain_ =
|
||||
std::min(partition_to_constrain_, current_size_partitions_ - 1);
|
||||
size_change_counter_ = 0;
|
||||
} else {
|
||||
size_change_counter_ = size_change_duration_blocks_;
|
||||
}
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::UpdateSize() {
|
||||
RTC_DCHECK_GE(size_change_duration_blocks_, size_change_counter_);
|
||||
size_t old_size_partitions_ = current_size_partitions_;
|
||||
if (size_change_counter_ > 0) {
|
||||
--size_change_counter_;
|
||||
|
||||
auto average = [](float from, float to, float from_weight) {
|
||||
return from * from_weight + to * (1.f - from_weight);
|
||||
};
|
||||
|
||||
float change_factor =
|
||||
size_change_counter_ * one_by_size_change_duration_blocks_;
|
||||
|
||||
current_size_partitions_ = average(old_target_size_partitions_,
|
||||
target_size_partitions_, change_factor);
|
||||
|
||||
partition_to_constrain_ =
|
||||
std::min(partition_to_constrain_, current_size_partitions_ - 1);
|
||||
} else {
|
||||
current_size_partitions_ = old_target_size_partitions_ =
|
||||
target_size_partitions_;
|
||||
}
|
||||
ZeroFilter(old_size_partitions_, current_size_partitions_, &H_);
|
||||
RTC_DCHECK_LE(0, size_change_counter_);
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::Filter(const RenderBuffer& render_buffer,
|
||||
FftData* S) const {
|
||||
RTC_DCHECK(S);
|
||||
switch (optimization_) {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
case Aec3Optimization::kSse2:
|
||||
aec3::ApplyFilter_Sse2(render_buffer, current_size_partitions_, H_, S);
|
||||
break;
|
||||
case Aec3Optimization::kAvx2:
|
||||
aec3::ApplyFilter_Avx2(render_buffer, current_size_partitions_, H_, S);
|
||||
break;
|
||||
#endif
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
case Aec3Optimization::kNeon:
|
||||
aec3::ApplyFilter_Neon(render_buffer, current_size_partitions_, H_, S);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
aec3::ApplyFilter(render_buffer, current_size_partitions_, H_, S);
|
||||
}
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer,
|
||||
const FftData& G) {
|
||||
// Adapt the filter and update the filter size.
|
||||
AdaptAndUpdateSize(render_buffer, G);
|
||||
|
||||
// Constrain the filter partitions in a cyclic manner.
|
||||
Constrain();
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::Adapt(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
std::vector<float>* impulse_response) {
|
||||
// Adapt the filter and update the filter size.
|
||||
AdaptAndUpdateSize(render_buffer, G);
|
||||
|
||||
// Constrain the filter partitions in a cyclic manner.
|
||||
ConstrainAndUpdateImpulseResponse(impulse_response);
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::ComputeFrequencyResponse(
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) const {
|
||||
RTC_DCHECK_GE(max_size_partitions_, H2->capacity());
|
||||
|
||||
H2->resize(current_size_partitions_);
|
||||
|
||||
switch (optimization_) {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
case Aec3Optimization::kSse2:
|
||||
aec3::ComputeFrequencyResponse_Sse2(current_size_partitions_, H_, H2);
|
||||
break;
|
||||
case Aec3Optimization::kAvx2:
|
||||
aec3::ComputeFrequencyResponse_Avx2(current_size_partitions_, H_, H2);
|
||||
break;
|
||||
#endif
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
case Aec3Optimization::kNeon:
|
||||
aec3::ComputeFrequencyResponse_Neon(current_size_partitions_, H_, H2);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
aec3::ComputeFrequencyResponse(current_size_partitions_, H_, H2);
|
||||
}
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::AdaptAndUpdateSize(const RenderBuffer& render_buffer,
|
||||
const FftData& G) {
|
||||
// Update the filter size if needed.
|
||||
UpdateSize();
|
||||
|
||||
// Adapt the filter.
|
||||
switch (optimization_) {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
case Aec3Optimization::kSse2:
|
||||
aec3::AdaptPartitions_Sse2(render_buffer, G, current_size_partitions_,
|
||||
&H_);
|
||||
break;
|
||||
case Aec3Optimization::kAvx2:
|
||||
aec3::AdaptPartitions_Avx2(render_buffer, G, current_size_partitions_,
|
||||
&H_);
|
||||
break;
|
||||
#endif
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
case Aec3Optimization::kNeon:
|
||||
aec3::AdaptPartitions_Neon(render_buffer, G, current_size_partitions_,
|
||||
&H_);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
aec3::AdaptPartitions(render_buffer, G, current_size_partitions_, &H_);
|
||||
}
|
||||
}
|
||||
|
||||
// Constrains the partition of the frequency domain filter to be limited in
|
||||
// time via setting the relevant time-domain coefficients to zero and updates
|
||||
// the corresponding values in an externally stored impulse response estimate.
|
||||
void AdaptiveFirFilter::ConstrainAndUpdateImpulseResponse(
|
||||
std::vector<float>* impulse_response) {
|
||||
RTC_DCHECK_EQ(GetTimeDomainLength(max_size_partitions_),
|
||||
impulse_response->capacity());
|
||||
impulse_response->resize(GetTimeDomainLength(current_size_partitions_));
|
||||
std::array<float, kFftLength> h;
|
||||
impulse_response->resize(GetTimeDomainLength(current_size_partitions_));
|
||||
std::fill(
|
||||
impulse_response->begin() + partition_to_constrain_ * kFftLengthBy2,
|
||||
impulse_response->begin() + (partition_to_constrain_ + 1) * kFftLengthBy2,
|
||||
0.f);
|
||||
|
||||
for (size_t ch = 0; ch < num_render_channels_; ++ch) {
|
||||
fft_.Ifft(H_[partition_to_constrain_][ch], &h);
|
||||
|
||||
static constexpr float kScale = 1.0f / kFftLengthBy2;
|
||||
std::for_each(h.begin(), h.begin() + kFftLengthBy2,
|
||||
[](float& a) { a *= kScale; });
|
||||
std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
|
||||
|
||||
if (ch == 0) {
|
||||
std::copy(
|
||||
h.begin(), h.begin() + kFftLengthBy2,
|
||||
impulse_response->begin() + partition_to_constrain_ * kFftLengthBy2);
|
||||
} else {
|
||||
for (size_t k = 0, j = partition_to_constrain_ * kFftLengthBy2;
|
||||
k < kFftLengthBy2; ++k, ++j) {
|
||||
if (fabsf((*impulse_response)[j]) < fabsf(h[k])) {
|
||||
(*impulse_response)[j] = h[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fft_.Fft(&h, &H_[partition_to_constrain_][ch]);
|
||||
}
|
||||
|
||||
partition_to_constrain_ =
|
||||
partition_to_constrain_ < (current_size_partitions_ - 1)
|
||||
? partition_to_constrain_ + 1
|
||||
: 0;
|
||||
}
|
||||
|
||||
// Constrains the a partiton of the frequency domain filter to be limited in
|
||||
// time via setting the relevant time-domain coefficients to zero.
|
||||
void AdaptiveFirFilter::Constrain() {
|
||||
std::array<float, kFftLength> h;
|
||||
for (size_t ch = 0; ch < num_render_channels_; ++ch) {
|
||||
fft_.Ifft(H_[partition_to_constrain_][ch], &h);
|
||||
|
||||
static constexpr float kScale = 1.0f / kFftLengthBy2;
|
||||
std::for_each(h.begin(), h.begin() + kFftLengthBy2,
|
||||
[](float& a) { a *= kScale; });
|
||||
std::fill(h.begin() + kFftLengthBy2, h.end(), 0.f);
|
||||
|
||||
fft_.Fft(&h, &H_[partition_to_constrain_][ch]);
|
||||
}
|
||||
|
||||
partition_to_constrain_ =
|
||||
partition_to_constrain_ < (current_size_partitions_ - 1)
|
||||
? partition_to_constrain_ + 1
|
||||
: 0;
|
||||
}
|
||||
|
||||
void AdaptiveFirFilter::ScaleFilter(float factor) {
|
||||
for (auto& H_p : H_) {
|
||||
for (auto& H_p_ch : H_p) {
|
||||
for (auto& re : H_p_ch.re) {
|
||||
re *= factor;
|
||||
}
|
||||
for (auto& im : H_p_ch.im) {
|
||||
im *= factor;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Set the filter coefficients.
|
||||
void AdaptiveFirFilter::SetFilter(size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H) {
|
||||
const size_t min_num_partitions =
|
||||
std::min(current_size_partitions_, num_partitions);
|
||||
for (size_t p = 0; p < min_num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(H_[p].size(), H[p].size());
|
||||
RTC_DCHECK_EQ(num_render_channels_, H_[p].size());
|
||||
|
||||
for (size_t ch = 0; ch < num_render_channels_; ++ch) {
|
||||
std::copy(H[p][ch].re.begin(), H[p][ch].re.end(), H_[p][ch].re.begin());
|
||||
std::copy(H[p][ch].im.begin(), H[p][ch].im.end(), H_[p][ch].im.begin());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
191
webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h
Normal file
191
webrtc/modules/audio_processing/aec3/adaptive_fir_filter.h
Normal file
@ -0,0 +1,191 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec3_fft.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace aec3 {
|
||||
// Computes and stores the frequency response of the filter.
|
||||
void ComputeFrequencyResponse(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
void ComputeFrequencyResponse_Neon(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
|
||||
#endif
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
void ComputeFrequencyResponse_Sse2(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
|
||||
|
||||
void ComputeFrequencyResponse_Avx2(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2);
|
||||
#endif
|
||||
|
||||
// Adapts the filter partitions.
|
||||
void AdaptPartitions(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H);
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
void AdaptPartitions_Neon(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H);
|
||||
#endif
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
void AdaptPartitions_Sse2(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H);
|
||||
|
||||
void AdaptPartitions_Avx2(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H);
|
||||
#endif
|
||||
|
||||
// Produces the filter output.
|
||||
void ApplyFilter(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S);
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
void ApplyFilter_Neon(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S);
|
||||
#endif
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
void ApplyFilter_Sse2(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S);
|
||||
|
||||
void ApplyFilter_Avx2(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S);
|
||||
#endif
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
// Provides a frequency domain adaptive filter functionality.
|
||||
class AdaptiveFirFilter {
|
||||
public:
|
||||
AdaptiveFirFilter(size_t max_size_partitions,
|
||||
size_t initial_size_partitions,
|
||||
size_t size_change_duration_blocks,
|
||||
size_t num_render_channels,
|
||||
Aec3Optimization optimization,
|
||||
ApmDataDumper* data_dumper);
|
||||
|
||||
~AdaptiveFirFilter();
|
||||
|
||||
AdaptiveFirFilter(const AdaptiveFirFilter&) = delete;
|
||||
AdaptiveFirFilter& operator=(const AdaptiveFirFilter&) = delete;
|
||||
|
||||
// Produces the output of the filter.
|
||||
void Filter(const RenderBuffer& render_buffer, FftData* S) const;
|
||||
|
||||
// Adapts the filter and updates an externally stored impulse response
|
||||
// estimate.
|
||||
void Adapt(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
std::vector<float>* impulse_response);
|
||||
|
||||
// Adapts the filter.
|
||||
void Adapt(const RenderBuffer& render_buffer, const FftData& G);
|
||||
|
||||
// Receives reports that known echo path changes have occured and adjusts
|
||||
// the filter adaptation accordingly.
|
||||
void HandleEchoPathChange();
|
||||
|
||||
// Returns the filter size.
|
||||
size_t SizePartitions() const { return current_size_partitions_; }
|
||||
|
||||
// Sets the filter size.
|
||||
void SetSizePartitions(size_t size, bool immediate_effect);
|
||||
|
||||
// Computes the frequency responses for the filter partitions.
|
||||
void ComputeFrequencyResponse(
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) const;
|
||||
|
||||
// Returns the maximum number of partitions for the filter.
|
||||
size_t max_filter_size_partitions() const { return max_size_partitions_; }
|
||||
|
||||
void DumpFilter(const char* name_frequency_domain) {
|
||||
for (size_t p = 0; p < max_size_partitions_; ++p) {
|
||||
data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].re);
|
||||
data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].im);
|
||||
}
|
||||
}
|
||||
|
||||
// Scale the filter impulse response and spectrum by a factor.
|
||||
void ScaleFilter(float factor);
|
||||
|
||||
// Set the filter coefficients.
|
||||
void SetFilter(size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H);
|
||||
|
||||
// Gets the filter coefficients.
|
||||
const std::vector<std::vector<FftData>>& GetFilter() const { return H_; }
|
||||
|
||||
private:
|
||||
// Adapts the filter and updates the filter size.
|
||||
void AdaptAndUpdateSize(const RenderBuffer& render_buffer, const FftData& G);
|
||||
|
||||
// Constrain the filter partitions in a cyclic manner.
|
||||
void Constrain();
|
||||
// Constrains the filter in a cyclic manner and updates the corresponding
|
||||
// values in the supplied impulse response.
|
||||
void ConstrainAndUpdateImpulseResponse(std::vector<float>* impulse_response);
|
||||
|
||||
// Gradually Updates the current filter size towards the target size.
|
||||
void UpdateSize();
|
||||
|
||||
ApmDataDumper* const data_dumper_;
|
||||
const Aec3Fft fft_;
|
||||
const Aec3Optimization optimization_;
|
||||
const size_t num_render_channels_;
|
||||
const size_t max_size_partitions_;
|
||||
const int size_change_duration_blocks_;
|
||||
float one_by_size_change_duration_blocks_;
|
||||
size_t current_size_partitions_;
|
||||
size_t target_size_partitions_;
|
||||
size_t old_target_size_partitions_;
|
||||
int size_change_counter_ = 0;
|
||||
std::vector<std::vector<FftData>> H_;
|
||||
size_t partition_to_constrain_ = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_H_
|
187
webrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc
Normal file
187
webrtc/modules/audio_processing/aec3/adaptive_fir_filter_avx2.cc
Normal file
@ -0,0 +1,187 @@
|
||||
/*
|
||||
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/adaptive_fir_filter.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace aec3 {
|
||||
|
||||
// Computes and stores the frequency response of the filter.
|
||||
void ComputeFrequencyResponse_Avx2(
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>* H2) {
|
||||
for (auto& H2_ch : *H2) {
|
||||
H2_ch.fill(0.f);
|
||||
}
|
||||
|
||||
const size_t num_render_channels = H[0].size();
|
||||
RTC_DCHECK_EQ(H.size(), H2->capacity());
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
for (size_t j = 0; j < kFftLengthBy2; j += 8) {
|
||||
__m256 re = _mm256_loadu_ps(&H[p][ch].re[j]);
|
||||
__m256 re2 = _mm256_mul_ps(re, re);
|
||||
__m256 im = _mm256_loadu_ps(&H[p][ch].im[j]);
|
||||
re2 = _mm256_fmadd_ps(im, im, re2);
|
||||
__m256 H2_k_j = _mm256_loadu_ps(&(*H2)[p][j]);
|
||||
H2_k_j = _mm256_max_ps(H2_k_j, re2);
|
||||
_mm256_storeu_ps(&(*H2)[p][j], H2_k_j);
|
||||
}
|
||||
float H2_new = H[p][ch].re[kFftLengthBy2] * H[p][ch].re[kFftLengthBy2] +
|
||||
H[p][ch].im[kFftLengthBy2] * H[p][ch].im[kFftLengthBy2];
|
||||
(*H2)[p][kFftLengthBy2] = std::max((*H2)[p][kFftLengthBy2], H2_new);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Adapts the filter partitions.
|
||||
void AdaptPartitions_Avx2(const RenderBuffer& render_buffer,
|
||||
const FftData& G,
|
||||
size_t num_partitions,
|
||||
std::vector<std::vector<FftData>>* H) {
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
const size_t num_render_channels = render_buffer_data[0].size();
|
||||
const size_t lim1 = std::min(
|
||||
render_buffer_data.size() - render_buffer.Position(), num_partitions);
|
||||
const size_t lim2 = num_partitions;
|
||||
constexpr size_t kNumEightBinBands = kFftLengthBy2 / 8;
|
||||
|
||||
size_t X_partition = render_buffer.Position();
|
||||
size_t limit = lim1;
|
||||
size_t p = 0;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
FftData& H_p_ch = (*H)[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
|
||||
for (size_t k = 0, n = 0; n < kNumEightBinBands; ++n, k += 8) {
|
||||
const __m256 G_re = _mm256_loadu_ps(&G.re[k]);
|
||||
const __m256 G_im = _mm256_loadu_ps(&G.im[k]);
|
||||
const __m256 X_re = _mm256_loadu_ps(&X.re[k]);
|
||||
const __m256 X_im = _mm256_loadu_ps(&X.im[k]);
|
||||
const __m256 H_re = _mm256_loadu_ps(&H_p_ch.re[k]);
|
||||
const __m256 H_im = _mm256_loadu_ps(&H_p_ch.im[k]);
|
||||
const __m256 a = _mm256_mul_ps(X_re, G_re);
|
||||
const __m256 b = _mm256_mul_ps(X_im, G_im);
|
||||
const __m256 c = _mm256_mul_ps(X_re, G_im);
|
||||
const __m256 d = _mm256_mul_ps(X_im, G_re);
|
||||
const __m256 e = _mm256_add_ps(a, b);
|
||||
const __m256 f = _mm256_sub_ps(c, d);
|
||||
const __m256 g = _mm256_add_ps(H_re, e);
|
||||
const __m256 h = _mm256_add_ps(H_im, f);
|
||||
_mm256_storeu_ps(&H_p_ch.re[k], g);
|
||||
_mm256_storeu_ps(&H_p_ch.im[k], h);
|
||||
}
|
||||
}
|
||||
}
|
||||
X_partition = 0;
|
||||
limit = lim2;
|
||||
} while (p < lim2);
|
||||
|
||||
X_partition = render_buffer.Position();
|
||||
limit = lim1;
|
||||
p = 0;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
FftData& H_p_ch = (*H)[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
|
||||
H_p_ch.re[kFftLengthBy2] += X.re[kFftLengthBy2] * G.re[kFftLengthBy2] +
|
||||
X.im[kFftLengthBy2] * G.im[kFftLengthBy2];
|
||||
H_p_ch.im[kFftLengthBy2] += X.re[kFftLengthBy2] * G.im[kFftLengthBy2] -
|
||||
X.im[kFftLengthBy2] * G.re[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
|
||||
X_partition = 0;
|
||||
limit = lim2;
|
||||
} while (p < lim2);
|
||||
}
|
||||
|
||||
// Produces the filter output (AVX2 variant).
|
||||
void ApplyFilter_Avx2(const RenderBuffer& render_buffer,
|
||||
size_t num_partitions,
|
||||
const std::vector<std::vector<FftData>>& H,
|
||||
FftData* S) {
|
||||
RTC_DCHECK_GE(H.size(), H.size() - 1);
|
||||
S->re.fill(0.f);
|
||||
S->im.fill(0.f);
|
||||
|
||||
rtc::ArrayView<const std::vector<FftData>> render_buffer_data =
|
||||
render_buffer.GetFftBuffer();
|
||||
const size_t num_render_channels = render_buffer_data[0].size();
|
||||
const size_t lim1 = std::min(
|
||||
render_buffer_data.size() - render_buffer.Position(), num_partitions);
|
||||
const size_t lim2 = num_partitions;
|
||||
constexpr size_t kNumEightBinBands = kFftLengthBy2 / 8;
|
||||
|
||||
size_t X_partition = render_buffer.Position();
|
||||
size_t p = 0;
|
||||
size_t limit = lim1;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
for (size_t k = 0, n = 0; n < kNumEightBinBands; ++n, k += 8) {
|
||||
const __m256 X_re = _mm256_loadu_ps(&X.re[k]);
|
||||
const __m256 X_im = _mm256_loadu_ps(&X.im[k]);
|
||||
const __m256 H_re = _mm256_loadu_ps(&H_p_ch.re[k]);
|
||||
const __m256 H_im = _mm256_loadu_ps(&H_p_ch.im[k]);
|
||||
const __m256 S_re = _mm256_loadu_ps(&S->re[k]);
|
||||
const __m256 S_im = _mm256_loadu_ps(&S->im[k]);
|
||||
const __m256 a = _mm256_mul_ps(X_re, H_re);
|
||||
const __m256 b = _mm256_mul_ps(X_im, H_im);
|
||||
const __m256 c = _mm256_mul_ps(X_re, H_im);
|
||||
const __m256 d = _mm256_mul_ps(X_im, H_re);
|
||||
const __m256 e = _mm256_sub_ps(a, b);
|
||||
const __m256 f = _mm256_add_ps(c, d);
|
||||
const __m256 g = _mm256_add_ps(S_re, e);
|
||||
const __m256 h = _mm256_add_ps(S_im, f);
|
||||
_mm256_storeu_ps(&S->re[k], g);
|
||||
_mm256_storeu_ps(&S->im[k], h);
|
||||
}
|
||||
}
|
||||
}
|
||||
limit = lim2;
|
||||
X_partition = 0;
|
||||
} while (p < lim2);
|
||||
|
||||
X_partition = render_buffer.Position();
|
||||
p = 0;
|
||||
limit = lim1;
|
||||
do {
|
||||
for (; p < limit; ++p, ++X_partition) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
const FftData& X = render_buffer_data[X_partition][ch];
|
||||
S->re[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] -
|
||||
X.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
|
||||
S->im[kFftLengthBy2] += X.re[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2] +
|
||||
X.im[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
limit = lim2;
|
||||
X_partition = 0;
|
||||
} while (p < lim2);
|
||||
}
|
||||
|
||||
} // namespace aec3
|
||||
} // namespace webrtc
|
102
webrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc
Normal file
102
webrtc/modules/audio_processing/aec3/adaptive_fir_filter_erl.cc
Normal file
@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace aec3 {
|
||||
|
||||
// Computes and stores the echo return loss estimate of the filter, which is the
|
||||
// sum of the partition frequency responses.
|
||||
void ErlComputer(const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl) {
|
||||
std::fill(erl.begin(), erl.end(), 0.f);
|
||||
for (auto& H2_j : H2) {
|
||||
std::transform(H2_j.begin(), H2_j.end(), erl.begin(), erl.begin(),
|
||||
std::plus<float>());
|
||||
}
|
||||
}
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
// Computes and stores the echo return loss estimate of the filter, which is the
|
||||
// sum of the partition frequency responses.
|
||||
void ErlComputer_NEON(
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl) {
|
||||
std::fill(erl.begin(), erl.end(), 0.f);
|
||||
for (auto& H2_j : H2) {
|
||||
for (size_t k = 0; k < kFftLengthBy2; k += 4) {
|
||||
const float32x4_t H2_j_k = vld1q_f32(&H2_j[k]);
|
||||
float32x4_t erl_k = vld1q_f32(&erl[k]);
|
||||
erl_k = vaddq_f32(erl_k, H2_j_k);
|
||||
vst1q_f32(&erl[k], erl_k);
|
||||
}
|
||||
erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
// Computes and stores the echo return loss estimate of the filter, which is the
|
||||
// sum of the partition frequency responses.
|
||||
void ErlComputer_SSE2(
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl) {
|
||||
std::fill(erl.begin(), erl.end(), 0.f);
|
||||
for (auto& H2_j : H2) {
|
||||
for (size_t k = 0; k < kFftLengthBy2; k += 4) {
|
||||
const __m128 H2_j_k = _mm_loadu_ps(&H2_j[k]);
|
||||
__m128 erl_k = _mm_loadu_ps(&erl[k]);
|
||||
erl_k = _mm_add_ps(erl_k, H2_j_k);
|
||||
_mm_storeu_ps(&erl[k], erl_k);
|
||||
}
|
||||
erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
void ComputeErl(const Aec3Optimization& optimization,
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, erl.size());
|
||||
// Update the frequency response and echo return loss for the filter.
|
||||
switch (optimization) {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
case Aec3Optimization::kSse2:
|
||||
aec3::ErlComputer_SSE2(H2, erl);
|
||||
break;
|
||||
case Aec3Optimization::kAvx2:
|
||||
aec3::ErlComputer_AVX2(H2, erl);
|
||||
break;
|
||||
#endif
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
case Aec3Optimization::kNeon:
|
||||
aec3::ErlComputer_NEON(H2, erl);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
aec3::ErlComputer(H2, erl);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace aec3 {
|
||||
|
||||
// Computes and stores the echo return loss estimate of the filter, which is the
|
||||
// sum of the partition frequency responses.
|
||||
void ErlComputer(const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl);
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
void ErlComputer_NEON(
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl);
|
||||
#endif
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
void ErlComputer_SSE2(
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl);
|
||||
|
||||
void ErlComputer_AVX2(
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl);
|
||||
#endif
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
// Computes the echo return loss based on a frequency response.
|
||||
void ComputeErl(const Aec3Optimization& optimization,
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ADAPTIVE_FIR_FILTER_ERL_H_
|
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace aec3 {
|
||||
|
||||
// Computes and stores the echo return loss estimate of the filter, which is the
|
||||
// sum of the partition frequency responses.
|
||||
void ErlComputer_AVX2(
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>& H2,
|
||||
rtc::ArrayView<float> erl) {
|
||||
std::fill(erl.begin(), erl.end(), 0.f);
|
||||
for (auto& H2_j : H2) {
|
||||
for (size_t k = 0; k < kFftLengthBy2; k += 8) {
|
||||
const __m256 H2_j_k = _mm256_loadu_ps(&H2_j[k]);
|
||||
__m256 erl_k = _mm256_loadu_ps(&erl[k]);
|
||||
erl_k = _mm256_add_ps(erl_k, H2_j_k);
|
||||
_mm256_storeu_ps(&erl[k], erl_k);
|
||||
}
|
||||
erl[kFftLengthBy2] += H2_j[kFftLengthBy2];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace aec3
|
||||
} // namespace webrtc
|
58
webrtc/modules/audio_processing/aec3/aec3_common.cc
Normal file
58
webrtc/modules/audio_processing/aec3/aec3_common.cc
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
#include "system_wrappers/include/cpu_features_wrapper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
Aec3Optimization DetectOptimization() {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
if (GetCPUInfo(kAVX2) != 0) {
|
||||
return Aec3Optimization::kAvx2;
|
||||
} else if (GetCPUInfo(kSSE2) != 0) {
|
||||
return Aec3Optimization::kSse2;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
return Aec3Optimization::kNeon;
|
||||
#endif
|
||||
|
||||
return Aec3Optimization::kNone;
|
||||
}
|
||||
|
||||
float FastApproxLog2f(const float in) {
|
||||
RTC_DCHECK_GT(in, .0f);
|
||||
// Read and interpret float as uint32_t and then cast to float.
|
||||
// This is done to extract the exponent (bits 30 - 23).
|
||||
// "Right shift" of the exponent is then performed by multiplying
|
||||
// with the constant (1/2^23). Finally, we subtract a constant to
|
||||
// remove the bias (https://en.wikipedia.org/wiki/Exponent_bias).
|
||||
union {
|
||||
float dummy;
|
||||
uint32_t a;
|
||||
} x = {in};
|
||||
float out = x.a;
|
||||
out *= 1.1920929e-7f; // 1/2^23
|
||||
out -= 126.942695f; // Remove bias.
|
||||
return out;
|
||||
}
|
||||
|
||||
float Log2TodB(const float in_log2) {
|
||||
return 3.0102999566398121 * in_log2;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
114
webrtc/modules/audio_processing/aec3/aec3_common.h
Normal file
114
webrtc/modules/audio_processing/aec3/aec3_common.h
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
#ifdef _MSC_VER /* visual c++ */
|
||||
#define ALIGN16_BEG __declspec(align(16))
|
||||
#define ALIGN16_END
|
||||
#else /* gcc or icc */
|
||||
#define ALIGN16_BEG
|
||||
#define ALIGN16_END __attribute__((aligned(16)))
|
||||
#endif
|
||||
|
||||
enum class Aec3Optimization { kNone, kSse2, kAvx2, kNeon };
|
||||
|
||||
constexpr int kNumBlocksPerSecond = 250;
|
||||
|
||||
constexpr int kMetricsReportingIntervalBlocks = 10 * kNumBlocksPerSecond;
|
||||
constexpr int kMetricsComputationBlocks = 7;
|
||||
constexpr int kMetricsCollectionBlocks =
|
||||
kMetricsReportingIntervalBlocks - kMetricsComputationBlocks;
|
||||
|
||||
constexpr size_t kFftLengthBy2 = 64;
|
||||
constexpr size_t kFftLengthBy2Plus1 = kFftLengthBy2 + 1;
|
||||
constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1;
|
||||
constexpr size_t kFftLength = 2 * kFftLengthBy2;
|
||||
constexpr size_t kFftLengthBy2Log2 = 6;
|
||||
|
||||
constexpr int kRenderTransferQueueSizeFrames = 100;
|
||||
|
||||
constexpr size_t kMaxNumBands = 3;
|
||||
constexpr size_t kFrameSize = 160;
|
||||
constexpr size_t kSubFrameLength = kFrameSize / 2;
|
||||
|
||||
constexpr size_t kBlockSize = kFftLengthBy2;
|
||||
constexpr size_t kBlockSizeLog2 = kFftLengthBy2Log2;
|
||||
|
||||
constexpr size_t kExtendedBlockSize = 2 * kFftLengthBy2;
|
||||
constexpr size_t kMatchedFilterWindowSizeSubBlocks = 32;
|
||||
constexpr size_t kMatchedFilterAlignmentShiftSizeSubBlocks =
|
||||
kMatchedFilterWindowSizeSubBlocks * 3 / 4;
|
||||
|
||||
// TODO(peah): Integrate this with how it is done inside audio_processing_impl.
|
||||
constexpr size_t NumBandsForRate(int sample_rate_hz) {
|
||||
return static_cast<size_t>(sample_rate_hz / 16000);
|
||||
}
|
||||
|
||||
constexpr bool ValidFullBandRate(int sample_rate_hz) {
|
||||
return sample_rate_hz == 16000 || sample_rate_hz == 32000 ||
|
||||
sample_rate_hz == 48000;
|
||||
}
|
||||
|
||||
constexpr int GetTimeDomainLength(int filter_length_blocks) {
|
||||
return filter_length_blocks * kFftLengthBy2;
|
||||
}
|
||||
|
||||
constexpr size_t GetDownSampledBufferSize(size_t down_sampling_factor,
|
||||
size_t num_matched_filters) {
|
||||
return kBlockSize / down_sampling_factor *
|
||||
(kMatchedFilterAlignmentShiftSizeSubBlocks * num_matched_filters +
|
||||
kMatchedFilterWindowSizeSubBlocks + 1);
|
||||
}
|
||||
|
||||
constexpr size_t GetRenderDelayBufferSize(size_t down_sampling_factor,
|
||||
size_t num_matched_filters,
|
||||
size_t filter_length_blocks) {
|
||||
return GetDownSampledBufferSize(down_sampling_factor, num_matched_filters) /
|
||||
(kBlockSize / down_sampling_factor) +
|
||||
filter_length_blocks + 1;
|
||||
}
|
||||
|
||||
// Detects what kind of optimizations to use for the code.
|
||||
Aec3Optimization DetectOptimization();
|
||||
|
||||
// Computes the log2 of the input in a fast an approximate manner.
|
||||
float FastApproxLog2f(const float in);
|
||||
|
||||
// Returns dB from a power quantity expressed in log2.
|
||||
float Log2TodB(const float in_log2);
|
||||
|
||||
static_assert(1 << kBlockSizeLog2 == kBlockSize,
|
||||
"Proper number of shifts for blocksize");
|
||||
|
||||
static_assert(1 << kFftLengthBy2Log2 == kFftLengthBy2,
|
||||
"Proper number of shifts for the fft length");
|
||||
|
||||
static_assert(1 == NumBandsForRate(16000), "Number of bands for 16 kHz");
|
||||
static_assert(2 == NumBandsForRate(32000), "Number of bands for 32 kHz");
|
||||
static_assert(3 == NumBandsForRate(48000), "Number of bands for 48 kHz");
|
||||
|
||||
static_assert(ValidFullBandRate(16000),
|
||||
"Test that 16 kHz is a valid sample rate");
|
||||
static_assert(ValidFullBandRate(32000),
|
||||
"Test that 32 kHz is a valid sample rate");
|
||||
static_assert(ValidFullBandRate(48000),
|
||||
"Test that 48 kHz is a valid sample rate");
|
||||
static_assert(!ValidFullBandRate(8001),
|
||||
"Test that 8001 Hz is not a valid sample rate");
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC3_COMMON_H_
|
144
webrtc/modules/audio_processing/aec3/aec3_fft.cc
Normal file
144
webrtc/modules/audio_processing/aec3/aec3_fft.cc
Normal file
@ -0,0 +1,144 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_fft.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <iterator>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/cpu_features_wrapper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
const float kHanning64[kFftLengthBy2] = {
|
||||
0.f, 0.00248461f, 0.00991376f, 0.0222136f, 0.03926189f,
|
||||
0.06088921f, 0.08688061f, 0.11697778f, 0.15088159f, 0.1882551f,
|
||||
0.22872687f, 0.27189467f, 0.31732949f, 0.36457977f, 0.41317591f,
|
||||
0.46263495f, 0.51246535f, 0.56217185f, 0.61126047f, 0.65924333f,
|
||||
0.70564355f, 0.75f, 0.79187184f, 0.83084292f, 0.86652594f,
|
||||
0.89856625f, 0.92664544f, 0.95048443f, 0.96984631f, 0.98453864f,
|
||||
0.99441541f, 0.99937846f, 0.99937846f, 0.99441541f, 0.98453864f,
|
||||
0.96984631f, 0.95048443f, 0.92664544f, 0.89856625f, 0.86652594f,
|
||||
0.83084292f, 0.79187184f, 0.75f, 0.70564355f, 0.65924333f,
|
||||
0.61126047f, 0.56217185f, 0.51246535f, 0.46263495f, 0.41317591f,
|
||||
0.36457977f, 0.31732949f, 0.27189467f, 0.22872687f, 0.1882551f,
|
||||
0.15088159f, 0.11697778f, 0.08688061f, 0.06088921f, 0.03926189f,
|
||||
0.0222136f, 0.00991376f, 0.00248461f, 0.f};
|
||||
|
||||
// Hanning window from Matlab command win = sqrt(hanning(128)).
|
||||
const float kSqrtHanning128[kFftLength] = {
|
||||
0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f,
|
||||
0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f,
|
||||
0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
|
||||
0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f,
|
||||
0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f,
|
||||
0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
|
||||
0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f,
|
||||
0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f,
|
||||
0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
|
||||
0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f,
|
||||
0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f,
|
||||
0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
|
||||
0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f,
|
||||
0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f,
|
||||
0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
|
||||
0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f,
|
||||
1.00000000000000f, 0.99969881869620f, 0.99879545620517f, 0.99729045667869f,
|
||||
0.99518472667220f, 0.99247953459871f, 0.98917650996478f, 0.98527764238894f,
|
||||
0.98078528040323f, 0.97570213003853f, 0.97003125319454f, 0.96377606579544f,
|
||||
0.95694033573221f, 0.94952818059304f, 0.94154406518302f, 0.93299279883474f,
|
||||
0.92387953251129f, 0.91420975570353f, 0.90398929312344f, 0.89322430119552f,
|
||||
0.88192126434835f, 0.87008699110871f, 0.85772861000027f, 0.84485356524971f,
|
||||
0.83146961230255f, 0.81758481315158f, 0.80320753148064f, 0.78834642762661f,
|
||||
0.77301045336274f, 0.75720884650648f, 0.74095112535496f, 0.72424708295147f,
|
||||
0.70710678118655f, 0.68954054473707f, 0.67155895484702f, 0.65317284295378f,
|
||||
0.63439328416365f, 0.61523159058063f, 0.59569930449243f, 0.57580819141785f,
|
||||
0.55557023301960f, 0.53499761988710f, 0.51410274419322f, 0.49289819222978f,
|
||||
0.47139673682600f, 0.44961132965461f, 0.42755509343028f, 0.40524131400499f,
|
||||
0.38268343236509f, 0.35989503653499f, 0.33688985339222f, 0.31368174039889f,
|
||||
0.29028467725446f, 0.26671275747490f, 0.24298017990326f, 0.21910124015687f,
|
||||
0.19509032201613f, 0.17096188876030f, 0.14673047445536f, 0.12241067519922f,
|
||||
0.09801714032956f, 0.07356456359967f, 0.04906767432742f, 0.02454122852291f};
|
||||
|
||||
bool IsSse2Available() {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
return GetCPUInfo(kSSE2) != 0;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
Aec3Fft::Aec3Fft() : ooura_fft_(IsSse2Available()) {}
|
||||
|
||||
// TODO(peah): Change x to be std::array once the rest of the code allows this.
|
||||
void Aec3Fft::ZeroPaddedFft(rtc::ArrayView<const float> x,
|
||||
Window window,
|
||||
FftData* X) const {
|
||||
RTC_DCHECK(X);
|
||||
RTC_DCHECK_EQ(kFftLengthBy2, x.size());
|
||||
std::array<float, kFftLength> fft;
|
||||
std::fill(fft.begin(), fft.begin() + kFftLengthBy2, 0.f);
|
||||
switch (window) {
|
||||
case Window::kRectangular:
|
||||
std::copy(x.begin(), x.end(), fft.begin() + kFftLengthBy2);
|
||||
break;
|
||||
case Window::kHanning:
|
||||
std::transform(x.begin(), x.end(), std::begin(kHanning64),
|
||||
fft.begin() + kFftLengthBy2,
|
||||
[](float a, float b) { return a * b; });
|
||||
break;
|
||||
case Window::kSqrtHanning:
|
||||
RTC_NOTREACHED();
|
||||
break;
|
||||
default:
|
||||
RTC_NOTREACHED();
|
||||
}
|
||||
|
||||
Fft(&fft, X);
|
||||
}
|
||||
|
||||
void Aec3Fft::PaddedFft(rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> x_old,
|
||||
Window window,
|
||||
FftData* X) const {
|
||||
RTC_DCHECK(X);
|
||||
RTC_DCHECK_EQ(kFftLengthBy2, x.size());
|
||||
RTC_DCHECK_EQ(kFftLengthBy2, x_old.size());
|
||||
std::array<float, kFftLength> fft;
|
||||
|
||||
switch (window) {
|
||||
case Window::kRectangular:
|
||||
std::copy(x_old.begin(), x_old.end(), fft.begin());
|
||||
std::copy(x.begin(), x.end(), fft.begin() + x_old.size());
|
||||
break;
|
||||
case Window::kHanning:
|
||||
RTC_NOTREACHED();
|
||||
break;
|
||||
case Window::kSqrtHanning:
|
||||
std::transform(x_old.begin(), x_old.end(), std::begin(kSqrtHanning128),
|
||||
fft.begin(), std::multiplies<float>());
|
||||
std::transform(x.begin(), x.end(),
|
||||
std::begin(kSqrtHanning128) + x_old.size(),
|
||||
fft.begin() + x_old.size(), std::multiplies<float>());
|
||||
break;
|
||||
default:
|
||||
RTC_NOTREACHED();
|
||||
}
|
||||
|
||||
Fft(&fft, X);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
75
webrtc/modules/audio_processing/aec3/aec3_fft.h
Normal file
75
webrtc/modules/audio_processing/aec3/aec3_fft.h
Normal file
@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "common_audio/third_party/ooura/fft_size_128/ooura_fft.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Wrapper class that provides 128 point real valued FFT functionality with the
|
||||
// FftData type.
|
||||
class Aec3Fft {
|
||||
public:
|
||||
enum class Window { kRectangular, kHanning, kSqrtHanning };
|
||||
|
||||
Aec3Fft();
|
||||
|
||||
// Computes the FFT. Note that both the input and output are modified.
|
||||
void Fft(std::array<float, kFftLength>* x, FftData* X) const {
|
||||
RTC_DCHECK(x);
|
||||
RTC_DCHECK(X);
|
||||
ooura_fft_.Fft(x->data());
|
||||
X->CopyFromPackedArray(*x);
|
||||
}
|
||||
// Computes the inverse Fft.
|
||||
void Ifft(const FftData& X, std::array<float, kFftLength>* x) const {
|
||||
RTC_DCHECK(x);
|
||||
X.CopyToPackedArray(x);
|
||||
ooura_fft_.InverseFft(x->data());
|
||||
}
|
||||
|
||||
// Windows the input using a Hanning window, and then adds padding of
|
||||
// kFftLengthBy2 initial zeros before computing the Fft.
|
||||
void ZeroPaddedFft(rtc::ArrayView<const float> x,
|
||||
Window window,
|
||||
FftData* X) const;
|
||||
|
||||
// Concatenates the kFftLengthBy2 values long x and x_old before computing the
|
||||
// Fft. After that, x is copied to x_old.
|
||||
void PaddedFft(rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> x_old,
|
||||
FftData* X) const {
|
||||
PaddedFft(x, x_old, Window::kRectangular, X);
|
||||
}
|
||||
|
||||
// Padded Fft using a time-domain window.
|
||||
void PaddedFft(rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> x_old,
|
||||
Window window,
|
||||
FftData* X) const;
|
||||
|
||||
private:
|
||||
const OouraFft ooura_fft_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(Aec3Fft);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC3_FFT_H_
|
477
webrtc/modules/audio_processing/aec3/aec_state.cc
Normal file
477
webrtc/modules/audio_processing/aec3/aec_state.cc
Normal file
@ -0,0 +1,477 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
bool DeactivateInitialStateResetAtEchoPathChange() {
|
||||
return field_trial::IsEnabled(
|
||||
"WebRTC-Aec3DeactivateInitialStateResetKillSwitch");
|
||||
}
|
||||
|
||||
bool FullResetAtEchoPathChange() {
|
||||
return !field_trial::IsEnabled("WebRTC-Aec3AecStateFullResetKillSwitch");
|
||||
}
|
||||
|
||||
bool SubtractorAnalyzerResetAtEchoPathChange() {
|
||||
return !field_trial::IsEnabled(
|
||||
"WebRTC-Aec3AecStateSubtractorAnalyzerResetKillSwitch");
|
||||
}
|
||||
|
||||
void ComputeAvgRenderReverb(
|
||||
const SpectrumBuffer& spectrum_buffer,
|
||||
int delay_blocks,
|
||||
float reverb_decay,
|
||||
ReverbModel* reverb_model,
|
||||
rtc::ArrayView<float, kFftLengthBy2Plus1> reverb_power_spectrum) {
|
||||
RTC_DCHECK(reverb_model);
|
||||
const size_t num_render_channels = spectrum_buffer.buffer[0].size();
|
||||
int idx_at_delay =
|
||||
spectrum_buffer.OffsetIndex(spectrum_buffer.read, delay_blocks);
|
||||
int idx_past = spectrum_buffer.IncIndex(idx_at_delay);
|
||||
|
||||
std::array<float, kFftLengthBy2Plus1> X2_data;
|
||||
rtc::ArrayView<const float> X2;
|
||||
if (num_render_channels > 1) {
|
||||
auto average_channels =
|
||||
[](size_t num_render_channels,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
spectrum_band_0,
|
||||
rtc::ArrayView<float, kFftLengthBy2Plus1> render_power) {
|
||||
std::fill(render_power.begin(), render_power.end(), 0.f);
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
render_power[k] += spectrum_band_0[ch][k];
|
||||
}
|
||||
}
|
||||
const float normalizer = 1.f / num_render_channels;
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
render_power[k] *= normalizer;
|
||||
}
|
||||
};
|
||||
average_channels(num_render_channels, spectrum_buffer.buffer[idx_past],
|
||||
X2_data);
|
||||
reverb_model->UpdateReverbNoFreqShaping(
|
||||
X2_data, /*power_spectrum_scaling=*/1.0f, reverb_decay);
|
||||
|
||||
average_channels(num_render_channels, spectrum_buffer.buffer[idx_at_delay],
|
||||
X2_data);
|
||||
X2 = X2_data;
|
||||
} else {
|
||||
reverb_model->UpdateReverbNoFreqShaping(
|
||||
spectrum_buffer.buffer[idx_past][/*channel=*/0],
|
||||
/*power_spectrum_scaling=*/1.0f, reverb_decay);
|
||||
|
||||
X2 = spectrum_buffer.buffer[idx_at_delay][/*channel=*/0];
|
||||
}
|
||||
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
|
||||
reverb_model->reverb();
|
||||
for (size_t k = 0; k < X2.size(); ++k) {
|
||||
reverb_power_spectrum[k] = X2[k] + reverb_power[k];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int AecState::instance_count_ = 0;
|
||||
|
||||
void AecState::GetResidualEchoScaling(
|
||||
rtc::ArrayView<float> residual_scaling) const {
|
||||
bool filter_has_had_time_to_converge;
|
||||
if (config_.filter.conservative_initial_phase) {
|
||||
filter_has_had_time_to_converge =
|
||||
strong_not_saturated_render_blocks_ >= 1.5f * kNumBlocksPerSecond;
|
||||
} else {
|
||||
filter_has_had_time_to_converge =
|
||||
strong_not_saturated_render_blocks_ >= 0.8f * kNumBlocksPerSecond;
|
||||
}
|
||||
echo_audibility_.GetResidualEchoScaling(filter_has_had_time_to_converge,
|
||||
residual_scaling);
|
||||
}
|
||||
|
||||
absl::optional<float> AecState::ErleUncertainty() const {
|
||||
if (SaturatedEcho()) {
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
AecState::AecState(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
config_(config),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
deactivate_initial_state_reset_at_echo_path_change_(
|
||||
DeactivateInitialStateResetAtEchoPathChange()),
|
||||
full_reset_at_echo_path_change_(FullResetAtEchoPathChange()),
|
||||
subtractor_analyzer_reset_at_echo_path_change_(
|
||||
SubtractorAnalyzerResetAtEchoPathChange()),
|
||||
initial_state_(config_),
|
||||
delay_state_(config_, num_capture_channels_),
|
||||
transparent_state_(TransparentMode::Create(config_)),
|
||||
filter_quality_state_(config_, num_capture_channels_),
|
||||
erl_estimator_(2 * kNumBlocksPerSecond),
|
||||
erle_estimator_(2 * kNumBlocksPerSecond, config_, num_capture_channels_),
|
||||
filter_analyzer_(config_, num_capture_channels_),
|
||||
echo_audibility_(
|
||||
config_.echo_audibility.use_stationarity_properties_at_init),
|
||||
reverb_model_estimator_(config_, num_capture_channels_),
|
||||
subtractor_output_analyzer_(num_capture_channels_) {}
|
||||
|
||||
AecState::~AecState() = default;
|
||||
|
||||
void AecState::HandleEchoPathChange(
|
||||
const EchoPathVariability& echo_path_variability) {
|
||||
const auto full_reset = [&]() {
|
||||
filter_analyzer_.Reset();
|
||||
capture_signal_saturation_ = false;
|
||||
strong_not_saturated_render_blocks_ = 0;
|
||||
blocks_with_active_render_ = 0;
|
||||
if (!deactivate_initial_state_reset_at_echo_path_change_) {
|
||||
initial_state_.Reset();
|
||||
}
|
||||
if (transparent_state_) {
|
||||
transparent_state_->Reset();
|
||||
}
|
||||
erle_estimator_.Reset(true);
|
||||
erl_estimator_.Reset();
|
||||
filter_quality_state_.Reset();
|
||||
};
|
||||
|
||||
// TODO(peah): Refine the reset scheme according to the type of gain and
|
||||
// delay adjustment.
|
||||
|
||||
if (full_reset_at_echo_path_change_ &&
|
||||
echo_path_variability.delay_change !=
|
||||
EchoPathVariability::DelayAdjustment::kNone) {
|
||||
full_reset();
|
||||
} else if (echo_path_variability.gain_change) {
|
||||
erle_estimator_.Reset(false);
|
||||
}
|
||||
if (subtractor_analyzer_reset_at_echo_path_change_) {
|
||||
subtractor_output_analyzer_.HandleEchoPathChange();
|
||||
}
|
||||
}
|
||||
|
||||
void AecState::Update(
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
adaptive_filter_frequency_responses,
|
||||
rtc::ArrayView<const std::vector<float>> adaptive_filter_impulse_responses,
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_refined,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<const SubtractorOutput> subtractor_output) {
|
||||
RTC_DCHECK_EQ(num_capture_channels_, Y2.size());
|
||||
RTC_DCHECK_EQ(num_capture_channels_, subtractor_output.size());
|
||||
RTC_DCHECK_EQ(num_capture_channels_,
|
||||
adaptive_filter_frequency_responses.size());
|
||||
RTC_DCHECK_EQ(num_capture_channels_,
|
||||
adaptive_filter_impulse_responses.size());
|
||||
|
||||
// Analyze the filter outputs and filters.
|
||||
bool any_filter_converged;
|
||||
bool all_filters_diverged;
|
||||
subtractor_output_analyzer_.Update(subtractor_output, &any_filter_converged,
|
||||
&all_filters_diverged);
|
||||
|
||||
bool any_filter_consistent;
|
||||
float max_echo_path_gain;
|
||||
filter_analyzer_.Update(adaptive_filter_impulse_responses, render_buffer,
|
||||
&any_filter_consistent, &max_echo_path_gain);
|
||||
|
||||
// Estimate the direct path delay of the filter.
|
||||
if (config_.filter.use_linear_filter) {
|
||||
delay_state_.Update(filter_analyzer_.FilterDelaysBlocks(), external_delay,
|
||||
strong_not_saturated_render_blocks_);
|
||||
}
|
||||
|
||||
const std::vector<std::vector<float>>& aligned_render_block =
|
||||
render_buffer.Block(-delay_state_.MinDirectPathFilterDelay())[0];
|
||||
|
||||
// Update render counters.
|
||||
bool active_render = false;
|
||||
for (size_t ch = 0; ch < aligned_render_block.size(); ++ch) {
|
||||
const float render_energy = std::inner_product(
|
||||
aligned_render_block[ch].begin(), aligned_render_block[ch].end(),
|
||||
aligned_render_block[ch].begin(), 0.f);
|
||||
if (render_energy > (config_.render_levels.active_render_limit *
|
||||
config_.render_levels.active_render_limit) *
|
||||
kFftLengthBy2) {
|
||||
active_render = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
blocks_with_active_render_ += active_render ? 1 : 0;
|
||||
strong_not_saturated_render_blocks_ +=
|
||||
active_render && !SaturatedCapture() ? 1 : 0;
|
||||
|
||||
std::array<float, kFftLengthBy2Plus1> avg_render_spectrum_with_reverb;
|
||||
|
||||
ComputeAvgRenderReverb(render_buffer.GetSpectrumBuffer(),
|
||||
delay_state_.MinDirectPathFilterDelay(), ReverbDecay(),
|
||||
&avg_render_reverb_, avg_render_spectrum_with_reverb);
|
||||
|
||||
if (config_.echo_audibility.use_stationarity_properties) {
|
||||
// Update the echo audibility evaluator.
|
||||
echo_audibility_.Update(render_buffer, avg_render_reverb_.reverb(),
|
||||
delay_state_.MinDirectPathFilterDelay(),
|
||||
delay_state_.ExternalDelayReported());
|
||||
}
|
||||
|
||||
// Update the ERL and ERLE measures.
|
||||
if (initial_state_.TransitionTriggered()) {
|
||||
erle_estimator_.Reset(false);
|
||||
}
|
||||
|
||||
erle_estimator_.Update(render_buffer, adaptive_filter_frequency_responses,
|
||||
avg_render_spectrum_with_reverb, Y2, E2_refined,
|
||||
subtractor_output_analyzer_.ConvergedFilters());
|
||||
|
||||
erl_estimator_.Update(
|
||||
subtractor_output_analyzer_.ConvergedFilters(),
|
||||
render_buffer.Spectrum(delay_state_.MinDirectPathFilterDelay()), Y2);
|
||||
|
||||
// Detect and flag echo saturation.
|
||||
if (config_.ep_strength.echo_can_saturate) {
|
||||
saturation_detector_.Update(aligned_render_block, SaturatedCapture(),
|
||||
UsableLinearEstimate(), subtractor_output,
|
||||
max_echo_path_gain);
|
||||
} else {
|
||||
RTC_DCHECK(!saturation_detector_.SaturatedEcho());
|
||||
}
|
||||
|
||||
// Update the decision on whether to use the initial state parameter set.
|
||||
initial_state_.Update(active_render, SaturatedCapture());
|
||||
|
||||
// Detect whether the transparent mode should be activated.
|
||||
if (transparent_state_) {
|
||||
transparent_state_->Update(delay_state_.MinDirectPathFilterDelay(),
|
||||
any_filter_consistent, any_filter_converged,
|
||||
all_filters_diverged, active_render,
|
||||
SaturatedCapture());
|
||||
}
|
||||
|
||||
// Analyze the quality of the filter.
|
||||
filter_quality_state_.Update(active_render, TransparentModeActive(),
|
||||
SaturatedCapture(), external_delay,
|
||||
any_filter_converged);
|
||||
|
||||
// Update the reverb estimate.
|
||||
const bool stationary_block =
|
||||
config_.echo_audibility.use_stationarity_properties &&
|
||||
echo_audibility_.IsBlockStationary();
|
||||
|
||||
reverb_model_estimator_.Update(
|
||||
filter_analyzer_.GetAdjustedFilters(),
|
||||
adaptive_filter_frequency_responses,
|
||||
erle_estimator_.GetInstLinearQualityEstimates(),
|
||||
delay_state_.DirectPathFilterDelays(),
|
||||
filter_quality_state_.UsableLinearFilterOutputs(), stationary_block);
|
||||
|
||||
erle_estimator_.Dump(data_dumper_);
|
||||
reverb_model_estimator_.Dump(data_dumper_.get());
|
||||
data_dumper_->DumpRaw("aec3_active_render", active_render);
|
||||
data_dumper_->DumpRaw("aec3_erl", Erl());
|
||||
data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain());
|
||||
data_dumper_->DumpRaw("aec3_erle", Erle()[0]);
|
||||
data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate());
|
||||
data_dumper_->DumpRaw("aec3_transparent_mode", TransparentModeActive());
|
||||
data_dumper_->DumpRaw("aec3_filter_delay",
|
||||
filter_analyzer_.MinFilterDelayBlocks());
|
||||
|
||||
data_dumper_->DumpRaw("aec3_any_filter_consistent", any_filter_consistent);
|
||||
data_dumper_->DumpRaw("aec3_initial_state",
|
||||
initial_state_.InitialStateActive());
|
||||
data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture());
|
||||
data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho());
|
||||
data_dumper_->DumpRaw("aec3_any_filter_converged", any_filter_converged);
|
||||
data_dumper_->DumpRaw("aec3_all_filters_diverged", all_filters_diverged);
|
||||
|
||||
data_dumper_->DumpRaw("aec3_external_delay_avaliable",
|
||||
external_delay ? 1 : 0);
|
||||
data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est",
|
||||
GetReverbFrequencyResponse());
|
||||
}
|
||||
|
||||
AecState::InitialState::InitialState(const EchoCanceller3Config& config)
|
||||
: conservative_initial_phase_(config.filter.conservative_initial_phase),
|
||||
initial_state_seconds_(config.filter.initial_state_seconds) {
|
||||
Reset();
|
||||
}
|
||||
void AecState::InitialState::InitialState::Reset() {
|
||||
initial_state_ = true;
|
||||
strong_not_saturated_render_blocks_ = 0;
|
||||
}
|
||||
void AecState::InitialState::InitialState::Update(bool active_render,
|
||||
bool saturated_capture) {
|
||||
strong_not_saturated_render_blocks_ +=
|
||||
active_render && !saturated_capture ? 1 : 0;
|
||||
|
||||
// Flag whether the initial state is still active.
|
||||
bool prev_initial_state = initial_state_;
|
||||
if (conservative_initial_phase_) {
|
||||
initial_state_ =
|
||||
strong_not_saturated_render_blocks_ < 5 * kNumBlocksPerSecond;
|
||||
} else {
|
||||
initial_state_ = strong_not_saturated_render_blocks_ <
|
||||
initial_state_seconds_ * kNumBlocksPerSecond;
|
||||
}
|
||||
|
||||
// Flag whether the transition from the initial state has started.
|
||||
transition_triggered_ = !initial_state_ && prev_initial_state;
|
||||
}
|
||||
|
||||
AecState::FilterDelay::FilterDelay(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize),
|
||||
filter_delays_blocks_(num_capture_channels, delay_headroom_blocks_),
|
||||
min_filter_delay_(delay_headroom_blocks_) {}
|
||||
|
||||
void AecState::FilterDelay::Update(
|
||||
rtc::ArrayView<const int> analyzer_filter_delay_estimates_blocks,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
size_t blocks_with_proper_filter_adaptation) {
|
||||
// Update the delay based on the external delay.
|
||||
if (external_delay &&
|
||||
(!external_delay_ || external_delay_->delay != external_delay->delay)) {
|
||||
external_delay_ = external_delay;
|
||||
external_delay_reported_ = true;
|
||||
}
|
||||
|
||||
// Override the estimated delay if it is not certain that the filter has had
|
||||
// time to converge.
|
||||
const bool delay_estimator_may_not_have_converged =
|
||||
blocks_with_proper_filter_adaptation < 2 * kNumBlocksPerSecond;
|
||||
if (delay_estimator_may_not_have_converged && external_delay_) {
|
||||
const int delay_guess = delay_headroom_blocks_;
|
||||
std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(),
|
||||
delay_guess);
|
||||
} else {
|
||||
RTC_DCHECK_EQ(filter_delays_blocks_.size(),
|
||||
analyzer_filter_delay_estimates_blocks.size());
|
||||
std::copy(analyzer_filter_delay_estimates_blocks.begin(),
|
||||
analyzer_filter_delay_estimates_blocks.end(),
|
||||
filter_delays_blocks_.begin());
|
||||
}
|
||||
|
||||
min_filter_delay_ = *std::min_element(filter_delays_blocks_.begin(),
|
||||
filter_delays_blocks_.end());
|
||||
}
|
||||
|
||||
AecState::FilteringQualityAnalyzer::FilteringQualityAnalyzer(
|
||||
const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: use_linear_filter_(config.filter.use_linear_filter),
|
||||
usable_linear_filter_estimates_(num_capture_channels, false) {}
|
||||
|
||||
void AecState::FilteringQualityAnalyzer::Reset() {
|
||||
std::fill(usable_linear_filter_estimates_.begin(),
|
||||
usable_linear_filter_estimates_.end(), false);
|
||||
overall_usable_linear_estimates_ = false;
|
||||
filter_update_blocks_since_reset_ = 0;
|
||||
}
|
||||
|
||||
void AecState::FilteringQualityAnalyzer::Update(
|
||||
bool active_render,
|
||||
bool transparent_mode,
|
||||
bool saturated_capture,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
bool any_filter_converged) {
|
||||
// Update blocks counter.
|
||||
const bool filter_update = active_render && !saturated_capture;
|
||||
filter_update_blocks_since_reset_ += filter_update ? 1 : 0;
|
||||
filter_update_blocks_since_start_ += filter_update ? 1 : 0;
|
||||
|
||||
// Store convergence flag when observed.
|
||||
convergence_seen_ = convergence_seen_ || any_filter_converged;
|
||||
|
||||
// Verify requirements for achieving a decent filter. The requirements for
|
||||
// filter adaptation at call startup are more restrictive than after an
|
||||
// in-call reset.
|
||||
const bool sufficient_data_to_converge_at_startup =
|
||||
filter_update_blocks_since_start_ > kNumBlocksPerSecond * 0.4f;
|
||||
const bool sufficient_data_to_converge_at_reset =
|
||||
sufficient_data_to_converge_at_startup &&
|
||||
filter_update_blocks_since_reset_ > kNumBlocksPerSecond * 0.2f;
|
||||
|
||||
// The linear filter can only be used if it has had time to converge.
|
||||
overall_usable_linear_estimates_ = sufficient_data_to_converge_at_startup &&
|
||||
sufficient_data_to_converge_at_reset;
|
||||
|
||||
// The linear filter can only be used if an external delay or convergence have
|
||||
// been identified
|
||||
overall_usable_linear_estimates_ =
|
||||
overall_usable_linear_estimates_ && (external_delay || convergence_seen_);
|
||||
|
||||
// If transparent mode is on, deactivate usign the linear filter.
|
||||
overall_usable_linear_estimates_ =
|
||||
overall_usable_linear_estimates_ && !transparent_mode;
|
||||
|
||||
if (use_linear_filter_) {
|
||||
std::fill(usable_linear_filter_estimates_.begin(),
|
||||
usable_linear_filter_estimates_.end(),
|
||||
overall_usable_linear_estimates_);
|
||||
}
|
||||
}
|
||||
|
||||
void AecState::SaturationDetector::Update(
|
||||
rtc::ArrayView<const std::vector<float>> x,
|
||||
bool saturated_capture,
|
||||
bool usable_linear_estimate,
|
||||
rtc::ArrayView<const SubtractorOutput> subtractor_output,
|
||||
float echo_path_gain) {
|
||||
saturated_echo_ = false;
|
||||
if (!saturated_capture) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (usable_linear_estimate) {
|
||||
constexpr float kSaturationThreshold = 20000.f;
|
||||
for (size_t ch = 0; ch < subtractor_output.size(); ++ch) {
|
||||
saturated_echo_ =
|
||||
saturated_echo_ ||
|
||||
(subtractor_output[ch].s_refined_max_abs > kSaturationThreshold ||
|
||||
subtractor_output[ch].s_coarse_max_abs > kSaturationThreshold);
|
||||
}
|
||||
} else {
|
||||
float max_sample = 0.f;
|
||||
for (auto& channel : x) {
|
||||
for (float sample : channel) {
|
||||
max_sample = std::max(max_sample, fabsf(sample));
|
||||
}
|
||||
}
|
||||
|
||||
const float kMargin = 10.f;
|
||||
float peak_echo_amplitude = max_sample * echo_path_gain * kMargin;
|
||||
saturated_echo_ = saturated_echo_ || peak_echo_amplitude > 32000;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
294
webrtc/modules/audio_processing/aec3/aec_state.h
Normal file
294
webrtc/modules/audio_processing/aec3/aec_state.h
Normal file
@ -0,0 +1,294 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/echo_audibility.h"
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
#include "modules/audio_processing/aec3/erl_estimator.h"
|
||||
#include "modules/audio_processing/aec3/erle_estimator.h"
|
||||
#include "modules/audio_processing/aec3/filter_analyzer.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/reverb_model_estimator.h"
|
||||
#include "modules/audio_processing/aec3/subtractor_output.h"
|
||||
#include "modules/audio_processing/aec3/subtractor_output_analyzer.h"
|
||||
#include "modules/audio_processing/aec3/transparent_mode.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
|
||||
// Handles the state and the conditions for the echo removal functionality.
|
||||
class AecState {
|
||||
public:
|
||||
AecState(const EchoCanceller3Config& config, size_t num_capture_channels);
|
||||
~AecState();
|
||||
|
||||
// Returns whether the echo subtractor can be used to determine the residual
|
||||
// echo.
|
||||
bool UsableLinearEstimate() const {
|
||||
return filter_quality_state_.LinearFilterUsable() &&
|
||||
config_.filter.use_linear_filter;
|
||||
}
|
||||
|
||||
// Returns whether the echo subtractor output should be used as output.
|
||||
bool UseLinearFilterOutput() const {
|
||||
return filter_quality_state_.LinearFilterUsable() &&
|
||||
config_.filter.use_linear_filter;
|
||||
}
|
||||
|
||||
// Returns whether the render signal is currently active.
|
||||
bool ActiveRender() const { return blocks_with_active_render_ > 200; }
|
||||
|
||||
// Returns the appropriate scaling of the residual echo to match the
|
||||
// audibility.
|
||||
void GetResidualEchoScaling(rtc::ArrayView<float> residual_scaling) const;
|
||||
|
||||
// Returns whether the stationary properties of the signals are used in the
|
||||
// aec.
|
||||
bool UseStationarityProperties() const {
|
||||
return config_.echo_audibility.use_stationarity_properties;
|
||||
}
|
||||
|
||||
// Returns the ERLE.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
|
||||
return erle_estimator_.Erle();
|
||||
}
|
||||
|
||||
// Returns an offset to apply to the estimation of the residual echo
|
||||
// computation. Returning nullopt means that no offset should be used, while
|
||||
// any other value will be applied as a multiplier to the estimated residual
|
||||
// echo.
|
||||
absl::optional<float> ErleUncertainty() const;
|
||||
|
||||
// Returns the fullband ERLE estimate in log2 units.
|
||||
float FullBandErleLog2() const { return erle_estimator_.FullbandErleLog2(); }
|
||||
|
||||
// Returns the ERL.
|
||||
const std::array<float, kFftLengthBy2Plus1>& Erl() const {
|
||||
return erl_estimator_.Erl();
|
||||
}
|
||||
|
||||
// Returns the time-domain ERL.
|
||||
float ErlTimeDomain() const { return erl_estimator_.ErlTimeDomain(); }
|
||||
|
||||
// Returns the delay estimate based on the linear filter.
|
||||
int MinDirectPathFilterDelay() const {
|
||||
return delay_state_.MinDirectPathFilterDelay();
|
||||
}
|
||||
|
||||
// Returns whether the capture signal is saturated.
|
||||
bool SaturatedCapture() const { return capture_signal_saturation_; }
|
||||
|
||||
// Returns whether the echo signal is saturated.
|
||||
bool SaturatedEcho() const { return saturation_detector_.SaturatedEcho(); }
|
||||
|
||||
// Updates the capture signal saturation.
|
||||
void UpdateCaptureSaturation(bool capture_signal_saturation) {
|
||||
capture_signal_saturation_ = capture_signal_saturation;
|
||||
}
|
||||
|
||||
// Returns whether the transparent mode is active
|
||||
bool TransparentModeActive() const {
|
||||
return transparent_state_ && transparent_state_->Active();
|
||||
}
|
||||
|
||||
// Takes appropriate action at an echo path change.
|
||||
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
|
||||
|
||||
// Returns the decay factor for the echo reverberation.
|
||||
float ReverbDecay() const { return reverb_model_estimator_.ReverbDecay(); }
|
||||
|
||||
// Return the frequency response of the reverberant echo.
|
||||
rtc::ArrayView<const float> GetReverbFrequencyResponse() const {
|
||||
return reverb_model_estimator_.GetReverbFrequencyResponse();
|
||||
}
|
||||
|
||||
// Returns whether the transition for going out of the initial stated has
|
||||
// been triggered.
|
||||
bool TransitionTriggered() const {
|
||||
return initial_state_.TransitionTriggered();
|
||||
}
|
||||
|
||||
// Updates the aec state.
|
||||
// TODO(bugs.webrtc.org/10913): Compute multi-channel ERL.
|
||||
void Update(
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
adaptive_filter_frequency_responses,
|
||||
rtc::ArrayView<const std::vector<float>>
|
||||
adaptive_filter_impulse_responses,
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2_refined,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<const SubtractorOutput> subtractor_output);
|
||||
|
||||
// Returns filter length in blocks.
|
||||
int FilterLengthBlocks() const {
|
||||
// All filters have the same length, so arbitrarily return channel 0 length.
|
||||
return filter_analyzer_.FilterLengthBlocks();
|
||||
}
|
||||
|
||||
private:
|
||||
static int instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const EchoCanceller3Config config_;
|
||||
const size_t num_capture_channels_;
|
||||
const bool deactivate_initial_state_reset_at_echo_path_change_;
|
||||
const bool full_reset_at_echo_path_change_;
|
||||
const bool subtractor_analyzer_reset_at_echo_path_change_;
|
||||
|
||||
// Class for controlling the transition from the intial state, which in turn
|
||||
// controls when the filter parameters for the initial state should be used.
|
||||
class InitialState {
|
||||
public:
|
||||
explicit InitialState(const EchoCanceller3Config& config);
|
||||
// Resets the state to again begin in the initial state.
|
||||
void Reset();
|
||||
|
||||
// Updates the state based on new data.
|
||||
void Update(bool active_render, bool saturated_capture);
|
||||
|
||||
// Returns whether the initial state is active or not.
|
||||
bool InitialStateActive() const { return initial_state_; }
|
||||
|
||||
// Returns that the transition from the initial state has was started.
|
||||
bool TransitionTriggered() const { return transition_triggered_; }
|
||||
|
||||
private:
|
||||
const bool conservative_initial_phase_;
|
||||
const float initial_state_seconds_;
|
||||
bool transition_triggered_ = false;
|
||||
bool initial_state_ = true;
|
||||
size_t strong_not_saturated_render_blocks_ = 0;
|
||||
} initial_state_;
|
||||
|
||||
// Class for choosing the direct-path delay relative to the beginning of the
|
||||
// filter, as well as any other data related to the delay used within
|
||||
// AecState.
|
||||
class FilterDelay {
|
||||
public:
|
||||
FilterDelay(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels);
|
||||
|
||||
// Returns whether an external delay has been reported to the AecState (from
|
||||
// the delay estimator).
|
||||
bool ExternalDelayReported() const { return external_delay_reported_; }
|
||||
|
||||
// Returns the delay in blocks relative to the beginning of the filter that
|
||||
// corresponds to the direct path of the echo.
|
||||
rtc::ArrayView<const int> DirectPathFilterDelays() const {
|
||||
return filter_delays_blocks_;
|
||||
}
|
||||
|
||||
// Returns the minimum delay among the direct path delays relative to the
|
||||
// beginning of the filter
|
||||
int MinDirectPathFilterDelay() const { return min_filter_delay_; }
|
||||
|
||||
// Updates the delay estimates based on new data.
|
||||
void Update(
|
||||
rtc::ArrayView<const int> analyzer_filter_delay_estimates_blocks,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
size_t blocks_with_proper_filter_adaptation);
|
||||
|
||||
private:
|
||||
const int delay_headroom_blocks_;
|
||||
bool external_delay_reported_ = false;
|
||||
std::vector<int> filter_delays_blocks_;
|
||||
int min_filter_delay_;
|
||||
absl::optional<DelayEstimate> external_delay_;
|
||||
} delay_state_;
|
||||
|
||||
// Classifier for toggling transparent mode when there is no echo.
|
||||
std::unique_ptr<TransparentMode> transparent_state_;
|
||||
|
||||
// Class for analyzing how well the linear filter is, and can be expected to,
|
||||
// perform on the current signals. The purpose of this is for using to
|
||||
// select the echo suppression functionality as well as the input to the echo
|
||||
// suppressor.
|
||||
class FilteringQualityAnalyzer {
|
||||
public:
|
||||
FilteringQualityAnalyzer(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels);
|
||||
|
||||
// Returns whether the linear filter can be used for the echo
|
||||
// canceller output.
|
||||
bool LinearFilterUsable() const { return overall_usable_linear_estimates_; }
|
||||
|
||||
// Returns whether an individual filter output can be used for the echo
|
||||
// canceller output.
|
||||
const std::vector<bool>& UsableLinearFilterOutputs() const {
|
||||
return usable_linear_filter_estimates_;
|
||||
}
|
||||
|
||||
// Resets the state of the analyzer.
|
||||
void Reset();
|
||||
|
||||
// Updates the analysis based on new data.
|
||||
void Update(bool active_render,
|
||||
bool transparent_mode,
|
||||
bool saturated_capture,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
bool any_filter_converged);
|
||||
|
||||
private:
|
||||
const bool use_linear_filter_;
|
||||
bool overall_usable_linear_estimates_ = false;
|
||||
size_t filter_update_blocks_since_reset_ = 0;
|
||||
size_t filter_update_blocks_since_start_ = 0;
|
||||
bool convergence_seen_ = false;
|
||||
std::vector<bool> usable_linear_filter_estimates_;
|
||||
} filter_quality_state_;
|
||||
|
||||
// Class for detecting whether the echo is to be considered to be
|
||||
// saturated.
|
||||
class SaturationDetector {
|
||||
public:
|
||||
// Returns whether the echo is to be considered saturated.
|
||||
bool SaturatedEcho() const { return saturated_echo_; }
|
||||
|
||||
// Updates the detection decision based on new data.
|
||||
void Update(rtc::ArrayView<const std::vector<float>> x,
|
||||
bool saturated_capture,
|
||||
bool usable_linear_estimate,
|
||||
rtc::ArrayView<const SubtractorOutput> subtractor_output,
|
||||
float echo_path_gain);
|
||||
|
||||
private:
|
||||
bool saturated_echo_ = false;
|
||||
} saturation_detector_;
|
||||
|
||||
ErlEstimator erl_estimator_;
|
||||
ErleEstimator erle_estimator_;
|
||||
size_t strong_not_saturated_render_blocks_ = 0;
|
||||
size_t blocks_with_active_render_ = 0;
|
||||
bool capture_signal_saturation_ = false;
|
||||
FilterAnalyzer filter_analyzer_;
|
||||
EchoAudibility echo_audibility_;
|
||||
ReverbModelEstimator reverb_model_estimator_;
|
||||
ReverbModel avg_render_reverb_;
|
||||
SubtractorOutputAnalyzer subtractor_output_analyzer_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_AEC_STATE_H_
|
160
webrtc/modules/audio_processing/aec3/alignment_mixer.cc
Normal file
160
webrtc/modules/audio_processing/aec3/alignment_mixer.cc
Normal file
@ -0,0 +1,160 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/alignment_mixer.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
AlignmentMixer::MixingVariant ChooseMixingVariant(bool downmix,
|
||||
bool adaptive_selection,
|
||||
int num_channels) {
|
||||
RTC_DCHECK(!(adaptive_selection && downmix));
|
||||
RTC_DCHECK_LT(0, num_channels);
|
||||
|
||||
if (num_channels == 1) {
|
||||
return AlignmentMixer::MixingVariant::kFixed;
|
||||
}
|
||||
if (downmix) {
|
||||
return AlignmentMixer::MixingVariant::kDownmix;
|
||||
}
|
||||
if (adaptive_selection) {
|
||||
return AlignmentMixer::MixingVariant::kAdaptive;
|
||||
}
|
||||
return AlignmentMixer::MixingVariant::kFixed;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
AlignmentMixer::AlignmentMixer(
|
||||
size_t num_channels,
|
||||
const EchoCanceller3Config::Delay::AlignmentMixing& config)
|
||||
: AlignmentMixer(num_channels,
|
||||
config.downmix,
|
||||
config.adaptive_selection,
|
||||
config.activity_power_threshold,
|
||||
config.prefer_first_two_channels) {}
|
||||
|
||||
AlignmentMixer::AlignmentMixer(size_t num_channels,
|
||||
bool downmix,
|
||||
bool adaptive_selection,
|
||||
float activity_power_threshold,
|
||||
bool prefer_first_two_channels)
|
||||
: num_channels_(num_channels),
|
||||
one_by_num_channels_(1.f / num_channels_),
|
||||
excitation_energy_threshold_(kBlockSize * activity_power_threshold),
|
||||
prefer_first_two_channels_(prefer_first_two_channels),
|
||||
selection_variant_(
|
||||
ChooseMixingVariant(downmix, adaptive_selection, num_channels_)) {
|
||||
if (selection_variant_ == MixingVariant::kAdaptive) {
|
||||
std::fill(strong_block_counters_.begin(), strong_block_counters_.end(), 0);
|
||||
cumulative_energies_.resize(num_channels_);
|
||||
std::fill(cumulative_energies_.begin(), cumulative_energies_.end(), 0.f);
|
||||
}
|
||||
}
|
||||
|
||||
void AlignmentMixer::ProduceOutput(rtc::ArrayView<const std::vector<float>> x,
|
||||
rtc::ArrayView<float, kBlockSize> y) {
|
||||
RTC_DCHECK_EQ(x.size(), num_channels_);
|
||||
if (selection_variant_ == MixingVariant::kDownmix) {
|
||||
Downmix(x, y);
|
||||
return;
|
||||
}
|
||||
|
||||
int ch = selection_variant_ == MixingVariant::kFixed ? 0 : SelectChannel(x);
|
||||
|
||||
RTC_DCHECK_GE(x.size(), ch);
|
||||
std::copy(x[ch].begin(), x[ch].end(), y.begin());
|
||||
}
|
||||
|
||||
void AlignmentMixer::Downmix(rtc::ArrayView<const std::vector<float>> x,
|
||||
rtc::ArrayView<float, kBlockSize> y) const {
|
||||
RTC_DCHECK_EQ(x.size(), num_channels_);
|
||||
RTC_DCHECK_GE(num_channels_, 2);
|
||||
std::copy(x[0].begin(), x[0].end(), y.begin());
|
||||
for (size_t ch = 1; ch < num_channels_; ++ch) {
|
||||
for (size_t i = 0; i < kBlockSize; ++i) {
|
||||
y[i] += x[ch][i];
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < kBlockSize; ++i) {
|
||||
y[i] *= one_by_num_channels_;
|
||||
}
|
||||
}
|
||||
|
||||
int AlignmentMixer::SelectChannel(rtc::ArrayView<const std::vector<float>> x) {
|
||||
RTC_DCHECK_EQ(x.size(), num_channels_);
|
||||
RTC_DCHECK_GE(num_channels_, 2);
|
||||
RTC_DCHECK_EQ(cumulative_energies_.size(), num_channels_);
|
||||
|
||||
constexpr size_t kBlocksToChooseLeftOrRight =
|
||||
static_cast<size_t>(0.5f * kNumBlocksPerSecond);
|
||||
const bool good_signal_in_left_or_right =
|
||||
prefer_first_two_channels_ &&
|
||||
(strong_block_counters_[0] > kBlocksToChooseLeftOrRight ||
|
||||
strong_block_counters_[1] > kBlocksToChooseLeftOrRight);
|
||||
|
||||
const int num_ch_to_analyze =
|
||||
good_signal_in_left_or_right ? 2 : num_channels_;
|
||||
|
||||
constexpr int kNumBlocksBeforeEnergySmoothing = 60 * kNumBlocksPerSecond;
|
||||
++block_counter_;
|
||||
|
||||
for (int ch = 0; ch < num_ch_to_analyze; ++ch) {
|
||||
RTC_DCHECK_EQ(x[ch].size(), kBlockSize);
|
||||
float x2_sum = 0.f;
|
||||
for (size_t i = 0; i < kBlockSize; ++i) {
|
||||
x2_sum += x[ch][i] * x[ch][i];
|
||||
}
|
||||
|
||||
if (ch < 2 && x2_sum > excitation_energy_threshold_) {
|
||||
++strong_block_counters_[ch];
|
||||
}
|
||||
|
||||
if (block_counter_ <= kNumBlocksBeforeEnergySmoothing) {
|
||||
cumulative_energies_[ch] += x2_sum;
|
||||
} else {
|
||||
constexpr float kSmoothing = 1.f / (10 * kNumBlocksPerSecond);
|
||||
cumulative_energies_[ch] +=
|
||||
kSmoothing * (x2_sum - cumulative_energies_[ch]);
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize the energies to allow the energy computations to from now be
|
||||
// based on smoothing.
|
||||
if (block_counter_ == kNumBlocksBeforeEnergySmoothing) {
|
||||
constexpr float kOneByNumBlocksBeforeEnergySmoothing =
|
||||
1.f / kNumBlocksBeforeEnergySmoothing;
|
||||
for (int ch = 0; ch < num_ch_to_analyze; ++ch) {
|
||||
cumulative_energies_[ch] *= kOneByNumBlocksBeforeEnergySmoothing;
|
||||
}
|
||||
}
|
||||
|
||||
int strongest_ch = 0;
|
||||
for (int ch = 0; ch < num_ch_to_analyze; ++ch) {
|
||||
if (cumulative_energies_[ch] > cumulative_energies_[strongest_ch]) {
|
||||
strongest_ch = ch;
|
||||
}
|
||||
}
|
||||
|
||||
if ((good_signal_in_left_or_right && selected_channel_ > 1) ||
|
||||
cumulative_energies_[strongest_ch] >
|
||||
2.f * cumulative_energies_[selected_channel_]) {
|
||||
selected_channel_ = strongest_ch;
|
||||
}
|
||||
|
||||
return selected_channel_;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
58
webrtc/modules/audio_processing/aec3/alignment_mixer.h
Normal file
58
webrtc/modules/audio_processing/aec3/alignment_mixer.h
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Performs channel conversion to mono for the purpose of providing a decent
|
||||
// mono input for the delay estimation. This is achieved by analyzing all
|
||||
// incoming channels and produce one single channel output.
|
||||
class AlignmentMixer {
|
||||
public:
|
||||
AlignmentMixer(size_t num_channels,
|
||||
const EchoCanceller3Config::Delay::AlignmentMixing& config);
|
||||
|
||||
AlignmentMixer(size_t num_channels,
|
||||
bool downmix,
|
||||
bool adaptive_selection,
|
||||
float excitation_limit,
|
||||
bool prefer_first_two_channels);
|
||||
|
||||
void ProduceOutput(rtc::ArrayView<const std::vector<float>> x,
|
||||
rtc::ArrayView<float, kBlockSize> y);
|
||||
|
||||
enum class MixingVariant { kDownmix, kAdaptive, kFixed };
|
||||
|
||||
private:
|
||||
const size_t num_channels_;
|
||||
const float one_by_num_channels_;
|
||||
const float excitation_energy_threshold_;
|
||||
const bool prefer_first_two_channels_;
|
||||
const MixingVariant selection_variant_;
|
||||
std::array<size_t, 2> strong_block_counters_;
|
||||
std::vector<float> cumulative_energies_;
|
||||
int selected_channel_ = 0;
|
||||
size_t block_counter_ = 0;
|
||||
|
||||
void Downmix(const rtc::ArrayView<const std::vector<float>> x,
|
||||
rtc::ArrayView<float, kBlockSize> y) const;
|
||||
int SelectChannel(rtc::ArrayView<const std::vector<float>> x);
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ALIGNMENT_MIXER_H_
|
121
webrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc
Normal file
121
webrtc/modules/audio_processing/aec3/api_call_jitter_metrics.cc
Normal file
@ -0,0 +1,121 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/api_call_jitter_metrics.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <limits>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "system_wrappers/include/metrics.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
bool TimeToReportMetrics(int frames_since_last_report) {
|
||||
constexpr int kNumFramesPerSecond = 100;
|
||||
constexpr int kReportingIntervalFrames = 10 * kNumFramesPerSecond;
|
||||
return frames_since_last_report == kReportingIntervalFrames;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ApiCallJitterMetrics::Jitter::Jitter()
|
||||
: max_(0), min_(std::numeric_limits<int>::max()) {}
|
||||
|
||||
void ApiCallJitterMetrics::Jitter::Update(int num_api_calls_in_a_row) {
|
||||
min_ = std::min(min_, num_api_calls_in_a_row);
|
||||
max_ = std::max(max_, num_api_calls_in_a_row);
|
||||
}
|
||||
|
||||
void ApiCallJitterMetrics::Jitter::Reset() {
|
||||
min_ = std::numeric_limits<int>::max();
|
||||
max_ = 0;
|
||||
}
|
||||
|
||||
void ApiCallJitterMetrics::Reset() {
|
||||
render_jitter_.Reset();
|
||||
capture_jitter_.Reset();
|
||||
num_api_calls_in_a_row_ = 0;
|
||||
frames_since_last_report_ = 0;
|
||||
last_call_was_render_ = false;
|
||||
proper_call_observed_ = false;
|
||||
}
|
||||
|
||||
void ApiCallJitterMetrics::ReportRenderCall() {
|
||||
if (!last_call_was_render_) {
|
||||
// If the previous call was a capture and a proper call has been observed
|
||||
// (containing both render and capture data), storing the last number of
|
||||
// capture calls into the metrics.
|
||||
if (proper_call_observed_) {
|
||||
capture_jitter_.Update(num_api_calls_in_a_row_);
|
||||
}
|
||||
|
||||
// Reset the call counter to start counting render calls.
|
||||
num_api_calls_in_a_row_ = 0;
|
||||
}
|
||||
++num_api_calls_in_a_row_;
|
||||
last_call_was_render_ = true;
|
||||
}
|
||||
|
||||
void ApiCallJitterMetrics::ReportCaptureCall() {
|
||||
if (last_call_was_render_) {
|
||||
// If the previous call was a render and a proper call has been observed
|
||||
// (containing both render and capture data), storing the last number of
|
||||
// render calls into the metrics.
|
||||
if (proper_call_observed_) {
|
||||
render_jitter_.Update(num_api_calls_in_a_row_);
|
||||
}
|
||||
// Reset the call counter to start counting capture calls.
|
||||
num_api_calls_in_a_row_ = 0;
|
||||
|
||||
// If this statement is reached, at least one render and one capture call
|
||||
// have been observed.
|
||||
proper_call_observed_ = true;
|
||||
}
|
||||
++num_api_calls_in_a_row_;
|
||||
last_call_was_render_ = false;
|
||||
|
||||
// Only report and update jitter metrics for when a proper call, containing
|
||||
// both render and capture data, has been observed.
|
||||
if (proper_call_observed_ &&
|
||||
TimeToReportMetrics(++frames_since_last_report_)) {
|
||||
// Report jitter, where the base basic unit is frames.
|
||||
constexpr int kMaxJitterToReport = 50;
|
||||
|
||||
// Report max and min jitter for render and capture, in units of 20 ms.
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.MaxRenderJitter",
|
||||
std::min(kMaxJitterToReport, render_jitter().max()), 1,
|
||||
kMaxJitterToReport, kMaxJitterToReport);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.MinRenderJitter",
|
||||
std::min(kMaxJitterToReport, render_jitter().min()), 1,
|
||||
kMaxJitterToReport, kMaxJitterToReport);
|
||||
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.MaxCaptureJitter",
|
||||
std::min(kMaxJitterToReport, capture_jitter().max()), 1,
|
||||
kMaxJitterToReport, kMaxJitterToReport);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.MinCaptureJitter",
|
||||
std::min(kMaxJitterToReport, capture_jitter().min()), 1,
|
||||
kMaxJitterToReport, kMaxJitterToReport);
|
||||
|
||||
frames_since_last_report_ = 0;
|
||||
Reset();
|
||||
}
|
||||
}
|
||||
|
||||
bool ApiCallJitterMetrics::WillReportMetricsAtNextCapture() const {
|
||||
return TimeToReportMetrics(frames_since_last_report_ + 1);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Stores data for reporting metrics on the API call jitter.
|
||||
class ApiCallJitterMetrics {
|
||||
public:
|
||||
class Jitter {
|
||||
public:
|
||||
Jitter();
|
||||
void Update(int num_api_calls_in_a_row);
|
||||
void Reset();
|
||||
|
||||
int min() const { return min_; }
|
||||
int max() const { return max_; }
|
||||
|
||||
private:
|
||||
int max_;
|
||||
int min_;
|
||||
};
|
||||
|
||||
ApiCallJitterMetrics() { Reset(); }
|
||||
|
||||
// Update metrics for render API call.
|
||||
void ReportRenderCall();
|
||||
|
||||
// Update and periodically report metrics for capture API call.
|
||||
void ReportCaptureCall();
|
||||
|
||||
// Methods used only for testing.
|
||||
const Jitter& render_jitter() const { return render_jitter_; }
|
||||
const Jitter& capture_jitter() const { return capture_jitter_; }
|
||||
bool WillReportMetricsAtNextCapture() const;
|
||||
|
||||
private:
|
||||
void Reset();
|
||||
|
||||
Jitter render_jitter_;
|
||||
Jitter capture_jitter_;
|
||||
|
||||
int num_api_calls_in_a_row_ = 0;
|
||||
int frames_since_last_report_ = 0;
|
||||
bool last_call_was_render_ = false;
|
||||
bool proper_call_observed_ = false;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_API_CALL_JITTER_METRICS_H_
|
39
webrtc/modules/audio_processing/aec3/block_buffer.cc
Normal file
39
webrtc/modules/audio_processing/aec3/block_buffer.cc
Normal file
@ -0,0 +1,39 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/block_buffer.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
BlockBuffer::BlockBuffer(size_t size,
|
||||
size_t num_bands,
|
||||
size_t num_channels,
|
||||
size_t frame_length)
|
||||
: size(static_cast<int>(size)),
|
||||
buffer(size,
|
||||
std::vector<std::vector<std::vector<float>>>(
|
||||
num_bands,
|
||||
std::vector<std::vector<float>>(
|
||||
num_channels,
|
||||
std::vector<float>(frame_length, 0.f)))) {
|
||||
for (auto& block : buffer) {
|
||||
for (auto& band : block) {
|
||||
for (auto& channel : band) {
|
||||
std::fill(channel.begin(), channel.end(), 0.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
BlockBuffer::~BlockBuffer() = default;
|
||||
|
||||
} // namespace webrtc
|
62
webrtc/modules/audio_processing/aec3/block_buffer.h
Normal file
62
webrtc/modules/audio_processing/aec3/block_buffer.h
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Struct for bundling a circular buffer of two dimensional vector objects
|
||||
// together with the read and write indices.
|
||||
struct BlockBuffer {
|
||||
BlockBuffer(size_t size,
|
||||
size_t num_bands,
|
||||
size_t num_channels,
|
||||
size_t frame_length);
|
||||
~BlockBuffer();
|
||||
|
||||
int IncIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index < size - 1 ? index + 1 : 0;
|
||||
}
|
||||
|
||||
int DecIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index > 0 ? index - 1 : size - 1;
|
||||
}
|
||||
|
||||
int OffsetIndex(int index, int offset) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
RTC_DCHECK_GE(size, offset);
|
||||
return (size + index + offset) % size;
|
||||
}
|
||||
|
||||
void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); }
|
||||
void IncWriteIndex() { write = IncIndex(write); }
|
||||
void DecWriteIndex() { write = DecIndex(write); }
|
||||
void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); }
|
||||
void IncReadIndex() { read = IncIndex(read); }
|
||||
void DecReadIndex() { read = DecIndex(read); }
|
||||
|
||||
const int size;
|
||||
std::vector<std::vector<std::vector<std::vector<float>>>> buffer;
|
||||
int write = 0;
|
||||
int read = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_BUFFER_H_
|
62
webrtc/modules/audio_processing/aec3/block_delay_buffer.cc
Normal file
62
webrtc/modules/audio_processing/aec3/block_delay_buffer.cc
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/block_delay_buffer.h"
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
BlockDelayBuffer::BlockDelayBuffer(size_t num_channels,
|
||||
size_t num_bands,
|
||||
size_t frame_length,
|
||||
size_t delay_samples)
|
||||
: frame_length_(frame_length),
|
||||
delay_(delay_samples),
|
||||
buf_(num_channels,
|
||||
std::vector<std::vector<float>>(num_bands,
|
||||
std::vector<float>(delay_, 0.f))) {}
|
||||
|
||||
BlockDelayBuffer::~BlockDelayBuffer() = default;
|
||||
|
||||
void BlockDelayBuffer::DelaySignal(AudioBuffer* frame) {
|
||||
RTC_DCHECK_EQ(buf_.size(), frame->num_channels());
|
||||
if (delay_ == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const size_t num_bands = buf_[0].size();
|
||||
const size_t num_channels = buf_.size();
|
||||
|
||||
const size_t i_start = last_insert_;
|
||||
size_t i = 0;
|
||||
for (size_t ch = 0; ch < num_channels; ++ch) {
|
||||
RTC_DCHECK_EQ(buf_[ch].size(), frame->num_bands());
|
||||
RTC_DCHECK_EQ(buf_[ch].size(), num_bands);
|
||||
rtc::ArrayView<float* const> frame_ch(frame->split_bands(ch), num_bands);
|
||||
|
||||
for (size_t band = 0; band < num_bands; ++band) {
|
||||
RTC_DCHECK_EQ(delay_, buf_[ch][band].size());
|
||||
i = i_start;
|
||||
|
||||
for (size_t k = 0; k < frame_length_; ++k) {
|
||||
const float tmp = buf_[ch][band][i];
|
||||
buf_[ch][band][i] = frame_ch[band][k];
|
||||
frame_ch[band][k] = tmp;
|
||||
|
||||
i = i < delay_ - 1 ? i + 1 : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
last_insert_ = i;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
43
webrtc/modules/audio_processing/aec3/block_delay_buffer.h
Normal file
43
webrtc/modules/audio_processing/aec3/block_delay_buffer.h
Normal file
@ -0,0 +1,43 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for applying a fixed delay to the samples in a signal partitioned using
|
||||
// the audiobuffer band-splitting scheme.
|
||||
class BlockDelayBuffer {
|
||||
public:
|
||||
BlockDelayBuffer(size_t num_channels,
|
||||
size_t num_bands,
|
||||
size_t frame_length,
|
||||
size_t delay_samples);
|
||||
~BlockDelayBuffer();
|
||||
|
||||
// Delays the samples by the specified delay.
|
||||
void DelaySignal(AudioBuffer* frame);
|
||||
|
||||
private:
|
||||
const size_t frame_length_;
|
||||
const size_t delay_;
|
||||
std::vector<std::vector<std::vector<float>>> buf_;
|
||||
size_t last_insert_ = 0;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_DELAY_BUFFER_H_
|
86
webrtc/modules/audio_processing/aec3/block_framer.cc
Normal file
86
webrtc/modules/audio_processing/aec3/block_framer.cc
Normal file
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/block_framer.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
BlockFramer::BlockFramer(size_t num_bands, size_t num_channels)
|
||||
: num_bands_(num_bands),
|
||||
num_channels_(num_channels),
|
||||
buffer_(num_bands_,
|
||||
std::vector<std::vector<float>>(
|
||||
num_channels,
|
||||
std::vector<float>(kBlockSize, 0.f))) {
|
||||
RTC_DCHECK_LT(0, num_bands);
|
||||
RTC_DCHECK_LT(0, num_channels);
|
||||
}
|
||||
|
||||
BlockFramer::~BlockFramer() = default;
|
||||
|
||||
// All the constants are chosen so that the buffer is either empty or has enough
|
||||
// samples for InsertBlockAndExtractSubFrame to produce a frame. In order to
|
||||
// achieve this, the InsertBlockAndExtractSubFrame and InsertBlock methods need
|
||||
// to be called in the correct order.
|
||||
void BlockFramer::InsertBlock(
|
||||
const std::vector<std::vector<std::vector<float>>>& block) {
|
||||
RTC_DCHECK_EQ(num_bands_, block.size());
|
||||
for (size_t band = 0; band < num_bands_; ++band) {
|
||||
RTC_DCHECK_EQ(num_channels_, block[band].size());
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
RTC_DCHECK_EQ(kBlockSize, block[band][channel].size());
|
||||
RTC_DCHECK_EQ(0, buffer_[band][channel].size());
|
||||
|
||||
buffer_[band][channel].insert(buffer_[band][channel].begin(),
|
||||
block[band][channel].begin(),
|
||||
block[band][channel].end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BlockFramer::InsertBlockAndExtractSubFrame(
|
||||
const std::vector<std::vector<std::vector<float>>>& block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame) {
|
||||
RTC_DCHECK(sub_frame);
|
||||
RTC_DCHECK_EQ(num_bands_, block.size());
|
||||
RTC_DCHECK_EQ(num_bands_, sub_frame->size());
|
||||
for (size_t band = 0; band < num_bands_; ++band) {
|
||||
RTC_DCHECK_EQ(num_channels_, block[band].size());
|
||||
RTC_DCHECK_EQ(num_channels_, (*sub_frame)[0].size());
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
RTC_DCHECK_LE(kSubFrameLength,
|
||||
buffer_[band][channel].size() + kBlockSize);
|
||||
RTC_DCHECK_EQ(kBlockSize, block[band][channel].size());
|
||||
RTC_DCHECK_GE(kBlockSize, buffer_[band][channel].size());
|
||||
RTC_DCHECK_EQ(kSubFrameLength, (*sub_frame)[band][channel].size());
|
||||
|
||||
const int samples_to_frame =
|
||||
kSubFrameLength - buffer_[band][channel].size();
|
||||
std::copy(buffer_[band][channel].begin(), buffer_[band][channel].end(),
|
||||
(*sub_frame)[band][channel].begin());
|
||||
std::copy(
|
||||
block[band][channel].begin(),
|
||||
block[band][channel].begin() + samples_to_frame,
|
||||
(*sub_frame)[band][channel].begin() + buffer_[band][channel].size());
|
||||
buffer_[band][channel].clear();
|
||||
buffer_[band][channel].insert(
|
||||
buffer_[band][channel].begin(),
|
||||
block[band][channel].begin() + samples_to_frame,
|
||||
block[band][channel].end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
48
webrtc/modules/audio_processing/aec3/block_framer.h
Normal file
48
webrtc/modules/audio_processing/aec3/block_framer.h
Normal file
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for producing frames consisting of 2 subframes of 80 samples each
|
||||
// from 64 sample blocks. The class is designed to work together with the
|
||||
// FrameBlocker class which performs the reverse conversion. Used together with
|
||||
// that, this class produces output frames are the same rate as frames are
|
||||
// received by the FrameBlocker class. Note that the internal buffers will
|
||||
// overrun if any other rate of packets insertion is used.
|
||||
class BlockFramer {
|
||||
public:
|
||||
BlockFramer(size_t num_bands, size_t num_channels);
|
||||
~BlockFramer();
|
||||
BlockFramer(const BlockFramer&) = delete;
|
||||
BlockFramer& operator=(const BlockFramer&) = delete;
|
||||
|
||||
// Adds a 64 sample block into the data that will form the next output frame.
|
||||
void InsertBlock(const std::vector<std::vector<std::vector<float>>>& block);
|
||||
// Adds a 64 sample block and extracts an 80 sample subframe.
|
||||
void InsertBlockAndExtractSubFrame(
|
||||
const std::vector<std::vector<std::vector<float>>>& block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame);
|
||||
|
||||
private:
|
||||
const size_t num_bands_;
|
||||
const size_t num_channels_;
|
||||
std::vector<std::vector<std::vector<float>>> buffer_;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_FRAMER_H_
|
292
webrtc/modules/audio_processing/aec3/block_processor.cc
Normal file
292
webrtc/modules/audio_processing/aec3/block_processor.cc
Normal file
@ -0,0 +1,292 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/block_processor.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "api/audio/echo_control.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/block_processor_metrics.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
#include "modules/audio_processing/aec3/echo_remover.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_controller.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
enum class BlockProcessorApiCall { kCapture, kRender };
|
||||
|
||||
class BlockProcessorImpl final : public BlockProcessor {
|
||||
public:
|
||||
BlockProcessorImpl(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer,
|
||||
std::unique_ptr<RenderDelayController> delay_controller,
|
||||
std::unique_ptr<EchoRemover> echo_remover);
|
||||
|
||||
BlockProcessorImpl() = delete;
|
||||
|
||||
~BlockProcessorImpl() override;
|
||||
|
||||
void ProcessCapture(
|
||||
bool echo_path_gain_change,
|
||||
bool capture_signal_saturation,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output,
|
||||
std::vector<std::vector<std::vector<float>>>* capture_block) override;
|
||||
|
||||
void BufferRender(
|
||||
const std::vector<std::vector<std::vector<float>>>& block) override;
|
||||
|
||||
void UpdateEchoLeakageStatus(bool leakage_detected) override;
|
||||
|
||||
void GetMetrics(EchoControl::Metrics* metrics) const override;
|
||||
|
||||
void SetAudioBufferDelay(int delay_ms) override;
|
||||
|
||||
private:
|
||||
static int instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const EchoCanceller3Config config_;
|
||||
bool capture_properly_started_ = false;
|
||||
bool render_properly_started_ = false;
|
||||
const size_t sample_rate_hz_;
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer_;
|
||||
std::unique_ptr<RenderDelayController> delay_controller_;
|
||||
std::unique_ptr<EchoRemover> echo_remover_;
|
||||
BlockProcessorMetrics metrics_;
|
||||
RenderDelayBuffer::BufferingEvent render_event_;
|
||||
size_t capture_call_counter_ = 0;
|
||||
absl::optional<DelayEstimate> estimated_delay_;
|
||||
};
|
||||
|
||||
int BlockProcessorImpl::instance_count_ = 0;
|
||||
|
||||
BlockProcessorImpl::BlockProcessorImpl(
|
||||
const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer,
|
||||
std::unique_ptr<RenderDelayController> delay_controller,
|
||||
std::unique_ptr<EchoRemover> echo_remover)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
config_(config),
|
||||
sample_rate_hz_(sample_rate_hz),
|
||||
render_buffer_(std::move(render_buffer)),
|
||||
delay_controller_(std::move(delay_controller)),
|
||||
echo_remover_(std::move(echo_remover)),
|
||||
render_event_(RenderDelayBuffer::BufferingEvent::kNone) {
|
||||
RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
|
||||
}
|
||||
|
||||
BlockProcessorImpl::~BlockProcessorImpl() = default;
|
||||
|
||||
void BlockProcessorImpl::ProcessCapture(
|
||||
bool echo_path_gain_change,
|
||||
bool capture_signal_saturation,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output,
|
||||
std::vector<std::vector<std::vector<float>>>* capture_block) {
|
||||
RTC_DCHECK(capture_block);
|
||||
RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), capture_block->size());
|
||||
RTC_DCHECK_EQ(kBlockSize, (*capture_block)[0][0].size());
|
||||
|
||||
capture_call_counter_++;
|
||||
|
||||
data_dumper_->DumpRaw("aec3_processblock_call_order",
|
||||
static_cast<int>(BlockProcessorApiCall::kCapture));
|
||||
data_dumper_->DumpWav("aec3_processblock_capture_input", kBlockSize,
|
||||
&(*capture_block)[0][0][0], 16000, 1);
|
||||
|
||||
if (render_properly_started_) {
|
||||
if (!capture_properly_started_) {
|
||||
capture_properly_started_ = true;
|
||||
render_buffer_->Reset();
|
||||
if (delay_controller_)
|
||||
delay_controller_->Reset(true);
|
||||
}
|
||||
} else {
|
||||
// If no render data has yet arrived, do not process the capture signal.
|
||||
render_buffer_->HandleSkippedCaptureProcessing();
|
||||
return;
|
||||
}
|
||||
|
||||
EchoPathVariability echo_path_variability(
|
||||
echo_path_gain_change, EchoPathVariability::DelayAdjustment::kNone,
|
||||
false);
|
||||
|
||||
if (render_event_ == RenderDelayBuffer::BufferingEvent::kRenderOverrun &&
|
||||
render_properly_started_) {
|
||||
echo_path_variability.delay_change =
|
||||
EchoPathVariability::DelayAdjustment::kBufferFlush;
|
||||
if (delay_controller_)
|
||||
delay_controller_->Reset(true);
|
||||
RTC_LOG(LS_WARNING) << "Reset due to render buffer overrun at block "
|
||||
<< capture_call_counter_;
|
||||
}
|
||||
render_event_ = RenderDelayBuffer::BufferingEvent::kNone;
|
||||
|
||||
// Update the render buffers with any newly arrived render blocks and prepare
|
||||
// the render buffers for reading the render data corresponding to the current
|
||||
// capture block.
|
||||
RenderDelayBuffer::BufferingEvent buffer_event =
|
||||
render_buffer_->PrepareCaptureProcessing();
|
||||
// Reset the delay controller at render buffer underrun.
|
||||
if (buffer_event == RenderDelayBuffer::BufferingEvent::kRenderUnderrun) {
|
||||
if (delay_controller_)
|
||||
delay_controller_->Reset(false);
|
||||
}
|
||||
|
||||
data_dumper_->DumpWav("aec3_processblock_capture_input2", kBlockSize,
|
||||
&(*capture_block)[0][0][0], 16000, 1);
|
||||
|
||||
bool has_delay_estimator = !config_.delay.use_external_delay_estimator;
|
||||
if (has_delay_estimator) {
|
||||
RTC_DCHECK(delay_controller_);
|
||||
// Compute and apply the render delay required to achieve proper signal
|
||||
// alignment.
|
||||
estimated_delay_ = delay_controller_->GetDelay(
|
||||
render_buffer_->GetDownsampledRenderBuffer(), render_buffer_->Delay(),
|
||||
(*capture_block)[0]);
|
||||
|
||||
if (estimated_delay_) {
|
||||
bool delay_change =
|
||||
render_buffer_->AlignFromDelay(estimated_delay_->delay);
|
||||
if (delay_change) {
|
||||
rtc::LoggingSeverity log_level =
|
||||
config_.delay.log_warning_on_delay_changes ? rtc::LS_WARNING
|
||||
: rtc::LS_INFO;
|
||||
RTC_LOG_V(log_level) << "Delay changed to " << estimated_delay_->delay
|
||||
<< " at block " << capture_call_counter_;
|
||||
echo_path_variability.delay_change =
|
||||
EchoPathVariability::DelayAdjustment::kNewDetectedDelay;
|
||||
}
|
||||
}
|
||||
|
||||
echo_path_variability.clock_drift = delay_controller_->HasClockdrift();
|
||||
|
||||
} else {
|
||||
render_buffer_->AlignFromExternalDelay();
|
||||
}
|
||||
|
||||
// Remove the echo from the capture signal.
|
||||
if (has_delay_estimator || render_buffer_->HasReceivedBufferDelay()) {
|
||||
echo_remover_->ProcessCapture(
|
||||
echo_path_variability, capture_signal_saturation, estimated_delay_,
|
||||
render_buffer_->GetRenderBuffer(), linear_output, capture_block);
|
||||
}
|
||||
|
||||
// Update the metrics.
|
||||
metrics_.UpdateCapture(false);
|
||||
}
|
||||
|
||||
void BlockProcessorImpl::BufferRender(
|
||||
const std::vector<std::vector<std::vector<float>>>& block) {
|
||||
RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), block.size());
|
||||
RTC_DCHECK_EQ(kBlockSize, block[0][0].size());
|
||||
data_dumper_->DumpRaw("aec3_processblock_call_order",
|
||||
static_cast<int>(BlockProcessorApiCall::kRender));
|
||||
data_dumper_->DumpWav("aec3_processblock_render_input", kBlockSize,
|
||||
&block[0][0][0], 16000, 1);
|
||||
data_dumper_->DumpWav("aec3_processblock_render_input2", kBlockSize,
|
||||
&block[0][0][0], 16000, 1);
|
||||
|
||||
render_event_ = render_buffer_->Insert(block);
|
||||
|
||||
metrics_.UpdateRender(render_event_ !=
|
||||
RenderDelayBuffer::BufferingEvent::kNone);
|
||||
|
||||
render_properly_started_ = true;
|
||||
if (delay_controller_)
|
||||
delay_controller_->LogRenderCall();
|
||||
}
|
||||
|
||||
void BlockProcessorImpl::UpdateEchoLeakageStatus(bool leakage_detected) {
|
||||
echo_remover_->UpdateEchoLeakageStatus(leakage_detected);
|
||||
}
|
||||
|
||||
void BlockProcessorImpl::GetMetrics(EchoControl::Metrics* metrics) const {
|
||||
echo_remover_->GetMetrics(metrics);
|
||||
constexpr int block_size_ms = 4;
|
||||
absl::optional<size_t> delay = render_buffer_->Delay();
|
||||
metrics->delay_ms = delay ? static_cast<int>(*delay) * block_size_ms : 0;
|
||||
}
|
||||
|
||||
void BlockProcessorImpl::SetAudioBufferDelay(int delay_ms) {
|
||||
render_buffer_->SetAudioBufferDelay(delay_ms);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
BlockProcessor* BlockProcessor::Create(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels) {
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer(
|
||||
RenderDelayBuffer::Create(config, sample_rate_hz, num_render_channels));
|
||||
std::unique_ptr<RenderDelayController> delay_controller;
|
||||
if (!config.delay.use_external_delay_estimator) {
|
||||
delay_controller.reset(RenderDelayController::Create(config, sample_rate_hz,
|
||||
num_capture_channels));
|
||||
}
|
||||
std::unique_ptr<EchoRemover> echo_remover(EchoRemover::Create(
|
||||
config, sample_rate_hz, num_render_channels, num_capture_channels));
|
||||
return Create(config, sample_rate_hz, num_render_channels,
|
||||
num_capture_channels, std::move(render_buffer),
|
||||
std::move(delay_controller), std::move(echo_remover));
|
||||
}
|
||||
|
||||
BlockProcessor* BlockProcessor::Create(
|
||||
const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer) {
|
||||
std::unique_ptr<RenderDelayController> delay_controller;
|
||||
if (!config.delay.use_external_delay_estimator) {
|
||||
delay_controller.reset(RenderDelayController::Create(config, sample_rate_hz,
|
||||
num_capture_channels));
|
||||
}
|
||||
std::unique_ptr<EchoRemover> echo_remover(EchoRemover::Create(
|
||||
config, sample_rate_hz, num_render_channels, num_capture_channels));
|
||||
return Create(config, sample_rate_hz, num_render_channels,
|
||||
num_capture_channels, std::move(render_buffer),
|
||||
std::move(delay_controller), std::move(echo_remover));
|
||||
}
|
||||
|
||||
BlockProcessor* BlockProcessor::Create(
|
||||
const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer,
|
||||
std::unique_ptr<RenderDelayController> delay_controller,
|
||||
std::unique_ptr<EchoRemover> echo_remover) {
|
||||
return new BlockProcessorImpl(config, sample_rate_hz, num_render_channels,
|
||||
num_capture_channels, std::move(render_buffer),
|
||||
std::move(delay_controller),
|
||||
std::move(echo_remover));
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
76
webrtc/modules/audio_processing/aec3/block_processor.h
Normal file
76
webrtc/modules/audio_processing/aec3/block_processor.h
Normal file
@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "api/audio/echo_control.h"
|
||||
#include "modules/audio_processing/aec3/echo_remover.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_controller.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for performing echo cancellation on 64 sample blocks of audio data.
|
||||
class BlockProcessor {
|
||||
public:
|
||||
static BlockProcessor* Create(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
// Only used for testing purposes.
|
||||
static BlockProcessor* Create(
|
||||
const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer);
|
||||
static BlockProcessor* Create(
|
||||
const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer,
|
||||
std::unique_ptr<RenderDelayController> delay_controller,
|
||||
std::unique_ptr<EchoRemover> echo_remover);
|
||||
|
||||
virtual ~BlockProcessor() = default;
|
||||
|
||||
// Get current metrics.
|
||||
virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0;
|
||||
|
||||
// Provides an optional external estimate of the audio buffer delay.
|
||||
virtual void SetAudioBufferDelay(int delay_ms) = 0;
|
||||
|
||||
// Processes a block of capture data.
|
||||
virtual void ProcessCapture(
|
||||
bool echo_path_gain_change,
|
||||
bool capture_signal_saturation,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output,
|
||||
std::vector<std::vector<std::vector<float>>>* capture_block) = 0;
|
||||
|
||||
// Buffers a block of render data supplied by a FrameBlocker object.
|
||||
virtual void BufferRender(
|
||||
const std::vector<std::vector<std::vector<float>>>& render_block) = 0;
|
||||
|
||||
// Reports whether echo leakage has been detected in the echo canceller
|
||||
// output.
|
||||
virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_H_
|
104
webrtc/modules/audio_processing/aec3/block_processor_metrics.cc
Normal file
104
webrtc/modules/audio_processing/aec3/block_processor_metrics.cc
Normal file
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/block_processor_metrics.h"
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/metrics.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
enum class RenderUnderrunCategory {
|
||||
kNone,
|
||||
kFew,
|
||||
kSeveral,
|
||||
kMany,
|
||||
kConstant,
|
||||
kNumCategories
|
||||
};
|
||||
|
||||
enum class RenderOverrunCategory {
|
||||
kNone,
|
||||
kFew,
|
||||
kSeveral,
|
||||
kMany,
|
||||
kConstant,
|
||||
kNumCategories
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
void BlockProcessorMetrics::UpdateCapture(bool underrun) {
|
||||
++capture_block_counter_;
|
||||
if (underrun) {
|
||||
++render_buffer_underruns_;
|
||||
}
|
||||
|
||||
if (capture_block_counter_ == kMetricsReportingIntervalBlocks) {
|
||||
metrics_reported_ = true;
|
||||
|
||||
RenderUnderrunCategory underrun_category;
|
||||
if (render_buffer_underruns_ == 0) {
|
||||
underrun_category = RenderUnderrunCategory::kNone;
|
||||
} else if (render_buffer_underruns_ > (capture_block_counter_ >> 1)) {
|
||||
underrun_category = RenderUnderrunCategory::kConstant;
|
||||
} else if (render_buffer_underruns_ > 100) {
|
||||
underrun_category = RenderUnderrunCategory::kMany;
|
||||
} else if (render_buffer_underruns_ > 10) {
|
||||
underrun_category = RenderUnderrunCategory::kSeveral;
|
||||
} else {
|
||||
underrun_category = RenderUnderrunCategory::kFew;
|
||||
}
|
||||
RTC_HISTOGRAM_ENUMERATION(
|
||||
"WebRTC.Audio.EchoCanceller.RenderUnderruns",
|
||||
static_cast<int>(underrun_category),
|
||||
static_cast<int>(RenderUnderrunCategory::kNumCategories));
|
||||
|
||||
RenderOverrunCategory overrun_category;
|
||||
if (render_buffer_overruns_ == 0) {
|
||||
overrun_category = RenderOverrunCategory::kNone;
|
||||
} else if (render_buffer_overruns_ > (buffer_render_calls_ >> 1)) {
|
||||
overrun_category = RenderOverrunCategory::kConstant;
|
||||
} else if (render_buffer_overruns_ > 100) {
|
||||
overrun_category = RenderOverrunCategory::kMany;
|
||||
} else if (render_buffer_overruns_ > 10) {
|
||||
overrun_category = RenderOverrunCategory::kSeveral;
|
||||
} else {
|
||||
overrun_category = RenderOverrunCategory::kFew;
|
||||
}
|
||||
RTC_HISTOGRAM_ENUMERATION(
|
||||
"WebRTC.Audio.EchoCanceller.RenderOverruns",
|
||||
static_cast<int>(overrun_category),
|
||||
static_cast<int>(RenderOverrunCategory::kNumCategories));
|
||||
|
||||
ResetMetrics();
|
||||
capture_block_counter_ = 0;
|
||||
} else {
|
||||
metrics_reported_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void BlockProcessorMetrics::UpdateRender(bool overrun) {
|
||||
++buffer_render_calls_;
|
||||
if (overrun) {
|
||||
++render_buffer_overruns_;
|
||||
}
|
||||
}
|
||||
|
||||
void BlockProcessorMetrics::ResetMetrics() {
|
||||
render_buffer_underruns_ = 0;
|
||||
render_buffer_overruns_ = 0;
|
||||
buffer_render_calls_ = 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,47 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_
|
||||
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Handles the reporting of metrics for the block_processor.
|
||||
class BlockProcessorMetrics {
|
||||
public:
|
||||
BlockProcessorMetrics() = default;
|
||||
|
||||
// Updates the metric with new capture data.
|
||||
void UpdateCapture(bool underrun);
|
||||
|
||||
// Updates the metric with new render data.
|
||||
void UpdateRender(bool overrun);
|
||||
|
||||
// Returns true if the metrics have just been reported, otherwise false.
|
||||
bool MetricsReported() { return metrics_reported_; }
|
||||
|
||||
private:
|
||||
// Resets the metrics.
|
||||
void ResetMetrics();
|
||||
|
||||
int capture_block_counter_ = 0;
|
||||
bool metrics_reported_ = false;
|
||||
int render_buffer_underruns_ = 0;
|
||||
int render_buffer_overruns_ = 0;
|
||||
int buffer_render_calls_ = 0;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(BlockProcessorMetrics);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_
|
61
webrtc/modules/audio_processing/aec3/clockdrift_detector.cc
Normal file
61
webrtc/modules/audio_processing/aec3/clockdrift_detector.cc
Normal file
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/clockdrift_detector.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
ClockdriftDetector::ClockdriftDetector()
|
||||
: level_(Level::kNone), stability_counter_(0) {
|
||||
delay_history_.fill(0);
|
||||
}
|
||||
|
||||
ClockdriftDetector::~ClockdriftDetector() = default;
|
||||
|
||||
void ClockdriftDetector::Update(int delay_estimate) {
|
||||
if (delay_estimate == delay_history_[0]) {
|
||||
// Reset clockdrift level if delay estimate is stable for 7500 blocks (30
|
||||
// seconds).
|
||||
if (++stability_counter_ > 7500)
|
||||
level_ = Level::kNone;
|
||||
return;
|
||||
}
|
||||
|
||||
stability_counter_ = 0;
|
||||
const int d1 = delay_history_[0] - delay_estimate;
|
||||
const int d2 = delay_history_[1] - delay_estimate;
|
||||
const int d3 = delay_history_[2] - delay_estimate;
|
||||
|
||||
// Patterns recognized as positive clockdrift:
|
||||
// [x-3], x-2, x-1, x.
|
||||
// [x-3], x-1, x-2, x.
|
||||
const bool probable_drift_up =
|
||||
(d1 == -1 && d2 == -2) || (d1 == -2 && d2 == -1);
|
||||
const bool drift_up = probable_drift_up && d3 == -3;
|
||||
|
||||
// Patterns recognized as negative clockdrift:
|
||||
// [x+3], x+2, x+1, x.
|
||||
// [x+3], x+1, x+2, x.
|
||||
const bool probable_drift_down = (d1 == 1 && d2 == 2) || (d1 == 2 && d2 == 1);
|
||||
const bool drift_down = probable_drift_down && d3 == 3;
|
||||
|
||||
// Set clockdrift level.
|
||||
if (drift_up || drift_down) {
|
||||
level_ = Level::kVerified;
|
||||
} else if ((probable_drift_up || probable_drift_down) &&
|
||||
level_ == Level::kNone) {
|
||||
level_ = Level::kProbable;
|
||||
}
|
||||
|
||||
// Shift delay history one step.
|
||||
delay_history_[2] = delay_history_[1];
|
||||
delay_history_[1] = delay_history_[0];
|
||||
delay_history_[0] = delay_estimate;
|
||||
}
|
||||
} // namespace webrtc
|
40
webrtc/modules/audio_processing/aec3/clockdrift_detector.h
Normal file
40
webrtc/modules/audio_processing/aec3/clockdrift_detector.h
Normal file
@ -0,0 +1,40 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
struct DownsampledRenderBuffer;
|
||||
struct EchoCanceller3Config;
|
||||
|
||||
// Detects clockdrift by analyzing the estimated delay.
|
||||
class ClockdriftDetector {
|
||||
public:
|
||||
enum class Level { kNone, kProbable, kVerified, kNumCategories };
|
||||
ClockdriftDetector();
|
||||
~ClockdriftDetector();
|
||||
void Update(int delay_estimate);
|
||||
Level ClockdriftLevel() const { return level_; }
|
||||
|
||||
private:
|
||||
std::array<int, 3> delay_history_;
|
||||
Level level_;
|
||||
size_t stability_counter_;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_CLOCKDRIFT_DETECTOR_H_
|
@ -0,0 +1,103 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/coarse_filter_update_gain.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
CoarseFilterUpdateGain::CoarseFilterUpdateGain(
|
||||
const EchoCanceller3Config::Filter::CoarseConfiguration& config,
|
||||
size_t config_change_duration_blocks)
|
||||
: config_change_duration_blocks_(
|
||||
static_cast<int>(config_change_duration_blocks)) {
|
||||
SetConfig(config, true);
|
||||
RTC_DCHECK_LT(0, config_change_duration_blocks_);
|
||||
one_by_config_change_duration_blocks_ = 1.f / config_change_duration_blocks_;
|
||||
}
|
||||
|
||||
void CoarseFilterUpdateGain::HandleEchoPathChange() {
|
||||
poor_signal_excitation_counter_ = 0;
|
||||
call_counter_ = 0;
|
||||
}
|
||||
|
||||
void CoarseFilterUpdateGain::Compute(
|
||||
const std::array<float, kFftLengthBy2Plus1>& render_power,
|
||||
const RenderSignalAnalyzer& render_signal_analyzer,
|
||||
const FftData& E_coarse,
|
||||
size_t size_partitions,
|
||||
bool saturated_capture_signal,
|
||||
FftData* G) {
|
||||
RTC_DCHECK(G);
|
||||
++call_counter_;
|
||||
|
||||
UpdateCurrentConfig();
|
||||
|
||||
if (render_signal_analyzer.PoorSignalExcitation()) {
|
||||
poor_signal_excitation_counter_ = 0;
|
||||
}
|
||||
|
||||
// Do not update the filter if the render is not sufficiently excited.
|
||||
if (++poor_signal_excitation_counter_ < size_partitions ||
|
||||
saturated_capture_signal || call_counter_ <= size_partitions) {
|
||||
G->re.fill(0.f);
|
||||
G->im.fill(0.f);
|
||||
return;
|
||||
}
|
||||
|
||||
// Compute mu.
|
||||
std::array<float, kFftLengthBy2Plus1> mu;
|
||||
const auto& X2 = render_power;
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
if (X2[k] > current_config_.noise_gate) {
|
||||
mu[k] = current_config_.rate / X2[k];
|
||||
} else {
|
||||
mu[k] = 0.f;
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid updating the filter close to narrow bands in the render signals.
|
||||
render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu);
|
||||
|
||||
// G = mu * E * X2.
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
G->re[k] = mu[k] * E_coarse.re[k];
|
||||
G->im[k] = mu[k] * E_coarse.im[k];
|
||||
}
|
||||
}
|
||||
|
||||
void CoarseFilterUpdateGain::UpdateCurrentConfig() {
|
||||
RTC_DCHECK_GE(config_change_duration_blocks_, config_change_counter_);
|
||||
if (config_change_counter_ > 0) {
|
||||
if (--config_change_counter_ > 0) {
|
||||
auto average = [](float from, float to, float from_weight) {
|
||||
return from * from_weight + to * (1.f - from_weight);
|
||||
};
|
||||
|
||||
float change_factor =
|
||||
config_change_counter_ * one_by_config_change_duration_blocks_;
|
||||
|
||||
current_config_.rate =
|
||||
average(old_target_config_.rate, target_config_.rate, change_factor);
|
||||
current_config_.noise_gate =
|
||||
average(old_target_config_.noise_gate, target_config_.noise_gate,
|
||||
change_factor);
|
||||
} else {
|
||||
current_config_ = old_target_config_ = target_config_;
|
||||
}
|
||||
}
|
||||
RTC_DCHECK_LE(0, config_change_counter_);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,74 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "modules/audio_processing/aec3/render_signal_analyzer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Provides functionality for computing the fixed gain for the coarse filter.
|
||||
class CoarseFilterUpdateGain {
|
||||
public:
|
||||
explicit CoarseFilterUpdateGain(
|
||||
const EchoCanceller3Config::Filter::CoarseConfiguration& config,
|
||||
size_t config_change_duration_blocks);
|
||||
|
||||
// Takes action in the case of a known echo path change.
|
||||
void HandleEchoPathChange();
|
||||
|
||||
// Computes the gain.
|
||||
void Compute(const std::array<float, kFftLengthBy2Plus1>& render_power,
|
||||
const RenderSignalAnalyzer& render_signal_analyzer,
|
||||
const FftData& E_coarse,
|
||||
size_t size_partitions,
|
||||
bool saturated_capture_signal,
|
||||
FftData* G);
|
||||
|
||||
// Sets a new config.
|
||||
void SetConfig(
|
||||
const EchoCanceller3Config::Filter::CoarseConfiguration& config,
|
||||
bool immediate_effect) {
|
||||
if (immediate_effect) {
|
||||
old_target_config_ = current_config_ = target_config_ = config;
|
||||
config_change_counter_ = 0;
|
||||
} else {
|
||||
old_target_config_ = current_config_;
|
||||
target_config_ = config;
|
||||
config_change_counter_ = config_change_duration_blocks_;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
EchoCanceller3Config::Filter::CoarseConfiguration current_config_;
|
||||
EchoCanceller3Config::Filter::CoarseConfiguration target_config_;
|
||||
EchoCanceller3Config::Filter::CoarseConfiguration old_target_config_;
|
||||
const int config_change_duration_blocks_;
|
||||
float one_by_config_change_duration_blocks_;
|
||||
// TODO(peah): Check whether this counter should instead be initialized to a
|
||||
// large value.
|
||||
size_t poor_signal_excitation_counter_ = 0;
|
||||
size_t call_counter_ = 0;
|
||||
int config_change_counter_ = 0;
|
||||
|
||||
void UpdateCurrentConfig();
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_COARSE_FILTER_UPDATE_GAIN_H_
|
186
webrtc/modules/audio_processing/aec3/comfort_noise_generator.cc
Normal file
186
webrtc/modules/audio_processing/aec3/comfort_noise_generator.cc
Normal file
@ -0,0 +1,186 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/comfort_noise_generator.h"
|
||||
|
||||
// Defines WEBRTC_ARCH_X86_FAMILY, used below.
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <cstdint>
|
||||
#include <functional>
|
||||
#include <numeric>
|
||||
|
||||
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "modules/audio_processing/aec3/vector_math.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Computes the noise floor value that matches a WGN input of noise_floor_dbfs.
|
||||
float GetNoiseFloorFactor(float noise_floor_dbfs) {
|
||||
// kdBfsNormalization = 20.f*log10(32768.f).
|
||||
constexpr float kdBfsNormalization = 90.30899869919436f;
|
||||
return 64.f * powf(10.f, (kdBfsNormalization + noise_floor_dbfs) * 0.1f);
|
||||
}
|
||||
|
||||
// Table of sqrt(2) * sin(2*pi*i/32).
|
||||
constexpr float kSqrt2Sin[32] = {
|
||||
+0.0000000f, +0.2758994f, +0.5411961f, +0.7856950f, +1.0000000f,
|
||||
+1.1758756f, +1.3065630f, +1.3870398f, +1.4142136f, +1.3870398f,
|
||||
+1.3065630f, +1.1758756f, +1.0000000f, +0.7856950f, +0.5411961f,
|
||||
+0.2758994f, +0.0000000f, -0.2758994f, -0.5411961f, -0.7856950f,
|
||||
-1.0000000f, -1.1758756f, -1.3065630f, -1.3870398f, -1.4142136f,
|
||||
-1.3870398f, -1.3065630f, -1.1758756f, -1.0000000f, -0.7856950f,
|
||||
-0.5411961f, -0.2758994f};
|
||||
|
||||
void GenerateComfortNoise(Aec3Optimization optimization,
|
||||
const std::array<float, kFftLengthBy2Plus1>& N2,
|
||||
uint32_t* seed,
|
||||
FftData* lower_band_noise,
|
||||
FftData* upper_band_noise) {
|
||||
FftData* N_low = lower_band_noise;
|
||||
FftData* N_high = upper_band_noise;
|
||||
|
||||
// Compute square root spectrum.
|
||||
std::array<float, kFftLengthBy2Plus1> N;
|
||||
std::copy(N2.begin(), N2.end(), N.begin());
|
||||
aec3::VectorMath(optimization).Sqrt(N);
|
||||
|
||||
// Compute the noise level for the upper bands.
|
||||
constexpr float kOneByNumBands = 1.f / (kFftLengthBy2Plus1 / 2 + 1);
|
||||
constexpr int kFftLengthBy2Plus1By2 = kFftLengthBy2Plus1 / 2;
|
||||
const float high_band_noise_level =
|
||||
std::accumulate(N.begin() + kFftLengthBy2Plus1By2, N.end(), 0.f) *
|
||||
kOneByNumBands;
|
||||
|
||||
// The analysis and synthesis windowing cause loss of power when
|
||||
// cross-fading the noise where frames are completely uncorrelated
|
||||
// (generated with random phase), hence the factor sqrt(2).
|
||||
// This is not the case for the speech signal where the input is overlapping
|
||||
// (strong correlation).
|
||||
N_low->re[0] = N_low->re[kFftLengthBy2] = N_high->re[0] =
|
||||
N_high->re[kFftLengthBy2] = 0.f;
|
||||
for (size_t k = 1; k < kFftLengthBy2; k++) {
|
||||
constexpr int kIndexMask = 32 - 1;
|
||||
// Generate a random 31-bit integer.
|
||||
seed[0] = (seed[0] * 69069 + 1) & (0x80000000 - 1);
|
||||
// Convert to a 5-bit index.
|
||||
int i = seed[0] >> 26;
|
||||
|
||||
// y = sqrt(2) * sin(a)
|
||||
const float x = kSqrt2Sin[i];
|
||||
// x = sqrt(2) * cos(a) = sqrt(2) * sin(a + pi/2)
|
||||
const float y = kSqrt2Sin[(i + 8) & kIndexMask];
|
||||
|
||||
// Form low-frequency noise via spectral shaping.
|
||||
N_low->re[k] = N[k] * x;
|
||||
N_low->im[k] = N[k] * y;
|
||||
|
||||
// Form the high-frequency noise via simple levelling.
|
||||
N_high->re[k] = high_band_noise_level * x;
|
||||
N_high->im[k] = high_band_noise_level * y;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ComfortNoiseGenerator::ComfortNoiseGenerator(const EchoCanceller3Config& config,
|
||||
Aec3Optimization optimization,
|
||||
size_t num_capture_channels)
|
||||
: optimization_(optimization),
|
||||
seed_(42),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
noise_floor_(GetNoiseFloorFactor(config.comfort_noise.noise_floor_dbfs)),
|
||||
N2_initial_(
|
||||
std::make_unique<std::vector<std::array<float, kFftLengthBy2Plus1>>>(
|
||||
num_capture_channels_)),
|
||||
Y2_smoothed_(num_capture_channels_),
|
||||
N2_(num_capture_channels_) {
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
(*N2_initial_)[ch].fill(0.f);
|
||||
Y2_smoothed_[ch].fill(0.f);
|
||||
N2_[ch].fill(1.0e6f);
|
||||
}
|
||||
}
|
||||
|
||||
ComfortNoiseGenerator::~ComfortNoiseGenerator() = default;
|
||||
|
||||
void ComfortNoiseGenerator::Compute(
|
||||
bool saturated_capture,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
capture_spectrum,
|
||||
rtc::ArrayView<FftData> lower_band_noise,
|
||||
rtc::ArrayView<FftData> upper_band_noise) {
|
||||
const auto& Y2 = capture_spectrum;
|
||||
|
||||
if (!saturated_capture) {
|
||||
// Smooth Y2.
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
std::transform(Y2_smoothed_[ch].begin(), Y2_smoothed_[ch].end(),
|
||||
Y2[ch].begin(), Y2_smoothed_[ch].begin(),
|
||||
[](float a, float b) { return a + 0.1f * (b - a); });
|
||||
}
|
||||
|
||||
if (N2_counter_ > 50) {
|
||||
// Update N2 from Y2_smoothed.
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
std::transform(N2_[ch].begin(), N2_[ch].end(), Y2_smoothed_[ch].begin(),
|
||||
N2_[ch].begin(), [](float a, float b) {
|
||||
return b < a ? (0.9f * b + 0.1f * a) * 1.0002f
|
||||
: a * 1.0002f;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (N2_initial_) {
|
||||
if (++N2_counter_ == 1000) {
|
||||
N2_initial_.reset();
|
||||
} else {
|
||||
// Compute the N2_initial from N2.
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
std::transform(N2_[ch].begin(), N2_[ch].end(),
|
||||
(*N2_initial_)[ch].begin(), (*N2_initial_)[ch].begin(),
|
||||
[](float a, float b) {
|
||||
return a > b ? b + 0.001f * (a - b) : a;
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
for (auto& n : N2_[ch]) {
|
||||
n = std::max(n, noise_floor_);
|
||||
}
|
||||
if (N2_initial_) {
|
||||
for (auto& n : (*N2_initial_)[ch]) {
|
||||
n = std::max(n, noise_floor_);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Choose N2 estimate to use.
|
||||
const auto& N2 = N2_initial_ ? (*N2_initial_) : N2_;
|
||||
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
GenerateComfortNoise(optimization_, N2[ch], &seed_, &lower_band_noise[ch],
|
||||
&upper_band_noise[ch]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace aec3 {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
|
||||
void EstimateComfortNoise_SSE2(const std::array<float, kFftLengthBy2Plus1>& N2,
|
||||
uint32_t* seed,
|
||||
FftData* lower_band_noise,
|
||||
FftData* upper_band_noise);
|
||||
#endif
|
||||
void EstimateComfortNoise(const std::array<float, kFftLengthBy2Plus1>& N2,
|
||||
uint32_t* seed,
|
||||
FftData* lower_band_noise,
|
||||
FftData* upper_band_noise);
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
// Generates the comfort noise.
|
||||
class ComfortNoiseGenerator {
|
||||
public:
|
||||
ComfortNoiseGenerator(const EchoCanceller3Config& config,
|
||||
Aec3Optimization optimization,
|
||||
size_t num_capture_channels);
|
||||
ComfortNoiseGenerator() = delete;
|
||||
~ComfortNoiseGenerator();
|
||||
ComfortNoiseGenerator(const ComfortNoiseGenerator&) = delete;
|
||||
|
||||
// Computes the comfort noise.
|
||||
void Compute(bool saturated_capture,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
capture_spectrum,
|
||||
rtc::ArrayView<FftData> lower_band_noise,
|
||||
rtc::ArrayView<FftData> upper_band_noise);
|
||||
|
||||
// Returns the estimate of the background noise spectrum.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> NoiseSpectrum()
|
||||
const {
|
||||
return N2_;
|
||||
}
|
||||
|
||||
private:
|
||||
const Aec3Optimization optimization_;
|
||||
uint32_t seed_;
|
||||
const size_t num_capture_channels_;
|
||||
const float noise_floor_;
|
||||
std::unique_ptr<std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
N2_initial_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> Y2_smoothed_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> N2_;
|
||||
int N2_counter_ = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_COMFORT_NOISE_GENERATOR_H_
|
91
webrtc/modules/audio_processing/aec3/decimator.cc
Normal file
91
webrtc/modules/audio_processing/aec3/decimator.cc
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/decimator.h"
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
// signal.butter(2, 3400/8000.0, 'lowpass', analog=False)
|
||||
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetLowPassFilterDS2() {
|
||||
return std::vector<CascadedBiQuadFilter::BiQuadParam>{
|
||||
{{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f},
|
||||
{{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f},
|
||||
{{-1.f, 0.f}, {0.13833231f, 0.40743176f}, 0.22711796393486466f}};
|
||||
}
|
||||
|
||||
// signal.ellip(6, 1, 40, 1800/8000, btype='lowpass', analog=False)
|
||||
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetLowPassFilterDS4() {
|
||||
return std::vector<CascadedBiQuadFilter::BiQuadParam>{
|
||||
{{-0.08873842f, 0.99605496f}, {0.75916227f, 0.23841065f}, 0.26250696827f},
|
||||
{{0.62273832f, 0.78243018f}, {0.74892112f, 0.5410152f}, 0.26250696827f},
|
||||
{{0.71107693f, 0.70311421f}, {0.74895534f, 0.63924616f}, 0.26250696827f}};
|
||||
}
|
||||
|
||||
// signal.cheby1(1, 6, [1000/8000, 2000/8000], btype='bandpass', analog=False)
|
||||
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetBandPassFilterDS8() {
|
||||
return std::vector<CascadedBiQuadFilter::BiQuadParam>{
|
||||
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
|
||||
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
|
||||
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
|
||||
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true},
|
||||
{{1.f, 0.f}, {0.7601815f, 0.46423542f}, 0.10330478266505948f, true}};
|
||||
}
|
||||
|
||||
// signal.butter(2, 1000/8000.0, 'highpass', analog=False)
|
||||
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetHighPassFilter() {
|
||||
return std::vector<CascadedBiQuadFilter::BiQuadParam>{
|
||||
{{1.f, 0.f}, {0.72712179f, 0.21296904f}, 0.7570763753338849f}};
|
||||
}
|
||||
|
||||
const std::vector<CascadedBiQuadFilter::BiQuadParam> GetPassThroughFilter() {
|
||||
return std::vector<CascadedBiQuadFilter::BiQuadParam>{};
|
||||
}
|
||||
} // namespace
|
||||
|
||||
Decimator::Decimator(size_t down_sampling_factor)
|
||||
: down_sampling_factor_(down_sampling_factor),
|
||||
anti_aliasing_filter_(down_sampling_factor_ == 4
|
||||
? GetLowPassFilterDS4()
|
||||
: (down_sampling_factor_ == 8
|
||||
? GetBandPassFilterDS8()
|
||||
: GetLowPassFilterDS2())),
|
||||
noise_reduction_filter_(down_sampling_factor_ == 8
|
||||
? GetPassThroughFilter()
|
||||
: GetHighPassFilter()) {
|
||||
RTC_DCHECK(down_sampling_factor_ == 2 || down_sampling_factor_ == 4 ||
|
||||
down_sampling_factor_ == 8);
|
||||
}
|
||||
|
||||
void Decimator::Decimate(rtc::ArrayView<const float> in,
|
||||
rtc::ArrayView<float> out) {
|
||||
RTC_DCHECK_EQ(kBlockSize, in.size());
|
||||
RTC_DCHECK_EQ(kBlockSize / down_sampling_factor_, out.size());
|
||||
std::array<float, kBlockSize> x;
|
||||
|
||||
// Limit the frequency content of the signal to avoid aliasing.
|
||||
anti_aliasing_filter_.Process(in, x);
|
||||
|
||||
// Reduce the impact of near-end noise.
|
||||
noise_reduction_filter_.Process(x);
|
||||
|
||||
// Downsample the signal.
|
||||
for (size_t j = 0, k = 0; j < out.size(); ++j, k += down_sampling_factor_) {
|
||||
RTC_DCHECK_GT(kBlockSize, k);
|
||||
out[j] = x[k];
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
41
webrtc/modules/audio_processing/aec3/decimator.h
Normal file
41
webrtc/modules/audio_processing/aec3/decimator.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/utility/cascaded_biquad_filter.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Provides functionality for decimating a signal.
|
||||
class Decimator {
|
||||
public:
|
||||
explicit Decimator(size_t down_sampling_factor);
|
||||
|
||||
// Downsamples the signal.
|
||||
void Decimate(rtc::ArrayView<const float> in, rtc::ArrayView<float> out);
|
||||
|
||||
private:
|
||||
const size_t down_sampling_factor_;
|
||||
CascadedBiQuadFilter anti_aliasing_filter_;
|
||||
CascadedBiQuadFilter noise_reduction_filter_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(Decimator);
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_DECIMATOR_H_
|
31
webrtc/modules/audio_processing/aec3/delay_estimate.h
Normal file
31
webrtc/modules/audio_processing/aec3/delay_estimate.h
Normal file
@ -0,0 +1,31 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Stores delay_estimates.
|
||||
struct DelayEstimate {
|
||||
enum class Quality { kCoarse, kRefined };
|
||||
|
||||
DelayEstimate(Quality quality, size_t delay)
|
||||
: quality(quality), delay(delay) {}
|
||||
|
||||
Quality quality;
|
||||
size_t delay;
|
||||
size_t blocks_since_last_change = 0;
|
||||
size_t blocks_since_last_update = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_
|
@ -0,0 +1,75 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/dominant_nearend_detector.h"
|
||||
|
||||
#include <numeric>
|
||||
|
||||
namespace webrtc {
|
||||
DominantNearendDetector::DominantNearendDetector(
|
||||
const EchoCanceller3Config::Suppressor::DominantNearendDetection& config,
|
||||
size_t num_capture_channels)
|
||||
: enr_threshold_(config.enr_threshold),
|
||||
enr_exit_threshold_(config.enr_exit_threshold),
|
||||
snr_threshold_(config.snr_threshold),
|
||||
hold_duration_(config.hold_duration),
|
||||
trigger_threshold_(config.trigger_threshold),
|
||||
use_during_initial_phase_(config.use_during_initial_phase),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
trigger_counters_(num_capture_channels_),
|
||||
hold_counters_(num_capture_channels_) {}
|
||||
|
||||
void DominantNearendDetector::Update(
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
nearend_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
residual_echo_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
comfort_noise_spectrum,
|
||||
bool initial_state) {
|
||||
nearend_state_ = false;
|
||||
|
||||
auto low_frequency_energy = [](rtc::ArrayView<const float> spectrum) {
|
||||
RTC_DCHECK_LE(16, spectrum.size());
|
||||
return std::accumulate(spectrum.begin() + 1, spectrum.begin() + 16, 0.f);
|
||||
};
|
||||
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
const float ne_sum = low_frequency_energy(nearend_spectrum[ch]);
|
||||
const float echo_sum = low_frequency_energy(residual_echo_spectrum[ch]);
|
||||
const float noise_sum = low_frequency_energy(comfort_noise_spectrum[ch]);
|
||||
|
||||
// Detect strong active nearend if the nearend is sufficiently stronger than
|
||||
// the echo and the nearend noise.
|
||||
if ((!initial_state || use_during_initial_phase_) &&
|
||||
echo_sum < enr_threshold_ * ne_sum &&
|
||||
ne_sum > snr_threshold_ * noise_sum) {
|
||||
if (++trigger_counters_[ch] >= trigger_threshold_) {
|
||||
// After a period of strong active nearend activity, flag nearend mode.
|
||||
hold_counters_[ch] = hold_duration_;
|
||||
trigger_counters_[ch] = trigger_threshold_;
|
||||
}
|
||||
} else {
|
||||
// Forget previously detected strong active nearend activity.
|
||||
trigger_counters_[ch] = std::max(0, trigger_counters_[ch] - 1);
|
||||
}
|
||||
|
||||
// Exit nearend-state early at strong echo.
|
||||
if (echo_sum > enr_exit_threshold_ * ne_sum &&
|
||||
echo_sum > snr_threshold_ * noise_sum) {
|
||||
hold_counters_[ch] = 0;
|
||||
}
|
||||
|
||||
// Remain in any nearend mode for a certain duration.
|
||||
hold_counters_[ch] = std::max(0, hold_counters_[ch] - 1);
|
||||
nearend_state_ = nearend_state_ || hold_counters_[ch] > 0;
|
||||
}
|
||||
}
|
||||
} // namespace webrtc
|
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/nearend_detector.h"
|
||||
|
||||
namespace webrtc {
|
||||
// Class for selecting whether the suppressor is in the nearend or echo state.
|
||||
class DominantNearendDetector : public NearendDetector {
|
||||
public:
|
||||
DominantNearendDetector(
|
||||
const EchoCanceller3Config::Suppressor::DominantNearendDetection& config,
|
||||
size_t num_capture_channels);
|
||||
|
||||
// Returns whether the current state is the nearend state.
|
||||
bool IsNearendState() const override { return nearend_state_; }
|
||||
|
||||
// Updates the state selection based on latest spectral estimates.
|
||||
void Update(rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
nearend_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
residual_echo_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
comfort_noise_spectrum,
|
||||
bool initial_state) override;
|
||||
|
||||
private:
|
||||
const float enr_threshold_;
|
||||
const float enr_exit_threshold_;
|
||||
const float snr_threshold_;
|
||||
const int hold_duration_;
|
||||
const int trigger_threshold_;
|
||||
const bool use_during_initial_phase_;
|
||||
const size_t num_capture_channels_;
|
||||
|
||||
bool nearend_state_ = false;
|
||||
std::vector<int> trigger_counters_;
|
||||
std::vector<int> hold_counters_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_DOMINANT_NEAREND_DETECTOR_H_
|
@ -0,0 +1,25 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
DownsampledRenderBuffer::DownsampledRenderBuffer(size_t downsampled_buffer_size)
|
||||
: size(static_cast<int>(downsampled_buffer_size)),
|
||||
buffer(downsampled_buffer_size, 0.f) {
|
||||
std::fill(buffer.begin(), buffer.end(), 0.f);
|
||||
}
|
||||
|
||||
DownsampledRenderBuffer::~DownsampledRenderBuffer() = default;
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Holds the circular buffer of the downsampled render data.
|
||||
struct DownsampledRenderBuffer {
|
||||
explicit DownsampledRenderBuffer(size_t downsampled_buffer_size);
|
||||
~DownsampledRenderBuffer();
|
||||
|
||||
int IncIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index < size - 1 ? index + 1 : 0;
|
||||
}
|
||||
|
||||
int DecIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index > 0 ? index - 1 : size - 1;
|
||||
}
|
||||
|
||||
int OffsetIndex(int index, int offset) const {
|
||||
RTC_DCHECK_GE(buffer.size(), offset);
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return (size + index + offset) % size;
|
||||
}
|
||||
|
||||
void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); }
|
||||
void IncWriteIndex() { write = IncIndex(write); }
|
||||
void DecWriteIndex() { write = DecIndex(write); }
|
||||
void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); }
|
||||
void IncReadIndex() { read = IncIndex(read); }
|
||||
void DecReadIndex() { read = DecIndex(read); }
|
||||
|
||||
const int size;
|
||||
std::vector<float> buffer;
|
||||
int write = 0;
|
||||
int read = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_DOWNSAMPLED_RENDER_BUFFER_H_
|
118
webrtc/modules/audio_processing/aec3/echo_audibility.cc
Normal file
118
webrtc/modules/audio_processing/aec3/echo_audibility.cc
Normal file
@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/echo_audibility.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/block_buffer.h"
|
||||
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
||||
#include "modules/audio_processing/aec3/stationarity_estimator.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
EchoAudibility::EchoAudibility(bool use_render_stationarity_at_init)
|
||||
: use_render_stationarity_at_init_(use_render_stationarity_at_init) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
EchoAudibility::~EchoAudibility() = default;
|
||||
|
||||
void EchoAudibility::Update(const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const float> average_reverb,
|
||||
int delay_blocks,
|
||||
bool external_delay_seen) {
|
||||
UpdateRenderNoiseEstimator(render_buffer.GetSpectrumBuffer(),
|
||||
render_buffer.GetBlockBuffer(),
|
||||
external_delay_seen);
|
||||
|
||||
if (external_delay_seen || use_render_stationarity_at_init_) {
|
||||
UpdateRenderStationarityFlags(render_buffer, average_reverb, delay_blocks);
|
||||
}
|
||||
}
|
||||
|
||||
void EchoAudibility::Reset() {
|
||||
render_stationarity_.Reset();
|
||||
non_zero_render_seen_ = false;
|
||||
render_spectrum_write_prev_ = absl::nullopt;
|
||||
}
|
||||
|
||||
void EchoAudibility::UpdateRenderStationarityFlags(
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const float> average_reverb,
|
||||
int min_channel_delay_blocks) {
|
||||
const SpectrumBuffer& spectrum_buffer = render_buffer.GetSpectrumBuffer();
|
||||
int idx_at_delay = spectrum_buffer.OffsetIndex(spectrum_buffer.read,
|
||||
min_channel_delay_blocks);
|
||||
|
||||
int num_lookahead = render_buffer.Headroom() - min_channel_delay_blocks + 1;
|
||||
num_lookahead = std::max(0, num_lookahead);
|
||||
|
||||
render_stationarity_.UpdateStationarityFlags(spectrum_buffer, average_reverb,
|
||||
idx_at_delay, num_lookahead);
|
||||
}
|
||||
|
||||
void EchoAudibility::UpdateRenderNoiseEstimator(
|
||||
const SpectrumBuffer& spectrum_buffer,
|
||||
const BlockBuffer& block_buffer,
|
||||
bool external_delay_seen) {
|
||||
if (!render_spectrum_write_prev_) {
|
||||
render_spectrum_write_prev_ = spectrum_buffer.write;
|
||||
render_block_write_prev_ = block_buffer.write;
|
||||
return;
|
||||
}
|
||||
int render_spectrum_write_current = spectrum_buffer.write;
|
||||
if (!non_zero_render_seen_ && !external_delay_seen) {
|
||||
non_zero_render_seen_ = !IsRenderTooLow(block_buffer);
|
||||
}
|
||||
if (non_zero_render_seen_) {
|
||||
for (int idx = render_spectrum_write_prev_.value();
|
||||
idx != render_spectrum_write_current;
|
||||
idx = spectrum_buffer.DecIndex(idx)) {
|
||||
render_stationarity_.UpdateNoiseEstimator(spectrum_buffer.buffer[idx]);
|
||||
}
|
||||
}
|
||||
render_spectrum_write_prev_ = render_spectrum_write_current;
|
||||
}
|
||||
|
||||
bool EchoAudibility::IsRenderTooLow(const BlockBuffer& block_buffer) {
|
||||
const int num_render_channels =
|
||||
static_cast<int>(block_buffer.buffer[0][0].size());
|
||||
bool too_low = false;
|
||||
const int render_block_write_current = block_buffer.write;
|
||||
if (render_block_write_current == render_block_write_prev_) {
|
||||
too_low = true;
|
||||
} else {
|
||||
for (int idx = render_block_write_prev_; idx != render_block_write_current;
|
||||
idx = block_buffer.IncIndex(idx)) {
|
||||
float max_abs_over_channels = 0.f;
|
||||
for (int ch = 0; ch < num_render_channels; ++ch) {
|
||||
auto block = block_buffer.buffer[idx][0][ch];
|
||||
auto r = std::minmax_element(block.cbegin(), block.cend());
|
||||
float max_abs_channel =
|
||||
std::max(std::fabs(*r.first), std::fabs(*r.second));
|
||||
max_abs_over_channels =
|
||||
std::max(max_abs_over_channels, max_abs_channel);
|
||||
}
|
||||
if (max_abs_over_channels < 10.f) {
|
||||
too_low = true; // Discards all blocks if one of them is too low.
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
render_block_write_prev_ = render_block_write_current;
|
||||
return too_low;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
86
webrtc/modules/audio_processing/aec3/echo_audibility.h
Normal file
86
webrtc/modules/audio_processing/aec3/echo_audibility.h
Normal file
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/block_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
||||
#include "modules/audio_processing/aec3/stationarity_estimator.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class EchoAudibility {
|
||||
public:
|
||||
explicit EchoAudibility(bool use_render_stationarity_at_init);
|
||||
~EchoAudibility();
|
||||
|
||||
EchoAudibility(const EchoAudibility&) = delete;
|
||||
EchoAudibility& operator=(const EchoAudibility&) = delete;
|
||||
|
||||
// Feed new render data to the echo audibility estimator.
|
||||
void Update(const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const float> average_reverb,
|
||||
int min_channel_delay_blocks,
|
||||
bool external_delay_seen);
|
||||
// Get the residual echo scaling.
|
||||
void GetResidualEchoScaling(bool filter_has_had_time_to_converge,
|
||||
rtc::ArrayView<float> residual_scaling) const {
|
||||
for (size_t band = 0; band < residual_scaling.size(); ++band) {
|
||||
if (render_stationarity_.IsBandStationary(band) &&
|
||||
(filter_has_had_time_to_converge ||
|
||||
use_render_stationarity_at_init_)) {
|
||||
residual_scaling[band] = 0.f;
|
||||
} else {
|
||||
residual_scaling[band] = 1.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Returns true if the current render block is estimated as stationary.
|
||||
bool IsBlockStationary() const {
|
||||
return render_stationarity_.IsBlockStationary();
|
||||
}
|
||||
|
||||
private:
|
||||
// Reset the EchoAudibility class.
|
||||
void Reset();
|
||||
|
||||
// Updates the render stationarity flags for the current frame.
|
||||
void UpdateRenderStationarityFlags(const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const float> average_reverb,
|
||||
int delay_blocks);
|
||||
|
||||
// Updates the noise estimator with the new render data since the previous
|
||||
// call to this method.
|
||||
void UpdateRenderNoiseEstimator(const SpectrumBuffer& spectrum_buffer,
|
||||
const BlockBuffer& block_buffer,
|
||||
bool external_delay_seen);
|
||||
|
||||
// Returns a bool being true if the render signal contains just close to zero
|
||||
// values.
|
||||
bool IsRenderTooLow(const BlockBuffer& block_buffer);
|
||||
|
||||
absl::optional<int> render_spectrum_write_prev_;
|
||||
int render_block_write_prev_;
|
||||
bool non_zero_render_seen_;
|
||||
const bool use_render_stationarity_at_init_;
|
||||
StationarityEstimator render_stationarity_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_AUDIBILITY_H_
|
868
webrtc/modules/audio_processing/aec3/echo_canceller3.cc
Normal file
868
webrtc/modules/audio_processing/aec3/echo_canceller3.cc
Normal file
@ -0,0 +1,868 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/echo_canceller3.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/high_pass_filter.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/experiments/field_trial_parser.h"
|
||||
#include "rtc_base/logging.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
enum class EchoCanceller3ApiCall { kCapture, kRender };
|
||||
|
||||
bool DetectSaturation(rtc::ArrayView<const float> y) {
|
||||
for (auto y_k : y) {
|
||||
if (y_k >= 32700.0f || y_k <= -32700.0f) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Retrieves a value from a field trial if it is available. If no value is
|
||||
// present, the default value is returned. If the retrieved value is beyond the
|
||||
// specified limits, the default value is returned instead.
|
||||
void RetrieveFieldTrialValue(const char* trial_name,
|
||||
float min,
|
||||
float max,
|
||||
float* value_to_update) {
|
||||
const std::string field_trial_str = field_trial::FindFullName(trial_name);
|
||||
|
||||
FieldTrialParameter<double> field_trial_param(/*key=*/"", *value_to_update);
|
||||
|
||||
ParseFieldTrial({&field_trial_param}, field_trial_str);
|
||||
float field_trial_value = static_cast<float>(field_trial_param.Get());
|
||||
|
||||
if (field_trial_value >= min && field_trial_value <= max) {
|
||||
*value_to_update = field_trial_value;
|
||||
}
|
||||
}
|
||||
|
||||
void RetrieveFieldTrialValue(const char* trial_name,
|
||||
int min,
|
||||
int max,
|
||||
int* value_to_update) {
|
||||
const std::string field_trial_str = field_trial::FindFullName(trial_name);
|
||||
|
||||
FieldTrialParameter<int> field_trial_param(/*key=*/"", *value_to_update);
|
||||
|
||||
ParseFieldTrial({&field_trial_param}, field_trial_str);
|
||||
float field_trial_value = field_trial_param.Get();
|
||||
|
||||
if (field_trial_value >= min && field_trial_value <= max) {
|
||||
*value_to_update = field_trial_value;
|
||||
}
|
||||
}
|
||||
|
||||
void FillSubFrameView(
|
||||
AudioBuffer* frame,
|
||||
size_t sub_frame_index,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
|
||||
RTC_DCHECK_GE(1, sub_frame_index);
|
||||
RTC_DCHECK_LE(0, sub_frame_index);
|
||||
RTC_DCHECK_EQ(frame->num_bands(), sub_frame_view->size());
|
||||
RTC_DCHECK_EQ(frame->num_channels(), (*sub_frame_view)[0].size());
|
||||
for (size_t band = 0; band < sub_frame_view->size(); ++band) {
|
||||
for (size_t channel = 0; channel < (*sub_frame_view)[0].size(); ++channel) {
|
||||
(*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
|
||||
&frame->split_bands(channel)[band][sub_frame_index * kSubFrameLength],
|
||||
kSubFrameLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FillSubFrameView(
|
||||
std::vector<std::vector<std::vector<float>>>* frame,
|
||||
size_t sub_frame_index,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
|
||||
RTC_DCHECK_GE(1, sub_frame_index);
|
||||
RTC_DCHECK_EQ(frame->size(), sub_frame_view->size());
|
||||
RTC_DCHECK_EQ((*frame)[0].size(), (*sub_frame_view)[0].size());
|
||||
for (size_t band = 0; band < frame->size(); ++band) {
|
||||
for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) {
|
||||
(*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
|
||||
&(*frame)[band][channel][sub_frame_index * kSubFrameLength],
|
||||
kSubFrameLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessCaptureFrameContent(
|
||||
AudioBuffer* linear_output,
|
||||
AudioBuffer* capture,
|
||||
bool level_change,
|
||||
bool saturated_microphone_signal,
|
||||
size_t sub_frame_index,
|
||||
FrameBlocker* capture_blocker,
|
||||
BlockFramer* linear_output_framer,
|
||||
BlockFramer* output_framer,
|
||||
BlockProcessor* block_processor,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output_block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>*
|
||||
linear_output_sub_frame_view,
|
||||
std::vector<std::vector<std::vector<float>>>* capture_block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* capture_sub_frame_view) {
|
||||
FillSubFrameView(capture, sub_frame_index, capture_sub_frame_view);
|
||||
|
||||
if (linear_output) {
|
||||
RTC_DCHECK(linear_output_framer);
|
||||
RTC_DCHECK(linear_output_block);
|
||||
RTC_DCHECK(linear_output_sub_frame_view);
|
||||
FillSubFrameView(linear_output, sub_frame_index,
|
||||
linear_output_sub_frame_view);
|
||||
}
|
||||
|
||||
capture_blocker->InsertSubFrameAndExtractBlock(*capture_sub_frame_view,
|
||||
capture_block);
|
||||
block_processor->ProcessCapture(level_change, saturated_microphone_signal,
|
||||
linear_output_block, capture_block);
|
||||
output_framer->InsertBlockAndExtractSubFrame(*capture_block,
|
||||
capture_sub_frame_view);
|
||||
|
||||
if (linear_output) {
|
||||
RTC_DCHECK(linear_output_framer);
|
||||
linear_output_framer->InsertBlockAndExtractSubFrame(
|
||||
*linear_output_block, linear_output_sub_frame_view);
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessRemainingCaptureFrameContent(
|
||||
bool level_change,
|
||||
bool saturated_microphone_signal,
|
||||
FrameBlocker* capture_blocker,
|
||||
BlockFramer* linear_output_framer,
|
||||
BlockFramer* output_framer,
|
||||
BlockProcessor* block_processor,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output_block,
|
||||
std::vector<std::vector<std::vector<float>>>* block) {
|
||||
if (!capture_blocker->IsBlockAvailable()) {
|
||||
return;
|
||||
}
|
||||
|
||||
capture_blocker->ExtractBlock(block);
|
||||
block_processor->ProcessCapture(level_change, saturated_microphone_signal,
|
||||
linear_output_block, block);
|
||||
output_framer->InsertBlock(*block);
|
||||
|
||||
if (linear_output_framer) {
|
||||
RTC_DCHECK(linear_output_block);
|
||||
linear_output_framer->InsertBlock(*linear_output_block);
|
||||
}
|
||||
}
|
||||
|
||||
void BufferRenderFrameContent(
|
||||
std::vector<std::vector<std::vector<float>>>* render_frame,
|
||||
size_t sub_frame_index,
|
||||
FrameBlocker* render_blocker,
|
||||
BlockProcessor* block_processor,
|
||||
std::vector<std::vector<std::vector<float>>>* block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
|
||||
FillSubFrameView(render_frame, sub_frame_index, sub_frame_view);
|
||||
render_blocker->InsertSubFrameAndExtractBlock(*sub_frame_view, block);
|
||||
block_processor->BufferRender(*block);
|
||||
}
|
||||
|
||||
void BufferRemainingRenderFrameContent(
|
||||
FrameBlocker* render_blocker,
|
||||
BlockProcessor* block_processor,
|
||||
std::vector<std::vector<std::vector<float>>>* block) {
|
||||
if (!render_blocker->IsBlockAvailable()) {
|
||||
return;
|
||||
}
|
||||
render_blocker->ExtractBlock(block);
|
||||
block_processor->BufferRender(*block);
|
||||
}
|
||||
|
||||
void CopyBufferIntoFrame(const AudioBuffer& buffer,
|
||||
size_t num_bands,
|
||||
size_t num_channels,
|
||||
std::vector<std::vector<std::vector<float>>>* frame) {
|
||||
RTC_DCHECK_EQ(num_bands, frame->size());
|
||||
RTC_DCHECK_EQ(num_channels, (*frame)[0].size());
|
||||
RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, (*frame)[0][0].size());
|
||||
for (size_t band = 0; band < num_bands; ++band) {
|
||||
for (size_t channel = 0; channel < num_channels; ++channel) {
|
||||
rtc::ArrayView<const float> buffer_view(
|
||||
&buffer.split_bands_const(channel)[band][0],
|
||||
AudioBuffer::kSplitBandSize);
|
||||
std::copy(buffer_view.begin(), buffer_view.end(),
|
||||
(*frame)[band][channel].begin());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// TODO(webrtc:5298): Move this to a separate file.
|
||||
EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
|
||||
EchoCanceller3Config adjusted_cfg = config;
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3AntiHowlingMinimizationKillSwitch")) {
|
||||
adjusted_cfg.suppressor.high_bands_suppression
|
||||
.anti_howling_activation_threshold = 25.f;
|
||||
adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 0.01f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3UseShortConfigChangeDuration")) {
|
||||
adjusted_cfg.filter.config_change_duration_blocks = 10;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3UseZeroInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = 0.f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3UseDot1SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = .1f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3UseDot2SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = .2f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3UseDot3SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = .3f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3UseDot6SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = .6f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3UseDot9SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = .9f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3Use1Dot2SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = 1.2f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3Use1Dot6SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = 1.6f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3Use2Dot0SecondsInitialStateDuration")) {
|
||||
adjusted_cfg.filter.initial_state_seconds = 2.0f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3EchoSaturationDetectionKillSwitch")) {
|
||||
adjusted_cfg.ep_strength.echo_can_saturate = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3UseDot2ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.2f;
|
||||
} else if (field_trial::IsEnabled("WebRTC-Aec3UseDot3ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.3f;
|
||||
} else if (field_trial::IsEnabled("WebRTC-Aec3UseDot4ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.4f;
|
||||
} else if (field_trial::IsEnabled("WebRTC-Aec3UseDot5ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.5f;
|
||||
} else if (field_trial::IsEnabled("WebRTC-Aec3UseDot6ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.6f;
|
||||
} else if (field_trial::IsEnabled("WebRTC-Aec3UseDot7ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.7f;
|
||||
} else if (field_trial::IsEnabled("WebRTC-Aec3UseDot8ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.8f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3ShortHeadroomKillSwitch")) {
|
||||
// Two blocks headroom.
|
||||
adjusted_cfg.delay.delay_headroom_samples = kBlockSize * 2;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToZeroKillSwitch")) {
|
||||
adjusted_cfg.erle.clamp_quality_estimate_to_zero = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3ClampInstQualityToOneKillSwitch")) {
|
||||
adjusted_cfg.erle.clamp_quality_estimate_to_one = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3OnsetDetectionKillSwitch")) {
|
||||
adjusted_cfg.erle.onset_detection = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceRenderDelayEstimationDownmixing")) {
|
||||
adjusted_cfg.delay.render_alignment_mixing.downmix = true;
|
||||
adjusted_cfg.delay.render_alignment_mixing.adaptive_selection = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceCaptureDelayEstimationDownmixing")) {
|
||||
adjusted_cfg.delay.capture_alignment_mixing.downmix = true;
|
||||
adjusted_cfg.delay.capture_alignment_mixing.adaptive_selection = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceCaptureDelayEstimationLeftRightPrioritization")) {
|
||||
adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels =
|
||||
true;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-"
|
||||
"Aec3RenderDelayEstimationLeftRightPrioritizationKillSwitch")) {
|
||||
adjusted_cfg.delay.capture_alignment_mixing.prefer_first_two_channels =
|
||||
false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3SensitiveDominantNearendActivation")) {
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.5f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3VerySensitiveDominantNearendActivation")) {
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.75f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3TransparentAntiHowlingGain")) {
|
||||
adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain = 1.f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceMoreTransparentNormalSuppressorTuning")) {
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent = 0.4f;
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress = 0.5f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceMoreTransparentNearendSuppressorTuning")) {
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent = 1.29f;
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress = 1.3f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceMoreTransparentNormalSuppressorHfTuning")) {
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent = 0.3f;
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress = 0.4f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceMoreTransparentNearendSuppressorHfTuning")) {
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent = 1.09f;
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress = 1.1f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceRapidlyAdjustingNormalSuppressorTunings")) {
|
||||
adjusted_cfg.suppressor.normal_tuning.max_inc_factor = 2.5f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceRapidlyAdjustingNearendSuppressorTunings")) {
|
||||
adjusted_cfg.suppressor.nearend_tuning.max_inc_factor = 2.5f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceSlowlyAdjustingNormalSuppressorTunings")) {
|
||||
adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf = .2f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceSlowlyAdjustingNearendSuppressorTunings")) {
|
||||
adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf = .2f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3EnforceStationarityProperties")) {
|
||||
adjusted_cfg.echo_audibility.use_stationarity_properties = true;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceStationarityPropertiesAtInit")) {
|
||||
adjusted_cfg.echo_audibility.use_stationarity_properties_at_init = true;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3EnforceLowActiveRenderLimit")) {
|
||||
adjusted_cfg.render_levels.active_render_limit = 50.f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3EnforceVeryLowActiveRenderLimit")) {
|
||||
adjusted_cfg.render_levels.active_render_limit = 30.f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3NonlinearModeReverbKillSwitch")) {
|
||||
adjusted_cfg.echo_model.model_reverb_in_nonlinear_mode = false;
|
||||
}
|
||||
|
||||
// Field-trial based override for the whole suppressor tuning.
|
||||
const std::string suppressor_tuning_override_trial_name =
|
||||
field_trial::FindFullName("WebRTC-Aec3SuppressorTuningOverride");
|
||||
|
||||
FieldTrialParameter<double> nearend_tuning_mask_lf_enr_transparent(
|
||||
"nearend_tuning_mask_lf_enr_transparent",
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent);
|
||||
FieldTrialParameter<double> nearend_tuning_mask_lf_enr_suppress(
|
||||
"nearend_tuning_mask_lf_enr_suppress",
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress);
|
||||
FieldTrialParameter<double> nearend_tuning_mask_hf_enr_transparent(
|
||||
"nearend_tuning_mask_hf_enr_transparent",
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent);
|
||||
FieldTrialParameter<double> nearend_tuning_mask_hf_enr_suppress(
|
||||
"nearend_tuning_mask_hf_enr_suppress",
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress);
|
||||
FieldTrialParameter<double> nearend_tuning_max_inc_factor(
|
||||
"nearend_tuning_max_inc_factor",
|
||||
adjusted_cfg.suppressor.nearend_tuning.max_inc_factor);
|
||||
FieldTrialParameter<double> nearend_tuning_max_dec_factor_lf(
|
||||
"nearend_tuning_max_dec_factor_lf",
|
||||
adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf);
|
||||
FieldTrialParameter<double> normal_tuning_mask_lf_enr_transparent(
|
||||
"normal_tuning_mask_lf_enr_transparent",
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent);
|
||||
FieldTrialParameter<double> normal_tuning_mask_lf_enr_suppress(
|
||||
"normal_tuning_mask_lf_enr_suppress",
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress);
|
||||
FieldTrialParameter<double> normal_tuning_mask_hf_enr_transparent(
|
||||
"normal_tuning_mask_hf_enr_transparent",
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent);
|
||||
FieldTrialParameter<double> normal_tuning_mask_hf_enr_suppress(
|
||||
"normal_tuning_mask_hf_enr_suppress",
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress);
|
||||
FieldTrialParameter<double> normal_tuning_max_inc_factor(
|
||||
"normal_tuning_max_inc_factor",
|
||||
adjusted_cfg.suppressor.normal_tuning.max_inc_factor);
|
||||
FieldTrialParameter<double> normal_tuning_max_dec_factor_lf(
|
||||
"normal_tuning_max_dec_factor_lf",
|
||||
adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf);
|
||||
FieldTrialParameter<double> dominant_nearend_detection_enr_threshold(
|
||||
"dominant_nearend_detection_enr_threshold",
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold);
|
||||
FieldTrialParameter<double> dominant_nearend_detection_enr_exit_threshold(
|
||||
"dominant_nearend_detection_enr_exit_threshold",
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold);
|
||||
FieldTrialParameter<double> dominant_nearend_detection_snr_threshold(
|
||||
"dominant_nearend_detection_snr_threshold",
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold);
|
||||
FieldTrialParameter<int> dominant_nearend_detection_hold_duration(
|
||||
"dominant_nearend_detection_hold_duration",
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration);
|
||||
FieldTrialParameter<int> dominant_nearend_detection_trigger_threshold(
|
||||
"dominant_nearend_detection_trigger_threshold",
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold);
|
||||
FieldTrialParameter<double> ep_strength_default_len(
|
||||
"ep_strength_default_len", adjusted_cfg.ep_strength.default_len);
|
||||
|
||||
ParseFieldTrial(
|
||||
{&nearend_tuning_mask_lf_enr_transparent,
|
||||
&nearend_tuning_mask_lf_enr_suppress,
|
||||
&nearend_tuning_mask_hf_enr_transparent,
|
||||
&nearend_tuning_mask_hf_enr_suppress, &nearend_tuning_max_inc_factor,
|
||||
&nearend_tuning_max_dec_factor_lf,
|
||||
&normal_tuning_mask_lf_enr_transparent,
|
||||
&normal_tuning_mask_lf_enr_suppress,
|
||||
&normal_tuning_mask_hf_enr_transparent,
|
||||
&normal_tuning_mask_hf_enr_suppress, &normal_tuning_max_inc_factor,
|
||||
&normal_tuning_max_dec_factor_lf,
|
||||
&dominant_nearend_detection_enr_threshold,
|
||||
&dominant_nearend_detection_enr_exit_threshold,
|
||||
&dominant_nearend_detection_snr_threshold,
|
||||
&dominant_nearend_detection_hold_duration,
|
||||
&dominant_nearend_detection_trigger_threshold, &ep_strength_default_len},
|
||||
suppressor_tuning_override_trial_name);
|
||||
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent =
|
||||
static_cast<float>(nearend_tuning_mask_lf_enr_transparent.Get());
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress =
|
||||
static_cast<float>(nearend_tuning_mask_lf_enr_suppress.Get());
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent =
|
||||
static_cast<float>(nearend_tuning_mask_hf_enr_transparent.Get());
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress =
|
||||
static_cast<float>(nearend_tuning_mask_hf_enr_suppress.Get());
|
||||
adjusted_cfg.suppressor.nearend_tuning.max_inc_factor =
|
||||
static_cast<float>(nearend_tuning_max_inc_factor.Get());
|
||||
adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf =
|
||||
static_cast<float>(nearend_tuning_max_dec_factor_lf.Get());
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent =
|
||||
static_cast<float>(normal_tuning_mask_lf_enr_transparent.Get());
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress =
|
||||
static_cast<float>(normal_tuning_mask_lf_enr_suppress.Get());
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent =
|
||||
static_cast<float>(normal_tuning_mask_hf_enr_transparent.Get());
|
||||
adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress =
|
||||
static_cast<float>(normal_tuning_mask_hf_enr_suppress.Get());
|
||||
adjusted_cfg.suppressor.normal_tuning.max_inc_factor =
|
||||
static_cast<float>(normal_tuning_max_inc_factor.Get());
|
||||
adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf =
|
||||
static_cast<float>(normal_tuning_max_dec_factor_lf.Get());
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold =
|
||||
static_cast<float>(dominant_nearend_detection_enr_threshold.Get());
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold =
|
||||
static_cast<float>(dominant_nearend_detection_enr_exit_threshold.Get());
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold =
|
||||
static_cast<float>(dominant_nearend_detection_snr_threshold.Get());
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration =
|
||||
dominant_nearend_detection_hold_duration.Get();
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold =
|
||||
dominant_nearend_detection_trigger_threshold.Get();
|
||||
adjusted_cfg.ep_strength.default_len =
|
||||
static_cast<float>(ep_strength_default_len.Get());
|
||||
|
||||
// Field trial-based overrides of individual suppressor parameters.
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNearendLfMaskTransparentOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNearendLfMaskSuppressOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_suppress);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNearendHfMaskTransparentOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_transparent);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNearendHfMaskSuppressOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.nearend_tuning.mask_hf.enr_suppress);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNearendMaxIncFactorOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.nearend_tuning.max_inc_factor);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNearendMaxDecFactorLfOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf);
|
||||
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNormalLfMaskTransparentOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_transparent);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNormalLfMaskSuppressOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.normal_tuning.mask_lf.enr_suppress);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNormalHfMaskTransparentOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_transparent);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNormalHfMaskSuppressOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.normal_tuning.mask_hf.enr_suppress);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNormalMaxIncFactorOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.normal_tuning.max_inc_factor);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorNormalMaxDecFactorLfOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.normal_tuning.max_dec_factor_lf);
|
||||
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorDominantNearendEnrThresholdOverride", 0.f, 100.f,
|
||||
&adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorDominantNearendEnrExitThresholdOverride", 0.f,
|
||||
100.f,
|
||||
&adjusted_cfg.suppressor.dominant_nearend_detection.enr_exit_threshold);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorDominantNearendSnrThresholdOverride", 0.f, 100.f,
|
||||
&adjusted_cfg.suppressor.dominant_nearend_detection.snr_threshold);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorDominantNearendHoldDurationOverride", 0, 1000,
|
||||
&adjusted_cfg.suppressor.dominant_nearend_detection.hold_duration);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorDominantNearendTriggerThresholdOverride", 0, 1000,
|
||||
&adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold);
|
||||
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3SuppressorAntiHowlingGainOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain);
|
||||
|
||||
RetrieveFieldTrialValue("WebRTC-Aec3SuppressorEpStrengthDefaultLenOverride",
|
||||
-1.f, 1.f, &adjusted_cfg.ep_strength.default_len);
|
||||
|
||||
return adjusted_cfg;
|
||||
}
|
||||
|
||||
class EchoCanceller3::RenderWriter {
|
||||
public:
|
||||
RenderWriter(ApmDataDumper* data_dumper,
|
||||
SwapQueue<std::vector<std::vector<std::vector<float>>>,
|
||||
Aec3RenderQueueItemVerifier>* render_transfer_queue,
|
||||
size_t num_bands,
|
||||
size_t num_channels);
|
||||
|
||||
RenderWriter() = delete;
|
||||
RenderWriter(const RenderWriter&) = delete;
|
||||
RenderWriter& operator=(const RenderWriter&) = delete;
|
||||
|
||||
~RenderWriter();
|
||||
void Insert(const AudioBuffer& input);
|
||||
|
||||
private:
|
||||
ApmDataDumper* data_dumper_;
|
||||
const size_t num_bands_;
|
||||
const size_t num_channels_;
|
||||
HighPassFilter high_pass_filter_;
|
||||
std::vector<std::vector<std::vector<float>>> render_queue_input_frame_;
|
||||
SwapQueue<std::vector<std::vector<std::vector<float>>>,
|
||||
Aec3RenderQueueItemVerifier>* render_transfer_queue_;
|
||||
};
|
||||
|
||||
EchoCanceller3::RenderWriter::RenderWriter(
|
||||
ApmDataDumper* data_dumper,
|
||||
SwapQueue<std::vector<std::vector<std::vector<float>>>,
|
||||
Aec3RenderQueueItemVerifier>* render_transfer_queue,
|
||||
size_t num_bands,
|
||||
size_t num_channels)
|
||||
: data_dumper_(data_dumper),
|
||||
num_bands_(num_bands),
|
||||
num_channels_(num_channels),
|
||||
high_pass_filter_(16000, num_channels),
|
||||
render_queue_input_frame_(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(
|
||||
num_channels_,
|
||||
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
|
||||
render_transfer_queue_(render_transfer_queue) {
|
||||
RTC_DCHECK(data_dumper);
|
||||
}
|
||||
|
||||
EchoCanceller3::RenderWriter::~RenderWriter() = default;
|
||||
|
||||
void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) {
|
||||
RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, input.num_frames_per_band());
|
||||
RTC_DCHECK_EQ(num_bands_, input.num_bands());
|
||||
RTC_DCHECK_EQ(num_channels_, input.num_channels());
|
||||
|
||||
// TODO(bugs.webrtc.org/8759) Temporary work-around.
|
||||
if (num_bands_ != input.num_bands())
|
||||
return;
|
||||
|
||||
data_dumper_->DumpWav("aec3_render_input", AudioBuffer::kSplitBandSize,
|
||||
&input.split_bands_const(0)[0][0], 16000, 1);
|
||||
|
||||
CopyBufferIntoFrame(input, num_bands_, num_channels_,
|
||||
&render_queue_input_frame_);
|
||||
high_pass_filter_.Process(&render_queue_input_frame_[0]);
|
||||
|
||||
static_cast<void>(render_transfer_queue_->Insert(&render_queue_input_frame_));
|
||||
}
|
||||
|
||||
int EchoCanceller3::instance_count_ = 0;
|
||||
|
||||
EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels)
|
||||
: EchoCanceller3(AdjustConfig(config),
|
||||
sample_rate_hz,
|
||||
num_render_channels,
|
||||
num_capture_channels,
|
||||
std::unique_ptr<BlockProcessor>(
|
||||
BlockProcessor::Create(AdjustConfig(config),
|
||||
sample_rate_hz,
|
||||
num_render_channels,
|
||||
num_capture_channels))) {}
|
||||
EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<BlockProcessor> block_processor)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
config_(config),
|
||||
sample_rate_hz_(sample_rate_hz),
|
||||
num_bands_(NumBandsForRate(sample_rate_hz_)),
|
||||
num_render_channels_(num_render_channels),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
output_framer_(num_bands_, num_capture_channels_),
|
||||
capture_blocker_(num_bands_, num_capture_channels_),
|
||||
render_blocker_(num_bands_, num_render_channels_),
|
||||
render_transfer_queue_(
|
||||
kRenderTransferQueueSizeFrames,
|
||||
std::vector<std::vector<std::vector<float>>>(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(
|
||||
num_render_channels_,
|
||||
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
|
||||
Aec3RenderQueueItemVerifier(num_bands_,
|
||||
num_render_channels_,
|
||||
AudioBuffer::kSplitBandSize)),
|
||||
block_processor_(std::move(block_processor)),
|
||||
render_queue_output_frame_(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(
|
||||
num_render_channels_,
|
||||
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
|
||||
render_block_(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(num_render_channels_,
|
||||
std::vector<float>(kBlockSize, 0.f))),
|
||||
capture_block_(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(num_capture_channels_,
|
||||
std::vector<float>(kBlockSize, 0.f))),
|
||||
render_sub_frame_view_(
|
||||
num_bands_,
|
||||
std::vector<rtc::ArrayView<float>>(num_render_channels_)),
|
||||
capture_sub_frame_view_(
|
||||
num_bands_,
|
||||
std::vector<rtc::ArrayView<float>>(num_capture_channels_)) {
|
||||
RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
|
||||
|
||||
if (config_.delay.fixed_capture_delay_samples > 0) {
|
||||
block_delay_buffer_.reset(new BlockDelayBuffer(
|
||||
num_capture_channels_, num_bands_, AudioBuffer::kSplitBandSize,
|
||||
config_.delay.fixed_capture_delay_samples));
|
||||
}
|
||||
|
||||
render_writer_.reset(new RenderWriter(data_dumper_.get(),
|
||||
&render_transfer_queue_, num_bands_,
|
||||
num_render_channels_));
|
||||
|
||||
RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000);
|
||||
RTC_DCHECK_GE(kMaxNumBands, num_bands_);
|
||||
|
||||
if (config_.filter.export_linear_aec_output) {
|
||||
linear_output_framer_.reset(new BlockFramer(1, num_capture_channels_));
|
||||
linear_output_block_ =
|
||||
std::make_unique<std::vector<std::vector<std::vector<float>>>>(
|
||||
1, std::vector<std::vector<float>>(
|
||||
num_capture_channels_, std::vector<float>(kBlockSize, 0.f)));
|
||||
linear_output_sub_frame_view_ =
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>(
|
||||
1, std::vector<rtc::ArrayView<float>>(num_capture_channels_));
|
||||
}
|
||||
}
|
||||
|
||||
EchoCanceller3::~EchoCanceller3() = default;
|
||||
|
||||
void EchoCanceller3::AnalyzeRender(const AudioBuffer& render) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_);
|
||||
|
||||
RTC_DCHECK_EQ(render.num_channels(), num_render_channels_);
|
||||
data_dumper_->DumpRaw("aec3_call_order",
|
||||
static_cast<int>(EchoCanceller3ApiCall::kRender));
|
||||
|
||||
return render_writer_->Insert(render);
|
||||
}
|
||||
|
||||
void EchoCanceller3::AnalyzeCapture(const AudioBuffer& capture) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
data_dumper_->DumpWav("aec3_capture_analyze_input", capture.num_frames(),
|
||||
capture.channels_const()[0], sample_rate_hz_, 1);
|
||||
saturated_microphone_signal_ = false;
|
||||
for (size_t channel = 0; channel < capture.num_channels(); ++channel) {
|
||||
saturated_microphone_signal_ |=
|
||||
DetectSaturation(rtc::ArrayView<const float>(
|
||||
capture.channels_const()[channel], capture.num_frames()));
|
||||
if (saturated_microphone_signal_) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void EchoCanceller3::ProcessCapture(AudioBuffer* capture, bool level_change) {
|
||||
ProcessCapture(capture, nullptr, level_change);
|
||||
}
|
||||
|
||||
void EchoCanceller3::ProcessCapture(AudioBuffer* capture,
|
||||
AudioBuffer* linear_output,
|
||||
bool level_change) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
RTC_DCHECK(capture);
|
||||
RTC_DCHECK_EQ(num_bands_, capture->num_bands());
|
||||
RTC_DCHECK_EQ(AudioBuffer::kSplitBandSize, capture->num_frames_per_band());
|
||||
RTC_DCHECK_EQ(capture->num_channels(), num_capture_channels_);
|
||||
data_dumper_->DumpRaw("aec3_call_order",
|
||||
static_cast<int>(EchoCanceller3ApiCall::kCapture));
|
||||
|
||||
if (linear_output && !linear_output_framer_) {
|
||||
RTC_LOG(LS_ERROR) << "Trying to retrieve the linear AEC output without "
|
||||
"properly configuring AEC3.";
|
||||
RTC_NOTREACHED();
|
||||
}
|
||||
|
||||
// Report capture call in the metrics and periodically update API call
|
||||
// metrics.
|
||||
api_call_metrics_.ReportCaptureCall();
|
||||
|
||||
// Optionally delay the capture signal.
|
||||
if (config_.delay.fixed_capture_delay_samples > 0) {
|
||||
RTC_DCHECK(block_delay_buffer_);
|
||||
block_delay_buffer_->DelaySignal(capture);
|
||||
}
|
||||
|
||||
rtc::ArrayView<float> capture_lower_band = rtc::ArrayView<float>(
|
||||
&capture->split_bands(0)[0][0], AudioBuffer::kSplitBandSize);
|
||||
|
||||
data_dumper_->DumpWav("aec3_capture_input", capture_lower_band, 16000, 1);
|
||||
|
||||
EmptyRenderQueue();
|
||||
|
||||
ProcessCaptureFrameContent(linear_output, capture, level_change,
|
||||
saturated_microphone_signal_, 0, &capture_blocker_,
|
||||
linear_output_framer_.get(), &output_framer_,
|
||||
block_processor_.get(), linear_output_block_.get(),
|
||||
&linear_output_sub_frame_view_, &capture_block_,
|
||||
&capture_sub_frame_view_);
|
||||
|
||||
ProcessCaptureFrameContent(linear_output, capture, level_change,
|
||||
saturated_microphone_signal_, 1, &capture_blocker_,
|
||||
linear_output_framer_.get(), &output_framer_,
|
||||
block_processor_.get(), linear_output_block_.get(),
|
||||
&linear_output_sub_frame_view_, &capture_block_,
|
||||
&capture_sub_frame_view_);
|
||||
|
||||
ProcessRemainingCaptureFrameContent(
|
||||
level_change, saturated_microphone_signal_, &capture_blocker_,
|
||||
linear_output_framer_.get(), &output_framer_, block_processor_.get(),
|
||||
linear_output_block_.get(), &capture_block_);
|
||||
|
||||
data_dumper_->DumpWav("aec3_capture_output", AudioBuffer::kSplitBandSize,
|
||||
&capture->split_bands(0)[0][0], 16000, 1);
|
||||
}
|
||||
|
||||
EchoControl::Metrics EchoCanceller3::GetMetrics() const {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
Metrics metrics;
|
||||
block_processor_->GetMetrics(&metrics);
|
||||
return metrics;
|
||||
}
|
||||
|
||||
void EchoCanceller3::SetAudioBufferDelay(int delay_ms) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
block_processor_->SetAudioBufferDelay(delay_ms);
|
||||
}
|
||||
|
||||
bool EchoCanceller3::ActiveProcessing() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
EchoCanceller3Config EchoCanceller3::CreateDefaultConfig(
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels) {
|
||||
EchoCanceller3Config cfg;
|
||||
if (num_render_channels > 1) {
|
||||
// Use shorter and more rapidly adapting coarse filter to compensate for
|
||||
// thge increased number of total filter parameters to adapt.
|
||||
cfg.filter.coarse.length_blocks = 11;
|
||||
cfg.filter.coarse.rate = 0.95f;
|
||||
cfg.filter.coarse_initial.length_blocks = 11;
|
||||
cfg.filter.coarse_initial.rate = 0.95f;
|
||||
|
||||
// Use more concervative suppressor behavior for non-nearend speech.
|
||||
cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f;
|
||||
cfg.suppressor.normal_tuning.max_inc_factor = 1.5f;
|
||||
}
|
||||
return cfg;
|
||||
}
|
||||
|
||||
void EchoCanceller3::EmptyRenderQueue() {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
bool frame_to_buffer =
|
||||
render_transfer_queue_.Remove(&render_queue_output_frame_);
|
||||
while (frame_to_buffer) {
|
||||
// Report render call in the metrics.
|
||||
api_call_metrics_.ReportRenderCall();
|
||||
|
||||
BufferRenderFrameContent(&render_queue_output_frame_, 0, &render_blocker_,
|
||||
block_processor_.get(), &render_block_,
|
||||
&render_sub_frame_view_);
|
||||
|
||||
BufferRenderFrameContent(&render_queue_output_frame_, 1, &render_blocker_,
|
||||
block_processor_.get(), &render_block_,
|
||||
&render_sub_frame_view_);
|
||||
|
||||
BufferRemainingRenderFrameContent(&render_blocker_, block_processor_.get(),
|
||||
&render_block_);
|
||||
|
||||
frame_to_buffer =
|
||||
render_transfer_queue_.Remove(&render_queue_output_frame_);
|
||||
}
|
||||
}
|
||||
} // namespace webrtc
|
196
webrtc/modules/audio_processing/aec3/echo_canceller3.h
Normal file
196
webrtc/modules/audio_processing/aec3/echo_canceller3.h
Normal file
@ -0,0 +1,196 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "api/audio/echo_control.h"
|
||||
#include "modules/audio_processing/aec3/api_call_jitter_metrics.h"
|
||||
#include "modules/audio_processing/aec3/block_delay_buffer.h"
|
||||
#include "modules/audio_processing/aec3/block_framer.h"
|
||||
#include "modules/audio_processing/aec3/block_processor.h"
|
||||
#include "modules/audio_processing/aec3/frame_blocker.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/race_checker.h"
|
||||
#include "rtc_base/swap_queue.h"
|
||||
#include "rtc_base/thread_annotations.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Method for adjusting config parameter dependencies.
|
||||
// Only to be used externally to AEC3 for testing purposes.
|
||||
// TODO(webrtc:5298): Move this to a separate file.
|
||||
EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config);
|
||||
|
||||
// Functor for verifying the invariance of the frames being put into the render
|
||||
// queue.
|
||||
class Aec3RenderQueueItemVerifier {
|
||||
public:
|
||||
Aec3RenderQueueItemVerifier(size_t num_bands,
|
||||
size_t num_channels,
|
||||
size_t frame_length)
|
||||
: num_bands_(num_bands),
|
||||
num_channels_(num_channels),
|
||||
frame_length_(frame_length) {}
|
||||
|
||||
bool operator()(const std::vector<std::vector<std::vector<float>>>& v) const {
|
||||
if (v.size() != num_bands_) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& band : v) {
|
||||
if (band.size() != num_channels_) {
|
||||
return false;
|
||||
}
|
||||
for (const auto& channel : band) {
|
||||
if (channel.size() != frame_length_) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
private:
|
||||
const size_t num_bands_;
|
||||
const size_t num_channels_;
|
||||
const size_t frame_length_;
|
||||
};
|
||||
|
||||
// Main class for the echo canceller3.
|
||||
// It does 4 things:
|
||||
// -Receives 10 ms frames of band-split audio.
|
||||
// -Provides the lower level echo canceller functionality with
|
||||
// blocks of 64 samples of audio data.
|
||||
// -Partially handles the jitter in the render and capture API
|
||||
// call sequence.
|
||||
//
|
||||
// The class is supposed to be used in a non-concurrent manner apart from the
|
||||
// AnalyzeRender call which can be called concurrently with the other methods.
|
||||
class EchoCanceller3 : public EchoControl {
|
||||
public:
|
||||
// Normal c-tor to use.
|
||||
EchoCanceller3(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
// Testing c-tor that is used only for testing purposes.
|
||||
EchoCanceller3(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<BlockProcessor> block_processor);
|
||||
~EchoCanceller3() override;
|
||||
EchoCanceller3(const EchoCanceller3&) = delete;
|
||||
EchoCanceller3& operator=(const EchoCanceller3&) = delete;
|
||||
|
||||
// Analyzes and stores an internal copy of the split-band domain render
|
||||
// signal.
|
||||
void AnalyzeRender(AudioBuffer* render) override { AnalyzeRender(*render); }
|
||||
// Analyzes the full-band domain capture signal to detect signal saturation.
|
||||
void AnalyzeCapture(AudioBuffer* capture) override {
|
||||
AnalyzeCapture(*capture);
|
||||
}
|
||||
// Processes the split-band domain capture signal in order to remove any echo
|
||||
// present in the signal.
|
||||
void ProcessCapture(AudioBuffer* capture, bool level_change) override;
|
||||
// As above, but also returns the linear filter output.
|
||||
void ProcessCapture(AudioBuffer* capture,
|
||||
AudioBuffer* linear_output,
|
||||
bool level_change) override;
|
||||
// Collect current metrics from the echo canceller.
|
||||
Metrics GetMetrics() const override;
|
||||
// Provides an optional external estimate of the audio buffer delay.
|
||||
void SetAudioBufferDelay(int delay_ms) override;
|
||||
|
||||
bool ActiveProcessing() const override;
|
||||
|
||||
// Signals whether an external detector has detected echo leakage from the
|
||||
// echo canceller.
|
||||
// Note that in the case echo leakage has been flagged, it should be unflagged
|
||||
// once it is no longer occurring.
|
||||
void UpdateEchoLeakageStatus(bool leakage_detected) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
block_processor_->UpdateEchoLeakageStatus(leakage_detected);
|
||||
}
|
||||
|
||||
// Produces a default configuration that is suitable for a certain combination
|
||||
// of render and capture channels.
|
||||
static EchoCanceller3Config CreateDefaultConfig(size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
|
||||
private:
|
||||
class RenderWriter;
|
||||
|
||||
// Empties the render SwapQueue.
|
||||
void EmptyRenderQueue();
|
||||
|
||||
// Analyzes and stores an internal copy of the split-band domain render
|
||||
// signal.
|
||||
void AnalyzeRender(const AudioBuffer& render);
|
||||
// Analyzes the full-band domain capture signal to detect signal saturation.
|
||||
void AnalyzeCapture(const AudioBuffer& capture);
|
||||
|
||||
rtc::RaceChecker capture_race_checker_;
|
||||
rtc::RaceChecker render_race_checker_;
|
||||
|
||||
// State that is accessed by the AnalyzeRender call.
|
||||
std::unique_ptr<RenderWriter> render_writer_
|
||||
RTC_GUARDED_BY(render_race_checker_);
|
||||
|
||||
// State that may be accessed by the capture thread.
|
||||
static int instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const EchoCanceller3Config config_;
|
||||
const int sample_rate_hz_;
|
||||
const int num_bands_;
|
||||
const size_t num_render_channels_;
|
||||
const size_t num_capture_channels_;
|
||||
std::unique_ptr<BlockFramer> linear_output_framer_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
BlockFramer output_framer_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
FrameBlocker capture_blocker_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
FrameBlocker render_blocker_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
SwapQueue<std::vector<std::vector<std::vector<float>>>,
|
||||
Aec3RenderQueueItemVerifier>
|
||||
render_transfer_queue_;
|
||||
std::unique_ptr<BlockProcessor> block_processor_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::vector<std::vector<std::vector<float>>> render_queue_output_frame_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
bool saturated_microphone_signal_ RTC_GUARDED_BY(capture_race_checker_) =
|
||||
false;
|
||||
std::vector<std::vector<std::vector<float>>> render_block_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::unique_ptr<std::vector<std::vector<std::vector<float>>>>
|
||||
linear_output_block_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::vector<std::vector<std::vector<float>>> capture_block_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::vector<std::vector<rtc::ArrayView<float>>> render_sub_frame_view_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::vector<std::vector<rtc::ArrayView<float>>> linear_output_sub_frame_view_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::vector<std::vector<rtc::ArrayView<float>>> capture_sub_frame_view_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::unique_ptr<BlockDelayBuffer> block_delay_buffer_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
ApiCallJitterMetrics api_call_metrics_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_CANCELLER3_H_
|
@ -0,0 +1,125 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/echo_path_delay_estimator.h"
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
EchoPathDelayEstimator::EchoPathDelayEstimator(
|
||||
ApmDataDumper* data_dumper,
|
||||
const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(data_dumper),
|
||||
down_sampling_factor_(config.delay.down_sampling_factor),
|
||||
sub_block_size_(down_sampling_factor_ != 0
|
||||
? kBlockSize / down_sampling_factor_
|
||||
: kBlockSize),
|
||||
capture_mixer_(num_capture_channels,
|
||||
config.delay.capture_alignment_mixing),
|
||||
capture_decimator_(down_sampling_factor_),
|
||||
matched_filter_(
|
||||
data_dumper_,
|
||||
DetectOptimization(),
|
||||
sub_block_size_,
|
||||
kMatchedFilterWindowSizeSubBlocks,
|
||||
config.delay.num_filters,
|
||||
kMatchedFilterAlignmentShiftSizeSubBlocks,
|
||||
config.delay.down_sampling_factor == 8
|
||||
? config.render_levels.poor_excitation_render_limit_ds8
|
||||
: config.render_levels.poor_excitation_render_limit,
|
||||
config.delay.delay_estimate_smoothing,
|
||||
config.delay.delay_candidate_detection_threshold),
|
||||
matched_filter_lag_aggregator_(data_dumper_,
|
||||
matched_filter_.GetMaxFilterLag(),
|
||||
config.delay.delay_selection_thresholds) {
|
||||
RTC_DCHECK(data_dumper);
|
||||
RTC_DCHECK(down_sampling_factor_ > 0);
|
||||
}
|
||||
|
||||
EchoPathDelayEstimator::~EchoPathDelayEstimator() = default;
|
||||
|
||||
void EchoPathDelayEstimator::Reset(bool reset_delay_confidence) {
|
||||
Reset(true, reset_delay_confidence);
|
||||
}
|
||||
|
||||
absl::optional<DelayEstimate> EchoPathDelayEstimator::EstimateDelay(
|
||||
const DownsampledRenderBuffer& render_buffer,
|
||||
const std::vector<std::vector<float>>& capture) {
|
||||
RTC_DCHECK_EQ(kBlockSize, capture[0].size());
|
||||
|
||||
std::array<float, kBlockSize> downsampled_capture_data;
|
||||
rtc::ArrayView<float> downsampled_capture(downsampled_capture_data.data(),
|
||||
sub_block_size_);
|
||||
|
||||
std::array<float, kBlockSize> downmixed_capture;
|
||||
capture_mixer_.ProduceOutput(capture, downmixed_capture);
|
||||
capture_decimator_.Decimate(downmixed_capture, downsampled_capture);
|
||||
data_dumper_->DumpWav("aec3_capture_decimator_output",
|
||||
downsampled_capture.size(), downsampled_capture.data(),
|
||||
16000 / down_sampling_factor_, 1);
|
||||
matched_filter_.Update(render_buffer, downsampled_capture);
|
||||
|
||||
absl::optional<DelayEstimate> aggregated_matched_filter_lag =
|
||||
matched_filter_lag_aggregator_.Aggregate(
|
||||
matched_filter_.GetLagEstimates());
|
||||
|
||||
// Run clockdrift detection.
|
||||
if (aggregated_matched_filter_lag &&
|
||||
(*aggregated_matched_filter_lag).quality ==
|
||||
DelayEstimate::Quality::kRefined)
|
||||
clockdrift_detector_.Update((*aggregated_matched_filter_lag).delay);
|
||||
|
||||
// TODO(peah): Move this logging outside of this class once EchoCanceller3
|
||||
// development is done.
|
||||
data_dumper_->DumpRaw(
|
||||
"aec3_echo_path_delay_estimator_delay",
|
||||
aggregated_matched_filter_lag
|
||||
? static_cast<int>(aggregated_matched_filter_lag->delay *
|
||||
down_sampling_factor_)
|
||||
: -1);
|
||||
|
||||
// Return the detected delay in samples as the aggregated matched filter lag
|
||||
// compensated by the down sampling factor for the signal being correlated.
|
||||
if (aggregated_matched_filter_lag) {
|
||||
aggregated_matched_filter_lag->delay *= down_sampling_factor_;
|
||||
}
|
||||
|
||||
if (old_aggregated_lag_ && aggregated_matched_filter_lag &&
|
||||
old_aggregated_lag_->delay == aggregated_matched_filter_lag->delay) {
|
||||
++consistent_estimate_counter_;
|
||||
} else {
|
||||
consistent_estimate_counter_ = 0;
|
||||
}
|
||||
old_aggregated_lag_ = aggregated_matched_filter_lag;
|
||||
constexpr size_t kNumBlocksPerSecondBy2 = kNumBlocksPerSecond / 2;
|
||||
if (consistent_estimate_counter_ > kNumBlocksPerSecondBy2) {
|
||||
Reset(false, false);
|
||||
}
|
||||
|
||||
return aggregated_matched_filter_lag;
|
||||
}
|
||||
|
||||
void EchoPathDelayEstimator::Reset(bool reset_lag_aggregator,
|
||||
bool reset_delay_confidence) {
|
||||
if (reset_lag_aggregator) {
|
||||
matched_filter_lag_aggregator_.Reset(reset_delay_confidence);
|
||||
}
|
||||
matched_filter_.Reset();
|
||||
old_aggregated_lag_ = absl::nullopt;
|
||||
consistent_estimate_counter_ = 0;
|
||||
}
|
||||
} // namespace webrtc
|
@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/alignment_mixer.h"
|
||||
#include "modules/audio_processing/aec3/clockdrift_detector.h"
|
||||
#include "modules/audio_processing/aec3/decimator.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/matched_filter.h"
|
||||
#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
struct DownsampledRenderBuffer;
|
||||
struct EchoCanceller3Config;
|
||||
|
||||
// Estimates the delay of the echo path.
|
||||
class EchoPathDelayEstimator {
|
||||
public:
|
||||
EchoPathDelayEstimator(ApmDataDumper* data_dumper,
|
||||
const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels);
|
||||
~EchoPathDelayEstimator();
|
||||
|
||||
// Resets the estimation. If the delay confidence is reset, the reset behavior
|
||||
// is as if the call is restarted.
|
||||
void Reset(bool reset_delay_confidence);
|
||||
|
||||
// Produce a delay estimate if such is avaliable.
|
||||
absl::optional<DelayEstimate> EstimateDelay(
|
||||
const DownsampledRenderBuffer& render_buffer,
|
||||
const std::vector<std::vector<float>>& capture);
|
||||
|
||||
// Log delay estimator properties.
|
||||
void LogDelayEstimationProperties(int sample_rate_hz, size_t shift) const {
|
||||
matched_filter_.LogFilterProperties(sample_rate_hz, shift,
|
||||
down_sampling_factor_);
|
||||
}
|
||||
|
||||
// Returns the level of detected clockdrift.
|
||||
ClockdriftDetector::Level Clockdrift() const {
|
||||
return clockdrift_detector_.ClockdriftLevel();
|
||||
}
|
||||
|
||||
private:
|
||||
ApmDataDumper* const data_dumper_;
|
||||
const size_t down_sampling_factor_;
|
||||
const size_t sub_block_size_;
|
||||
AlignmentMixer capture_mixer_;
|
||||
Decimator capture_decimator_;
|
||||
MatchedFilter matched_filter_;
|
||||
MatchedFilterLagAggregator matched_filter_lag_aggregator_;
|
||||
absl::optional<DelayEstimate> old_aggregated_lag_;
|
||||
size_t consistent_estimate_counter_ = 0;
|
||||
ClockdriftDetector clockdrift_detector_;
|
||||
|
||||
// Internal reset method with more granularity.
|
||||
void Reset(bool reset_lag_aggregator, bool reset_delay_confidence);
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(EchoPathDelayEstimator);
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_DELAY_ESTIMATOR_H_
|
@ -0,0 +1,22 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
EchoPathVariability::EchoPathVariability(bool gain_change,
|
||||
DelayAdjustment delay_change,
|
||||
bool clock_drift)
|
||||
: gain_change(gain_change),
|
||||
delay_change(delay_change),
|
||||
clock_drift(clock_drift) {}
|
||||
|
||||
} // namespace webrtc
|
37
webrtc/modules/audio_processing/aec3/echo_path_variability.h
Normal file
37
webrtc/modules/audio_processing/aec3/echo_path_variability.h
Normal file
@ -0,0 +1,37 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
struct EchoPathVariability {
|
||||
enum class DelayAdjustment {
|
||||
kNone,
|
||||
kBufferFlush,
|
||||
kNewDetectedDelay
|
||||
};
|
||||
|
||||
EchoPathVariability(bool gain_change,
|
||||
DelayAdjustment delay_change,
|
||||
bool clock_drift);
|
||||
|
||||
bool AudioPathChanged() const {
|
||||
return gain_change || delay_change != DelayAdjustment::kNone;
|
||||
}
|
||||
bool gain_change;
|
||||
DelayAdjustment delay_change;
|
||||
bool clock_drift;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_PATH_VARIABILITY_H_
|
500
webrtc/modules/audio_processing/aec3/echo_remover.cc
Normal file
500
webrtc/modules/audio_processing/aec3/echo_remover.cc
Normal file
@ -0,0 +1,500 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/echo_remover.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec3_fft.h"
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
#include "modules/audio_processing/aec3/comfort_noise_generator.h"
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
#include "modules/audio_processing/aec3/echo_remover_metrics.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_signal_analyzer.h"
|
||||
#include "modules/audio_processing/aec3/residual_echo_estimator.h"
|
||||
#include "modules/audio_processing/aec3/subtractor.h"
|
||||
#include "modules/audio_processing/aec3/subtractor_output.h"
|
||||
#include "modules/audio_processing/aec3/suppression_filter.h"
|
||||
#include "modules/audio_processing/aec3/suppression_gain.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Maximum number of channels for which the capture channel data is stored on
|
||||
// the stack. If the number of channels are larger than this, they are stored
|
||||
// using scratch memory that is pre-allocated on the heap. The reason for this
|
||||
// partitioning is not to waste heap space for handling the more common numbers
|
||||
// of channels, while at the same time not limiting the support for higher
|
||||
// numbers of channels by enforcing the capture channel data to be stored on the
|
||||
// stack using a fixed maximum value.
|
||||
constexpr size_t kMaxNumChannelsOnStack = 2;
|
||||
|
||||
// Chooses the number of channels to store on the heap when that is required due
|
||||
// to the number of capture channels being larger than the pre-defined number
|
||||
// of channels to store on the stack.
|
||||
size_t NumChannelsOnHeap(size_t num_capture_channels) {
|
||||
return num_capture_channels > kMaxNumChannelsOnStack ? num_capture_channels
|
||||
: 0;
|
||||
}
|
||||
|
||||
void LinearEchoPower(const FftData& E,
|
||||
const FftData& Y,
|
||||
std::array<float, kFftLengthBy2Plus1>* S2) {
|
||||
for (size_t k = 0; k < E.re.size(); ++k) {
|
||||
(*S2)[k] = (Y.re[k] - E.re[k]) * (Y.re[k] - E.re[k]) +
|
||||
(Y.im[k] - E.im[k]) * (Y.im[k] - E.im[k]);
|
||||
}
|
||||
}
|
||||
|
||||
// Fades between two input signals using a fix-sized transition.
|
||||
void SignalTransition(rtc::ArrayView<const float> from,
|
||||
rtc::ArrayView<const float> to,
|
||||
rtc::ArrayView<float> out) {
|
||||
if (from == to) {
|
||||
RTC_DCHECK_EQ(to.size(), out.size());
|
||||
std::copy(to.begin(), to.end(), out.begin());
|
||||
} else {
|
||||
constexpr size_t kTransitionSize = 30;
|
||||
constexpr float kOneByTransitionSizePlusOne = 1.f / (kTransitionSize + 1);
|
||||
|
||||
RTC_DCHECK_EQ(from.size(), to.size());
|
||||
RTC_DCHECK_EQ(from.size(), out.size());
|
||||
RTC_DCHECK_LE(kTransitionSize, out.size());
|
||||
|
||||
for (size_t k = 0; k < kTransitionSize; ++k) {
|
||||
float a = (k + 1) * kOneByTransitionSizePlusOne;
|
||||
out[k] = a * to[k] + (1.f - a) * from[k];
|
||||
}
|
||||
|
||||
std::copy(to.begin() + kTransitionSize, to.end(),
|
||||
out.begin() + kTransitionSize);
|
||||
}
|
||||
}
|
||||
|
||||
// Computes a windowed (square root Hanning) padded FFT and updates the related
|
||||
// memory.
|
||||
void WindowedPaddedFft(const Aec3Fft& fft,
|
||||
rtc::ArrayView<const float> v,
|
||||
rtc::ArrayView<float> v_old,
|
||||
FftData* V) {
|
||||
fft.PaddedFft(v, v_old, Aec3Fft::Window::kSqrtHanning, V);
|
||||
std::copy(v.begin(), v.end(), v_old.begin());
|
||||
}
|
||||
|
||||
// Class for removing the echo from the capture signal.
|
||||
class EchoRemoverImpl final : public EchoRemover {
|
||||
public:
|
||||
EchoRemoverImpl(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
~EchoRemoverImpl() override;
|
||||
EchoRemoverImpl(const EchoRemoverImpl&) = delete;
|
||||
EchoRemoverImpl& operator=(const EchoRemoverImpl&) = delete;
|
||||
|
||||
void GetMetrics(EchoControl::Metrics* metrics) const override;
|
||||
|
||||
// Removes the echo from a block of samples from the capture signal. The
|
||||
// supplied render signal is assumed to be pre-aligned with the capture
|
||||
// signal.
|
||||
void ProcessCapture(
|
||||
EchoPathVariability echo_path_variability,
|
||||
bool capture_signal_saturation,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
RenderBuffer* render_buffer,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output,
|
||||
std::vector<std::vector<std::vector<float>>>* capture) override;
|
||||
|
||||
// Updates the status on whether echo leakage is detected in the output of the
|
||||
// echo remover.
|
||||
void UpdateEchoLeakageStatus(bool leakage_detected) override {
|
||||
echo_leakage_detected_ = leakage_detected;
|
||||
}
|
||||
|
||||
private:
|
||||
// Selects which of the coarse and refined linear filter outputs that is most
|
||||
// appropriate to pass to the suppressor and forms the linear filter output by
|
||||
// smoothly transition between those.
|
||||
void FormLinearFilterOutput(const SubtractorOutput& subtractor_output,
|
||||
rtc::ArrayView<float> output);
|
||||
|
||||
static int instance_count_;
|
||||
const EchoCanceller3Config config_;
|
||||
const Aec3Fft fft_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const Aec3Optimization optimization_;
|
||||
const int sample_rate_hz_;
|
||||
const size_t num_render_channels_;
|
||||
const size_t num_capture_channels_;
|
||||
const bool use_coarse_filter_output_;
|
||||
Subtractor subtractor_;
|
||||
SuppressionGain suppression_gain_;
|
||||
ComfortNoiseGenerator cng_;
|
||||
SuppressionFilter suppression_filter_;
|
||||
RenderSignalAnalyzer render_signal_analyzer_;
|
||||
ResidualEchoEstimator residual_echo_estimator_;
|
||||
bool echo_leakage_detected_ = false;
|
||||
AecState aec_state_;
|
||||
EchoRemoverMetrics metrics_;
|
||||
std::vector<std::array<float, kFftLengthBy2>> e_old_;
|
||||
std::vector<std::array<float, kFftLengthBy2>> y_old_;
|
||||
size_t block_counter_ = 0;
|
||||
int gain_change_hangover_ = 0;
|
||||
bool refined_filter_output_last_selected_ = true;
|
||||
|
||||
std::vector<std::array<float, kFftLengthBy2>> e_heap_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> Y2_heap_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> E2_heap_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> R2_heap_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> S2_linear_heap_;
|
||||
std::vector<FftData> Y_heap_;
|
||||
std::vector<FftData> E_heap_;
|
||||
std::vector<FftData> comfort_noise_heap_;
|
||||
std::vector<FftData> high_band_comfort_noise_heap_;
|
||||
std::vector<SubtractorOutput> subtractor_output_heap_;
|
||||
};
|
||||
|
||||
int EchoRemoverImpl::instance_count_ = 0;
|
||||
|
||||
EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels)
|
||||
: config_(config),
|
||||
fft_(),
|
||||
data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
optimization_(DetectOptimization()),
|
||||
sample_rate_hz_(sample_rate_hz),
|
||||
num_render_channels_(num_render_channels),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
use_coarse_filter_output_(
|
||||
config_.filter.enable_coarse_filter_output_usage),
|
||||
subtractor_(config,
|
||||
num_render_channels_,
|
||||
num_capture_channels_,
|
||||
data_dumper_.get(),
|
||||
optimization_),
|
||||
suppression_gain_(config_,
|
||||
optimization_,
|
||||
sample_rate_hz,
|
||||
num_capture_channels),
|
||||
cng_(config_, optimization_, num_capture_channels_),
|
||||
suppression_filter_(optimization_,
|
||||
sample_rate_hz_,
|
||||
num_capture_channels_),
|
||||
render_signal_analyzer_(config_),
|
||||
residual_echo_estimator_(config_, num_render_channels),
|
||||
aec_state_(config_, num_capture_channels_),
|
||||
e_old_(num_capture_channels_, {0.f}),
|
||||
y_old_(num_capture_channels_, {0.f}),
|
||||
e_heap_(NumChannelsOnHeap(num_capture_channels_), {0.f}),
|
||||
Y2_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
E2_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
R2_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
S2_linear_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
Y_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
E_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
high_band_comfort_noise_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
subtractor_output_heap_(NumChannelsOnHeap(num_capture_channels_)) {
|
||||
RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
|
||||
}
|
||||
|
||||
EchoRemoverImpl::~EchoRemoverImpl() = default;
|
||||
|
||||
void EchoRemoverImpl::GetMetrics(EchoControl::Metrics* metrics) const {
|
||||
// Echo return loss (ERL) is inverted to go from gain to attenuation.
|
||||
metrics->echo_return_loss = -10.0 * std::log10(aec_state_.ErlTimeDomain());
|
||||
metrics->echo_return_loss_enhancement =
|
||||
Log2TodB(aec_state_.FullBandErleLog2());
|
||||
}
|
||||
|
||||
void EchoRemoverImpl::ProcessCapture(
|
||||
EchoPathVariability echo_path_variability,
|
||||
bool capture_signal_saturation,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
RenderBuffer* render_buffer,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output,
|
||||
std::vector<std::vector<std::vector<float>>>* capture) {
|
||||
++block_counter_;
|
||||
const std::vector<std::vector<std::vector<float>>>& x =
|
||||
render_buffer->Block(0);
|
||||
std::vector<std::vector<std::vector<float>>>* y = capture;
|
||||
RTC_DCHECK(render_buffer);
|
||||
RTC_DCHECK(y);
|
||||
RTC_DCHECK_EQ(x.size(), NumBandsForRate(sample_rate_hz_));
|
||||
RTC_DCHECK_EQ(y->size(), NumBandsForRate(sample_rate_hz_));
|
||||
RTC_DCHECK_EQ(x[0].size(), num_render_channels_);
|
||||
RTC_DCHECK_EQ((*y)[0].size(), num_capture_channels_);
|
||||
RTC_DCHECK_EQ(x[0][0].size(), kBlockSize);
|
||||
RTC_DCHECK_EQ((*y)[0][0].size(), kBlockSize);
|
||||
|
||||
// Stack allocated data to use when the number of channels is low.
|
||||
std::array<std::array<float, kFftLengthBy2>, kMaxNumChannelsOnStack> e_stack;
|
||||
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
|
||||
Y2_stack;
|
||||
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
|
||||
E2_stack;
|
||||
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
|
||||
R2_stack;
|
||||
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
|
||||
S2_linear_stack;
|
||||
std::array<FftData, kMaxNumChannelsOnStack> Y_stack;
|
||||
std::array<FftData, kMaxNumChannelsOnStack> E_stack;
|
||||
std::array<FftData, kMaxNumChannelsOnStack> comfort_noise_stack;
|
||||
std::array<FftData, kMaxNumChannelsOnStack> high_band_comfort_noise_stack;
|
||||
std::array<SubtractorOutput, kMaxNumChannelsOnStack> subtractor_output_stack;
|
||||
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2>> e(e_stack.data(),
|
||||
num_capture_channels_);
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> Y2(
|
||||
Y2_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> E2(
|
||||
E2_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2(
|
||||
R2_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> S2_linear(
|
||||
S2_linear_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<FftData> Y(Y_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<FftData> E(E_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<FftData> comfort_noise(comfort_noise_stack.data(),
|
||||
num_capture_channels_);
|
||||
rtc::ArrayView<FftData> high_band_comfort_noise(
|
||||
high_band_comfort_noise_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<SubtractorOutput> subtractor_output(
|
||||
subtractor_output_stack.data(), num_capture_channels_);
|
||||
if (NumChannelsOnHeap(num_capture_channels_) > 0) {
|
||||
// If the stack-allocated space is too small, use the heap for storing the
|
||||
// microphone data.
|
||||
e = rtc::ArrayView<std::array<float, kFftLengthBy2>>(e_heap_.data(),
|
||||
num_capture_channels_);
|
||||
Y2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
|
||||
Y2_heap_.data(), num_capture_channels_);
|
||||
E2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
|
||||
E2_heap_.data(), num_capture_channels_);
|
||||
R2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
|
||||
R2_heap_.data(), num_capture_channels_);
|
||||
S2_linear = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
|
||||
S2_linear_heap_.data(), num_capture_channels_);
|
||||
Y = rtc::ArrayView<FftData>(Y_heap_.data(), num_capture_channels_);
|
||||
E = rtc::ArrayView<FftData>(E_heap_.data(), num_capture_channels_);
|
||||
comfort_noise = rtc::ArrayView<FftData>(comfort_noise_heap_.data(),
|
||||
num_capture_channels_);
|
||||
high_band_comfort_noise = rtc::ArrayView<FftData>(
|
||||
high_band_comfort_noise_heap_.data(), num_capture_channels_);
|
||||
subtractor_output = rtc::ArrayView<SubtractorOutput>(
|
||||
subtractor_output_heap_.data(), num_capture_channels_);
|
||||
}
|
||||
|
||||
data_dumper_->DumpWav("aec3_echo_remover_capture_input", kBlockSize,
|
||||
&(*y)[0][0][0], 16000, 1);
|
||||
data_dumper_->DumpWav("aec3_echo_remover_render_input", kBlockSize,
|
||||
&x[0][0][0], 16000, 1);
|
||||
data_dumper_->DumpRaw("aec3_echo_remover_capture_input", (*y)[0][0]);
|
||||
data_dumper_->DumpRaw("aec3_echo_remover_render_input", x[0][0]);
|
||||
|
||||
aec_state_.UpdateCaptureSaturation(capture_signal_saturation);
|
||||
|
||||
if (echo_path_variability.AudioPathChanged()) {
|
||||
// Ensure that the gain change is only acted on once per frame.
|
||||
if (echo_path_variability.gain_change) {
|
||||
if (gain_change_hangover_ == 0) {
|
||||
constexpr int kMaxBlocksPerFrame = 3;
|
||||
gain_change_hangover_ = kMaxBlocksPerFrame;
|
||||
rtc::LoggingSeverity log_level =
|
||||
config_.delay.log_warning_on_delay_changes ? rtc::LS_WARNING
|
||||
: rtc::LS_VERBOSE;
|
||||
RTC_LOG_V(log_level)
|
||||
<< "Gain change detected at block " << block_counter_;
|
||||
} else {
|
||||
echo_path_variability.gain_change = false;
|
||||
}
|
||||
}
|
||||
|
||||
subtractor_.HandleEchoPathChange(echo_path_variability);
|
||||
aec_state_.HandleEchoPathChange(echo_path_variability);
|
||||
|
||||
if (echo_path_variability.delay_change !=
|
||||
EchoPathVariability::DelayAdjustment::kNone) {
|
||||
suppression_gain_.SetInitialState(true);
|
||||
}
|
||||
}
|
||||
if (gain_change_hangover_ > 0) {
|
||||
--gain_change_hangover_;
|
||||
}
|
||||
|
||||
// Analyze the render signal.
|
||||
render_signal_analyzer_.Update(*render_buffer,
|
||||
aec_state_.MinDirectPathFilterDelay());
|
||||
|
||||
// State transition.
|
||||
if (aec_state_.TransitionTriggered()) {
|
||||
subtractor_.ExitInitialState();
|
||||
suppression_gain_.SetInitialState(false);
|
||||
}
|
||||
|
||||
// Perform linear echo cancellation.
|
||||
subtractor_.Process(*render_buffer, (*y)[0], render_signal_analyzer_,
|
||||
aec_state_, subtractor_output);
|
||||
|
||||
// Compute spectra.
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
FormLinearFilterOutput(subtractor_output[ch], e[ch]);
|
||||
WindowedPaddedFft(fft_, (*y)[0][ch], y_old_[ch], &Y[ch]);
|
||||
WindowedPaddedFft(fft_, e[ch], e_old_[ch], &E[ch]);
|
||||
LinearEchoPower(E[ch], Y[ch], &S2_linear[ch]);
|
||||
Y[ch].Spectrum(optimization_, Y2[ch]);
|
||||
E[ch].Spectrum(optimization_, E2[ch]);
|
||||
}
|
||||
|
||||
// Optionally return the linear filter output.
|
||||
if (linear_output) {
|
||||
RTC_DCHECK_GE(1, linear_output->size());
|
||||
RTC_DCHECK_EQ(num_capture_channels_, linear_output[0].size());
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
RTC_DCHECK_EQ(kBlockSize, (*linear_output)[0][ch].size());
|
||||
std::copy(e[ch].begin(), e[ch].end(), (*linear_output)[0][ch].begin());
|
||||
}
|
||||
}
|
||||
|
||||
// Update the AEC state information.
|
||||
aec_state_.Update(external_delay, subtractor_.FilterFrequencyResponses(),
|
||||
subtractor_.FilterImpulseResponses(), *render_buffer, E2,
|
||||
Y2, subtractor_output);
|
||||
|
||||
// Choose the linear output.
|
||||
const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y;
|
||||
|
||||
data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &(*y)[0][0][0], 16000,
|
||||
1);
|
||||
data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0][0], 16000, 1);
|
||||
|
||||
// Estimate the residual echo power.
|
||||
residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2,
|
||||
R2);
|
||||
|
||||
// Estimate the comfort noise.
|
||||
cng_.Compute(aec_state_.SaturatedCapture(), Y2, comfort_noise,
|
||||
high_band_comfort_noise);
|
||||
|
||||
// Suppressor nearend estimate.
|
||||
if (aec_state_.UsableLinearEstimate()) {
|
||||
// E2 is bound by Y2.
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
std::transform(E2[ch].begin(), E2[ch].end(), Y2[ch].begin(),
|
||||
E2[ch].begin(),
|
||||
[](float a, float b) { return std::min(a, b); });
|
||||
}
|
||||
}
|
||||
const auto& nearend_spectrum = aec_state_.UsableLinearEstimate() ? E2 : Y2;
|
||||
|
||||
// Suppressor echo estimate.
|
||||
const auto& echo_spectrum =
|
||||
aec_state_.UsableLinearEstimate() ? S2_linear : R2;
|
||||
|
||||
// Compute preferred gains.
|
||||
float high_bands_gain;
|
||||
std::array<float, kFftLengthBy2Plus1> G;
|
||||
suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2,
|
||||
cng_.NoiseSpectrum(), render_signal_analyzer_,
|
||||
aec_state_, x, &high_bands_gain, &G);
|
||||
|
||||
suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
|
||||
high_bands_gain, Y_fft, y);
|
||||
|
||||
// Update the metrics.
|
||||
metrics_.Update(aec_state_, cng_.NoiseSpectrum()[0], G);
|
||||
|
||||
// Debug outputs for the purpose of development and analysis.
|
||||
data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize,
|
||||
&subtractor_output[0].s_refined[0], 16000, 1);
|
||||
data_dumper_->DumpRaw("aec3_output", (*y)[0][0]);
|
||||
data_dumper_->DumpRaw("aec3_narrow_render",
|
||||
render_signal_analyzer_.NarrowPeakBand() ? 1 : 0);
|
||||
data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum()[0]);
|
||||
data_dumper_->DumpRaw("aec3_suppressor_gain", G);
|
||||
data_dumper_->DumpWav("aec3_output",
|
||||
rtc::ArrayView<const float>(&(*y)[0][0][0], kBlockSize),
|
||||
16000, 1);
|
||||
data_dumper_->DumpRaw("aec3_using_subtractor_output[0]",
|
||||
aec_state_.UseLinearFilterOutput() ? 1 : 0);
|
||||
data_dumper_->DumpRaw("aec3_E2", E2[0]);
|
||||
data_dumper_->DumpRaw("aec3_S2_linear", S2_linear[0]);
|
||||
data_dumper_->DumpRaw("aec3_Y2", Y2[0]);
|
||||
data_dumper_->DumpRaw(
|
||||
"aec3_X2", render_buffer->Spectrum(
|
||||
aec_state_.MinDirectPathFilterDelay())[/*channel=*/0]);
|
||||
data_dumper_->DumpRaw("aec3_R2", R2[0]);
|
||||
data_dumper_->DumpRaw("aec3_filter_delay",
|
||||
aec_state_.MinDirectPathFilterDelay());
|
||||
data_dumper_->DumpRaw("aec3_capture_saturation",
|
||||
aec_state_.SaturatedCapture() ? 1 : 0);
|
||||
}
|
||||
|
||||
void EchoRemoverImpl::FormLinearFilterOutput(
|
||||
const SubtractorOutput& subtractor_output,
|
||||
rtc::ArrayView<float> output) {
|
||||
RTC_DCHECK_EQ(subtractor_output.e_refined.size(), output.size());
|
||||
RTC_DCHECK_EQ(subtractor_output.e_coarse.size(), output.size());
|
||||
bool use_refined_output = true;
|
||||
if (use_coarse_filter_output_) {
|
||||
// As the output of the refined adaptive filter generally should be better
|
||||
// than the coarse filter output, add a margin and threshold for when
|
||||
// choosing the coarse filter output.
|
||||
if (subtractor_output.e2_coarse < 0.9f * subtractor_output.e2_refined &&
|
||||
subtractor_output.y2 > 30.f * 30.f * kBlockSize &&
|
||||
(subtractor_output.s2_refined > 60.f * 60.f * kBlockSize ||
|
||||
subtractor_output.s2_coarse > 60.f * 60.f * kBlockSize)) {
|
||||
use_refined_output = false;
|
||||
} else {
|
||||
// If the refined filter is diverged, choose the filter output that has
|
||||
// the lowest power.
|
||||
if (subtractor_output.e2_coarse < subtractor_output.e2_refined &&
|
||||
subtractor_output.y2 < subtractor_output.e2_refined) {
|
||||
use_refined_output = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SignalTransition(refined_filter_output_last_selected_
|
||||
? subtractor_output.e_refined
|
||||
: subtractor_output.e_coarse,
|
||||
use_refined_output ? subtractor_output.e_refined
|
||||
: subtractor_output.e_coarse,
|
||||
output);
|
||||
refined_filter_output_last_selected_ = use_refined_output;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
EchoRemover* EchoRemover::Create(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels) {
|
||||
return new EchoRemoverImpl(config, sample_rate_hz, num_render_channels,
|
||||
num_capture_channels);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
55
webrtc/modules/audio_processing/aec3/echo_remover.h
Normal file
55
webrtc/modules/audio_processing/aec3/echo_remover.h
Normal file
@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "api/audio/echo_control.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for removing the echo from the capture signal.
|
||||
class EchoRemover {
|
||||
public:
|
||||
static EchoRemover* Create(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
virtual ~EchoRemover() = default;
|
||||
|
||||
// Get current metrics.
|
||||
virtual void GetMetrics(EchoControl::Metrics* metrics) const = 0;
|
||||
|
||||
// Removes the echo from a block of samples from the capture signal. The
|
||||
// supplied render signal is assumed to be pre-aligned with the capture
|
||||
// signal.
|
||||
virtual void ProcessCapture(
|
||||
EchoPathVariability echo_path_variability,
|
||||
bool capture_signal_saturation,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
RenderBuffer* render_buffer,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output,
|
||||
std::vector<std::vector<std::vector<float>>>* capture) = 0;
|
||||
|
||||
// Updates the status on whether echo leakage is detected in the output of the
|
||||
// echo remover.
|
||||
virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_H_
|
246
webrtc/modules/audio_processing/aec3/echo_remover_metrics.cc
Normal file
246
webrtc/modules/audio_processing/aec3/echo_remover_metrics.cc
Normal file
@ -0,0 +1,246 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/echo_remover_metrics.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
#include "system_wrappers/include/metrics.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr float kOneByMetricsCollectionBlocks = 1.f / kMetricsCollectionBlocks;
|
||||
|
||||
} // namespace
|
||||
|
||||
EchoRemoverMetrics::DbMetric::DbMetric() : DbMetric(0.f, 0.f, 0.f) {}
|
||||
EchoRemoverMetrics::DbMetric::DbMetric(float sum_value,
|
||||
float floor_value,
|
||||
float ceil_value)
|
||||
: sum_value(sum_value), floor_value(floor_value), ceil_value(ceil_value) {}
|
||||
|
||||
void EchoRemoverMetrics::DbMetric::Update(float value) {
|
||||
sum_value += value;
|
||||
floor_value = std::min(floor_value, value);
|
||||
ceil_value = std::max(ceil_value, value);
|
||||
}
|
||||
|
||||
void EchoRemoverMetrics::DbMetric::UpdateInstant(float value) {
|
||||
sum_value = value;
|
||||
floor_value = std::min(floor_value, value);
|
||||
ceil_value = std::max(ceil_value, value);
|
||||
}
|
||||
|
||||
EchoRemoverMetrics::EchoRemoverMetrics() {
|
||||
ResetMetrics();
|
||||
}
|
||||
|
||||
void EchoRemoverMetrics::ResetMetrics() {
|
||||
erl_.fill(DbMetric(0.f, 10000.f, 0.000f));
|
||||
erl_time_domain_ = DbMetric(0.f, 10000.f, 0.000f);
|
||||
erle_.fill(DbMetric(0.f, 0.f, 1000.f));
|
||||
erle_time_domain_ = DbMetric(0.f, 0.f, 1000.f);
|
||||
active_render_count_ = 0;
|
||||
saturated_capture_ = false;
|
||||
}
|
||||
|
||||
void EchoRemoverMetrics::Update(
|
||||
const AecState& aec_state,
|
||||
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
|
||||
const std::array<float, kFftLengthBy2Plus1>& suppressor_gain) {
|
||||
metrics_reported_ = false;
|
||||
if (++block_counter_ <= kMetricsCollectionBlocks) {
|
||||
aec3::UpdateDbMetric(aec_state.Erl(), &erl_);
|
||||
erl_time_domain_.UpdateInstant(aec_state.ErlTimeDomain());
|
||||
aec3::UpdateDbMetric(aec_state.Erle()[0], &erle_);
|
||||
erle_time_domain_.UpdateInstant(aec_state.FullBandErleLog2());
|
||||
active_render_count_ += (aec_state.ActiveRender() ? 1 : 0);
|
||||
saturated_capture_ = saturated_capture_ || aec_state.SaturatedCapture();
|
||||
} else {
|
||||
// Report the metrics over several frames in order to lower the impact of
|
||||
// the logarithms involved on the computational complexity.
|
||||
constexpr int kMetricsCollectionBlocksBy2 = kMetricsCollectionBlocks / 2;
|
||||
switch (block_counter_) {
|
||||
case kMetricsCollectionBlocks + 1:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErleBand0.Average",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f,
|
||||
kOneByMetricsCollectionBlocks,
|
||||
erle_[0].sum_value),
|
||||
0, 19, 20);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErleBand0.Max",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_[0].ceil_value),
|
||||
0, 19, 20);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErleBand0.Min",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_[0].floor_value),
|
||||
0, 19, 20);
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 2:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErleBand1.Average",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f,
|
||||
kOneByMetricsCollectionBlocks,
|
||||
erle_[1].sum_value),
|
||||
0, 19, 20);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErleBand1.Max",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_[1].ceil_value),
|
||||
0, 19, 20);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErleBand1.Min",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_[1].floor_value),
|
||||
0, 19, 20);
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 3:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErlBand0.Average",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f,
|
||||
kOneByMetricsCollectionBlocks,
|
||||
erl_[0].sum_value),
|
||||
0, 59, 30);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErlBand0.Max",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_[0].ceil_value),
|
||||
0, 59, 30);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErlBand0.Min",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_[0].floor_value),
|
||||
0, 59, 30);
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 4:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErlBand1.Average",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f,
|
||||
kOneByMetricsCollectionBlocks,
|
||||
erl_[1].sum_value),
|
||||
0, 59, 30);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErlBand1.Max",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_[1].ceil_value),
|
||||
0, 59, 30);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErlBand1.Min",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_[1].floor_value),
|
||||
0, 59, 30);
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 5:
|
||||
RTC_HISTOGRAM_BOOLEAN(
|
||||
"WebRTC.Audio.EchoCanceller.UsableLinearEstimate",
|
||||
static_cast<int>(aec_state.UsableLinearEstimate() ? 1 : 0));
|
||||
RTC_HISTOGRAM_BOOLEAN(
|
||||
"WebRTC.Audio.EchoCanceller.ActiveRender",
|
||||
static_cast<int>(
|
||||
active_render_count_ > kMetricsCollectionBlocksBy2 ? 1 : 0));
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.FilterDelay",
|
||||
aec_state.MinDirectPathFilterDelay(), 0, 30,
|
||||
31);
|
||||
RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.EchoCanceller.CaptureSaturation",
|
||||
static_cast<int>(saturated_capture_ ? 1 : 0));
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 6:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erl.Value",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_time_domain_.sum_value),
|
||||
0, 59, 30);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erl.Max",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_time_domain_.ceil_value),
|
||||
0, 59, 30);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erl.Min",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_time_domain_.floor_value),
|
||||
0, 59, 30);
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 7:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erle.Value",
|
||||
aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_time_domain_.sum_value),
|
||||
0, 19, 20);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erle.Max",
|
||||
aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_time_domain_.ceil_value),
|
||||
0, 19, 20);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erle.Min",
|
||||
aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_time_domain_.floor_value),
|
||||
0, 19, 20);
|
||||
metrics_reported_ = true;
|
||||
RTC_DCHECK_EQ(kMetricsReportingIntervalBlocks, block_counter_);
|
||||
block_counter_ = 0;
|
||||
ResetMetrics();
|
||||
break;
|
||||
default:
|
||||
RTC_NOTREACHED();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
namespace aec3 {
|
||||
|
||||
void UpdateDbMetric(const std::array<float, kFftLengthBy2Plus1>& value,
|
||||
std::array<EchoRemoverMetrics::DbMetric, 2>* statistic) {
|
||||
RTC_DCHECK(statistic);
|
||||
// Truncation is intended in the band width computation.
|
||||
constexpr int kNumBands = 2;
|
||||
constexpr int kBandWidth = 65 / kNumBands;
|
||||
constexpr float kOneByBandWidth = 1.f / kBandWidth;
|
||||
RTC_DCHECK_EQ(kNumBands, statistic->size());
|
||||
RTC_DCHECK_EQ(65, value.size());
|
||||
for (size_t k = 0; k < statistic->size(); ++k) {
|
||||
float average_band =
|
||||
std::accumulate(value.begin() + kBandWidth * k,
|
||||
value.begin() + kBandWidth * (k + 1), 0.f) *
|
||||
kOneByBandWidth;
|
||||
(*statistic)[k].Update(average_band);
|
||||
}
|
||||
}
|
||||
|
||||
int TransformDbMetricForReporting(bool negate,
|
||||
float min_value,
|
||||
float max_value,
|
||||
float offset,
|
||||
float scaling,
|
||||
float value) {
|
||||
float new_value = 10.f * std::log10(value * scaling + 1e-10f) + offset;
|
||||
if (negate) {
|
||||
new_value = -new_value;
|
||||
}
|
||||
return static_cast<int>(rtc::SafeClamp(new_value, min_value, max_value));
|
||||
}
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
} // namespace webrtc
|
81
webrtc/modules/audio_processing/aec3/echo_remover_metrics.h
Normal file
81
webrtc/modules/audio_processing/aec3/echo_remover_metrics.h
Normal file
@ -0,0 +1,81 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Handles the reporting of metrics for the echo remover.
|
||||
class EchoRemoverMetrics {
|
||||
public:
|
||||
struct DbMetric {
|
||||
DbMetric();
|
||||
DbMetric(float sum_value, float floor_value, float ceil_value);
|
||||
void Update(float value);
|
||||
void UpdateInstant(float value);
|
||||
float sum_value;
|
||||
float floor_value;
|
||||
float ceil_value;
|
||||
};
|
||||
|
||||
EchoRemoverMetrics();
|
||||
|
||||
// Updates the metric with new data.
|
||||
void Update(
|
||||
const AecState& aec_state,
|
||||
const std::array<float, kFftLengthBy2Plus1>& comfort_noise_spectrum,
|
||||
const std::array<float, kFftLengthBy2Plus1>& suppressor_gain);
|
||||
|
||||
// Returns true if the metrics have just been reported, otherwise false.
|
||||
bool MetricsReported() { return metrics_reported_; }
|
||||
|
||||
private:
|
||||
// Resets the metrics.
|
||||
void ResetMetrics();
|
||||
|
||||
int block_counter_ = 0;
|
||||
std::array<DbMetric, 2> erl_;
|
||||
DbMetric erl_time_domain_;
|
||||
std::array<DbMetric, 2> erle_;
|
||||
DbMetric erle_time_domain_;
|
||||
int active_render_count_ = 0;
|
||||
bool saturated_capture_ = false;
|
||||
bool metrics_reported_ = false;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(EchoRemoverMetrics);
|
||||
};
|
||||
|
||||
namespace aec3 {
|
||||
|
||||
// Updates a banded metric of type DbMetric with the values in the supplied
|
||||
// array.
|
||||
void UpdateDbMetric(const std::array<float, kFftLengthBy2Plus1>& value,
|
||||
std::array<EchoRemoverMetrics::DbMetric, 2>* statistic);
|
||||
|
||||
// Transforms a DbMetric from the linear domain into the logarithmic domain.
|
||||
int TransformDbMetricForReporting(bool negate,
|
||||
float min_value,
|
||||
float max_value,
|
||||
float offset,
|
||||
float scaling,
|
||||
float value);
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ECHO_REMOVER_METRICS_H_
|
146
webrtc/modules/audio_processing/aec3/erl_estimator.cc
Normal file
146
webrtc/modules/audio_processing/aec3/erl_estimator.cc
Normal file
@ -0,0 +1,146 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/erl_estimator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <numeric>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr float kMinErl = 0.01f;
|
||||
constexpr float kMaxErl = 1000.f;
|
||||
|
||||
} // namespace
|
||||
|
||||
ErlEstimator::ErlEstimator(size_t startup_phase_length_blocks_)
|
||||
: startup_phase_length_blocks__(startup_phase_length_blocks_) {
|
||||
erl_.fill(kMaxErl);
|
||||
hold_counters_.fill(0);
|
||||
erl_time_domain_ = kMaxErl;
|
||||
hold_counter_time_domain_ = 0;
|
||||
}
|
||||
|
||||
ErlEstimator::~ErlEstimator() = default;
|
||||
|
||||
void ErlEstimator::Reset() {
|
||||
blocks_since_reset_ = 0;
|
||||
}
|
||||
|
||||
void ErlEstimator::Update(
|
||||
const std::vector<bool>& converged_filters,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> render_spectra,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
capture_spectra) {
|
||||
const size_t num_capture_channels = converged_filters.size();
|
||||
RTC_DCHECK_EQ(capture_spectra.size(), num_capture_channels);
|
||||
|
||||
// Corresponds to WGN of power -46 dBFS.
|
||||
constexpr float kX2Min = 44015068.0f;
|
||||
|
||||
const auto first_converged_iter =
|
||||
std::find(converged_filters.begin(), converged_filters.end(), true);
|
||||
const bool any_filter_converged =
|
||||
first_converged_iter != converged_filters.end();
|
||||
|
||||
if (++blocks_since_reset_ < startup_phase_length_blocks__ ||
|
||||
!any_filter_converged) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Use the maximum spectrum across capture and the maximum across render.
|
||||
std::array<float, kFftLengthBy2Plus1> max_capture_spectrum_data;
|
||||
std::array<float, kFftLengthBy2Plus1> max_capture_spectrum =
|
||||
capture_spectra[/*channel=*/0];
|
||||
if (num_capture_channels > 1) {
|
||||
// Initialize using the first channel with a converged filter.
|
||||
const size_t first_converged =
|
||||
std::distance(converged_filters.begin(), first_converged_iter);
|
||||
RTC_DCHECK_GE(first_converged, 0);
|
||||
RTC_DCHECK_LT(first_converged, num_capture_channels);
|
||||
max_capture_spectrum_data = capture_spectra[first_converged];
|
||||
|
||||
for (size_t ch = first_converged + 1; ch < num_capture_channels; ++ch) {
|
||||
if (!converged_filters[ch]) {
|
||||
continue;
|
||||
}
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
max_capture_spectrum_data[k] =
|
||||
std::max(max_capture_spectrum_data[k], capture_spectra[ch][k]);
|
||||
}
|
||||
}
|
||||
max_capture_spectrum = max_capture_spectrum_data;
|
||||
}
|
||||
|
||||
const size_t num_render_channels = render_spectra.size();
|
||||
std::array<float, kFftLengthBy2Plus1> max_render_spectrum_data;
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> max_render_spectrum =
|
||||
render_spectra[/*channel=*/0];
|
||||
if (num_render_channels > 1) {
|
||||
std::copy(render_spectra[0].begin(), render_spectra[0].end(),
|
||||
max_render_spectrum_data.begin());
|
||||
for (size_t ch = 1; ch < num_render_channels; ++ch) {
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
max_render_spectrum_data[k] =
|
||||
std::max(max_render_spectrum_data[k], render_spectra[ch][k]);
|
||||
}
|
||||
}
|
||||
max_render_spectrum = max_render_spectrum_data;
|
||||
}
|
||||
|
||||
const auto& X2 = max_render_spectrum;
|
||||
const auto& Y2 = max_capture_spectrum;
|
||||
|
||||
// Update the estimates in a maximum statistics manner.
|
||||
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
||||
if (X2[k] > kX2Min) {
|
||||
const float new_erl = Y2[k] / X2[k];
|
||||
if (new_erl < erl_[k]) {
|
||||
hold_counters_[k - 1] = 1000;
|
||||
erl_[k] += 0.1f * (new_erl - erl_[k]);
|
||||
erl_[k] = std::max(erl_[k], kMinErl);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::for_each(hold_counters_.begin(), hold_counters_.end(),
|
||||
[](int& a) { --a; });
|
||||
std::transform(hold_counters_.begin(), hold_counters_.end(), erl_.begin() + 1,
|
||||
erl_.begin() + 1, [](int a, float b) {
|
||||
return a > 0 ? b : std::min(kMaxErl, 2.f * b);
|
||||
});
|
||||
|
||||
erl_[0] = erl_[1];
|
||||
erl_[kFftLengthBy2] = erl_[kFftLengthBy2 - 1];
|
||||
|
||||
// Compute ERL over all frequency bins.
|
||||
const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f);
|
||||
|
||||
if (X2_sum > kX2Min * X2.size()) {
|
||||
const float Y2_sum = std::accumulate(Y2.begin(), Y2.end(), 0.0f);
|
||||
const float new_erl = Y2_sum / X2_sum;
|
||||
if (new_erl < erl_time_domain_) {
|
||||
hold_counter_time_domain_ = 1000;
|
||||
erl_time_domain_ += 0.1f * (new_erl - erl_time_domain_);
|
||||
erl_time_domain_ = std::max(erl_time_domain_, kMinErl);
|
||||
}
|
||||
}
|
||||
|
||||
--hold_counter_time_domain_;
|
||||
erl_time_domain_ = (hold_counter_time_domain_ > 0)
|
||||
? erl_time_domain_
|
||||
: std::min(kMaxErl, 2.f * erl_time_domain_);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
57
webrtc/modules/audio_processing/aec3/erl_estimator.h
Normal file
57
webrtc/modules/audio_processing/aec3/erl_estimator.h
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Estimates the echo return loss based on the signal spectra.
|
||||
class ErlEstimator {
|
||||
public:
|
||||
explicit ErlEstimator(size_t startup_phase_length_blocks_);
|
||||
~ErlEstimator();
|
||||
|
||||
// Resets the ERL estimation.
|
||||
void Reset();
|
||||
|
||||
// Updates the ERL estimate.
|
||||
void Update(const std::vector<bool>& converged_filters,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
render_spectra,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
capture_spectra);
|
||||
|
||||
// Returns the most recent ERL estimate.
|
||||
const std::array<float, kFftLengthBy2Plus1>& Erl() const { return erl_; }
|
||||
float ErlTimeDomain() const { return erl_time_domain_; }
|
||||
|
||||
private:
|
||||
const size_t startup_phase_length_blocks__;
|
||||
std::array<float, kFftLengthBy2Plus1> erl_;
|
||||
std::array<int, kFftLengthBy2Minus1> hold_counters_;
|
||||
float erl_time_domain_;
|
||||
int hold_counter_time_domain_;
|
||||
size_t blocks_since_reset_ = 0;
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(ErlEstimator);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ERL_ESTIMATOR_H_
|
86
webrtc/modules/audio_processing/aec3/erle_estimator.cc
Normal file
86
webrtc/modules/audio_processing/aec3/erle_estimator.cc
Normal file
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/erle_estimator.h"
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
ErleEstimator::ErleEstimator(size_t startup_phase_length_blocks,
|
||||
const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: startup_phase_length_blocks_(startup_phase_length_blocks),
|
||||
fullband_erle_estimator_(config.erle, num_capture_channels),
|
||||
subband_erle_estimator_(config, num_capture_channels) {
|
||||
if (config.erle.num_sections > 1) {
|
||||
signal_dependent_erle_estimator_ =
|
||||
std::make_unique<SignalDependentErleEstimator>(config,
|
||||
num_capture_channels);
|
||||
}
|
||||
Reset(true);
|
||||
}
|
||||
|
||||
ErleEstimator::~ErleEstimator() = default;
|
||||
|
||||
void ErleEstimator::Reset(bool delay_change) {
|
||||
fullband_erle_estimator_.Reset();
|
||||
subband_erle_estimator_.Reset();
|
||||
if (signal_dependent_erle_estimator_) {
|
||||
signal_dependent_erle_estimator_->Reset();
|
||||
}
|
||||
if (delay_change) {
|
||||
blocks_since_reset_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void ErleEstimator::Update(
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
filter_frequency_responses,
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1>
|
||||
avg_render_spectrum_with_reverb,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> capture_spectra,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
subtractor_spectra,
|
||||
const std::vector<bool>& converged_filters) {
|
||||
RTC_DCHECK_EQ(subband_erle_estimator_.Erle().size(), capture_spectra.size());
|
||||
RTC_DCHECK_EQ(subband_erle_estimator_.Erle().size(),
|
||||
subtractor_spectra.size());
|
||||
const auto& X2_reverb = avg_render_spectrum_with_reverb;
|
||||
const auto& Y2 = capture_spectra;
|
||||
const auto& E2 = subtractor_spectra;
|
||||
|
||||
if (++blocks_since_reset_ < startup_phase_length_blocks_) {
|
||||
return;
|
||||
}
|
||||
|
||||
subband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters);
|
||||
|
||||
if (signal_dependent_erle_estimator_) {
|
||||
signal_dependent_erle_estimator_->Update(
|
||||
render_buffer, filter_frequency_responses, X2_reverb, Y2, E2,
|
||||
subband_erle_estimator_.Erle(), converged_filters);
|
||||
}
|
||||
|
||||
fullband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters);
|
||||
}
|
||||
|
||||
void ErleEstimator::Dump(
|
||||
const std::unique_ptr<ApmDataDumper>& data_dumper) const {
|
||||
fullband_erle_estimator_.Dump(data_dumper);
|
||||
subband_erle_estimator_.Dump(data_dumper);
|
||||
if (signal_dependent_erle_estimator_) {
|
||||
signal_dependent_erle_estimator_->Dump(data_dumper);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
99
webrtc/modules/audio_processing/aec3/erle_estimator.h
Normal file
99
webrtc/modules/audio_processing/aec3/erle_estimator.h
Normal file
@ -0,0 +1,99 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/fullband_erle_estimator.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h"
|
||||
#include "modules/audio_processing/aec3/subband_erle_estimator.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Estimates the echo return loss enhancement. One estimate is done per subband
|
||||
// and another one is done using the aggreation of energy over all the subbands.
|
||||
class ErleEstimator {
|
||||
public:
|
||||
ErleEstimator(size_t startup_phase_length_blocks,
|
||||
const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels);
|
||||
~ErleEstimator();
|
||||
|
||||
// Resets the fullband ERLE estimator and the subbands ERLE estimators.
|
||||
void Reset(bool delay_change);
|
||||
|
||||
// Updates the ERLE estimates.
|
||||
void Update(
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
filter_frequency_responses,
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1>
|
||||
avg_render_spectrum_with_reverb,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
capture_spectra,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
subtractor_spectra,
|
||||
const std::vector<bool>& converged_filters);
|
||||
|
||||
// Returns the most recent subband ERLE estimates.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
|
||||
return signal_dependent_erle_estimator_
|
||||
? signal_dependent_erle_estimator_->Erle()
|
||||
: subband_erle_estimator_.Erle();
|
||||
}
|
||||
|
||||
// Returns the subband ERLE that are estimated during onsets (only used for
|
||||
// testing).
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleOnsets()
|
||||
const {
|
||||
return subband_erle_estimator_.ErleOnsets();
|
||||
}
|
||||
|
||||
// Returns the fullband ERLE estimate.
|
||||
float FullbandErleLog2() const {
|
||||
return fullband_erle_estimator_.FullbandErleLog2();
|
||||
}
|
||||
|
||||
// Returns an estimation of the current linear filter quality based on the
|
||||
// current and past fullband ERLE estimates. The returned value is a float
|
||||
// vector with content between 0 and 1 where 1 indicates that, at this current
|
||||
// time instant, the linear filter is reaching its maximum subtraction
|
||||
// performance.
|
||||
rtc::ArrayView<const absl::optional<float>> GetInstLinearQualityEstimates()
|
||||
const {
|
||||
return fullband_erle_estimator_.GetInstLinearQualityEstimates();
|
||||
}
|
||||
|
||||
void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
|
||||
|
||||
private:
|
||||
const size_t startup_phase_length_blocks_;
|
||||
FullBandErleEstimator fullband_erle_estimator_;
|
||||
SubbandErleEstimator subband_erle_estimator_;
|
||||
std::unique_ptr<SignalDependentErleEstimator>
|
||||
signal_dependent_erle_estimator_;
|
||||
size_t blocks_since_reset_ = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_ERLE_ESTIMATOR_H_
|
27
webrtc/modules/audio_processing/aec3/fft_buffer.cc
Normal file
27
webrtc/modules/audio_processing/aec3/fft_buffer.cc
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/fft_buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
FftBuffer::FftBuffer(size_t size, size_t num_channels)
|
||||
: size(static_cast<int>(size)),
|
||||
buffer(size, std::vector<FftData>(num_channels)) {
|
||||
for (auto& block : buffer) {
|
||||
for (auto& channel_fft_data : block) {
|
||||
channel_fft_data.Clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FftBuffer::~FftBuffer() = default;
|
||||
|
||||
} // namespace webrtc
|
60
webrtc/modules/audio_processing/aec3/fft_buffer.h
Normal file
60
webrtc/modules/audio_processing/aec3/fft_buffer.h
Normal file
@ -0,0 +1,60 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Struct for bundling a circular buffer of FftData objects together with the
|
||||
// read and write indices.
|
||||
struct FftBuffer {
|
||||
FftBuffer(size_t size, size_t num_channels);
|
||||
~FftBuffer();
|
||||
|
||||
int IncIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index < size - 1 ? index + 1 : 0;
|
||||
}
|
||||
|
||||
int DecIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index > 0 ? index - 1 : size - 1;
|
||||
}
|
||||
|
||||
int OffsetIndex(int index, int offset) const {
|
||||
RTC_DCHECK_GE(buffer.size(), offset);
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return (size + index + offset) % size;
|
||||
}
|
||||
|
||||
void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); }
|
||||
void IncWriteIndex() { write = IncIndex(write); }
|
||||
void DecWriteIndex() { write = DecIndex(write); }
|
||||
void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); }
|
||||
void IncReadIndex() { read = IncIndex(read); }
|
||||
void DecReadIndex() { read = DecIndex(read); }
|
||||
|
||||
const int size;
|
||||
std::vector<std::vector<FftData>> buffer;
|
||||
int write = 0;
|
||||
int read = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_FFT_BUFFER_H_
|
104
webrtc/modules/audio_processing/aec3/fft_data.h
Normal file
104
webrtc/modules/audio_processing/aec3/fft_data.h
Normal file
@ -0,0 +1,104 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_
|
||||
|
||||
// Defines WEBRTC_ARCH_X86_FAMILY, used below.
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Struct that holds imaginary data produced from 128 point real-valued FFTs.
|
||||
struct FftData {
|
||||
// Copies the data in src.
|
||||
void Assign(const FftData& src) {
|
||||
std::copy(src.re.begin(), src.re.end(), re.begin());
|
||||
std::copy(src.im.begin(), src.im.end(), im.begin());
|
||||
im[0] = im[kFftLengthBy2] = 0;
|
||||
}
|
||||
|
||||
// Clears all the imaginary.
|
||||
void Clear() {
|
||||
re.fill(0.f);
|
||||
im.fill(0.f);
|
||||
}
|
||||
|
||||
// Computes the power spectrum of the data.
|
||||
void SpectrumAVX2(rtc::ArrayView<float> power_spectrum) const;
|
||||
|
||||
// Computes the power spectrum of the data.
|
||||
void Spectrum(Aec3Optimization optimization,
|
||||
rtc::ArrayView<float> power_spectrum) const {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, power_spectrum.size());
|
||||
switch (optimization) {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
case Aec3Optimization::kSse2: {
|
||||
constexpr int kNumFourBinBands = kFftLengthBy2 / 4;
|
||||
constexpr int kLimit = kNumFourBinBands * 4;
|
||||
for (size_t k = 0; k < kLimit; k += 4) {
|
||||
const __m128 r = _mm_loadu_ps(&re[k]);
|
||||
const __m128 i = _mm_loadu_ps(&im[k]);
|
||||
const __m128 ii = _mm_mul_ps(i, i);
|
||||
const __m128 rr = _mm_mul_ps(r, r);
|
||||
const __m128 rrii = _mm_add_ps(rr, ii);
|
||||
_mm_storeu_ps(&power_spectrum[k], rrii);
|
||||
}
|
||||
power_spectrum[kFftLengthBy2] = re[kFftLengthBy2] * re[kFftLengthBy2] +
|
||||
im[kFftLengthBy2] * im[kFftLengthBy2];
|
||||
} break;
|
||||
case Aec3Optimization::kAvx2:
|
||||
SpectrumAVX2(power_spectrum);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
std::transform(re.begin(), re.end(), im.begin(), power_spectrum.begin(),
|
||||
[](float a, float b) { return a * a + b * b; });
|
||||
}
|
||||
}
|
||||
|
||||
// Copy the data from an interleaved array.
|
||||
void CopyFromPackedArray(const std::array<float, kFftLength>& v) {
|
||||
re[0] = v[0];
|
||||
re[kFftLengthBy2] = v[1];
|
||||
im[0] = im[kFftLengthBy2] = 0;
|
||||
for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) {
|
||||
re[k] = v[j++];
|
||||
im[k] = v[j++];
|
||||
}
|
||||
}
|
||||
|
||||
// Copies the data into an interleaved array.
|
||||
void CopyToPackedArray(std::array<float, kFftLength>* v) const {
|
||||
RTC_DCHECK(v);
|
||||
(*v)[0] = re[0];
|
||||
(*v)[1] = re[kFftLengthBy2];
|
||||
for (size_t k = 1, j = 2; k < kFftLengthBy2; ++k) {
|
||||
(*v)[j++] = re[k];
|
||||
(*v)[j++] = im[k];
|
||||
}
|
||||
}
|
||||
|
||||
std::array<float, kFftLengthBy2Plus1> re;
|
||||
std::array<float, kFftLengthBy2Plus1> im;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_FFT_DATA_H_
|
33
webrtc/modules/audio_processing/aec3/fft_data_avx2.cc
Normal file
33
webrtc/modules/audio_processing/aec3/fft_data_avx2.cc
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "api/array_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Computes the power spectrum of the data.
|
||||
void FftData::SpectrumAVX2(rtc::ArrayView<float> power_spectrum) const {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, power_spectrum.size());
|
||||
for (size_t k = 0; k < kFftLengthBy2; k += 8) {
|
||||
__m256 r = _mm256_loadu_ps(&re[k]);
|
||||
__m256 i = _mm256_loadu_ps(&im[k]);
|
||||
__m256 ii = _mm256_mul_ps(i, i);
|
||||
ii = _mm256_fmadd_ps(r, r, ii);
|
||||
_mm256_storeu_ps(&power_spectrum[k], ii);
|
||||
}
|
||||
power_spectrum[kFftLengthBy2] = re[kFftLengthBy2] * re[kFftLengthBy2] +
|
||||
im[kFftLengthBy2] * im[kFftLengthBy2];
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
280
webrtc/modules/audio_processing/aec3/filter_analyzer.cc
Normal file
280
webrtc/modules/audio_processing/aec3/filter_analyzer.cc
Normal file
@ -0,0 +1,280 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/filter_analyzer.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <numeric>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
size_t FindPeakIndex(rtc::ArrayView<const float> filter_time_domain,
|
||||
size_t peak_index_in,
|
||||
size_t start_sample,
|
||||
size_t end_sample) {
|
||||
size_t peak_index_out = peak_index_in;
|
||||
float max_h2 =
|
||||
filter_time_domain[peak_index_out] * filter_time_domain[peak_index_out];
|
||||
for (size_t k = start_sample; k <= end_sample; ++k) {
|
||||
float tmp = filter_time_domain[k] * filter_time_domain[k];
|
||||
if (tmp > max_h2) {
|
||||
peak_index_out = k;
|
||||
max_h2 = tmp;
|
||||
}
|
||||
}
|
||||
|
||||
return peak_index_out;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
int FilterAnalyzer::instance_count_ = 0;
|
||||
|
||||
FilterAnalyzer::FilterAnalyzer(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
bounded_erl_(config.ep_strength.bounded_erl),
|
||||
default_gain_(config.ep_strength.default_gain),
|
||||
h_highpass_(num_capture_channels,
|
||||
std::vector<float>(
|
||||
GetTimeDomainLength(config.filter.refined.length_blocks),
|
||||
0.f)),
|
||||
filter_analysis_states_(num_capture_channels,
|
||||
FilterAnalysisState(config)),
|
||||
filter_delays_blocks_(num_capture_channels, 0) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
FilterAnalyzer::~FilterAnalyzer() = default;
|
||||
|
||||
void FilterAnalyzer::Reset() {
|
||||
blocks_since_reset_ = 0;
|
||||
ResetRegion();
|
||||
for (auto& state : filter_analysis_states_) {
|
||||
state.Reset(default_gain_);
|
||||
}
|
||||
std::fill(filter_delays_blocks_.begin(), filter_delays_blocks_.end(), 0);
|
||||
}
|
||||
|
||||
void FilterAnalyzer::Update(
|
||||
rtc::ArrayView<const std::vector<float>> filters_time_domain,
|
||||
const RenderBuffer& render_buffer,
|
||||
bool* any_filter_consistent,
|
||||
float* max_echo_path_gain) {
|
||||
RTC_DCHECK(any_filter_consistent);
|
||||
RTC_DCHECK(max_echo_path_gain);
|
||||
RTC_DCHECK_EQ(filters_time_domain.size(), filter_analysis_states_.size());
|
||||
RTC_DCHECK_EQ(filters_time_domain.size(), h_highpass_.size());
|
||||
|
||||
++blocks_since_reset_;
|
||||
SetRegionToAnalyze(filters_time_domain[0].size());
|
||||
AnalyzeRegion(filters_time_domain, render_buffer);
|
||||
|
||||
// Aggregate the results for all capture channels.
|
||||
auto& st_ch0 = filter_analysis_states_[0];
|
||||
*any_filter_consistent = st_ch0.consistent_estimate;
|
||||
*max_echo_path_gain = st_ch0.gain;
|
||||
min_filter_delay_blocks_ = filter_delays_blocks_[0];
|
||||
for (size_t ch = 1; ch < filters_time_domain.size(); ++ch) {
|
||||
auto& st_ch = filter_analysis_states_[ch];
|
||||
*any_filter_consistent =
|
||||
*any_filter_consistent || st_ch.consistent_estimate;
|
||||
*max_echo_path_gain = std::max(*max_echo_path_gain, st_ch.gain);
|
||||
min_filter_delay_blocks_ =
|
||||
std::min(min_filter_delay_blocks_, filter_delays_blocks_[ch]);
|
||||
}
|
||||
}
|
||||
|
||||
void FilterAnalyzer::AnalyzeRegion(
|
||||
rtc::ArrayView<const std::vector<float>> filters_time_domain,
|
||||
const RenderBuffer& render_buffer) {
|
||||
// Preprocess the filter to avoid issues with low-frequency components in the
|
||||
// filter.
|
||||
PreProcessFilters(filters_time_domain);
|
||||
data_dumper_->DumpRaw("aec3_linear_filter_processed_td", h_highpass_[0]);
|
||||
|
||||
constexpr float kOneByBlockSize = 1.f / kBlockSize;
|
||||
for (size_t ch = 0; ch < filters_time_domain.size(); ++ch) {
|
||||
RTC_DCHECK_LT(region_.start_sample_, filters_time_domain[ch].size());
|
||||
RTC_DCHECK_LT(region_.end_sample_, filters_time_domain[ch].size());
|
||||
|
||||
auto& st_ch = filter_analysis_states_[ch];
|
||||
RTC_DCHECK_EQ(h_highpass_[ch].size(), filters_time_domain[ch].size());
|
||||
RTC_DCHECK_GT(h_highpass_[ch].size(), 0);
|
||||
st_ch.peak_index = std::min(st_ch.peak_index, h_highpass_[ch].size() - 1);
|
||||
|
||||
st_ch.peak_index =
|
||||
FindPeakIndex(h_highpass_[ch], st_ch.peak_index, region_.start_sample_,
|
||||
region_.end_sample_);
|
||||
filter_delays_blocks_[ch] = st_ch.peak_index >> kBlockSizeLog2;
|
||||
UpdateFilterGain(h_highpass_[ch], &st_ch);
|
||||
st_ch.filter_length_blocks =
|
||||
filters_time_domain[ch].size() * kOneByBlockSize;
|
||||
|
||||
st_ch.consistent_estimate = st_ch.consistent_filter_detector.Detect(
|
||||
h_highpass_[ch], region_,
|
||||
render_buffer.Block(-filter_delays_blocks_[ch])[0], st_ch.peak_index,
|
||||
filter_delays_blocks_[ch]);
|
||||
}
|
||||
}
|
||||
|
||||
void FilterAnalyzer::UpdateFilterGain(
|
||||
rtc::ArrayView<const float> filter_time_domain,
|
||||
FilterAnalysisState* st) {
|
||||
bool sufficient_time_to_converge =
|
||||
blocks_since_reset_ > 5 * kNumBlocksPerSecond;
|
||||
|
||||
if (sufficient_time_to_converge && st->consistent_estimate) {
|
||||
st->gain = fabsf(filter_time_domain[st->peak_index]);
|
||||
} else {
|
||||
// TODO(peah): Verify whether this check against a float is ok.
|
||||
if (st->gain) {
|
||||
st->gain = std::max(st->gain, fabsf(filter_time_domain[st->peak_index]));
|
||||
}
|
||||
}
|
||||
|
||||
if (bounded_erl_ && st->gain) {
|
||||
st->gain = std::max(st->gain, 0.01f);
|
||||
}
|
||||
}
|
||||
|
||||
void FilterAnalyzer::PreProcessFilters(
|
||||
rtc::ArrayView<const std::vector<float>> filters_time_domain) {
|
||||
for (size_t ch = 0; ch < filters_time_domain.size(); ++ch) {
|
||||
RTC_DCHECK_LT(region_.start_sample_, filters_time_domain[ch].size());
|
||||
RTC_DCHECK_LT(region_.end_sample_, filters_time_domain[ch].size());
|
||||
|
||||
RTC_DCHECK_GE(h_highpass_[ch].capacity(), filters_time_domain[ch].size());
|
||||
h_highpass_[ch].resize(filters_time_domain[ch].size());
|
||||
// Minimum phase high-pass filter with cutoff frequency at about 600 Hz.
|
||||
constexpr std::array<float, 3> h = {
|
||||
{0.7929742f, -0.36072128f, -0.47047766f}};
|
||||
|
||||
std::fill(h_highpass_[ch].begin() + region_.start_sample_,
|
||||
h_highpass_[ch].begin() + region_.end_sample_ + 1, 0.f);
|
||||
for (size_t k = std::max(h.size() - 1, region_.start_sample_);
|
||||
k <= region_.end_sample_; ++k) {
|
||||
for (size_t j = 0; j < h.size(); ++j) {
|
||||
h_highpass_[ch][k] += filters_time_domain[ch][k - j] * h[j];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void FilterAnalyzer::ResetRegion() {
|
||||
region_.start_sample_ = 0;
|
||||
region_.end_sample_ = 0;
|
||||
}
|
||||
|
||||
void FilterAnalyzer::SetRegionToAnalyze(size_t filter_size) {
|
||||
constexpr size_t kNumberBlocksToUpdate = 1;
|
||||
auto& r = region_;
|
||||
r.start_sample_ = r.end_sample_ >= filter_size - 1 ? 0 : r.end_sample_ + 1;
|
||||
r.end_sample_ =
|
||||
std::min(r.start_sample_ + kNumberBlocksToUpdate * kBlockSize - 1,
|
||||
filter_size - 1);
|
||||
|
||||
// Check range.
|
||||
RTC_DCHECK_LT(r.start_sample_, filter_size);
|
||||
RTC_DCHECK_LT(r.end_sample_, filter_size);
|
||||
RTC_DCHECK_LE(r.start_sample_, r.end_sample_);
|
||||
}
|
||||
|
||||
FilterAnalyzer::ConsistentFilterDetector::ConsistentFilterDetector(
|
||||
const EchoCanceller3Config& config)
|
||||
: active_render_threshold_(config.render_levels.active_render_limit *
|
||||
config.render_levels.active_render_limit *
|
||||
kFftLengthBy2) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
void FilterAnalyzer::ConsistentFilterDetector::Reset() {
|
||||
significant_peak_ = false;
|
||||
filter_floor_accum_ = 0.f;
|
||||
filter_secondary_peak_ = 0.f;
|
||||
filter_floor_low_limit_ = 0;
|
||||
filter_floor_high_limit_ = 0;
|
||||
consistent_estimate_counter_ = 0;
|
||||
consistent_delay_reference_ = -10;
|
||||
}
|
||||
|
||||
bool FilterAnalyzer::ConsistentFilterDetector::Detect(
|
||||
rtc::ArrayView<const float> filter_to_analyze,
|
||||
const FilterRegion& region,
|
||||
rtc::ArrayView<const std::vector<float>> x_block,
|
||||
size_t peak_index,
|
||||
int delay_blocks) {
|
||||
if (region.start_sample_ == 0) {
|
||||
filter_floor_accum_ = 0.f;
|
||||
filter_secondary_peak_ = 0.f;
|
||||
filter_floor_low_limit_ = peak_index < 64 ? 0 : peak_index - 64;
|
||||
filter_floor_high_limit_ =
|
||||
peak_index > filter_to_analyze.size() - 129 ? 0 : peak_index + 128;
|
||||
}
|
||||
|
||||
for (size_t k = region.start_sample_;
|
||||
k < std::min(region.end_sample_ + 1, filter_floor_low_limit_); ++k) {
|
||||
float abs_h = fabsf(filter_to_analyze[k]);
|
||||
filter_floor_accum_ += abs_h;
|
||||
filter_secondary_peak_ = std::max(filter_secondary_peak_, abs_h);
|
||||
}
|
||||
|
||||
for (size_t k = std::max(filter_floor_high_limit_, region.start_sample_);
|
||||
k <= region.end_sample_; ++k) {
|
||||
float abs_h = fabsf(filter_to_analyze[k]);
|
||||
filter_floor_accum_ += abs_h;
|
||||
filter_secondary_peak_ = std::max(filter_secondary_peak_, abs_h);
|
||||
}
|
||||
|
||||
if (region.end_sample_ == filter_to_analyze.size() - 1) {
|
||||
float filter_floor = filter_floor_accum_ /
|
||||
(filter_floor_low_limit_ + filter_to_analyze.size() -
|
||||
filter_floor_high_limit_);
|
||||
|
||||
float abs_peak = fabsf(filter_to_analyze[peak_index]);
|
||||
significant_peak_ = abs_peak > 10.f * filter_floor &&
|
||||
abs_peak > 2.f * filter_secondary_peak_;
|
||||
}
|
||||
|
||||
if (significant_peak_) {
|
||||
bool active_render_block = false;
|
||||
for (auto& x_channel : x_block) {
|
||||
const float x_energy = std::inner_product(
|
||||
x_channel.begin(), x_channel.end(), x_channel.begin(), 0.f);
|
||||
if (x_energy > active_render_threshold_) {
|
||||
active_render_block = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (consistent_delay_reference_ == delay_blocks) {
|
||||
if (active_render_block) {
|
||||
++consistent_estimate_counter_;
|
||||
}
|
||||
} else {
|
||||
consistent_estimate_counter_ = 0;
|
||||
consistent_delay_reference_ = delay_blocks;
|
||||
}
|
||||
}
|
||||
return consistent_estimate_counter_ > 1.5f * kNumBlocksPerSecond;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
149
webrtc/modules/audio_processing/aec3/filter_analyzer.h
Normal file
149
webrtc/modules/audio_processing/aec3/filter_analyzer.h
Normal file
@ -0,0 +1,149 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
class RenderBuffer;
|
||||
|
||||
// Class for analyzing the properties of an adaptive filter.
|
||||
class FilterAnalyzer {
|
||||
public:
|
||||
FilterAnalyzer(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels);
|
||||
~FilterAnalyzer();
|
||||
|
||||
FilterAnalyzer(const FilterAnalyzer&) = delete;
|
||||
FilterAnalyzer& operator=(const FilterAnalyzer&) = delete;
|
||||
|
||||
// Resets the analysis.
|
||||
void Reset();
|
||||
|
||||
// Updates the estimates with new input data.
|
||||
void Update(rtc::ArrayView<const std::vector<float>> filters_time_domain,
|
||||
const RenderBuffer& render_buffer,
|
||||
bool* any_filter_consistent,
|
||||
float* max_echo_path_gain);
|
||||
|
||||
// Returns the delay in blocks for each filter.
|
||||
rtc::ArrayView<const int> FilterDelaysBlocks() const {
|
||||
return filter_delays_blocks_;
|
||||
}
|
||||
|
||||
// Returns the minimum delay of all filters in terms of blocks.
|
||||
int MinFilterDelayBlocks() const { return min_filter_delay_blocks_; }
|
||||
|
||||
// Returns the number of blocks for the current used filter.
|
||||
int FilterLengthBlocks() const {
|
||||
return filter_analysis_states_[0].filter_length_blocks;
|
||||
}
|
||||
|
||||
// Returns the preprocessed filter.
|
||||
rtc::ArrayView<const std::vector<float>> GetAdjustedFilters() const {
|
||||
return h_highpass_;
|
||||
}
|
||||
|
||||
// Public for testing purposes only.
|
||||
void SetRegionToAnalyze(size_t filter_size);
|
||||
|
||||
private:
|
||||
struct FilterAnalysisState;
|
||||
|
||||
void AnalyzeRegion(
|
||||
rtc::ArrayView<const std::vector<float>> filters_time_domain,
|
||||
const RenderBuffer& render_buffer);
|
||||
|
||||
void UpdateFilterGain(rtc::ArrayView<const float> filters_time_domain,
|
||||
FilterAnalysisState* st);
|
||||
void PreProcessFilters(
|
||||
rtc::ArrayView<const std::vector<float>> filters_time_domain);
|
||||
|
||||
void ResetRegion();
|
||||
|
||||
struct FilterRegion {
|
||||
size_t start_sample_;
|
||||
size_t end_sample_;
|
||||
};
|
||||
|
||||
// This class checks whether the shape of the impulse response has been
|
||||
// consistent over time.
|
||||
class ConsistentFilterDetector {
|
||||
public:
|
||||
explicit ConsistentFilterDetector(const EchoCanceller3Config& config);
|
||||
void Reset();
|
||||
bool Detect(rtc::ArrayView<const float> filter_to_analyze,
|
||||
const FilterRegion& region,
|
||||
rtc::ArrayView<const std::vector<float>> x_block,
|
||||
size_t peak_index,
|
||||
int delay_blocks);
|
||||
|
||||
private:
|
||||
bool significant_peak_;
|
||||
float filter_floor_accum_;
|
||||
float filter_secondary_peak_;
|
||||
size_t filter_floor_low_limit_;
|
||||
size_t filter_floor_high_limit_;
|
||||
const float active_render_threshold_;
|
||||
size_t consistent_estimate_counter_ = 0;
|
||||
int consistent_delay_reference_ = -10;
|
||||
};
|
||||
|
||||
struct FilterAnalysisState {
|
||||
explicit FilterAnalysisState(const EchoCanceller3Config& config)
|
||||
: filter_length_blocks(config.filter.refined_initial.length_blocks),
|
||||
consistent_filter_detector(config) {
|
||||
Reset(config.ep_strength.default_gain);
|
||||
}
|
||||
|
||||
void Reset(float default_gain) {
|
||||
peak_index = 0;
|
||||
gain = default_gain;
|
||||
consistent_filter_detector.Reset();
|
||||
}
|
||||
|
||||
float gain;
|
||||
size_t peak_index;
|
||||
int filter_length_blocks;
|
||||
bool consistent_estimate = false;
|
||||
ConsistentFilterDetector consistent_filter_detector;
|
||||
};
|
||||
|
||||
static int instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const bool bounded_erl_;
|
||||
const float default_gain_;
|
||||
std::vector<std::vector<float>> h_highpass_;
|
||||
|
||||
size_t blocks_since_reset_ = 0;
|
||||
FilterRegion region_;
|
||||
|
||||
std::vector<FilterAnalysisState> filter_analysis_states_;
|
||||
std::vector<int> filter_delays_blocks_;
|
||||
|
||||
int min_filter_delay_blocks_ = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_FILTER_ANALYZER_H_
|
88
webrtc/modules/audio_processing/aec3/frame_blocker.cc
Normal file
88
webrtc/modules/audio_processing/aec3/frame_blocker.cc
Normal file
@ -0,0 +1,88 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/frame_blocker.h"
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
FrameBlocker::FrameBlocker(size_t num_bands, size_t num_channels)
|
||||
: num_bands_(num_bands),
|
||||
num_channels_(num_channels),
|
||||
buffer_(num_bands_, std::vector<std::vector<float>>(num_channels)) {
|
||||
RTC_DCHECK_LT(0, num_bands);
|
||||
RTC_DCHECK_LT(0, num_channels);
|
||||
for (auto& band : buffer_) {
|
||||
for (auto& channel : band) {
|
||||
channel.reserve(kBlockSize);
|
||||
RTC_DCHECK(channel.empty());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FrameBlocker::~FrameBlocker() = default;
|
||||
|
||||
void FrameBlocker::InsertSubFrameAndExtractBlock(
|
||||
const std::vector<std::vector<rtc::ArrayView<float>>>& sub_frame,
|
||||
std::vector<std::vector<std::vector<float>>>* block) {
|
||||
RTC_DCHECK(block);
|
||||
RTC_DCHECK_EQ(num_bands_, block->size());
|
||||
RTC_DCHECK_EQ(num_bands_, sub_frame.size());
|
||||
for (size_t band = 0; band < num_bands_; ++band) {
|
||||
RTC_DCHECK_EQ(num_channels_, (*block)[band].size());
|
||||
RTC_DCHECK_EQ(num_channels_, sub_frame[band].size());
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
RTC_DCHECK_GE(kBlockSize - 16, buffer_[band][channel].size());
|
||||
RTC_DCHECK_EQ(kBlockSize, (*block)[band][channel].size());
|
||||
RTC_DCHECK_EQ(kSubFrameLength, sub_frame[band][channel].size());
|
||||
const int samples_to_block = kBlockSize - buffer_[band][channel].size();
|
||||
(*block)[band][channel].clear();
|
||||
(*block)[band][channel].insert((*block)[band][channel].begin(),
|
||||
buffer_[band][channel].begin(),
|
||||
buffer_[band][channel].end());
|
||||
(*block)[band][channel].insert(
|
||||
(*block)[band][channel].begin() + buffer_[band][channel].size(),
|
||||
sub_frame[band][channel].begin(),
|
||||
sub_frame[band][channel].begin() + samples_to_block);
|
||||
buffer_[band][channel].clear();
|
||||
buffer_[band][channel].insert(
|
||||
buffer_[band][channel].begin(),
|
||||
sub_frame[band][channel].begin() + samples_to_block,
|
||||
sub_frame[band][channel].end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool FrameBlocker::IsBlockAvailable() const {
|
||||
return kBlockSize == buffer_[0][0].size();
|
||||
}
|
||||
|
||||
void FrameBlocker::ExtractBlock(
|
||||
std::vector<std::vector<std::vector<float>>>* block) {
|
||||
RTC_DCHECK(block);
|
||||
RTC_DCHECK_EQ(num_bands_, block->size());
|
||||
RTC_DCHECK(IsBlockAvailable());
|
||||
for (size_t band = 0; band < num_bands_; ++band) {
|
||||
RTC_DCHECK_EQ(num_channels_, (*block)[band].size());
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
RTC_DCHECK_EQ(kBlockSize, buffer_[band][channel].size());
|
||||
RTC_DCHECK_EQ(kBlockSize, (*block)[band][channel].size());
|
||||
(*block)[band][channel].clear();
|
||||
(*block)[band][channel].insert((*block)[band][channel].begin(),
|
||||
buffer_[band][channel].begin(),
|
||||
buffer_[band][channel].end());
|
||||
buffer_[band][channel].clear();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
50
webrtc/modules/audio_processing/aec3/frame_blocker.h
Normal file
50
webrtc/modules/audio_processing/aec3/frame_blocker.h
Normal file
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for producing 64 sample multiband blocks from frames consisting of 2
|
||||
// subframes of 80 samples.
|
||||
class FrameBlocker {
|
||||
public:
|
||||
FrameBlocker(size_t num_bands, size_t num_channels);
|
||||
~FrameBlocker();
|
||||
FrameBlocker(const FrameBlocker&) = delete;
|
||||
FrameBlocker& operator=(const FrameBlocker&) = delete;
|
||||
|
||||
// Inserts one 80 sample multiband subframe from the multiband frame and
|
||||
// extracts one 64 sample multiband block.
|
||||
void InsertSubFrameAndExtractBlock(
|
||||
const std::vector<std::vector<rtc::ArrayView<float>>>& sub_frame,
|
||||
std::vector<std::vector<std::vector<float>>>* block);
|
||||
// Reports whether a multiband block of 64 samples is available for
|
||||
// extraction.
|
||||
bool IsBlockAvailable() const;
|
||||
// Extracts a multiband block of 64 samples.
|
||||
void ExtractBlock(std::vector<std::vector<std::vector<float>>>* block);
|
||||
|
||||
private:
|
||||
const size_t num_bands_;
|
||||
const size_t num_channels_;
|
||||
std::vector<std::vector<std::vector<float>>> buffer_;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_FRAME_BLOCKER_H_
|
200
webrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc
Normal file
200
webrtc/modules/audio_processing/aec3/fullband_erle_estimator.cc
Normal file
@ -0,0 +1,200 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/fullband_erle_estimator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
constexpr float kEpsilon = 1e-3f;
|
||||
constexpr float kX2BandEnergyThreshold = 44015068.0f;
|
||||
constexpr int kBlocksToHoldErle = 100;
|
||||
constexpr int kPointsToAccumulate = 6;
|
||||
} // namespace
|
||||
|
||||
FullBandErleEstimator::FullBandErleEstimator(
|
||||
const EchoCanceller3Config::Erle& config,
|
||||
size_t num_capture_channels)
|
||||
: min_erle_log2_(FastApproxLog2f(config.min + kEpsilon)),
|
||||
max_erle_lf_log2(FastApproxLog2f(config.max_l + kEpsilon)),
|
||||
hold_counters_time_domain_(num_capture_channels, 0),
|
||||
erle_time_domain_log2_(num_capture_channels, min_erle_log2_),
|
||||
instantaneous_erle_(num_capture_channels, ErleInstantaneous(config)),
|
||||
linear_filters_qualities_(num_capture_channels) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
FullBandErleEstimator::~FullBandErleEstimator() = default;
|
||||
|
||||
void FullBandErleEstimator::Reset() {
|
||||
for (auto& instantaneous_erle_ch : instantaneous_erle_) {
|
||||
instantaneous_erle_ch.Reset();
|
||||
}
|
||||
|
||||
UpdateQualityEstimates();
|
||||
std::fill(erle_time_domain_log2_.begin(), erle_time_domain_log2_.end(),
|
||||
min_erle_log2_);
|
||||
std::fill(hold_counters_time_domain_.begin(),
|
||||
hold_counters_time_domain_.end(), 0);
|
||||
}
|
||||
|
||||
void FullBandErleEstimator::Update(
|
||||
rtc::ArrayView<const float> X2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
|
||||
const std::vector<bool>& converged_filters) {
|
||||
for (size_t ch = 0; ch < Y2.size(); ++ch) {
|
||||
if (converged_filters[ch]) {
|
||||
// Computes the fullband ERLE.
|
||||
const float X2_sum = std::accumulate(X2.begin(), X2.end(), 0.0f);
|
||||
if (X2_sum > kX2BandEnergyThreshold * X2.size()) {
|
||||
const float Y2_sum =
|
||||
std::accumulate(Y2[ch].begin(), Y2[ch].end(), 0.0f);
|
||||
const float E2_sum =
|
||||
std::accumulate(E2[ch].begin(), E2[ch].end(), 0.0f);
|
||||
if (instantaneous_erle_[ch].Update(Y2_sum, E2_sum)) {
|
||||
hold_counters_time_domain_[ch] = kBlocksToHoldErle;
|
||||
erle_time_domain_log2_[ch] +=
|
||||
0.1f * ((instantaneous_erle_[ch].GetInstErleLog2().value()) -
|
||||
erle_time_domain_log2_[ch]);
|
||||
erle_time_domain_log2_[ch] = rtc::SafeClamp(
|
||||
erle_time_domain_log2_[ch], min_erle_log2_, max_erle_lf_log2);
|
||||
}
|
||||
}
|
||||
}
|
||||
--hold_counters_time_domain_[ch];
|
||||
if (hold_counters_time_domain_[ch] <= 0) {
|
||||
erle_time_domain_log2_[ch] =
|
||||
std::max(min_erle_log2_, erle_time_domain_log2_[ch] - 0.044f);
|
||||
}
|
||||
if (hold_counters_time_domain_[ch] == 0) {
|
||||
instantaneous_erle_[ch].ResetAccumulators();
|
||||
}
|
||||
}
|
||||
|
||||
UpdateQualityEstimates();
|
||||
}
|
||||
|
||||
void FullBandErleEstimator::Dump(
|
||||
const std::unique_ptr<ApmDataDumper>& data_dumper) const {
|
||||
data_dumper->DumpRaw("aec3_fullband_erle_log2", FullbandErleLog2());
|
||||
instantaneous_erle_[0].Dump(data_dumper);
|
||||
}
|
||||
|
||||
void FullBandErleEstimator::UpdateQualityEstimates() {
|
||||
for (size_t ch = 0; ch < instantaneous_erle_.size(); ++ch) {
|
||||
linear_filters_qualities_[ch] =
|
||||
instantaneous_erle_[ch].GetQualityEstimate();
|
||||
}
|
||||
}
|
||||
|
||||
FullBandErleEstimator::ErleInstantaneous::ErleInstantaneous(
|
||||
const EchoCanceller3Config::Erle& config)
|
||||
: clamp_inst_quality_to_zero_(config.clamp_quality_estimate_to_zero),
|
||||
clamp_inst_quality_to_one_(config.clamp_quality_estimate_to_one) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
FullBandErleEstimator::ErleInstantaneous::~ErleInstantaneous() = default;
|
||||
|
||||
bool FullBandErleEstimator::ErleInstantaneous::Update(const float Y2_sum,
|
||||
const float E2_sum) {
|
||||
bool update_estimates = false;
|
||||
E2_acum_ += E2_sum;
|
||||
Y2_acum_ += Y2_sum;
|
||||
num_points_++;
|
||||
if (num_points_ == kPointsToAccumulate) {
|
||||
if (E2_acum_ > 0.f) {
|
||||
update_estimates = true;
|
||||
erle_log2_ = FastApproxLog2f(Y2_acum_ / E2_acum_ + kEpsilon);
|
||||
}
|
||||
num_points_ = 0;
|
||||
E2_acum_ = 0.f;
|
||||
Y2_acum_ = 0.f;
|
||||
}
|
||||
|
||||
if (update_estimates) {
|
||||
UpdateMaxMin();
|
||||
UpdateQualityEstimate();
|
||||
}
|
||||
return update_estimates;
|
||||
}
|
||||
|
||||
void FullBandErleEstimator::ErleInstantaneous::Reset() {
|
||||
ResetAccumulators();
|
||||
max_erle_log2_ = -10.f; // -30 dB.
|
||||
min_erle_log2_ = 33.f; // 100 dB.
|
||||
inst_quality_estimate_ = 0.f;
|
||||
}
|
||||
|
||||
void FullBandErleEstimator::ErleInstantaneous::ResetAccumulators() {
|
||||
erle_log2_ = absl::nullopt;
|
||||
inst_quality_estimate_ = 0.f;
|
||||
num_points_ = 0;
|
||||
E2_acum_ = 0.f;
|
||||
Y2_acum_ = 0.f;
|
||||
}
|
||||
|
||||
void FullBandErleEstimator::ErleInstantaneous::Dump(
|
||||
const std::unique_ptr<ApmDataDumper>& data_dumper) const {
|
||||
data_dumper->DumpRaw("aec3_fullband_erle_inst_log2",
|
||||
erle_log2_ ? *erle_log2_ : -10.f);
|
||||
data_dumper->DumpRaw(
|
||||
"aec3_erle_instantaneous_quality",
|
||||
GetQualityEstimate() ? GetQualityEstimate().value() : 0.f);
|
||||
data_dumper->DumpRaw("aec3_fullband_erle_max_log2", max_erle_log2_);
|
||||
data_dumper->DumpRaw("aec3_fullband_erle_min_log2", min_erle_log2_);
|
||||
}
|
||||
|
||||
void FullBandErleEstimator::ErleInstantaneous::UpdateMaxMin() {
|
||||
RTC_DCHECK(erle_log2_);
|
||||
if (erle_log2_.value() > max_erle_log2_) {
|
||||
max_erle_log2_ = erle_log2_.value();
|
||||
} else {
|
||||
max_erle_log2_ -= 0.0004; // Forget factor, approx 1dB every 3 sec.
|
||||
}
|
||||
|
||||
if (erle_log2_.value() < min_erle_log2_) {
|
||||
min_erle_log2_ = erle_log2_.value();
|
||||
} else {
|
||||
min_erle_log2_ += 0.0004; // Forget factor, approx 1dB every 3 sec.
|
||||
}
|
||||
}
|
||||
|
||||
void FullBandErleEstimator::ErleInstantaneous::UpdateQualityEstimate() {
|
||||
const float alpha = 0.07f;
|
||||
float quality_estimate = 0.f;
|
||||
RTC_DCHECK(erle_log2_);
|
||||
// TODO(peah): Currently, the estimate can become be less than 0; this should
|
||||
// be corrected.
|
||||
if (max_erle_log2_ > min_erle_log2_) {
|
||||
quality_estimate = (erle_log2_.value() - min_erle_log2_) /
|
||||
(max_erle_log2_ - min_erle_log2_);
|
||||
}
|
||||
if (quality_estimate > inst_quality_estimate_) {
|
||||
inst_quality_estimate_ = quality_estimate;
|
||||
} else {
|
||||
inst_quality_estimate_ +=
|
||||
alpha * (quality_estimate - inst_quality_estimate_);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
118
webrtc/modules/audio_processing/aec3/fullband_erle_estimator.h
Normal file
118
webrtc/modules/audio_processing/aec3/fullband_erle_estimator.h
Normal file
@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Estimates the echo return loss enhancement using the energy of all the
|
||||
// freuquency bands.
|
||||
class FullBandErleEstimator {
|
||||
public:
|
||||
FullBandErleEstimator(const EchoCanceller3Config::Erle& config,
|
||||
size_t num_capture_channels);
|
||||
~FullBandErleEstimator();
|
||||
// Resets the ERLE estimator.
|
||||
void Reset();
|
||||
|
||||
// Updates the ERLE estimator.
|
||||
void Update(rtc::ArrayView<const float> X2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
|
||||
const std::vector<bool>& converged_filters);
|
||||
|
||||
// Returns the fullband ERLE estimates in log2 units.
|
||||
float FullbandErleLog2() const {
|
||||
float min_erle = erle_time_domain_log2_[0];
|
||||
for (size_t ch = 1; ch < erle_time_domain_log2_.size(); ++ch) {
|
||||
min_erle = std::min(min_erle, erle_time_domain_log2_[ch]);
|
||||
}
|
||||
return min_erle;
|
||||
}
|
||||
|
||||
// Returns an estimation of the current linear filter quality. It returns a
|
||||
// float number between 0 and 1 mapping 1 to the highest possible quality.
|
||||
rtc::ArrayView<const absl::optional<float>> GetInstLinearQualityEstimates()
|
||||
const {
|
||||
return linear_filters_qualities_;
|
||||
}
|
||||
|
||||
void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
|
||||
|
||||
private:
|
||||
void UpdateQualityEstimates();
|
||||
|
||||
class ErleInstantaneous {
|
||||
public:
|
||||
explicit ErleInstantaneous(const EchoCanceller3Config::Erle& config);
|
||||
~ErleInstantaneous();
|
||||
|
||||
// Updates the estimator with a new point, returns true
|
||||
// if the instantaneous ERLE was updated due to having enough
|
||||
// points for performing the estimate.
|
||||
bool Update(const float Y2_sum, const float E2_sum);
|
||||
// Resets the instantaneous ERLE estimator to its initial state.
|
||||
void Reset();
|
||||
// Resets the members related with an instantaneous estimate.
|
||||
void ResetAccumulators();
|
||||
// Returns the instantaneous ERLE in log2 units.
|
||||
absl::optional<float> GetInstErleLog2() const { return erle_log2_; }
|
||||
// Gets an indication between 0 and 1 of the performance of the linear
|
||||
// filter for the current time instant.
|
||||
absl::optional<float> GetQualityEstimate() const {
|
||||
if (erle_log2_) {
|
||||
float value = inst_quality_estimate_;
|
||||
if (clamp_inst_quality_to_zero_) {
|
||||
value = std::max(0.f, value);
|
||||
}
|
||||
if (clamp_inst_quality_to_one_) {
|
||||
value = std::min(1.f, value);
|
||||
}
|
||||
return absl::optional<float>(value);
|
||||
}
|
||||
return absl::nullopt;
|
||||
}
|
||||
void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
|
||||
|
||||
private:
|
||||
void UpdateMaxMin();
|
||||
void UpdateQualityEstimate();
|
||||
const bool clamp_inst_quality_to_zero_;
|
||||
const bool clamp_inst_quality_to_one_;
|
||||
absl::optional<float> erle_log2_;
|
||||
float inst_quality_estimate_;
|
||||
float max_erle_log2_;
|
||||
float min_erle_log2_;
|
||||
float Y2_acum_;
|
||||
float E2_acum_;
|
||||
int num_points_;
|
||||
};
|
||||
|
||||
const float min_erle_log2_;
|
||||
const float max_erle_lf_log2;
|
||||
std::vector<int> hold_counters_time_domain_;
|
||||
std::vector<float> erle_time_domain_log2_;
|
||||
std::vector<ErleInstantaneous> instantaneous_erle_;
|
||||
std::vector<absl::optional<float>> linear_filters_qualities_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_FULLBAND_ERLE_ESTIMATOR_H_
|
464
webrtc/modules/audio_processing/aec3/matched_filter.cc
Normal file
464
webrtc/modules/audio_processing/aec3/matched_filter.cc
Normal file
@ -0,0 +1,464 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/matched_filter.h"
|
||||
|
||||
// Defines WEBRTC_ARCH_X86_FAMILY, used below.
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
#include <arm_neon.h>
|
||||
#endif
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
#include <emmintrin.h>
|
||||
#endif
|
||||
#include <algorithm>
|
||||
#include <cstddef>
|
||||
#include <initializer_list>
|
||||
#include <iterator>
|
||||
#include <numeric>
|
||||
|
||||
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace aec3 {
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
|
||||
void MatchedFilterCore_NEON(size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum) {
|
||||
const int h_size = static_cast<int>(h.size());
|
||||
const int x_size = static_cast<int>(x.size());
|
||||
RTC_DCHECK_EQ(0, h_size % 4);
|
||||
|
||||
// Process for all samples in the sub-block.
|
||||
for (size_t i = 0; i < y.size(); ++i) {
|
||||
// Apply the matched filter as filter * x, and compute x * x.
|
||||
|
||||
RTC_DCHECK_GT(x_size, x_start_index);
|
||||
const float* x_p = &x[x_start_index];
|
||||
const float* h_p = &h[0];
|
||||
|
||||
// Initialize values for the accumulation.
|
||||
float32x4_t s_128 = vdupq_n_f32(0);
|
||||
float32x4_t x2_sum_128 = vdupq_n_f32(0);
|
||||
float x2_sum = 0.f;
|
||||
float s = 0;
|
||||
|
||||
// Compute loop chunk sizes until, and after, the wraparound of the circular
|
||||
// buffer for x.
|
||||
const int chunk1 =
|
||||
std::min(h_size, static_cast<int>(x_size - x_start_index));
|
||||
|
||||
// Perform the loop in two chunks.
|
||||
const int chunk2 = h_size - chunk1;
|
||||
for (int limit : {chunk1, chunk2}) {
|
||||
// Perform 128 bit vector operations.
|
||||
const int limit_by_4 = limit >> 2;
|
||||
for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) {
|
||||
// Load the data into 128 bit vectors.
|
||||
const float32x4_t x_k = vld1q_f32(x_p);
|
||||
const float32x4_t h_k = vld1q_f32(h_p);
|
||||
// Compute and accumulate x * x and h * x.
|
||||
x2_sum_128 = vmlaq_f32(x2_sum_128, x_k, x_k);
|
||||
s_128 = vmlaq_f32(s_128, h_k, x_k);
|
||||
}
|
||||
|
||||
// Perform non-vector operations for any remaining items.
|
||||
for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) {
|
||||
const float x_k = *x_p;
|
||||
x2_sum += x_k * x_k;
|
||||
s += *h_p * x_k;
|
||||
}
|
||||
|
||||
x_p = &x[0];
|
||||
}
|
||||
|
||||
// Combine the accumulated vector and scalar values.
|
||||
float* v = reinterpret_cast<float*>(&x2_sum_128);
|
||||
x2_sum += v[0] + v[1] + v[2] + v[3];
|
||||
v = reinterpret_cast<float*>(&s_128);
|
||||
s += v[0] + v[1] + v[2] + v[3];
|
||||
|
||||
// Compute the matched filter error.
|
||||
float e = y[i] - s;
|
||||
const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f;
|
||||
(*error_sum) += e * e;
|
||||
|
||||
// Update the matched filter estimate in an NLMS manner.
|
||||
if (x2_sum > x2_sum_threshold && !saturation) {
|
||||
RTC_DCHECK_LT(0.f, x2_sum);
|
||||
const float alpha = smoothing * e / x2_sum;
|
||||
const float32x4_t alpha_128 = vmovq_n_f32(alpha);
|
||||
|
||||
// filter = filter + smoothing * (y - filter * x) * x / x * x.
|
||||
float* h_p = &h[0];
|
||||
x_p = &x[x_start_index];
|
||||
|
||||
// Perform the loop in two chunks.
|
||||
for (int limit : {chunk1, chunk2}) {
|
||||
// Perform 128 bit vector operations.
|
||||
const int limit_by_4 = limit >> 2;
|
||||
for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) {
|
||||
// Load the data into 128 bit vectors.
|
||||
float32x4_t h_k = vld1q_f32(h_p);
|
||||
const float32x4_t x_k = vld1q_f32(x_p);
|
||||
// Compute h = h + alpha * x.
|
||||
h_k = vmlaq_f32(h_k, alpha_128, x_k);
|
||||
|
||||
// Store the result.
|
||||
vst1q_f32(h_p, h_k);
|
||||
}
|
||||
|
||||
// Perform non-vector operations for any remaining items.
|
||||
for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) {
|
||||
*h_p += alpha * *x_p;
|
||||
}
|
||||
|
||||
x_p = &x[0];
|
||||
}
|
||||
|
||||
*filters_updated = true;
|
||||
}
|
||||
|
||||
x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
|
||||
void MatchedFilterCore_SSE2(size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum) {
|
||||
const int h_size = static_cast<int>(h.size());
|
||||
const int x_size = static_cast<int>(x.size());
|
||||
RTC_DCHECK_EQ(0, h_size % 4);
|
||||
|
||||
// Process for all samples in the sub-block.
|
||||
for (size_t i = 0; i < y.size(); ++i) {
|
||||
// Apply the matched filter as filter * x, and compute x * x.
|
||||
|
||||
RTC_DCHECK_GT(x_size, x_start_index);
|
||||
const float* x_p = &x[x_start_index];
|
||||
const float* h_p = &h[0];
|
||||
|
||||
// Initialize values for the accumulation.
|
||||
__m128 s_128 = _mm_set1_ps(0);
|
||||
__m128 x2_sum_128 = _mm_set1_ps(0);
|
||||
float x2_sum = 0.f;
|
||||
float s = 0;
|
||||
|
||||
// Compute loop chunk sizes until, and after, the wraparound of the circular
|
||||
// buffer for x.
|
||||
const int chunk1 =
|
||||
std::min(h_size, static_cast<int>(x_size - x_start_index));
|
||||
|
||||
// Perform the loop in two chunks.
|
||||
const int chunk2 = h_size - chunk1;
|
||||
for (int limit : {chunk1, chunk2}) {
|
||||
// Perform 128 bit vector operations.
|
||||
const int limit_by_4 = limit >> 2;
|
||||
for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) {
|
||||
// Load the data into 128 bit vectors.
|
||||
const __m128 x_k = _mm_loadu_ps(x_p);
|
||||
const __m128 h_k = _mm_loadu_ps(h_p);
|
||||
const __m128 xx = _mm_mul_ps(x_k, x_k);
|
||||
// Compute and accumulate x * x and h * x.
|
||||
x2_sum_128 = _mm_add_ps(x2_sum_128, xx);
|
||||
const __m128 hx = _mm_mul_ps(h_k, x_k);
|
||||
s_128 = _mm_add_ps(s_128, hx);
|
||||
}
|
||||
|
||||
// Perform non-vector operations for any remaining items.
|
||||
for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) {
|
||||
const float x_k = *x_p;
|
||||
x2_sum += x_k * x_k;
|
||||
s += *h_p * x_k;
|
||||
}
|
||||
|
||||
x_p = &x[0];
|
||||
}
|
||||
|
||||
// Combine the accumulated vector and scalar values.
|
||||
float* v = reinterpret_cast<float*>(&x2_sum_128);
|
||||
x2_sum += v[0] + v[1] + v[2] + v[3];
|
||||
v = reinterpret_cast<float*>(&s_128);
|
||||
s += v[0] + v[1] + v[2] + v[3];
|
||||
|
||||
// Compute the matched filter error.
|
||||
float e = y[i] - s;
|
||||
const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f;
|
||||
(*error_sum) += e * e;
|
||||
|
||||
// Update the matched filter estimate in an NLMS manner.
|
||||
if (x2_sum > x2_sum_threshold && !saturation) {
|
||||
RTC_DCHECK_LT(0.f, x2_sum);
|
||||
const float alpha = smoothing * e / x2_sum;
|
||||
const __m128 alpha_128 = _mm_set1_ps(alpha);
|
||||
|
||||
// filter = filter + smoothing * (y - filter * x) * x / x * x.
|
||||
float* h_p = &h[0];
|
||||
x_p = &x[x_start_index];
|
||||
|
||||
// Perform the loop in two chunks.
|
||||
for (int limit : {chunk1, chunk2}) {
|
||||
// Perform 128 bit vector operations.
|
||||
const int limit_by_4 = limit >> 2;
|
||||
for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) {
|
||||
// Load the data into 128 bit vectors.
|
||||
__m128 h_k = _mm_loadu_ps(h_p);
|
||||
const __m128 x_k = _mm_loadu_ps(x_p);
|
||||
|
||||
// Compute h = h + alpha * x.
|
||||
const __m128 alpha_x = _mm_mul_ps(alpha_128, x_k);
|
||||
h_k = _mm_add_ps(h_k, alpha_x);
|
||||
|
||||
// Store the result.
|
||||
_mm_storeu_ps(h_p, h_k);
|
||||
}
|
||||
|
||||
// Perform non-vector operations for any remaining items.
|
||||
for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) {
|
||||
*h_p += alpha * *x_p;
|
||||
}
|
||||
|
||||
x_p = &x[0];
|
||||
}
|
||||
|
||||
*filters_updated = true;
|
||||
}
|
||||
|
||||
x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
void MatchedFilterCore(size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum) {
|
||||
// Process for all samples in the sub-block.
|
||||
for (size_t i = 0; i < y.size(); ++i) {
|
||||
// Apply the matched filter as filter * x, and compute x * x.
|
||||
float x2_sum = 0.f;
|
||||
float s = 0;
|
||||
size_t x_index = x_start_index;
|
||||
for (size_t k = 0; k < h.size(); ++k) {
|
||||
x2_sum += x[x_index] * x[x_index];
|
||||
s += h[k] * x[x_index];
|
||||
x_index = x_index < (x.size() - 1) ? x_index + 1 : 0;
|
||||
}
|
||||
|
||||
// Compute the matched filter error.
|
||||
float e = y[i] - s;
|
||||
const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f;
|
||||
(*error_sum) += e * e;
|
||||
|
||||
// Update the matched filter estimate in an NLMS manner.
|
||||
if (x2_sum > x2_sum_threshold && !saturation) {
|
||||
RTC_DCHECK_LT(0.f, x2_sum);
|
||||
const float alpha = smoothing * e / x2_sum;
|
||||
|
||||
// filter = filter + smoothing * (y - filter * x) * x / x * x.
|
||||
size_t x_index = x_start_index;
|
||||
for (size_t k = 0; k < h.size(); ++k) {
|
||||
h[k] += alpha * x[x_index];
|
||||
x_index = x_index < (x.size() - 1) ? x_index + 1 : 0;
|
||||
}
|
||||
*filters_updated = true;
|
||||
}
|
||||
|
||||
x_start_index = x_start_index > 0 ? x_start_index - 1 : x.size() - 1;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
MatchedFilter::MatchedFilter(ApmDataDumper* data_dumper,
|
||||
Aec3Optimization optimization,
|
||||
size_t sub_block_size,
|
||||
size_t window_size_sub_blocks,
|
||||
int num_matched_filters,
|
||||
size_t alignment_shift_sub_blocks,
|
||||
float excitation_limit,
|
||||
float smoothing,
|
||||
float matching_filter_threshold)
|
||||
: data_dumper_(data_dumper),
|
||||
optimization_(optimization),
|
||||
sub_block_size_(sub_block_size),
|
||||
filter_intra_lag_shift_(alignment_shift_sub_blocks * sub_block_size_),
|
||||
filters_(
|
||||
num_matched_filters,
|
||||
std::vector<float>(window_size_sub_blocks * sub_block_size_, 0.f)),
|
||||
lag_estimates_(num_matched_filters),
|
||||
filters_offsets_(num_matched_filters, 0),
|
||||
excitation_limit_(excitation_limit),
|
||||
smoothing_(smoothing),
|
||||
matching_filter_threshold_(matching_filter_threshold) {
|
||||
RTC_DCHECK(data_dumper);
|
||||
RTC_DCHECK_LT(0, window_size_sub_blocks);
|
||||
RTC_DCHECK((kBlockSize % sub_block_size) == 0);
|
||||
RTC_DCHECK((sub_block_size % 4) == 0);
|
||||
}
|
||||
|
||||
MatchedFilter::~MatchedFilter() = default;
|
||||
|
||||
void MatchedFilter::Reset() {
|
||||
for (auto& f : filters_) {
|
||||
std::fill(f.begin(), f.end(), 0.f);
|
||||
}
|
||||
|
||||
for (auto& l : lag_estimates_) {
|
||||
l = MatchedFilter::LagEstimate();
|
||||
}
|
||||
}
|
||||
|
||||
void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const float> capture) {
|
||||
RTC_DCHECK_EQ(sub_block_size_, capture.size());
|
||||
auto& y = capture;
|
||||
|
||||
const float x2_sum_threshold =
|
||||
filters_[0].size() * excitation_limit_ * excitation_limit_;
|
||||
|
||||
// Apply all matched filters.
|
||||
size_t alignment_shift = 0;
|
||||
for (size_t n = 0; n < filters_.size(); ++n) {
|
||||
float error_sum = 0.f;
|
||||
bool filters_updated = false;
|
||||
|
||||
size_t x_start_index =
|
||||
(render_buffer.read + alignment_shift + sub_block_size_ - 1) %
|
||||
render_buffer.buffer.size();
|
||||
|
||||
switch (optimization_) {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
case Aec3Optimization::kSse2:
|
||||
aec3::MatchedFilterCore_SSE2(x_start_index, x2_sum_threshold,
|
||||
smoothing_, render_buffer.buffer, y,
|
||||
filters_[n], &filters_updated, &error_sum);
|
||||
break;
|
||||
case Aec3Optimization::kAvx2:
|
||||
aec3::MatchedFilterCore_AVX2(x_start_index, x2_sum_threshold,
|
||||
smoothing_, render_buffer.buffer, y,
|
||||
filters_[n], &filters_updated, &error_sum);
|
||||
break;
|
||||
#endif
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
case Aec3Optimization::kNeon:
|
||||
aec3::MatchedFilterCore_NEON(x_start_index, x2_sum_threshold,
|
||||
smoothing_, render_buffer.buffer, y,
|
||||
filters_[n], &filters_updated, &error_sum);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
aec3::MatchedFilterCore(x_start_index, x2_sum_threshold, smoothing_,
|
||||
render_buffer.buffer, y, filters_[n],
|
||||
&filters_updated, &error_sum);
|
||||
}
|
||||
|
||||
// Compute anchor for the matched filter error.
|
||||
const float error_sum_anchor =
|
||||
std::inner_product(y.begin(), y.end(), y.begin(), 0.f);
|
||||
|
||||
// Estimate the lag in the matched filter as the distance to the portion in
|
||||
// the filter that contributes the most to the matched filter output. This
|
||||
// is detected as the peak of the matched filter.
|
||||
const size_t lag_estimate = std::distance(
|
||||
filters_[n].begin(),
|
||||
std::max_element(
|
||||
filters_[n].begin(), filters_[n].end(),
|
||||
[](float a, float b) -> bool { return a * a < b * b; }));
|
||||
|
||||
// Update the lag estimates for the matched filter.
|
||||
lag_estimates_[n] = LagEstimate(
|
||||
error_sum_anchor - error_sum,
|
||||
(lag_estimate > 2 && lag_estimate < (filters_[n].size() - 10) &&
|
||||
error_sum < matching_filter_threshold_ * error_sum_anchor),
|
||||
lag_estimate + alignment_shift, filters_updated);
|
||||
|
||||
RTC_DCHECK_GE(10, filters_.size());
|
||||
switch (n) {
|
||||
case 0:
|
||||
data_dumper_->DumpRaw("aec3_correlator_0_h", filters_[0]);
|
||||
break;
|
||||
case 1:
|
||||
data_dumper_->DumpRaw("aec3_correlator_1_h", filters_[1]);
|
||||
break;
|
||||
case 2:
|
||||
data_dumper_->DumpRaw("aec3_correlator_2_h", filters_[2]);
|
||||
break;
|
||||
case 3:
|
||||
data_dumper_->DumpRaw("aec3_correlator_3_h", filters_[3]);
|
||||
break;
|
||||
case 4:
|
||||
data_dumper_->DumpRaw("aec3_correlator_4_h", filters_[4]);
|
||||
break;
|
||||
case 5:
|
||||
data_dumper_->DumpRaw("aec3_correlator_5_h", filters_[5]);
|
||||
break;
|
||||
case 6:
|
||||
data_dumper_->DumpRaw("aec3_correlator_6_h", filters_[6]);
|
||||
break;
|
||||
case 7:
|
||||
data_dumper_->DumpRaw("aec3_correlator_7_h", filters_[7]);
|
||||
break;
|
||||
case 8:
|
||||
data_dumper_->DumpRaw("aec3_correlator_8_h", filters_[8]);
|
||||
break;
|
||||
case 9:
|
||||
data_dumper_->DumpRaw("aec3_correlator_9_h", filters_[9]);
|
||||
break;
|
||||
default:
|
||||
RTC_NOTREACHED();
|
||||
}
|
||||
|
||||
alignment_shift += filter_intra_lag_shift_;
|
||||
}
|
||||
}
|
||||
|
||||
void MatchedFilter::LogFilterProperties(int sample_rate_hz,
|
||||
size_t shift,
|
||||
size_t downsampling_factor) const {
|
||||
size_t alignment_shift = 0;
|
||||
constexpr int kFsBy1000 = 16;
|
||||
for (size_t k = 0; k < filters_.size(); ++k) {
|
||||
int start = static_cast<int>(alignment_shift * downsampling_factor);
|
||||
int end = static_cast<int>((alignment_shift + filters_[k].size()) *
|
||||
downsampling_factor);
|
||||
RTC_LOG(LS_VERBOSE) << "Filter " << k << ": start: "
|
||||
<< (start - static_cast<int>(shift)) / kFsBy1000
|
||||
<< " ms, end: "
|
||||
<< (end - static_cast<int>(shift)) / kFsBy1000
|
||||
<< " ms.";
|
||||
alignment_shift += filter_intra_lag_shift_;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
149
webrtc/modules/audio_processing/aec3/matched_filter.h
Normal file
149
webrtc/modules/audio_processing/aec3/matched_filter.h
Normal file
@ -0,0 +1,149 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
struct DownsampledRenderBuffer;
|
||||
|
||||
namespace aec3 {
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
|
||||
// Filter core for the matched filter that is optimized for NEON.
|
||||
void MatchedFilterCore_NEON(size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum);
|
||||
|
||||
#endif
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
|
||||
// Filter core for the matched filter that is optimized for SSE2.
|
||||
void MatchedFilterCore_SSE2(size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum);
|
||||
|
||||
// Filter core for the matched filter that is optimized for AVX2.
|
||||
void MatchedFilterCore_AVX2(size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum);
|
||||
|
||||
#endif
|
||||
|
||||
// Filter core for the matched filter.
|
||||
void MatchedFilterCore(size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum);
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
// Produces recursively updated cross-correlation estimates for several signal
|
||||
// shifts where the intra-shift spacing is uniform.
|
||||
class MatchedFilter {
|
||||
public:
|
||||
// Stores properties for the lag estimate corresponding to a particular signal
|
||||
// shift.
|
||||
struct LagEstimate {
|
||||
LagEstimate() = default;
|
||||
LagEstimate(float accuracy, bool reliable, size_t lag, bool updated)
|
||||
: accuracy(accuracy), reliable(reliable), lag(lag), updated(updated) {}
|
||||
|
||||
float accuracy = 0.f;
|
||||
bool reliable = false;
|
||||
size_t lag = 0;
|
||||
bool updated = false;
|
||||
};
|
||||
|
||||
MatchedFilter(ApmDataDumper* data_dumper,
|
||||
Aec3Optimization optimization,
|
||||
size_t sub_block_size,
|
||||
size_t window_size_sub_blocks,
|
||||
int num_matched_filters,
|
||||
size_t alignment_shift_sub_blocks,
|
||||
float excitation_limit,
|
||||
float smoothing,
|
||||
float matching_filter_threshold);
|
||||
|
||||
MatchedFilter() = delete;
|
||||
MatchedFilter(const MatchedFilter&) = delete;
|
||||
MatchedFilter& operator=(const MatchedFilter&) = delete;
|
||||
|
||||
~MatchedFilter();
|
||||
|
||||
// Updates the correlation with the values in the capture buffer.
|
||||
void Update(const DownsampledRenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const float> capture);
|
||||
|
||||
// Resets the matched filter.
|
||||
void Reset();
|
||||
|
||||
// Returns the current lag estimates.
|
||||
rtc::ArrayView<const MatchedFilter::LagEstimate> GetLagEstimates() const {
|
||||
return lag_estimates_;
|
||||
}
|
||||
|
||||
// Returns the maximum filter lag.
|
||||
size_t GetMaxFilterLag() const {
|
||||
return filters_.size() * filter_intra_lag_shift_ + filters_[0].size();
|
||||
}
|
||||
|
||||
// Log matched filter properties.
|
||||
void LogFilterProperties(int sample_rate_hz,
|
||||
size_t shift,
|
||||
size_t downsampling_factor) const;
|
||||
|
||||
private:
|
||||
ApmDataDumper* const data_dumper_;
|
||||
const Aec3Optimization optimization_;
|
||||
const size_t sub_block_size_;
|
||||
const size_t filter_intra_lag_shift_;
|
||||
std::vector<std::vector<float>> filters_;
|
||||
std::vector<LagEstimate> lag_estimates_;
|
||||
std::vector<size_t> filters_offsets_;
|
||||
const float excitation_limit_;
|
||||
const float smoothing_;
|
||||
const float matching_filter_threshold_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_H_
|
132
webrtc/modules/audio_processing/aec3/matched_filter_avx2.cc
Normal file
132
webrtc/modules/audio_processing/aec3/matched_filter_avx2.cc
Normal file
@ -0,0 +1,132 @@
|
||||
/*
|
||||
* Copyright (c) 2020 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/matched_filter.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace aec3 {
|
||||
|
||||
void MatchedFilterCore_AVX2(size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum) {
|
||||
const int h_size = static_cast<int>(h.size());
|
||||
const int x_size = static_cast<int>(x.size());
|
||||
RTC_DCHECK_EQ(0, h_size % 8);
|
||||
|
||||
// Process for all samples in the sub-block.
|
||||
for (size_t i = 0; i < y.size(); ++i) {
|
||||
// Apply the matched filter as filter * x, and compute x * x.
|
||||
|
||||
RTC_DCHECK_GT(x_size, x_start_index);
|
||||
const float* x_p = &x[x_start_index];
|
||||
const float* h_p = &h[0];
|
||||
|
||||
// Initialize values for the accumulation.
|
||||
__m256 s_256 = _mm256_set1_ps(0);
|
||||
__m256 x2_sum_256 = _mm256_set1_ps(0);
|
||||
float x2_sum = 0.f;
|
||||
float s = 0;
|
||||
|
||||
// Compute loop chunk sizes until, and after, the wraparound of the circular
|
||||
// buffer for x.
|
||||
const int chunk1 =
|
||||
std::min(h_size, static_cast<int>(x_size - x_start_index));
|
||||
|
||||
// Perform the loop in two chunks.
|
||||
const int chunk2 = h_size - chunk1;
|
||||
for (int limit : {chunk1, chunk2}) {
|
||||
// Perform 256 bit vector operations.
|
||||
const int limit_by_8 = limit >> 3;
|
||||
for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8) {
|
||||
// Load the data into 256 bit vectors.
|
||||
__m256 x_k = _mm256_loadu_ps(x_p);
|
||||
__m256 h_k = _mm256_loadu_ps(h_p);
|
||||
// Compute and accumulate x * x and h * x.
|
||||
x2_sum_256 = _mm256_fmadd_ps(x_k, x_k, x2_sum_256);
|
||||
s_256 = _mm256_fmadd_ps(h_k, x_k, s_256);
|
||||
}
|
||||
|
||||
// Perform non-vector operations for any remaining items.
|
||||
for (int k = limit - limit_by_8 * 8; k > 0; --k, ++h_p, ++x_p) {
|
||||
const float x_k = *x_p;
|
||||
x2_sum += x_k * x_k;
|
||||
s += *h_p * x_k;
|
||||
}
|
||||
|
||||
x_p = &x[0];
|
||||
}
|
||||
|
||||
// Sum components together.
|
||||
__m128 x2_sum_128 = _mm_add_ps(_mm256_extractf128_ps(x2_sum_256, 0),
|
||||
_mm256_extractf128_ps(x2_sum_256, 1));
|
||||
__m128 s_128 = _mm_add_ps(_mm256_extractf128_ps(s_256, 0),
|
||||
_mm256_extractf128_ps(s_256, 1));
|
||||
// Combine the accumulated vector and scalar values.
|
||||
float* v = reinterpret_cast<float*>(&x2_sum_128);
|
||||
x2_sum += v[0] + v[1] + v[2] + v[3];
|
||||
v = reinterpret_cast<float*>(&s_128);
|
||||
s += v[0] + v[1] + v[2] + v[3];
|
||||
|
||||
// Compute the matched filter error.
|
||||
float e = y[i] - s;
|
||||
const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f;
|
||||
(*error_sum) += e * e;
|
||||
|
||||
// Update the matched filter estimate in an NLMS manner.
|
||||
if (x2_sum > x2_sum_threshold && !saturation) {
|
||||
RTC_DCHECK_LT(0.f, x2_sum);
|
||||
const float alpha = smoothing * e / x2_sum;
|
||||
const __m256 alpha_256 = _mm256_set1_ps(alpha);
|
||||
|
||||
// filter = filter + smoothing * (y - filter * x) * x / x * x.
|
||||
float* h_p = &h[0];
|
||||
x_p = &x[x_start_index];
|
||||
|
||||
// Perform the loop in two chunks.
|
||||
for (int limit : {chunk1, chunk2}) {
|
||||
// Perform 256 bit vector operations.
|
||||
const int limit_by_8 = limit >> 3;
|
||||
for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8) {
|
||||
// Load the data into 256 bit vectors.
|
||||
__m256 h_k = _mm256_loadu_ps(h_p);
|
||||
__m256 x_k = _mm256_loadu_ps(x_p);
|
||||
// Compute h = h + alpha * x.
|
||||
h_k = _mm256_fmadd_ps(x_k, alpha_256, h_k);
|
||||
|
||||
// Store the result.
|
||||
_mm256_storeu_ps(h_p, h_k);
|
||||
}
|
||||
|
||||
// Perform non-vector operations for any remaining items.
|
||||
for (int k = limit - limit_by_8 * 8; k > 0; --k, ++h_p, ++x_p) {
|
||||
*h_p += alpha * *x_p;
|
||||
}
|
||||
|
||||
x_p = &x[0];
|
||||
}
|
||||
|
||||
*filters_updated = true;
|
||||
}
|
||||
|
||||
x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace aec3
|
||||
} // namespace webrtc
|
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <iterator>
|
||||
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
MatchedFilterLagAggregator::MatchedFilterLagAggregator(
|
||||
ApmDataDumper* data_dumper,
|
||||
size_t max_filter_lag,
|
||||
const EchoCanceller3Config::Delay::DelaySelectionThresholds& thresholds)
|
||||
: data_dumper_(data_dumper),
|
||||
histogram_(max_filter_lag + 1, 0),
|
||||
thresholds_(thresholds) {
|
||||
RTC_DCHECK(data_dumper);
|
||||
RTC_DCHECK_LE(thresholds_.initial, thresholds_.converged);
|
||||
histogram_data_.fill(0);
|
||||
}
|
||||
|
||||
MatchedFilterLagAggregator::~MatchedFilterLagAggregator() = default;
|
||||
|
||||
void MatchedFilterLagAggregator::Reset(bool hard_reset) {
|
||||
std::fill(histogram_.begin(), histogram_.end(), 0);
|
||||
histogram_data_.fill(0);
|
||||
histogram_data_index_ = 0;
|
||||
if (hard_reset) {
|
||||
significant_candidate_found_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
absl::optional<DelayEstimate> MatchedFilterLagAggregator::Aggregate(
|
||||
rtc::ArrayView<const MatchedFilter::LagEstimate> lag_estimates) {
|
||||
// Choose the strongest lag estimate as the best one.
|
||||
float best_accuracy = 0.f;
|
||||
int best_lag_estimate_index = -1;
|
||||
for (size_t k = 0; k < lag_estimates.size(); ++k) {
|
||||
if (lag_estimates[k].updated && lag_estimates[k].reliable) {
|
||||
if (lag_estimates[k].accuracy > best_accuracy) {
|
||||
best_accuracy = lag_estimates[k].accuracy;
|
||||
best_lag_estimate_index = static_cast<int>(k);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO(peah): Remove this logging once all development is done.
|
||||
data_dumper_->DumpRaw("aec3_echo_path_delay_estimator_best_index",
|
||||
best_lag_estimate_index);
|
||||
data_dumper_->DumpRaw("aec3_echo_path_delay_estimator_histogram", histogram_);
|
||||
|
||||
if (best_lag_estimate_index != -1) {
|
||||
RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]);
|
||||
RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]);
|
||||
--histogram_[histogram_data_[histogram_data_index_]];
|
||||
|
||||
histogram_data_[histogram_data_index_] =
|
||||
lag_estimates[best_lag_estimate_index].lag;
|
||||
|
||||
RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]);
|
||||
RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]);
|
||||
++histogram_[histogram_data_[histogram_data_index_]];
|
||||
|
||||
histogram_data_index_ =
|
||||
(histogram_data_index_ + 1) % histogram_data_.size();
|
||||
|
||||
const int candidate =
|
||||
std::distance(histogram_.begin(),
|
||||
std::max_element(histogram_.begin(), histogram_.end()));
|
||||
|
||||
significant_candidate_found_ =
|
||||
significant_candidate_found_ ||
|
||||
histogram_[candidate] > thresholds_.converged;
|
||||
if (histogram_[candidate] > thresholds_.converged ||
|
||||
(histogram_[candidate] > thresholds_.initial &&
|
||||
!significant_candidate_found_)) {
|
||||
DelayEstimate::Quality quality = significant_candidate_found_
|
||||
? DelayEstimate::Quality::kRefined
|
||||
: DelayEstimate::Quality::kCoarse;
|
||||
return DelayEstimate(quality, candidate);
|
||||
}
|
||||
}
|
||||
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/matched_filter.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
|
||||
// Aggregates lag estimates produced by the MatchedFilter class into a single
|
||||
// reliable combined lag estimate.
|
||||
class MatchedFilterLagAggregator {
|
||||
public:
|
||||
MatchedFilterLagAggregator(
|
||||
ApmDataDumper* data_dumper,
|
||||
size_t max_filter_lag,
|
||||
const EchoCanceller3Config::Delay::DelaySelectionThresholds& thresholds);
|
||||
|
||||
MatchedFilterLagAggregator() = delete;
|
||||
MatchedFilterLagAggregator(const MatchedFilterLagAggregator&) = delete;
|
||||
MatchedFilterLagAggregator& operator=(const MatchedFilterLagAggregator&) =
|
||||
delete;
|
||||
|
||||
~MatchedFilterLagAggregator();
|
||||
|
||||
// Resets the aggregator.
|
||||
void Reset(bool hard_reset);
|
||||
|
||||
// Aggregates the provided lag estimates.
|
||||
absl::optional<DelayEstimate> Aggregate(
|
||||
rtc::ArrayView<const MatchedFilter::LagEstimate> lag_estimates);
|
||||
|
||||
private:
|
||||
ApmDataDumper* const data_dumper_;
|
||||
std::vector<int> histogram_;
|
||||
std::array<int, 250> histogram_data_;
|
||||
int histogram_data_index_ = 0;
|
||||
bool significant_candidate_found_ = false;
|
||||
const EchoCanceller3Config::Delay::DelaySelectionThresholds thresholds_;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_MATCHED_FILTER_LAG_AGGREGATOR_H_
|
60
webrtc/modules/audio_processing/aec3/moving_average.cc
Normal file
60
webrtc/modules/audio_processing/aec3/moving_average.cc
Normal file
@ -0,0 +1,60 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/moving_average.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace aec3 {
|
||||
|
||||
MovingAverage::MovingAverage(size_t num_elem, size_t mem_len)
|
||||
: num_elem_(num_elem),
|
||||
mem_len_(mem_len - 1),
|
||||
scaling_(1.0f / static_cast<float>(mem_len)),
|
||||
memory_(num_elem * mem_len_, 0.f),
|
||||
mem_index_(0) {
|
||||
RTC_DCHECK(num_elem_ > 0);
|
||||
RTC_DCHECK(mem_len > 0);
|
||||
}
|
||||
|
||||
MovingAverage::~MovingAverage() = default;
|
||||
|
||||
void MovingAverage::Average(rtc::ArrayView<const float> input,
|
||||
rtc::ArrayView<float> output) {
|
||||
RTC_DCHECK(input.size() == num_elem_);
|
||||
RTC_DCHECK(output.size() == num_elem_);
|
||||
|
||||
// Sum all contributions.
|
||||
std::copy(input.begin(), input.end(), output.begin());
|
||||
for (auto i = memory_.begin(); i < memory_.end(); i += num_elem_) {
|
||||
std::transform(i, i + num_elem_, output.begin(), output.begin(),
|
||||
std::plus<float>());
|
||||
}
|
||||
|
||||
// Divide by mem_len_.
|
||||
for (float& o : output) {
|
||||
o *= scaling_;
|
||||
}
|
||||
|
||||
// Update memory.
|
||||
if (mem_len_ > 0) {
|
||||
std::copy(input.begin(), input.end(),
|
||||
memory_.begin() + mem_index_ * num_elem_);
|
||||
mem_index_ = (mem_index_ + 1) % mem_len_;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace aec3
|
||||
} // namespace webrtc
|
45
webrtc/modules/audio_processing/aec3/moving_average.h
Normal file
45
webrtc/modules/audio_processing/aec3/moving_average.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_MOVING_AVERAGE_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_MOVING_AVERAGE_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace aec3 {
|
||||
|
||||
class MovingAverage {
|
||||
public:
|
||||
// Creates an instance of MovingAverage that accepts inputs of length num_elem
|
||||
// and averages over mem_len inputs.
|
||||
MovingAverage(size_t num_elem, size_t mem_len);
|
||||
~MovingAverage();
|
||||
|
||||
// Computes the average of input and mem_len-1 previous inputs and stores the
|
||||
// result in output.
|
||||
void Average(rtc::ArrayView<const float> input, rtc::ArrayView<float> output);
|
||||
|
||||
private:
|
||||
const size_t num_elem_;
|
||||
const size_t mem_len_;
|
||||
const float scaling_;
|
||||
std::vector<float> memory_;
|
||||
size_t mem_index_;
|
||||
};
|
||||
|
||||
} // namespace aec3
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_MOVING_AVERAGE_H_
|
42
webrtc/modules/audio_processing/aec3/nearend_detector.h
Normal file
42
webrtc/modules/audio_processing/aec3/nearend_detector.h
Normal file
@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_NEAREND_DETECTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_NEAREND_DETECTOR_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
// Class for selecting whether the suppressor is in the nearend or echo state.
|
||||
class NearendDetector {
|
||||
public:
|
||||
virtual ~NearendDetector() {}
|
||||
|
||||
// Returns whether the current state is the nearend state.
|
||||
virtual bool IsNearendState() const = 0;
|
||||
|
||||
// Updates the state selection based on latest spectral estimates.
|
||||
virtual void Update(
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
nearend_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
residual_echo_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
comfort_noise_spectrum,
|
||||
bool initial_state) = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_NEAREND_DETECTOR_H_
|
@ -0,0 +1,174 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/refined_filter_update_gain.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
#include "modules/audio_processing/aec3/adaptive_fir_filter.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "modules/audio_processing/aec3/render_signal_analyzer.h"
|
||||
#include "modules/audio_processing/aec3/subtractor_output.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
constexpr float kHErrorInitial = 10000.f;
|
||||
constexpr int kPoorExcitationCounterInitial = 1000;
|
||||
|
||||
} // namespace
|
||||
|
||||
int RefinedFilterUpdateGain::instance_count_ = 0;
|
||||
|
||||
RefinedFilterUpdateGain::RefinedFilterUpdateGain(
|
||||
const EchoCanceller3Config::Filter::RefinedConfiguration& config,
|
||||
size_t config_change_duration_blocks)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
config_change_duration_blocks_(
|
||||
static_cast<int>(config_change_duration_blocks)),
|
||||
poor_excitation_counter_(kPoorExcitationCounterInitial) {
|
||||
SetConfig(config, true);
|
||||
H_error_.fill(kHErrorInitial);
|
||||
RTC_DCHECK_LT(0, config_change_duration_blocks_);
|
||||
one_by_config_change_duration_blocks_ = 1.f / config_change_duration_blocks_;
|
||||
}
|
||||
|
||||
RefinedFilterUpdateGain::~RefinedFilterUpdateGain() {}
|
||||
|
||||
void RefinedFilterUpdateGain::HandleEchoPathChange(
|
||||
const EchoPathVariability& echo_path_variability) {
|
||||
if (echo_path_variability.gain_change) {
|
||||
// TODO(bugs.webrtc.org/9526) Handle gain changes.
|
||||
}
|
||||
|
||||
if (echo_path_variability.delay_change !=
|
||||
EchoPathVariability::DelayAdjustment::kNone) {
|
||||
H_error_.fill(kHErrorInitial);
|
||||
}
|
||||
|
||||
if (!echo_path_variability.gain_change) {
|
||||
poor_excitation_counter_ = kPoorExcitationCounterInitial;
|
||||
call_counter_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void RefinedFilterUpdateGain::Compute(
|
||||
const std::array<float, kFftLengthBy2Plus1>& render_power,
|
||||
const RenderSignalAnalyzer& render_signal_analyzer,
|
||||
const SubtractorOutput& subtractor_output,
|
||||
rtc::ArrayView<const float> erl,
|
||||
size_t size_partitions,
|
||||
bool saturated_capture_signal,
|
||||
FftData* gain_fft) {
|
||||
RTC_DCHECK(gain_fft);
|
||||
// Introducing shorter notation to improve readability.
|
||||
const FftData& E_refined = subtractor_output.E_refined;
|
||||
const auto& E2_refined = subtractor_output.E2_refined;
|
||||
const auto& E2_coarse = subtractor_output.E2_coarse;
|
||||
FftData* G = gain_fft;
|
||||
const auto& X2 = render_power;
|
||||
|
||||
++call_counter_;
|
||||
|
||||
UpdateCurrentConfig();
|
||||
|
||||
if (render_signal_analyzer.PoorSignalExcitation()) {
|
||||
poor_excitation_counter_ = 0;
|
||||
}
|
||||
|
||||
// Do not update the filter if the render is not sufficiently excited.
|
||||
if (++poor_excitation_counter_ < size_partitions ||
|
||||
saturated_capture_signal || call_counter_ <= size_partitions) {
|
||||
G->re.fill(0.f);
|
||||
G->im.fill(0.f);
|
||||
} else {
|
||||
// Corresponds to WGN of power -39 dBFS.
|
||||
std::array<float, kFftLengthBy2Plus1> mu;
|
||||
// mu = H_error / (0.5* H_error* X2 + n * E2).
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
if (X2[k] >= current_config_.noise_gate) {
|
||||
mu[k] = H_error_[k] /
|
||||
(0.5f * H_error_[k] * X2[k] + size_partitions * E2_refined[k]);
|
||||
} else {
|
||||
mu[k] = 0.f;
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid updating the filter close to narrow bands in the render signals.
|
||||
render_signal_analyzer.MaskRegionsAroundNarrowBands(&mu);
|
||||
|
||||
// H_error = H_error - 0.5 * mu * X2 * H_error.
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
H_error_[k] -= 0.5f * mu[k] * X2[k] * H_error_[k];
|
||||
}
|
||||
|
||||
// G = mu * E.
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
G->re[k] = mu[k] * E_refined.re[k];
|
||||
G->im[k] = mu[k] * E_refined.im[k];
|
||||
}
|
||||
}
|
||||
|
||||
// H_error = H_error + factor * erl.
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
if (E2_coarse[k] >= E2_refined[k]) {
|
||||
H_error_[k] += current_config_.leakage_converged * erl[k];
|
||||
} else {
|
||||
H_error_[k] += current_config_.leakage_diverged * erl[k];
|
||||
}
|
||||
|
||||
H_error_[k] = std::max(H_error_[k], current_config_.error_floor);
|
||||
H_error_[k] = std::min(H_error_[k], current_config_.error_ceil);
|
||||
}
|
||||
|
||||
data_dumper_->DumpRaw("aec3_refined_gain_H_error", H_error_);
|
||||
}
|
||||
|
||||
void RefinedFilterUpdateGain::UpdateCurrentConfig() {
|
||||
RTC_DCHECK_GE(config_change_duration_blocks_, config_change_counter_);
|
||||
if (config_change_counter_ > 0) {
|
||||
if (--config_change_counter_ > 0) {
|
||||
auto average = [](float from, float to, float from_weight) {
|
||||
return from * from_weight + to * (1.f - from_weight);
|
||||
};
|
||||
|
||||
float change_factor =
|
||||
config_change_counter_ * one_by_config_change_duration_blocks_;
|
||||
|
||||
current_config_.leakage_converged =
|
||||
average(old_target_config_.leakage_converged,
|
||||
target_config_.leakage_converged, change_factor);
|
||||
current_config_.leakage_diverged =
|
||||
average(old_target_config_.leakage_diverged,
|
||||
target_config_.leakage_diverged, change_factor);
|
||||
current_config_.error_floor =
|
||||
average(old_target_config_.error_floor, target_config_.error_floor,
|
||||
change_factor);
|
||||
current_config_.error_ceil =
|
||||
average(old_target_config_.error_ceil, target_config_.error_ceil,
|
||||
change_factor);
|
||||
current_config_.noise_gate =
|
||||
average(old_target_config_.noise_gate, target_config_.noise_gate,
|
||||
change_factor);
|
||||
} else {
|
||||
current_config_ = old_target_config_ = target_config_;
|
||||
}
|
||||
}
|
||||
RTC_DCHECK_LE(0, config_change_counter_);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,89 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_REFINED_FILTER_UPDATE_GAIN_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_REFINED_FILTER_UPDATE_GAIN_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AdaptiveFirFilter;
|
||||
class ApmDataDumper;
|
||||
struct EchoPathVariability;
|
||||
struct FftData;
|
||||
class RenderSignalAnalyzer;
|
||||
struct SubtractorOutput;
|
||||
|
||||
// Provides functionality for computing the adaptive gain for the refined
|
||||
// filter.
|
||||
class RefinedFilterUpdateGain {
|
||||
public:
|
||||
RefinedFilterUpdateGain(
|
||||
const EchoCanceller3Config::Filter::RefinedConfiguration& config,
|
||||
size_t config_change_duration_blocks);
|
||||
~RefinedFilterUpdateGain();
|
||||
|
||||
RefinedFilterUpdateGain(const RefinedFilterUpdateGain&) = delete;
|
||||
RefinedFilterUpdateGain& operator=(const RefinedFilterUpdateGain&) = delete;
|
||||
|
||||
// Takes action in the case of a known echo path change.
|
||||
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
|
||||
|
||||
// Computes the gain.
|
||||
void Compute(const std::array<float, kFftLengthBy2Plus1>& render_power,
|
||||
const RenderSignalAnalyzer& render_signal_analyzer,
|
||||
const SubtractorOutput& subtractor_output,
|
||||
rtc::ArrayView<const float> erl,
|
||||
size_t size_partitions,
|
||||
bool saturated_capture_signal,
|
||||
FftData* gain_fft);
|
||||
|
||||
// Sets a new config.
|
||||
void SetConfig(
|
||||
const EchoCanceller3Config::Filter::RefinedConfiguration& config,
|
||||
bool immediate_effect) {
|
||||
if (immediate_effect) {
|
||||
old_target_config_ = current_config_ = target_config_ = config;
|
||||
config_change_counter_ = 0;
|
||||
} else {
|
||||
old_target_config_ = current_config_;
|
||||
target_config_ = config;
|
||||
config_change_counter_ = config_change_duration_blocks_;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
static int instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const int config_change_duration_blocks_;
|
||||
float one_by_config_change_duration_blocks_;
|
||||
EchoCanceller3Config::Filter::RefinedConfiguration current_config_;
|
||||
EchoCanceller3Config::Filter::RefinedConfiguration target_config_;
|
||||
EchoCanceller3Config::Filter::RefinedConfiguration old_target_config_;
|
||||
std::array<float, kFftLengthBy2Plus1> H_error_;
|
||||
size_t poor_excitation_counter_;
|
||||
size_t call_counter_ = 0;
|
||||
int config_change_counter_ = 0;
|
||||
|
||||
// Updates the current config towards the target config.
|
||||
void UpdateCurrentConfig();
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_REFINED_FILTER_UPDATE_GAIN_H_
|
80
webrtc/modules/audio_processing/aec3/render_buffer.cc
Normal file
80
webrtc/modules/audio_processing/aec3/render_buffer.cc
Normal file
@ -0,0 +1,80 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
RenderBuffer::RenderBuffer(BlockBuffer* block_buffer,
|
||||
SpectrumBuffer* spectrum_buffer,
|
||||
FftBuffer* fft_buffer)
|
||||
: block_buffer_(block_buffer),
|
||||
spectrum_buffer_(spectrum_buffer),
|
||||
fft_buffer_(fft_buffer) {
|
||||
RTC_DCHECK(block_buffer_);
|
||||
RTC_DCHECK(spectrum_buffer_);
|
||||
RTC_DCHECK(fft_buffer_);
|
||||
RTC_DCHECK_EQ(block_buffer_->buffer.size(), fft_buffer_->buffer.size());
|
||||
RTC_DCHECK_EQ(spectrum_buffer_->buffer.size(), fft_buffer_->buffer.size());
|
||||
RTC_DCHECK_EQ(spectrum_buffer_->read, fft_buffer_->read);
|
||||
RTC_DCHECK_EQ(spectrum_buffer_->write, fft_buffer_->write);
|
||||
}
|
||||
|
||||
RenderBuffer::~RenderBuffer() = default;
|
||||
|
||||
void RenderBuffer::SpectralSum(
|
||||
size_t num_spectra,
|
||||
std::array<float, kFftLengthBy2Plus1>* X2) const {
|
||||
X2->fill(0.f);
|
||||
int position = spectrum_buffer_->read;
|
||||
for (size_t j = 0; j < num_spectra; ++j) {
|
||||
for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) {
|
||||
std::transform(X2->begin(), X2->end(), channel_spectrum.begin(),
|
||||
X2->begin(), std::plus<float>());
|
||||
}
|
||||
position = spectrum_buffer_->IncIndex(position);
|
||||
}
|
||||
}
|
||||
|
||||
void RenderBuffer::SpectralSums(
|
||||
size_t num_spectra_shorter,
|
||||
size_t num_spectra_longer,
|
||||
std::array<float, kFftLengthBy2Plus1>* X2_shorter,
|
||||
std::array<float, kFftLengthBy2Plus1>* X2_longer) const {
|
||||
RTC_DCHECK_LE(num_spectra_shorter, num_spectra_longer);
|
||||
X2_shorter->fill(0.f);
|
||||
int position = spectrum_buffer_->read;
|
||||
size_t j = 0;
|
||||
for (; j < num_spectra_shorter; ++j) {
|
||||
for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) {
|
||||
std::transform(X2_shorter->begin(), X2_shorter->end(),
|
||||
channel_spectrum.begin(), X2_shorter->begin(),
|
||||
std::plus<float>());
|
||||
}
|
||||
position = spectrum_buffer_->IncIndex(position);
|
||||
}
|
||||
std::copy(X2_shorter->begin(), X2_shorter->end(), X2_longer->begin());
|
||||
for (; j < num_spectra_longer; ++j) {
|
||||
for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) {
|
||||
std::transform(X2_longer->begin(), X2_longer->end(),
|
||||
channel_spectrum.begin(), X2_longer->begin(),
|
||||
std::plus<float>());
|
||||
}
|
||||
position = spectrum_buffer_->IncIndex(position);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
116
webrtc/modules/audio_processing/aec3/render_buffer.h
Normal file
116
webrtc/modules/audio_processing/aec3/render_buffer.h
Normal file
@ -0,0 +1,116 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/block_buffer.h"
|
||||
#include "modules/audio_processing/aec3/fft_buffer.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Provides a buffer of the render data for the echo remover.
|
||||
class RenderBuffer {
|
||||
public:
|
||||
RenderBuffer(BlockBuffer* block_buffer,
|
||||
SpectrumBuffer* spectrum_buffer,
|
||||
FftBuffer* fft_buffer);
|
||||
|
||||
RenderBuffer() = delete;
|
||||
RenderBuffer(const RenderBuffer&) = delete;
|
||||
RenderBuffer& operator=(const RenderBuffer&) = delete;
|
||||
|
||||
~RenderBuffer();
|
||||
|
||||
// Get a block.
|
||||
const std::vector<std::vector<std::vector<float>>>& Block(
|
||||
int buffer_offset_blocks) const {
|
||||
int position =
|
||||
block_buffer_->OffsetIndex(block_buffer_->read, buffer_offset_blocks);
|
||||
return block_buffer_->buffer[position];
|
||||
}
|
||||
|
||||
// Get the spectrum from one of the FFTs in the buffer.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Spectrum(
|
||||
int buffer_offset_ffts) const {
|
||||
int position = spectrum_buffer_->OffsetIndex(spectrum_buffer_->read,
|
||||
buffer_offset_ffts);
|
||||
return spectrum_buffer_->buffer[position];
|
||||
}
|
||||
|
||||
// Returns the circular fft buffer.
|
||||
rtc::ArrayView<const std::vector<FftData>> GetFftBuffer() const {
|
||||
return fft_buffer_->buffer;
|
||||
}
|
||||
|
||||
// Returns the current position in the circular buffer.
|
||||
size_t Position() const {
|
||||
RTC_DCHECK_EQ(spectrum_buffer_->read, fft_buffer_->read);
|
||||
RTC_DCHECK_EQ(spectrum_buffer_->write, fft_buffer_->write);
|
||||
return fft_buffer_->read;
|
||||
}
|
||||
|
||||
// Returns the sum of the spectrums for a certain number of FFTs.
|
||||
void SpectralSum(size_t num_spectra,
|
||||
std::array<float, kFftLengthBy2Plus1>* X2) const;
|
||||
|
||||
// Returns the sums of the spectrums for two numbers of FFTs.
|
||||
void SpectralSums(size_t num_spectra_shorter,
|
||||
size_t num_spectra_longer,
|
||||
std::array<float, kFftLengthBy2Plus1>* X2_shorter,
|
||||
std::array<float, kFftLengthBy2Plus1>* X2_longer) const;
|
||||
|
||||
// Gets the recent activity seen in the render signal.
|
||||
bool GetRenderActivity() const { return render_activity_; }
|
||||
|
||||
// Specifies the recent activity seen in the render signal.
|
||||
void SetRenderActivity(bool activity) { render_activity_ = activity; }
|
||||
|
||||
// Returns the headroom between the write and the read positions in the
|
||||
// buffer.
|
||||
int Headroom() const {
|
||||
// The write and read indices are decreased over time.
|
||||
int headroom =
|
||||
fft_buffer_->write < fft_buffer_->read
|
||||
? fft_buffer_->read - fft_buffer_->write
|
||||
: fft_buffer_->size - fft_buffer_->write + fft_buffer_->read;
|
||||
|
||||
RTC_DCHECK_LE(0, headroom);
|
||||
RTC_DCHECK_GE(fft_buffer_->size, headroom);
|
||||
|
||||
return headroom;
|
||||
}
|
||||
|
||||
// Returns a reference to the spectrum buffer.
|
||||
const SpectrumBuffer& GetSpectrumBuffer() const { return *spectrum_buffer_; }
|
||||
|
||||
// Returns a reference to the block buffer.
|
||||
const BlockBuffer& GetBlockBuffer() const { return *block_buffer_; }
|
||||
|
||||
private:
|
||||
const BlockBuffer* const block_buffer_;
|
||||
const SpectrumBuffer* const spectrum_buffer_;
|
||||
const FftBuffer* const fft_buffer_;
|
||||
bool render_activity_ = false;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_BUFFER_H_
|
523
webrtc/modules/audio_processing/aec3/render_delay_buffer.cc
Normal file
523
webrtc/modules/audio_processing/aec3/render_delay_buffer.cc
Normal file
@ -0,0 +1,523 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/render_delay_buffer.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec3_fft.h"
|
||||
#include "modules/audio_processing/aec3/alignment_mixer.h"
|
||||
#include "modules/audio_processing/aec3/block_buffer.h"
|
||||
#include "modules/audio_processing/aec3/decimator.h"
|
||||
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/fft_buffer.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
bool UpdateCaptureCallCounterOnSkippedBlocks() {
|
||||
return !field_trial::IsEnabled(
|
||||
"WebRTC-Aec3RenderBufferCallCounterUpdateKillSwitch");
|
||||
}
|
||||
|
||||
class RenderDelayBufferImpl final : public RenderDelayBuffer {
|
||||
public:
|
||||
RenderDelayBufferImpl(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels);
|
||||
RenderDelayBufferImpl() = delete;
|
||||
~RenderDelayBufferImpl() override;
|
||||
|
||||
void Reset() override;
|
||||
BufferingEvent Insert(
|
||||
const std::vector<std::vector<std::vector<float>>>& block) override;
|
||||
BufferingEvent PrepareCaptureProcessing() override;
|
||||
void HandleSkippedCaptureProcessing() override;
|
||||
bool AlignFromDelay(size_t delay) override;
|
||||
void AlignFromExternalDelay() override;
|
||||
size_t Delay() const override { return ComputeDelay(); }
|
||||
size_t MaxDelay() const override {
|
||||
return blocks_.buffer.size() - 1 - buffer_headroom_;
|
||||
}
|
||||
RenderBuffer* GetRenderBuffer() override { return &echo_remover_buffer_; }
|
||||
|
||||
const DownsampledRenderBuffer& GetDownsampledRenderBuffer() const override {
|
||||
return low_rate_;
|
||||
}
|
||||
|
||||
int BufferLatency() const;
|
||||
void SetAudioBufferDelay(int delay_ms) override;
|
||||
bool HasReceivedBufferDelay() override;
|
||||
|
||||
private:
|
||||
static int instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const Aec3Optimization optimization_;
|
||||
const EchoCanceller3Config config_;
|
||||
const bool update_capture_call_counter_on_skipped_blocks_;
|
||||
const float render_linear_amplitude_gain_;
|
||||
const rtc::LoggingSeverity delay_log_level_;
|
||||
size_t down_sampling_factor_;
|
||||
const int sub_block_size_;
|
||||
BlockBuffer blocks_;
|
||||
SpectrumBuffer spectra_;
|
||||
FftBuffer ffts_;
|
||||
absl::optional<size_t> delay_;
|
||||
RenderBuffer echo_remover_buffer_;
|
||||
DownsampledRenderBuffer low_rate_;
|
||||
AlignmentMixer render_mixer_;
|
||||
Decimator render_decimator_;
|
||||
const Aec3Fft fft_;
|
||||
std::vector<float> render_ds_;
|
||||
const int buffer_headroom_;
|
||||
bool last_call_was_render_ = false;
|
||||
int num_api_calls_in_a_row_ = 0;
|
||||
int max_observed_jitter_ = 1;
|
||||
int64_t capture_call_counter_ = 0;
|
||||
int64_t render_call_counter_ = 0;
|
||||
bool render_activity_ = false;
|
||||
size_t render_activity_counter_ = 0;
|
||||
absl::optional<int> external_audio_buffer_delay_;
|
||||
bool external_audio_buffer_delay_verified_after_reset_ = false;
|
||||
size_t min_latency_blocks_ = 0;
|
||||
size_t excess_render_detection_counter_ = 0;
|
||||
|
||||
int MapDelayToTotalDelay(size_t delay) const;
|
||||
int ComputeDelay() const;
|
||||
void ApplyTotalDelay(int delay);
|
||||
void InsertBlock(const std::vector<std::vector<std::vector<float>>>& block,
|
||||
int previous_write);
|
||||
bool DetectActiveRender(rtc::ArrayView<const float> x) const;
|
||||
bool DetectExcessRenderBlocks();
|
||||
void IncrementWriteIndices();
|
||||
void IncrementLowRateReadIndices();
|
||||
void IncrementReadIndices();
|
||||
bool RenderOverrun();
|
||||
bool RenderUnderrun();
|
||||
};
|
||||
|
||||
int RenderDelayBufferImpl::instance_count_ = 0;
|
||||
|
||||
RenderDelayBufferImpl::RenderDelayBufferImpl(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
optimization_(DetectOptimization()),
|
||||
config_(config),
|
||||
update_capture_call_counter_on_skipped_blocks_(
|
||||
UpdateCaptureCallCounterOnSkippedBlocks()),
|
||||
render_linear_amplitude_gain_(
|
||||
std::pow(10.0f, config_.render_levels.render_power_gain_db / 20.f)),
|
||||
delay_log_level_(config_.delay.log_warning_on_delay_changes
|
||||
? rtc::LS_WARNING
|
||||
: rtc::LS_VERBOSE),
|
||||
down_sampling_factor_(config.delay.down_sampling_factor),
|
||||
sub_block_size_(static_cast<int>(down_sampling_factor_ > 0
|
||||
? kBlockSize / down_sampling_factor_
|
||||
: kBlockSize)),
|
||||
blocks_(GetRenderDelayBufferSize(down_sampling_factor_,
|
||||
config.delay.num_filters,
|
||||
config.filter.refined.length_blocks),
|
||||
NumBandsForRate(sample_rate_hz),
|
||||
num_render_channels,
|
||||
kBlockSize),
|
||||
spectra_(blocks_.buffer.size(), num_render_channels),
|
||||
ffts_(blocks_.buffer.size(), num_render_channels),
|
||||
delay_(config_.delay.default_delay),
|
||||
echo_remover_buffer_(&blocks_, &spectra_, &ffts_),
|
||||
low_rate_(GetDownSampledBufferSize(down_sampling_factor_,
|
||||
config.delay.num_filters)),
|
||||
render_mixer_(num_render_channels, config.delay.render_alignment_mixing),
|
||||
render_decimator_(down_sampling_factor_),
|
||||
fft_(),
|
||||
render_ds_(sub_block_size_, 0.f),
|
||||
buffer_headroom_(config.filter.refined.length_blocks) {
|
||||
RTC_DCHECK_EQ(blocks_.buffer.size(), ffts_.buffer.size());
|
||||
RTC_DCHECK_EQ(spectra_.buffer.size(), ffts_.buffer.size());
|
||||
for (size_t i = 0; i < blocks_.buffer.size(); ++i) {
|
||||
RTC_DCHECK_EQ(blocks_.buffer[i][0].size(), ffts_.buffer[i].size());
|
||||
RTC_DCHECK_EQ(spectra_.buffer[i].size(), ffts_.buffer[i].size());
|
||||
}
|
||||
|
||||
Reset();
|
||||
}
|
||||
|
||||
RenderDelayBufferImpl::~RenderDelayBufferImpl() = default;
|
||||
|
||||
// Resets the buffer delays and clears the reported delays.
|
||||
void RenderDelayBufferImpl::Reset() {
|
||||
last_call_was_render_ = false;
|
||||
num_api_calls_in_a_row_ = 1;
|
||||
min_latency_blocks_ = 0;
|
||||
excess_render_detection_counter_ = 0;
|
||||
|
||||
// Initialize the read index to one sub-block before the write index.
|
||||
low_rate_.read = low_rate_.OffsetIndex(low_rate_.write, sub_block_size_);
|
||||
|
||||
// Check for any external audio buffer delay and whether it is feasible.
|
||||
if (external_audio_buffer_delay_) {
|
||||
const int headroom = 2;
|
||||
size_t audio_buffer_delay_to_set;
|
||||
// Minimum delay is 1 (like the low-rate render buffer).
|
||||
if (*external_audio_buffer_delay_ <= headroom) {
|
||||
audio_buffer_delay_to_set = 1;
|
||||
} else {
|
||||
audio_buffer_delay_to_set = *external_audio_buffer_delay_ - headroom;
|
||||
}
|
||||
|
||||
audio_buffer_delay_to_set = std::min(audio_buffer_delay_to_set, MaxDelay());
|
||||
|
||||
// When an external delay estimate is available, use that delay as the
|
||||
// initial render buffer delay.
|
||||
ApplyTotalDelay(audio_buffer_delay_to_set);
|
||||
delay_ = ComputeDelay();
|
||||
|
||||
external_audio_buffer_delay_verified_after_reset_ = false;
|
||||
} else {
|
||||
// If an external delay estimate is not available, use that delay as the
|
||||
// initial delay. Set the render buffer delays to the default delay.
|
||||
ApplyTotalDelay(config_.delay.default_delay);
|
||||
|
||||
// Unset the delays which are set by AlignFromDelay.
|
||||
delay_ = absl::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
// Inserts a new block into the render buffers.
|
||||
RenderDelayBuffer::BufferingEvent RenderDelayBufferImpl::Insert(
|
||||
const std::vector<std::vector<std::vector<float>>>& block) {
|
||||
++render_call_counter_;
|
||||
if (delay_) {
|
||||
if (!last_call_was_render_) {
|
||||
last_call_was_render_ = true;
|
||||
num_api_calls_in_a_row_ = 1;
|
||||
} else {
|
||||
if (++num_api_calls_in_a_row_ > max_observed_jitter_) {
|
||||
max_observed_jitter_ = num_api_calls_in_a_row_;
|
||||
RTC_LOG_V(delay_log_level_)
|
||||
<< "New max number api jitter observed at render block "
|
||||
<< render_call_counter_ << ": " << num_api_calls_in_a_row_
|
||||
<< " blocks";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Increase the write indices to where the new blocks should be written.
|
||||
const int previous_write = blocks_.write;
|
||||
IncrementWriteIndices();
|
||||
|
||||
// Allow overrun and do a reset when render overrun occurrs due to more render
|
||||
// data being inserted than capture data is received.
|
||||
BufferingEvent event =
|
||||
RenderOverrun() ? BufferingEvent::kRenderOverrun : BufferingEvent::kNone;
|
||||
|
||||
// Detect and update render activity.
|
||||
if (!render_activity_) {
|
||||
render_activity_counter_ += DetectActiveRender(block[0][0]) ? 1 : 0;
|
||||
render_activity_ = render_activity_counter_ >= 20;
|
||||
}
|
||||
|
||||
// Insert the new render block into the specified position.
|
||||
InsertBlock(block, previous_write);
|
||||
|
||||
if (event != BufferingEvent::kNone) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
return event;
|
||||
}
|
||||
|
||||
void RenderDelayBufferImpl::HandleSkippedCaptureProcessing() {
|
||||
if (update_capture_call_counter_on_skipped_blocks_) {
|
||||
++capture_call_counter_;
|
||||
}
|
||||
}
|
||||
|
||||
// Prepares the render buffers for processing another capture block.
|
||||
RenderDelayBuffer::BufferingEvent
|
||||
RenderDelayBufferImpl::PrepareCaptureProcessing() {
|
||||
RenderDelayBuffer::BufferingEvent event = BufferingEvent::kNone;
|
||||
++capture_call_counter_;
|
||||
|
||||
if (delay_) {
|
||||
if (last_call_was_render_) {
|
||||
last_call_was_render_ = false;
|
||||
num_api_calls_in_a_row_ = 1;
|
||||
} else {
|
||||
if (++num_api_calls_in_a_row_ > max_observed_jitter_) {
|
||||
max_observed_jitter_ = num_api_calls_in_a_row_;
|
||||
RTC_LOG_V(delay_log_level_)
|
||||
<< "New max number api jitter observed at capture block "
|
||||
<< capture_call_counter_ << ": " << num_api_calls_in_a_row_
|
||||
<< " blocks";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (DetectExcessRenderBlocks()) {
|
||||
// Too many render blocks compared to capture blocks. Risk of delay ending
|
||||
// up before the filter used by the delay estimator.
|
||||
RTC_LOG_V(delay_log_level_)
|
||||
<< "Excess render blocks detected at block " << capture_call_counter_;
|
||||
Reset();
|
||||
event = BufferingEvent::kRenderOverrun;
|
||||
} else if (RenderUnderrun()) {
|
||||
// Don't increment the read indices of the low rate buffer if there is a
|
||||
// render underrun.
|
||||
RTC_LOG_V(delay_log_level_)
|
||||
<< "Render buffer underrun detected at block " << capture_call_counter_;
|
||||
IncrementReadIndices();
|
||||
// Incrementing the buffer index without increasing the low rate buffer
|
||||
// index means that the delay is reduced by one.
|
||||
if (delay_ && *delay_ > 0)
|
||||
delay_ = *delay_ - 1;
|
||||
event = BufferingEvent::kRenderUnderrun;
|
||||
} else {
|
||||
// Increment the read indices in the render buffers to point to the most
|
||||
// recent block to use in the capture processing.
|
||||
IncrementLowRateReadIndices();
|
||||
IncrementReadIndices();
|
||||
}
|
||||
|
||||
echo_remover_buffer_.SetRenderActivity(render_activity_);
|
||||
if (render_activity_) {
|
||||
render_activity_counter_ = 0;
|
||||
render_activity_ = false;
|
||||
}
|
||||
|
||||
return event;
|
||||
}
|
||||
|
||||
// Sets the delay and returns a bool indicating whether the delay was changed.
|
||||
bool RenderDelayBufferImpl::AlignFromDelay(size_t delay) {
|
||||
RTC_DCHECK(!config_.delay.use_external_delay_estimator);
|
||||
if (!external_audio_buffer_delay_verified_after_reset_ &&
|
||||
external_audio_buffer_delay_ && delay_) {
|
||||
int difference = static_cast<int>(delay) - static_cast<int>(*delay_);
|
||||
RTC_LOG_V(delay_log_level_)
|
||||
<< "Mismatch between first estimated delay after reset "
|
||||
"and externally reported audio buffer delay: "
|
||||
<< difference << " blocks";
|
||||
external_audio_buffer_delay_verified_after_reset_ = true;
|
||||
}
|
||||
if (delay_ && *delay_ == delay) {
|
||||
return false;
|
||||
}
|
||||
delay_ = delay;
|
||||
|
||||
// Compute the total delay and limit the delay to the allowed range.
|
||||
int total_delay = MapDelayToTotalDelay(*delay_);
|
||||
total_delay =
|
||||
std::min(MaxDelay(), static_cast<size_t>(std::max(total_delay, 0)));
|
||||
|
||||
// Apply the delay to the buffers.
|
||||
ApplyTotalDelay(total_delay);
|
||||
return true;
|
||||
}
|
||||
|
||||
void RenderDelayBufferImpl::SetAudioBufferDelay(int delay_ms) {
|
||||
if (!external_audio_buffer_delay_) {
|
||||
RTC_LOG_V(delay_log_level_)
|
||||
<< "Receiving a first externally reported audio buffer delay of "
|
||||
<< delay_ms << " ms.";
|
||||
}
|
||||
|
||||
// Convert delay from milliseconds to blocks (rounded down).
|
||||
external_audio_buffer_delay_ = delay_ms / 4;
|
||||
}
|
||||
|
||||
bool RenderDelayBufferImpl::HasReceivedBufferDelay() {
|
||||
return external_audio_buffer_delay_.has_value();
|
||||
}
|
||||
|
||||
// Maps the externally computed delay to the delay used internally.
|
||||
int RenderDelayBufferImpl::MapDelayToTotalDelay(
|
||||
size_t external_delay_blocks) const {
|
||||
const int latency_blocks = BufferLatency();
|
||||
return latency_blocks + static_cast<int>(external_delay_blocks);
|
||||
}
|
||||
|
||||
// Returns the delay (not including call jitter).
|
||||
int RenderDelayBufferImpl::ComputeDelay() const {
|
||||
const int latency_blocks = BufferLatency();
|
||||
int internal_delay = spectra_.read >= spectra_.write
|
||||
? spectra_.read - spectra_.write
|
||||
: spectra_.size + spectra_.read - spectra_.write;
|
||||
|
||||
return internal_delay - latency_blocks;
|
||||
}
|
||||
|
||||
// Set the read indices according to the delay.
|
||||
void RenderDelayBufferImpl::ApplyTotalDelay(int delay) {
|
||||
RTC_LOG_V(delay_log_level_)
|
||||
<< "Applying total delay of " << delay << " blocks.";
|
||||
blocks_.read = blocks_.OffsetIndex(blocks_.write, -delay);
|
||||
spectra_.read = spectra_.OffsetIndex(spectra_.write, delay);
|
||||
ffts_.read = ffts_.OffsetIndex(ffts_.write, delay);
|
||||
}
|
||||
|
||||
void RenderDelayBufferImpl::AlignFromExternalDelay() {
|
||||
RTC_DCHECK(config_.delay.use_external_delay_estimator);
|
||||
if (external_audio_buffer_delay_) {
|
||||
const int64_t delay = render_call_counter_ - capture_call_counter_ +
|
||||
*external_audio_buffer_delay_;
|
||||
const int64_t delay_with_headroom =
|
||||
delay - config_.delay.delay_headroom_samples / kBlockSize;
|
||||
ApplyTotalDelay(delay_with_headroom);
|
||||
}
|
||||
}
|
||||
|
||||
// Inserts a block into the render buffers.
|
||||
void RenderDelayBufferImpl::InsertBlock(
|
||||
const std::vector<std::vector<std::vector<float>>>& block,
|
||||
int previous_write) {
|
||||
auto& b = blocks_;
|
||||
auto& lr = low_rate_;
|
||||
auto& ds = render_ds_;
|
||||
auto& f = ffts_;
|
||||
auto& s = spectra_;
|
||||
const size_t num_bands = b.buffer[b.write].size();
|
||||
const size_t num_render_channels = b.buffer[b.write][0].size();
|
||||
RTC_DCHECK_EQ(block.size(), b.buffer[b.write].size());
|
||||
for (size_t band = 0; band < num_bands; ++band) {
|
||||
RTC_DCHECK_EQ(block[band].size(), num_render_channels);
|
||||
RTC_DCHECK_EQ(b.buffer[b.write][band].size(), num_render_channels);
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
RTC_DCHECK_EQ(block[band][ch].size(), b.buffer[b.write][band][ch].size());
|
||||
std::copy(block[band][ch].begin(), block[band][ch].end(),
|
||||
b.buffer[b.write][band][ch].begin());
|
||||
}
|
||||
}
|
||||
|
||||
if (render_linear_amplitude_gain_ != 1.f) {
|
||||
for (size_t band = 0; band < num_bands; ++band) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
for (size_t k = 0; k < 64; ++k) {
|
||||
b.buffer[b.write][band][ch][k] *= render_linear_amplitude_gain_;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::array<float, kBlockSize> downmixed_render;
|
||||
render_mixer_.ProduceOutput(b.buffer[b.write][0], downmixed_render);
|
||||
render_decimator_.Decimate(downmixed_render, ds);
|
||||
data_dumper_->DumpWav("aec3_render_decimator_output", ds.size(), ds.data(),
|
||||
16000 / down_sampling_factor_, 1);
|
||||
std::copy(ds.rbegin(), ds.rend(), lr.buffer.begin() + lr.write);
|
||||
for (size_t channel = 0; channel < b.buffer[b.write][0].size(); ++channel) {
|
||||
fft_.PaddedFft(b.buffer[b.write][0][channel],
|
||||
b.buffer[previous_write][0][channel],
|
||||
&f.buffer[f.write][channel]);
|
||||
f.buffer[f.write][channel].Spectrum(optimization_,
|
||||
s.buffer[s.write][channel]);
|
||||
}
|
||||
}
|
||||
|
||||
bool RenderDelayBufferImpl::DetectActiveRender(
|
||||
rtc::ArrayView<const float> x) const {
|
||||
const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
|
||||
return x_energy > (config_.render_levels.active_render_limit *
|
||||
config_.render_levels.active_render_limit) *
|
||||
kFftLengthBy2;
|
||||
}
|
||||
|
||||
bool RenderDelayBufferImpl::DetectExcessRenderBlocks() {
|
||||
bool excess_render_detected = false;
|
||||
const size_t latency_blocks = static_cast<size_t>(BufferLatency());
|
||||
// The recently seen minimum latency in blocks. Should be close to 0.
|
||||
min_latency_blocks_ = std::min(min_latency_blocks_, latency_blocks);
|
||||
// After processing a configurable number of blocks the minimum latency is
|
||||
// checked.
|
||||
if (++excess_render_detection_counter_ >=
|
||||
config_.buffering.excess_render_detection_interval_blocks) {
|
||||
// If the minimum latency is not lower than the threshold there have been
|
||||
// more render than capture frames.
|
||||
excess_render_detected = min_latency_blocks_ >
|
||||
config_.buffering.max_allowed_excess_render_blocks;
|
||||
// Reset the counter and let the minimum latency be the current latency.
|
||||
min_latency_blocks_ = latency_blocks;
|
||||
excess_render_detection_counter_ = 0;
|
||||
}
|
||||
|
||||
data_dumper_->DumpRaw("aec3_latency_blocks", latency_blocks);
|
||||
data_dumper_->DumpRaw("aec3_min_latency_blocks", min_latency_blocks_);
|
||||
data_dumper_->DumpRaw("aec3_excess_render_detected", excess_render_detected);
|
||||
return excess_render_detected;
|
||||
}
|
||||
|
||||
// Computes the latency in the buffer (the number of unread sub-blocks).
|
||||
int RenderDelayBufferImpl::BufferLatency() const {
|
||||
const DownsampledRenderBuffer& l = low_rate_;
|
||||
int latency_samples = (l.buffer.size() + l.read - l.write) % l.buffer.size();
|
||||
int latency_blocks = latency_samples / sub_block_size_;
|
||||
return latency_blocks;
|
||||
}
|
||||
|
||||
// Increments the write indices for the render buffers.
|
||||
void RenderDelayBufferImpl::IncrementWriteIndices() {
|
||||
low_rate_.UpdateWriteIndex(-sub_block_size_);
|
||||
blocks_.IncWriteIndex();
|
||||
spectra_.DecWriteIndex();
|
||||
ffts_.DecWriteIndex();
|
||||
}
|
||||
|
||||
// Increments the read indices of the low rate render buffers.
|
||||
void RenderDelayBufferImpl::IncrementLowRateReadIndices() {
|
||||
low_rate_.UpdateReadIndex(-sub_block_size_);
|
||||
}
|
||||
|
||||
// Increments the read indices for the render buffers.
|
||||
void RenderDelayBufferImpl::IncrementReadIndices() {
|
||||
if (blocks_.read != blocks_.write) {
|
||||
blocks_.IncReadIndex();
|
||||
spectra_.DecReadIndex();
|
||||
ffts_.DecReadIndex();
|
||||
}
|
||||
}
|
||||
|
||||
// Checks for a render buffer overrun.
|
||||
bool RenderDelayBufferImpl::RenderOverrun() {
|
||||
return low_rate_.read == low_rate_.write || blocks_.read == blocks_.write;
|
||||
}
|
||||
|
||||
// Checks for a render buffer underrun.
|
||||
bool RenderDelayBufferImpl::RenderUnderrun() {
|
||||
return low_rate_.read == low_rate_.write;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
RenderDelayBuffer* RenderDelayBuffer::Create(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels) {
|
||||
return new RenderDelayBufferImpl(config, sample_rate_hz, num_render_channels);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
86
webrtc/modules/audio_processing/aec3/render_delay_buffer.h
Normal file
86
webrtc/modules/audio_processing/aec3/render_delay_buffer.h
Normal file
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for buffering the incoming render blocks such that these may be
|
||||
// extracted with a specified delay.
|
||||
class RenderDelayBuffer {
|
||||
public:
|
||||
enum class BufferingEvent {
|
||||
kNone,
|
||||
kRenderUnderrun,
|
||||
kRenderOverrun,
|
||||
kApiCallSkew
|
||||
};
|
||||
|
||||
static RenderDelayBuffer* Create(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels);
|
||||
virtual ~RenderDelayBuffer() = default;
|
||||
|
||||
// Resets the buffer alignment.
|
||||
virtual void Reset() = 0;
|
||||
|
||||
// Inserts a block into the buffer.
|
||||
virtual BufferingEvent Insert(
|
||||
const std::vector<std::vector<std::vector<float>>>& block) = 0;
|
||||
|
||||
// Updates the buffers one step based on the specified buffer delay. Returns
|
||||
// an enum indicating whether there was a special event that occurred.
|
||||
virtual BufferingEvent PrepareCaptureProcessing() = 0;
|
||||
|
||||
// Called on capture blocks where PrepareCaptureProcessing is not called.
|
||||
virtual void HandleSkippedCaptureProcessing() = 0;
|
||||
|
||||
// Sets the buffer delay and returns a bool indicating whether the delay
|
||||
// changed.
|
||||
virtual bool AlignFromDelay(size_t delay) = 0;
|
||||
|
||||
// Sets the buffer delay from the most recently reported external delay.
|
||||
virtual void AlignFromExternalDelay() = 0;
|
||||
|
||||
// Gets the buffer delay.
|
||||
virtual size_t Delay() const = 0;
|
||||
|
||||
// Gets the buffer delay.
|
||||
virtual size_t MaxDelay() const = 0;
|
||||
|
||||
// Returns the render buffer for the echo remover.
|
||||
virtual RenderBuffer* GetRenderBuffer() = 0;
|
||||
|
||||
// Returns the downsampled render buffer.
|
||||
virtual const DownsampledRenderBuffer& GetDownsampledRenderBuffer() const = 0;
|
||||
|
||||
// Returns the maximum non calusal offset that can occur in the delay buffer.
|
||||
static int DelayEstimatorOffset(const EchoCanceller3Config& config);
|
||||
|
||||
// Provides an optional external estimate of the audio buffer delay.
|
||||
virtual void SetAudioBufferDelay(int delay_ms) = 0;
|
||||
|
||||
// Returns whether an external delay estimate has been reported via
|
||||
// SetAudioBufferDelay.
|
||||
virtual bool HasReceivedBufferDelay() = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_BUFFER_H_
|
196
webrtc/modules/audio_processing/aec3/render_delay_controller.cc
Normal file
196
webrtc/modules/audio_processing/aec3/render_delay_controller.cc
Normal file
@ -0,0 +1,196 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "modules/audio_processing/aec3/render_delay_controller.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <memory>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/echo_path_delay_estimator.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_controller_metrics.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
class RenderDelayControllerImpl final : public RenderDelayController {
|
||||
public:
|
||||
RenderDelayControllerImpl(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_capture_channels);
|
||||
|
||||
RenderDelayControllerImpl() = delete;
|
||||
RenderDelayControllerImpl(const RenderDelayControllerImpl&) = delete;
|
||||
RenderDelayControllerImpl& operator=(const RenderDelayControllerImpl&) =
|
||||
delete;
|
||||
|
||||
~RenderDelayControllerImpl() override;
|
||||
void Reset(bool reset_delay_confidence) override;
|
||||
void LogRenderCall() override;
|
||||
absl::optional<DelayEstimate> GetDelay(
|
||||
const DownsampledRenderBuffer& render_buffer,
|
||||
size_t render_delay_buffer_delay,
|
||||
const std::vector<std::vector<float>>& capture) override;
|
||||
bool HasClockdrift() const override;
|
||||
|
||||
private:
|
||||
static int instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const int hysteresis_limit_blocks_;
|
||||
const int delay_headroom_samples_;
|
||||
absl::optional<DelayEstimate> delay_;
|
||||
EchoPathDelayEstimator delay_estimator_;
|
||||
RenderDelayControllerMetrics metrics_;
|
||||
absl::optional<DelayEstimate> delay_samples_;
|
||||
size_t capture_call_counter_ = 0;
|
||||
int delay_change_counter_ = 0;
|
||||
DelayEstimate::Quality last_delay_estimate_quality_;
|
||||
};
|
||||
|
||||
DelayEstimate ComputeBufferDelay(
|
||||
const absl::optional<DelayEstimate>& current_delay,
|
||||
int hysteresis_limit_blocks,
|
||||
int delay_headroom_samples,
|
||||
DelayEstimate estimated_delay) {
|
||||
// Subtract delay headroom.
|
||||
const int delay_with_headroom_samples = std::max(
|
||||
static_cast<int>(estimated_delay.delay) - delay_headroom_samples, 0);
|
||||
|
||||
// Compute the buffer delay increase required to achieve the desired latency.
|
||||
size_t new_delay_blocks = delay_with_headroom_samples >> kBlockSizeLog2;
|
||||
|
||||
// Add hysteresis.
|
||||
if (current_delay) {
|
||||
size_t current_delay_blocks = current_delay->delay;
|
||||
if (new_delay_blocks > current_delay_blocks &&
|
||||
new_delay_blocks <= current_delay_blocks + hysteresis_limit_blocks) {
|
||||
new_delay_blocks = current_delay_blocks;
|
||||
}
|
||||
}
|
||||
|
||||
DelayEstimate new_delay = estimated_delay;
|
||||
new_delay.delay = new_delay_blocks;
|
||||
return new_delay;
|
||||
}
|
||||
|
||||
int RenderDelayControllerImpl::instance_count_ = 0;
|
||||
|
||||
RenderDelayControllerImpl::RenderDelayControllerImpl(
|
||||
const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
hysteresis_limit_blocks_(
|
||||
static_cast<int>(config.delay.hysteresis_limit_blocks)),
|
||||
delay_headroom_samples_(config.delay.delay_headroom_samples),
|
||||
delay_estimator_(data_dumper_.get(), config, num_capture_channels),
|
||||
last_delay_estimate_quality_(DelayEstimate::Quality::kCoarse) {
|
||||
RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
|
||||
delay_estimator_.LogDelayEstimationProperties(sample_rate_hz, 0);
|
||||
}
|
||||
|
||||
RenderDelayControllerImpl::~RenderDelayControllerImpl() = default;
|
||||
|
||||
void RenderDelayControllerImpl::Reset(bool reset_delay_confidence) {
|
||||
delay_ = absl::nullopt;
|
||||
delay_samples_ = absl::nullopt;
|
||||
delay_estimator_.Reset(reset_delay_confidence);
|
||||
delay_change_counter_ = 0;
|
||||
if (reset_delay_confidence) {
|
||||
last_delay_estimate_quality_ = DelayEstimate::Quality::kCoarse;
|
||||
}
|
||||
}
|
||||
|
||||
void RenderDelayControllerImpl::LogRenderCall() {}
|
||||
|
||||
absl::optional<DelayEstimate> RenderDelayControllerImpl::GetDelay(
|
||||
const DownsampledRenderBuffer& render_buffer,
|
||||
size_t render_delay_buffer_delay,
|
||||
const std::vector<std::vector<float>>& capture) {
|
||||
RTC_DCHECK_EQ(kBlockSize, capture[0].size());
|
||||
++capture_call_counter_;
|
||||
|
||||
auto delay_samples = delay_estimator_.EstimateDelay(render_buffer, capture);
|
||||
|
||||
if (delay_samples) {
|
||||
if (!delay_samples_ || delay_samples->delay != delay_samples_->delay) {
|
||||
delay_change_counter_ = 0;
|
||||
}
|
||||
if (delay_samples_) {
|
||||
delay_samples_->blocks_since_last_change =
|
||||
delay_samples_->delay == delay_samples->delay
|
||||
? delay_samples_->blocks_since_last_change + 1
|
||||
: 0;
|
||||
delay_samples_->blocks_since_last_update = 0;
|
||||
delay_samples_->delay = delay_samples->delay;
|
||||
delay_samples_->quality = delay_samples->quality;
|
||||
} else {
|
||||
delay_samples_ = delay_samples;
|
||||
}
|
||||
} else {
|
||||
if (delay_samples_) {
|
||||
++delay_samples_->blocks_since_last_change;
|
||||
++delay_samples_->blocks_since_last_update;
|
||||
}
|
||||
}
|
||||
|
||||
if (delay_change_counter_ < 2 * kNumBlocksPerSecond) {
|
||||
++delay_change_counter_;
|
||||
}
|
||||
|
||||
if (delay_samples_) {
|
||||
// Compute the render delay buffer delay.
|
||||
const bool use_hysteresis =
|
||||
last_delay_estimate_quality_ == DelayEstimate::Quality::kRefined &&
|
||||
delay_samples_->quality == DelayEstimate::Quality::kRefined;
|
||||
delay_ = ComputeBufferDelay(delay_,
|
||||
use_hysteresis ? hysteresis_limit_blocks_ : 0,
|
||||
delay_headroom_samples_, *delay_samples_);
|
||||
last_delay_estimate_quality_ = delay_samples_->quality;
|
||||
}
|
||||
|
||||
metrics_.Update(delay_samples_ ? absl::optional<size_t>(delay_samples_->delay)
|
||||
: absl::nullopt,
|
||||
delay_ ? delay_->delay : 0, 0, delay_estimator_.Clockdrift());
|
||||
|
||||
data_dumper_->DumpRaw("aec3_render_delay_controller_delay",
|
||||
delay_samples ? delay_samples->delay : 0);
|
||||
data_dumper_->DumpRaw("aec3_render_delay_controller_buffer_delay",
|
||||
delay_ ? delay_->delay : 0);
|
||||
|
||||
return delay_;
|
||||
}
|
||||
|
||||
bool RenderDelayControllerImpl::HasClockdrift() const {
|
||||
return delay_estimator_.Clockdrift() != ClockdriftDetector::Level::kNone;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
RenderDelayController* RenderDelayController::Create(
|
||||
const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_capture_channels) {
|
||||
return new RenderDelayControllerImpl(config, sample_rate_hz,
|
||||
num_capture_channels);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,50 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for aligning the render and capture signal using a RenderDelayBuffer.
|
||||
class RenderDelayController {
|
||||
public:
|
||||
static RenderDelayController* Create(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_capture_channels);
|
||||
virtual ~RenderDelayController() = default;
|
||||
|
||||
// Resets the delay controller. If the delay confidence is reset, the reset
|
||||
// behavior is as if the call is restarted.
|
||||
virtual void Reset(bool reset_delay_confidence) = 0;
|
||||
|
||||
// Logs a render call.
|
||||
virtual void LogRenderCall() = 0;
|
||||
|
||||
// Aligns the render buffer content with the capture signal.
|
||||
virtual absl::optional<DelayEstimate> GetDelay(
|
||||
const DownsampledRenderBuffer& render_buffer,
|
||||
size_t render_delay_buffer_delay,
|
||||
const std::vector<std::vector<float>>& capture) = 0;
|
||||
|
||||
// Returns true if clockdrift has been detected.
|
||||
virtual bool HasClockdrift() const = 0;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_H_
|
@ -0,0 +1,145 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/render_delay_controller_metrics.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/metrics.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
enum class DelayReliabilityCategory {
|
||||
kNone,
|
||||
kPoor,
|
||||
kMedium,
|
||||
kGood,
|
||||
kExcellent,
|
||||
kNumCategories
|
||||
};
|
||||
enum class DelayChangesCategory {
|
||||
kNone,
|
||||
kFew,
|
||||
kSeveral,
|
||||
kMany,
|
||||
kConstant,
|
||||
kNumCategories
|
||||
};
|
||||
|
||||
constexpr int kMaxSkewShiftCount = 20;
|
||||
|
||||
} // namespace
|
||||
|
||||
RenderDelayControllerMetrics::RenderDelayControllerMetrics() = default;
|
||||
|
||||
void RenderDelayControllerMetrics::Update(
|
||||
absl::optional<size_t> delay_samples,
|
||||
size_t buffer_delay_blocks,
|
||||
absl::optional<int> skew_shift_blocks,
|
||||
ClockdriftDetector::Level clockdrift) {
|
||||
++call_counter_;
|
||||
|
||||
if (!initial_update) {
|
||||
size_t delay_blocks;
|
||||
if (delay_samples) {
|
||||
++reliable_delay_estimate_counter_;
|
||||
delay_blocks = (*delay_samples) / kBlockSize + 2;
|
||||
} else {
|
||||
delay_blocks = 0;
|
||||
}
|
||||
|
||||
if (delay_blocks != delay_blocks_) {
|
||||
++delay_change_counter_;
|
||||
delay_blocks_ = delay_blocks;
|
||||
}
|
||||
|
||||
if (skew_shift_blocks) {
|
||||
skew_shift_count_ = std::min(kMaxSkewShiftCount, skew_shift_count_);
|
||||
}
|
||||
} else if (++initial_call_counter_ == 5 * kNumBlocksPerSecond) {
|
||||
initial_update = false;
|
||||
}
|
||||
|
||||
if (call_counter_ == kMetricsReportingIntervalBlocks) {
|
||||
int value_to_report = static_cast<int>(delay_blocks_);
|
||||
value_to_report = std::min(124, value_to_report >> 1);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.EchoPathDelay",
|
||||
value_to_report, 0, 124, 125);
|
||||
|
||||
value_to_report = static_cast<int>(buffer_delay_blocks + 2);
|
||||
value_to_report = std::min(124, value_to_report >> 1);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.BufferDelay",
|
||||
value_to_report, 0, 124, 125);
|
||||
|
||||
DelayReliabilityCategory delay_reliability;
|
||||
if (reliable_delay_estimate_counter_ == 0) {
|
||||
delay_reliability = DelayReliabilityCategory::kNone;
|
||||
} else if (reliable_delay_estimate_counter_ > (call_counter_ >> 1)) {
|
||||
delay_reliability = DelayReliabilityCategory::kExcellent;
|
||||
} else if (reliable_delay_estimate_counter_ > 100) {
|
||||
delay_reliability = DelayReliabilityCategory::kGood;
|
||||
} else if (reliable_delay_estimate_counter_ > 10) {
|
||||
delay_reliability = DelayReliabilityCategory::kMedium;
|
||||
} else {
|
||||
delay_reliability = DelayReliabilityCategory::kPoor;
|
||||
}
|
||||
RTC_HISTOGRAM_ENUMERATION(
|
||||
"WebRTC.Audio.EchoCanceller.ReliableDelayEstimates",
|
||||
static_cast<int>(delay_reliability),
|
||||
static_cast<int>(DelayReliabilityCategory::kNumCategories));
|
||||
|
||||
DelayChangesCategory delay_changes;
|
||||
if (delay_change_counter_ == 0) {
|
||||
delay_changes = DelayChangesCategory::kNone;
|
||||
} else if (delay_change_counter_ > 10) {
|
||||
delay_changes = DelayChangesCategory::kConstant;
|
||||
} else if (delay_change_counter_ > 5) {
|
||||
delay_changes = DelayChangesCategory::kMany;
|
||||
} else if (delay_change_counter_ > 2) {
|
||||
delay_changes = DelayChangesCategory::kSeveral;
|
||||
} else {
|
||||
delay_changes = DelayChangesCategory::kFew;
|
||||
}
|
||||
RTC_HISTOGRAM_ENUMERATION(
|
||||
"WebRTC.Audio.EchoCanceller.DelayChanges",
|
||||
static_cast<int>(delay_changes),
|
||||
static_cast<int>(DelayChangesCategory::kNumCategories));
|
||||
|
||||
RTC_HISTOGRAM_ENUMERATION(
|
||||
"WebRTC.Audio.EchoCanceller.Clockdrift", static_cast<int>(clockdrift),
|
||||
static_cast<int>(ClockdriftDetector::Level::kNumCategories));
|
||||
|
||||
metrics_reported_ = true;
|
||||
call_counter_ = 0;
|
||||
ResetMetrics();
|
||||
} else {
|
||||
metrics_reported_ = false;
|
||||
}
|
||||
|
||||
if (!initial_update && ++skew_report_timer_ == 60 * kNumBlocksPerSecond) {
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.MaxSkewShiftCount",
|
||||
skew_shift_count_, 0, kMaxSkewShiftCount,
|
||||
kMaxSkewShiftCount + 1);
|
||||
|
||||
skew_shift_count_ = 0;
|
||||
skew_report_timer_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void RenderDelayControllerMetrics::ResetMetrics() {
|
||||
delay_change_counter_ = 0;
|
||||
reliable_delay_estimate_counter_ = 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,55 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_METRICS_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_METRICS_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "modules/audio_processing/aec3/clockdrift_detector.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Handles the reporting of metrics for the render delay controller.
|
||||
class RenderDelayControllerMetrics {
|
||||
public:
|
||||
RenderDelayControllerMetrics();
|
||||
|
||||
// Updates the metric with new data.
|
||||
void Update(absl::optional<size_t> delay_samples,
|
||||
size_t buffer_delay_blocks,
|
||||
absl::optional<int> skew_shift_blocks,
|
||||
ClockdriftDetector::Level clockdrift);
|
||||
|
||||
// Returns true if the metrics have just been reported, otherwise false.
|
||||
bool MetricsReported() { return metrics_reported_; }
|
||||
|
||||
private:
|
||||
// Resets the metrics.
|
||||
void ResetMetrics();
|
||||
|
||||
size_t delay_blocks_ = 0;
|
||||
int reliable_delay_estimate_counter_ = 0;
|
||||
int delay_change_counter_ = 0;
|
||||
int call_counter_ = 0;
|
||||
int skew_report_timer_ = 0;
|
||||
int initial_call_counter_ = 0;
|
||||
bool metrics_reported_ = false;
|
||||
bool initial_update = true;
|
||||
int skew_shift_count_ = 0;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(RenderDelayControllerMetrics);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_DELAY_CONTROLLER_METRICS_H_
|
156
webrtc/modules/audio_processing/aec3/render_signal_analyzer.cc
Normal file
156
webrtc/modules/audio_processing/aec3/render_signal_analyzer.cc
Normal file
@ -0,0 +1,156 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/render_signal_analyzer.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
constexpr size_t kCounterThreshold = 5;
|
||||
|
||||
// Identifies local bands with narrow characteristics.
|
||||
void IdentifySmallNarrowBandRegions(
|
||||
const RenderBuffer& render_buffer,
|
||||
const absl::optional<size_t>& delay_partitions,
|
||||
std::array<size_t, kFftLengthBy2 - 1>* narrow_band_counters) {
|
||||
RTC_DCHECK(narrow_band_counters);
|
||||
|
||||
if (!delay_partitions) {
|
||||
narrow_band_counters->fill(0);
|
||||
return;
|
||||
}
|
||||
|
||||
std::array<size_t, kFftLengthBy2 - 1> channel_counters;
|
||||
channel_counters.fill(0);
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> X2 =
|
||||
render_buffer.Spectrum(*delay_partitions);
|
||||
for (size_t ch = 0; ch < X2.size(); ++ch) {
|
||||
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
||||
if (X2[ch][k] > 3 * std::max(X2[ch][k - 1], X2[ch][k + 1])) {
|
||||
++channel_counters[k - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
||||
(*narrow_band_counters)[k - 1] =
|
||||
channel_counters[k - 1] > 0 ? (*narrow_band_counters)[k - 1] + 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Identifies whether the signal has a single strong narrow-band component.
|
||||
void IdentifyStrongNarrowBandComponent(const RenderBuffer& render_buffer,
|
||||
int strong_peak_freeze_duration,
|
||||
absl::optional<int>* narrow_peak_band,
|
||||
size_t* narrow_peak_counter) {
|
||||
RTC_DCHECK(narrow_peak_band);
|
||||
RTC_DCHECK(narrow_peak_counter);
|
||||
if (*narrow_peak_band &&
|
||||
++(*narrow_peak_counter) >
|
||||
static_cast<size_t>(strong_peak_freeze_duration)) {
|
||||
*narrow_peak_band = absl::nullopt;
|
||||
}
|
||||
|
||||
const std::vector<std::vector<std::vector<float>>>& x_latest =
|
||||
render_buffer.Block(0);
|
||||
float max_peak_level = 0.f;
|
||||
for (size_t channel = 0; channel < x_latest[0].size(); ++channel) {
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> X2_latest =
|
||||
render_buffer.Spectrum(0)[channel];
|
||||
|
||||
// Identify the spectral peak.
|
||||
const int peak_bin =
|
||||
static_cast<int>(std::max_element(X2_latest.begin(), X2_latest.end()) -
|
||||
X2_latest.begin());
|
||||
|
||||
// Compute the level around the peak.
|
||||
float non_peak_power = 0.f;
|
||||
for (int k = std::max(0, peak_bin - 14); k < peak_bin - 4; ++k) {
|
||||
non_peak_power = std::max(X2_latest[k], non_peak_power);
|
||||
}
|
||||
for (int k = peak_bin + 5;
|
||||
k < std::min(peak_bin + 15, static_cast<int>(kFftLengthBy2Plus1));
|
||||
++k) {
|
||||
non_peak_power = std::max(X2_latest[k], non_peak_power);
|
||||
}
|
||||
|
||||
// Assess the render signal strength.
|
||||
auto result0 = std::minmax_element(x_latest[0][channel].begin(),
|
||||
x_latest[0][channel].end());
|
||||
float max_abs = std::max(fabs(*result0.first), fabs(*result0.second));
|
||||
|
||||
if (x_latest.size() > 1) {
|
||||
const auto result1 = std::minmax_element(x_latest[1][channel].begin(),
|
||||
x_latest[1][channel].end());
|
||||
max_abs =
|
||||
std::max(max_abs, static_cast<float>(std::max(
|
||||
fabs(*result1.first), fabs(*result1.second))));
|
||||
}
|
||||
|
||||
// Detect whether the spectral peak has as strong narrowband nature.
|
||||
const float peak_level = X2_latest[peak_bin];
|
||||
if (peak_bin > 0 && max_abs > 100 && peak_level > 100 * non_peak_power) {
|
||||
// Store the strongest peak across channels.
|
||||
if (peak_level > max_peak_level) {
|
||||
max_peak_level = peak_level;
|
||||
*narrow_peak_band = peak_bin;
|
||||
*narrow_peak_counter = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
RenderSignalAnalyzer::RenderSignalAnalyzer(const EchoCanceller3Config& config)
|
||||
: strong_peak_freeze_duration_(config.filter.refined.length_blocks) {
|
||||
narrow_band_counters_.fill(0);
|
||||
}
|
||||
RenderSignalAnalyzer::~RenderSignalAnalyzer() = default;
|
||||
|
||||
void RenderSignalAnalyzer::Update(
|
||||
const RenderBuffer& render_buffer,
|
||||
const absl::optional<size_t>& delay_partitions) {
|
||||
// Identify bands of narrow nature.
|
||||
IdentifySmallNarrowBandRegions(render_buffer, delay_partitions,
|
||||
&narrow_band_counters_);
|
||||
|
||||
// Identify the presence of a strong narrow band.
|
||||
IdentifyStrongNarrowBandComponent(render_buffer, strong_peak_freeze_duration_,
|
||||
&narrow_peak_band_, &narrow_peak_counter_);
|
||||
}
|
||||
|
||||
void RenderSignalAnalyzer::MaskRegionsAroundNarrowBands(
|
||||
std::array<float, kFftLengthBy2Plus1>* v) const {
|
||||
RTC_DCHECK(v);
|
||||
|
||||
// Set v to zero around narrow band signal regions.
|
||||
if (narrow_band_counters_[0] > kCounterThreshold) {
|
||||
(*v)[1] = (*v)[0] = 0.f;
|
||||
}
|
||||
for (size_t k = 2; k < kFftLengthBy2 - 1; ++k) {
|
||||
if (narrow_band_counters_[k - 1] > kCounterThreshold) {
|
||||
(*v)[k - 2] = (*v)[k - 1] = (*v)[k] = (*v)[k + 1] = (*v)[k + 2] = 0.f;
|
||||
}
|
||||
}
|
||||
if (narrow_band_counters_[kFftLengthBy2 - 2] > kCounterThreshold) {
|
||||
(*v)[kFftLengthBy2] = (*v)[kFftLengthBy2 - 1] = 0.f;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_RENDER_SIGNAL_ANALYZER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_RENDER_SIGNAL_ANALYZER_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Provides functionality for analyzing the properties of the render signal.
|
||||
class RenderSignalAnalyzer {
|
||||
public:
|
||||
explicit RenderSignalAnalyzer(const EchoCanceller3Config& config);
|
||||
~RenderSignalAnalyzer();
|
||||
|
||||
// Updates the render signal analysis with the most recent render signal.
|
||||
void Update(const RenderBuffer& render_buffer,
|
||||
const absl::optional<size_t>& delay_partitions);
|
||||
|
||||
// Returns true if the render signal is poorly exciting.
|
||||
bool PoorSignalExcitation() const {
|
||||
RTC_DCHECK_LT(2, narrow_band_counters_.size());
|
||||
return std::any_of(narrow_band_counters_.begin(),
|
||||
narrow_band_counters_.end(),
|
||||
[](size_t a) { return a > 10; });
|
||||
}
|
||||
|
||||
// Zeros the array around regions with narrow bands signal characteristics.
|
||||
void MaskRegionsAroundNarrowBands(
|
||||
std::array<float, kFftLengthBy2Plus1>* v) const;
|
||||
|
||||
absl::optional<int> NarrowPeakBand() const { return narrow_peak_band_; }
|
||||
|
||||
private:
|
||||
const int strong_peak_freeze_duration_;
|
||||
std::array<size_t, kFftLengthBy2 - 1> narrow_band_counters_;
|
||||
absl::optional<int> narrow_peak_band_;
|
||||
size_t narrow_peak_counter_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(RenderSignalAnalyzer);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_RENDER_SIGNAL_ANALYZER_H_
|
379
webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
Normal file
379
webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
Normal file
@ -0,0 +1,379 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/residual_echo_estimator.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/reverb_model.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
constexpr float kDefaultTransparentModeGain = 0.f;
|
||||
|
||||
float GetTransparentModeGain() {
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3NoSuppressionInTransparentModeKillSwitch")) {
|
||||
return 0.01f;
|
||||
} else {
|
||||
return kDefaultTransparentModeGain;
|
||||
}
|
||||
}
|
||||
|
||||
float GetEarlyReflectionsDefaultModeGain(
|
||||
const EchoCanceller3Config::EpStrength& config) {
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3UseLowEarlyReflectionsDefaultGain")) {
|
||||
return 0.1f;
|
||||
}
|
||||
return config.default_gain;
|
||||
}
|
||||
|
||||
float GetLateReflectionsDefaultModeGain(
|
||||
const EchoCanceller3Config::EpStrength& config) {
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3UseLowLateReflectionsDefaultGain")) {
|
||||
return 0.1f;
|
||||
}
|
||||
return config.default_gain;
|
||||
}
|
||||
|
||||
// Computes the indexes that will be used for computing spectral power over
|
||||
// the blocks surrounding the delay.
|
||||
void GetRenderIndexesToAnalyze(
|
||||
const SpectrumBuffer& spectrum_buffer,
|
||||
const EchoCanceller3Config::EchoModel& echo_model,
|
||||
int filter_delay_blocks,
|
||||
int* idx_start,
|
||||
int* idx_stop) {
|
||||
RTC_DCHECK(idx_start);
|
||||
RTC_DCHECK(idx_stop);
|
||||
size_t window_start;
|
||||
size_t window_end;
|
||||
window_start =
|
||||
std::max(0, filter_delay_blocks -
|
||||
static_cast<int>(echo_model.render_pre_window_size));
|
||||
window_end = filter_delay_blocks +
|
||||
static_cast<int>(echo_model.render_post_window_size);
|
||||
*idx_start = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_start);
|
||||
*idx_stop = spectrum_buffer.OffsetIndex(spectrum_buffer.read, window_end + 1);
|
||||
}
|
||||
|
||||
// Estimates the residual echo power based on the echo return loss enhancement
|
||||
// (ERLE) and the linear power estimate.
|
||||
void LinearEstimate(
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> erle,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
|
||||
RTC_DCHECK_EQ(S2_linear.size(), erle.size());
|
||||
RTC_DCHECK_EQ(S2_linear.size(), R2.size());
|
||||
|
||||
const size_t num_capture_channels = R2.size();
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
RTC_DCHECK_LT(0.f, erle[ch][k]);
|
||||
R2[ch][k] = S2_linear[ch][k] / erle[ch][k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Estimates the residual echo power based on an uncertainty estimate of the
|
||||
// echo return loss enhancement (ERLE) and the linear power estimate.
|
||||
void LinearEstimate(
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
|
||||
float erle_uncertainty,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
|
||||
RTC_DCHECK_EQ(S2_linear.size(), R2.size());
|
||||
|
||||
const size_t num_capture_channels = R2.size();
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
R2[ch][k] = S2_linear[ch][k] * erle_uncertainty;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Estimates the residual echo power based on the estimate of the echo path
|
||||
// gain.
|
||||
void NonLinearEstimate(
|
||||
float echo_path_gain,
|
||||
const std::array<float, kFftLengthBy2Plus1>& X2,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
|
||||
const size_t num_capture_channels = R2.size();
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
R2[ch][k] = X2[k] * echo_path_gain;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Applies a soft noise gate to the echo generating power.
|
||||
void ApplyNoiseGate(const EchoCanceller3Config::EchoModel& config,
|
||||
rtc::ArrayView<float, kFftLengthBy2Plus1> X2) {
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
if (config.noise_gate_power > X2[k]) {
|
||||
X2[k] = std::max(0.f, X2[k] - config.noise_gate_slope *
|
||||
(config.noise_gate_power - X2[k]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Estimates the echo generating signal power as gated maximal power over a
|
||||
// time window.
|
||||
void EchoGeneratingPower(size_t num_render_channels,
|
||||
const SpectrumBuffer& spectrum_buffer,
|
||||
const EchoCanceller3Config::EchoModel& echo_model,
|
||||
int filter_delay_blocks,
|
||||
rtc::ArrayView<float, kFftLengthBy2Plus1> X2) {
|
||||
int idx_stop;
|
||||
int idx_start;
|
||||
GetRenderIndexesToAnalyze(spectrum_buffer, echo_model, filter_delay_blocks,
|
||||
&idx_start, &idx_stop);
|
||||
|
||||
std::fill(X2.begin(), X2.end(), 0.f);
|
||||
if (num_render_channels == 1) {
|
||||
for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) {
|
||||
for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
|
||||
X2[j] = std::max(X2[j], spectrum_buffer.buffer[k][/*channel=*/0][j]);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (int k = idx_start; k != idx_stop; k = spectrum_buffer.IncIndex(k)) {
|
||||
std::array<float, kFftLengthBy2Plus1> render_power;
|
||||
render_power.fill(0.f);
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const auto& channel_power = spectrum_buffer.buffer[k][ch];
|
||||
for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
|
||||
render_power[j] += channel_power[j];
|
||||
}
|
||||
}
|
||||
for (size_t j = 0; j < kFftLengthBy2Plus1; ++j) {
|
||||
X2[j] = std::max(X2[j], render_power[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config,
|
||||
size_t num_render_channels)
|
||||
: config_(config),
|
||||
num_render_channels_(num_render_channels),
|
||||
early_reflections_transparent_mode_gain_(GetTransparentModeGain()),
|
||||
late_reflections_transparent_mode_gain_(GetTransparentModeGain()),
|
||||
early_reflections_general_gain_(
|
||||
GetEarlyReflectionsDefaultModeGain(config_.ep_strength)),
|
||||
late_reflections_general_gain_(
|
||||
GetLateReflectionsDefaultModeGain(config_.ep_strength)) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
ResidualEchoEstimator::~ResidualEchoEstimator() = default;
|
||||
|
||||
void ResidualEchoEstimator::Estimate(
|
||||
const AecState& aec_state,
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
|
||||
RTC_DCHECK_EQ(R2.size(), Y2.size());
|
||||
RTC_DCHECK_EQ(R2.size(), S2_linear.size());
|
||||
|
||||
const size_t num_capture_channels = R2.size();
|
||||
|
||||
// Estimate the power of the stationary noise in the render signal.
|
||||
UpdateRenderNoisePower(render_buffer);
|
||||
|
||||
// Estimate the residual echo power.
|
||||
if (aec_state.UsableLinearEstimate()) {
|
||||
// When there is saturated echo, assume the same spectral content as is
|
||||
// present in the microphone signal.
|
||||
if (aec_state.SaturatedEcho()) {
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
|
||||
}
|
||||
} else {
|
||||
absl::optional<float> erle_uncertainty = aec_state.ErleUncertainty();
|
||||
if (erle_uncertainty) {
|
||||
LinearEstimate(S2_linear, *erle_uncertainty, R2);
|
||||
} else {
|
||||
LinearEstimate(S2_linear, aec_state.Erle(), R2);
|
||||
}
|
||||
}
|
||||
|
||||
AddReverb(ReverbType::kLinear, aec_state, render_buffer, R2);
|
||||
} else {
|
||||
const float echo_path_gain =
|
||||
GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/true);
|
||||
|
||||
// When there is saturated echo, assume the same spectral content as is
|
||||
// present in the microphone signal.
|
||||
if (aec_state.SaturatedEcho()) {
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
|
||||
}
|
||||
} else {
|
||||
// Estimate the echo generating signal power.
|
||||
std::array<float, kFftLengthBy2Plus1> X2;
|
||||
EchoGeneratingPower(num_render_channels_,
|
||||
render_buffer.GetSpectrumBuffer(), config_.echo_model,
|
||||
aec_state.MinDirectPathFilterDelay(), X2);
|
||||
if (!aec_state.UseStationarityProperties()) {
|
||||
ApplyNoiseGate(config_.echo_model, X2);
|
||||
}
|
||||
|
||||
// Subtract the stationary noise power to avoid stationary noise causing
|
||||
// excessive echo suppression.
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
X2[k] -= config_.echo_model.stationary_gate_slope * X2_noise_floor_[k];
|
||||
X2[k] = std::max(0.f, X2[k]);
|
||||
}
|
||||
|
||||
NonLinearEstimate(echo_path_gain, X2, R2);
|
||||
}
|
||||
|
||||
if (config_.echo_model.model_reverb_in_nonlinear_mode &&
|
||||
!aec_state.TransparentModeActive()) {
|
||||
AddReverb(ReverbType::kNonLinear, aec_state, render_buffer, R2);
|
||||
}
|
||||
}
|
||||
|
||||
if (aec_state.UseStationarityProperties()) {
|
||||
// Scale the echo according to echo audibility.
|
||||
std::array<float, kFftLengthBy2Plus1> residual_scaling;
|
||||
aec_state.GetResidualEchoScaling(residual_scaling);
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
R2[ch][k] *= residual_scaling[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ResidualEchoEstimator::Reset() {
|
||||
echo_reverb_.Reset();
|
||||
X2_noise_floor_counter_.fill(config_.echo_model.noise_floor_hold);
|
||||
X2_noise_floor_.fill(config_.echo_model.min_noise_floor_power);
|
||||
}
|
||||
|
||||
void ResidualEchoEstimator::UpdateRenderNoisePower(
|
||||
const RenderBuffer& render_buffer) {
|
||||
std::array<float, kFftLengthBy2Plus1> render_power_data;
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> X2 =
|
||||
render_buffer.Spectrum(0);
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> render_power =
|
||||
X2[/*channel=*/0];
|
||||
if (num_render_channels_ > 1) {
|
||||
render_power_data.fill(0.f);
|
||||
for (size_t ch = 0; ch < num_render_channels_; ++ch) {
|
||||
const auto& channel_power = X2[ch];
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
render_power_data[k] += channel_power[k];
|
||||
}
|
||||
}
|
||||
render_power = render_power_data;
|
||||
}
|
||||
|
||||
// Estimate the stationary noise power in a minimum statistics manner.
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
// Decrease rapidly.
|
||||
if (render_power[k] < X2_noise_floor_[k]) {
|
||||
X2_noise_floor_[k] = render_power[k];
|
||||
X2_noise_floor_counter_[k] = 0;
|
||||
} else {
|
||||
// Increase in a delayed, leaky manner.
|
||||
if (X2_noise_floor_counter_[k] >=
|
||||
static_cast<int>(config_.echo_model.noise_floor_hold)) {
|
||||
X2_noise_floor_[k] = std::max(X2_noise_floor_[k] * 1.1f,
|
||||
config_.echo_model.min_noise_floor_power);
|
||||
} else {
|
||||
++X2_noise_floor_counter_[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Adds the estimated power of the reverb to the residual echo power.
|
||||
void ResidualEchoEstimator::AddReverb(
|
||||
ReverbType reverb_type,
|
||||
const AecState& aec_state,
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
|
||||
const size_t num_capture_channels = R2.size();
|
||||
|
||||
// Choose reverb partition based on what type of echo power model is used.
|
||||
const size_t first_reverb_partition =
|
||||
reverb_type == ReverbType::kLinear
|
||||
? aec_state.FilterLengthBlocks() + 1
|
||||
: aec_state.MinDirectPathFilterDelay() + 1;
|
||||
|
||||
// Compute render power for the reverb.
|
||||
std::array<float, kFftLengthBy2Plus1> render_power_data;
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> X2 =
|
||||
render_buffer.Spectrum(first_reverb_partition);
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> render_power =
|
||||
X2[/*channel=*/0];
|
||||
if (num_render_channels_ > 1) {
|
||||
render_power_data.fill(0.f);
|
||||
for (size_t ch = 0; ch < num_render_channels_; ++ch) {
|
||||
const auto& channel_power = X2[ch];
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
render_power_data[k] += channel_power[k];
|
||||
}
|
||||
}
|
||||
render_power = render_power_data;
|
||||
}
|
||||
|
||||
// Update the reverb estimate.
|
||||
if (reverb_type == ReverbType::kLinear) {
|
||||
echo_reverb_.UpdateReverb(render_power,
|
||||
aec_state.GetReverbFrequencyResponse(),
|
||||
aec_state.ReverbDecay());
|
||||
} else {
|
||||
const float echo_path_gain =
|
||||
GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/false);
|
||||
echo_reverb_.UpdateReverbNoFreqShaping(render_power, echo_path_gain,
|
||||
aec_state.ReverbDecay());
|
||||
}
|
||||
|
||||
// Add the reverb power.
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
|
||||
echo_reverb_.reverb();
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
R2[ch][k] += reverb_power[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Chooses the echo path gain to use.
|
||||
float ResidualEchoEstimator::GetEchoPathGain(
|
||||
const AecState& aec_state,
|
||||
bool gain_for_early_reflections) const {
|
||||
float gain_amplitude;
|
||||
if (aec_state.TransparentModeActive()) {
|
||||
gain_amplitude = gain_for_early_reflections
|
||||
? early_reflections_transparent_mode_gain_
|
||||
: late_reflections_transparent_mode_gain_;
|
||||
} else {
|
||||
gain_amplitude = gain_for_early_reflections
|
||||
? early_reflections_general_gain_
|
||||
: late_reflections_general_gain_;
|
||||
}
|
||||
return gain_amplitude * gain_amplitude;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_RESIDUAL_ECHO_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_RESIDUAL_ECHO_ESTIMATOR_H_
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/reverb_model.h"
|
||||
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ResidualEchoEstimator {
|
||||
public:
|
||||
ResidualEchoEstimator(const EchoCanceller3Config& config,
|
||||
size_t num_render_channels);
|
||||
~ResidualEchoEstimator();
|
||||
|
||||
ResidualEchoEstimator(const ResidualEchoEstimator&) = delete;
|
||||
ResidualEchoEstimator& operator=(const ResidualEchoEstimator&) = delete;
|
||||
|
||||
void Estimate(
|
||||
const AecState& aec_state,
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2);
|
||||
|
||||
private:
|
||||
enum class ReverbType { kLinear, kNonLinear };
|
||||
|
||||
// Resets the state.
|
||||
void Reset();
|
||||
|
||||
// Updates estimate for the power of the stationary noise component in the
|
||||
// render signal.
|
||||
void UpdateRenderNoisePower(const RenderBuffer& render_buffer);
|
||||
|
||||
// Adds the estimated unmodelled echo power to the residual echo power
|
||||
// estimate.
|
||||
void AddReverb(ReverbType reverb_type,
|
||||
const AecState& aec_state,
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2);
|
||||
|
||||
// Gets the echo path gain to apply.
|
||||
float GetEchoPathGain(const AecState& aec_state,
|
||||
bool gain_for_early_reflections) const;
|
||||
|
||||
const EchoCanceller3Config config_;
|
||||
const size_t num_render_channels_;
|
||||
const float early_reflections_transparent_mode_gain_;
|
||||
const float late_reflections_transparent_mode_gain_;
|
||||
const float early_reflections_general_gain_;
|
||||
const float late_reflections_general_gain_;
|
||||
std::array<float, kFftLengthBy2Plus1> X2_noise_floor_;
|
||||
std::array<int, kFftLengthBy2Plus1> X2_noise_floor_counter_;
|
||||
ReverbModel echo_reverb_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_RESIDUAL_ECHO_ESTIMATOR_H_
|
409
webrtc/modules/audio_processing/aec3/reverb_decay_estimator.cc
Normal file
409
webrtc/modules/audio_processing/aec3/reverb_decay_estimator.cc
Normal file
@ -0,0 +1,409 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/reverb_decay_estimator.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kEarlyReverbMinSizeBlocks = 3;
|
||||
constexpr int kBlocksPerSection = 6;
|
||||
// Linear regression approach assumes symmetric index around 0.
|
||||
constexpr float kEarlyReverbFirstPointAtLinearRegressors =
|
||||
-0.5f * kBlocksPerSection * kFftLengthBy2 + 0.5f;
|
||||
|
||||
// Averages the values in a block of size kFftLengthBy2;
|
||||
float BlockAverage(rtc::ArrayView<const float> v, size_t block_index) {
|
||||
constexpr float kOneByFftLengthBy2 = 1.f / kFftLengthBy2;
|
||||
const int i = block_index * kFftLengthBy2;
|
||||
RTC_DCHECK_GE(v.size(), i + kFftLengthBy2);
|
||||
const float sum =
|
||||
std::accumulate(v.begin() + i, v.begin() + i + kFftLengthBy2, 0.f);
|
||||
return sum * kOneByFftLengthBy2;
|
||||
}
|
||||
|
||||
// Analyzes the gain in a block.
|
||||
void AnalyzeBlockGain(const std::array<float, kFftLengthBy2>& h2,
|
||||
float floor_gain,
|
||||
float* previous_gain,
|
||||
bool* block_adapting,
|
||||
bool* decaying_gain) {
|
||||
float gain = std::max(BlockAverage(h2, 0), 1e-32f);
|
||||
*block_adapting =
|
||||
*previous_gain > 1.1f * gain || *previous_gain < 0.9f * gain;
|
||||
*decaying_gain = gain > floor_gain;
|
||||
*previous_gain = gain;
|
||||
}
|
||||
|
||||
// Arithmetic sum of $2 \sum_{i=0.5}^{(N-1)/2}i^2$ calculated directly.
|
||||
constexpr float SymmetricArithmetricSum(int N) {
|
||||
return N * (N * N - 1.0f) * (1.f / 12.f);
|
||||
}
|
||||
|
||||
// Returns the peak energy of an impulse response.
|
||||
float BlockEnergyPeak(rtc::ArrayView<const float> h, int peak_block) {
|
||||
RTC_DCHECK_LE((peak_block + 1) * kFftLengthBy2, h.size());
|
||||
RTC_DCHECK_GE(peak_block, 0);
|
||||
float peak_value =
|
||||
*std::max_element(h.begin() + peak_block * kFftLengthBy2,
|
||||
h.begin() + (peak_block + 1) * kFftLengthBy2,
|
||||
[](float a, float b) { return a * a < b * b; });
|
||||
return peak_value * peak_value;
|
||||
}
|
||||
|
||||
// Returns the average energy of an impulse response block.
|
||||
float BlockEnergyAverage(rtc::ArrayView<const float> h, int block_index) {
|
||||
RTC_DCHECK_LE((block_index + 1) * kFftLengthBy2, h.size());
|
||||
RTC_DCHECK_GE(block_index, 0);
|
||||
constexpr float kOneByFftLengthBy2 = 1.f / kFftLengthBy2;
|
||||
const auto sum_of_squares = [](float a, float b) { return a + b * b; };
|
||||
return std::accumulate(h.begin() + block_index * kFftLengthBy2,
|
||||
h.begin() + (block_index + 1) * kFftLengthBy2, 0.f,
|
||||
sum_of_squares) *
|
||||
kOneByFftLengthBy2;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ReverbDecayEstimator::ReverbDecayEstimator(const EchoCanceller3Config& config)
|
||||
: filter_length_blocks_(config.filter.refined.length_blocks),
|
||||
filter_length_coefficients_(GetTimeDomainLength(filter_length_blocks_)),
|
||||
use_adaptive_echo_decay_(config.ep_strength.default_len < 0.f),
|
||||
early_reverb_estimator_(config.filter.refined.length_blocks -
|
||||
kEarlyReverbMinSizeBlocks),
|
||||
late_reverb_start_(kEarlyReverbMinSizeBlocks),
|
||||
late_reverb_end_(kEarlyReverbMinSizeBlocks),
|
||||
previous_gains_(config.filter.refined.length_blocks, 0.f),
|
||||
decay_(std::fabs(config.ep_strength.default_len)) {
|
||||
RTC_DCHECK_GT(config.filter.refined.length_blocks,
|
||||
static_cast<size_t>(kEarlyReverbMinSizeBlocks));
|
||||
}
|
||||
|
||||
ReverbDecayEstimator::~ReverbDecayEstimator() = default;
|
||||
|
||||
void ReverbDecayEstimator::Update(rtc::ArrayView<const float> filter,
|
||||
const absl::optional<float>& filter_quality,
|
||||
int filter_delay_blocks,
|
||||
bool usable_linear_filter,
|
||||
bool stationary_signal) {
|
||||
const int filter_size = static_cast<int>(filter.size());
|
||||
|
||||
if (stationary_signal) {
|
||||
return;
|
||||
}
|
||||
|
||||
bool estimation_feasible =
|
||||
filter_delay_blocks <=
|
||||
filter_length_blocks_ - kEarlyReverbMinSizeBlocks - 1;
|
||||
estimation_feasible =
|
||||
estimation_feasible && filter_size == filter_length_coefficients_;
|
||||
estimation_feasible = estimation_feasible && filter_delay_blocks > 0;
|
||||
estimation_feasible = estimation_feasible && usable_linear_filter;
|
||||
|
||||
if (!estimation_feasible) {
|
||||
ResetDecayEstimation();
|
||||
return;
|
||||
}
|
||||
|
||||
if (!use_adaptive_echo_decay_) {
|
||||
return;
|
||||
}
|
||||
|
||||
const float new_smoothing = filter_quality ? *filter_quality * 0.2f : 0.f;
|
||||
smoothing_constant_ = std::max(new_smoothing, smoothing_constant_);
|
||||
if (smoothing_constant_ == 0.f) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (block_to_analyze_ < filter_length_blocks_) {
|
||||
// Analyze the filter and accumulate data for reverb estimation.
|
||||
AnalyzeFilter(filter);
|
||||
++block_to_analyze_;
|
||||
} else {
|
||||
// When the filter is fully analyzed, estimate the reverb decay and reset
|
||||
// the block_to_analyze_ counter.
|
||||
EstimateDecay(filter, filter_delay_blocks);
|
||||
}
|
||||
}
|
||||
|
||||
void ReverbDecayEstimator::ResetDecayEstimation() {
|
||||
early_reverb_estimator_.Reset();
|
||||
late_reverb_decay_estimator_.Reset(0);
|
||||
block_to_analyze_ = 0;
|
||||
estimation_region_candidate_size_ = 0;
|
||||
estimation_region_identified_ = false;
|
||||
smoothing_constant_ = 0.f;
|
||||
late_reverb_start_ = 0;
|
||||
late_reverb_end_ = 0;
|
||||
}
|
||||
|
||||
void ReverbDecayEstimator::EstimateDecay(rtc::ArrayView<const float> filter,
|
||||
int peak_block) {
|
||||
auto& h = filter;
|
||||
RTC_DCHECK_EQ(0, h.size() % kFftLengthBy2);
|
||||
|
||||
// Reset the block analysis counter.
|
||||
block_to_analyze_ =
|
||||
std::min(peak_block + kEarlyReverbMinSizeBlocks, filter_length_blocks_);
|
||||
|
||||
// To estimate the reverb decay, the energy of the first filter section must
|
||||
// be substantially larger than the last. Also, the first filter section
|
||||
// energy must not deviate too much from the max peak.
|
||||
const float first_reverb_gain = BlockEnergyAverage(h, block_to_analyze_);
|
||||
const size_t h_size_blocks = h.size() >> kFftLengthBy2Log2;
|
||||
tail_gain_ = BlockEnergyAverage(h, h_size_blocks - 1);
|
||||
float peak_energy = BlockEnergyPeak(h, peak_block);
|
||||
const bool sufficient_reverb_decay = first_reverb_gain > 4.f * tail_gain_;
|
||||
const bool valid_filter =
|
||||
first_reverb_gain > 2.f * tail_gain_ && peak_energy < 100.f;
|
||||
|
||||
// Estimate the size of the regions with early and late reflections.
|
||||
const int size_early_reverb = early_reverb_estimator_.Estimate();
|
||||
const int size_late_reverb =
|
||||
std::max(estimation_region_candidate_size_ - size_early_reverb, 0);
|
||||
|
||||
// Only update the reverb decay estimate if the size of the identified late
|
||||
// reverb is sufficiently large.
|
||||
if (size_late_reverb >= 5) {
|
||||
if (valid_filter && late_reverb_decay_estimator_.EstimateAvailable()) {
|
||||
float decay = std::pow(
|
||||
2.0f, late_reverb_decay_estimator_.Estimate() * kFftLengthBy2);
|
||||
constexpr float kMaxDecay = 0.95f; // ~1 sec min RT60.
|
||||
constexpr float kMinDecay = 0.02f; // ~15 ms max RT60.
|
||||
decay = std::max(.97f * decay_, decay);
|
||||
decay = std::min(decay, kMaxDecay);
|
||||
decay = std::max(decay, kMinDecay);
|
||||
decay_ += smoothing_constant_ * (decay - decay_);
|
||||
}
|
||||
|
||||
// Update length of decay. Must have enough data (number of sections) in
|
||||
// order to estimate decay rate.
|
||||
late_reverb_decay_estimator_.Reset(size_late_reverb * kFftLengthBy2);
|
||||
late_reverb_start_ =
|
||||
peak_block + kEarlyReverbMinSizeBlocks + size_early_reverb;
|
||||
late_reverb_end_ =
|
||||
block_to_analyze_ + estimation_region_candidate_size_ - 1;
|
||||
} else {
|
||||
late_reverb_decay_estimator_.Reset(0);
|
||||
late_reverb_start_ = 0;
|
||||
late_reverb_end_ = 0;
|
||||
}
|
||||
|
||||
// Reset variables for the identification of the region for reverb decay
|
||||
// estimation.
|
||||
estimation_region_identified_ = !(valid_filter && sufficient_reverb_decay);
|
||||
estimation_region_candidate_size_ = 0;
|
||||
|
||||
// Stop estimation of the decay until another good filter is received.
|
||||
smoothing_constant_ = 0.f;
|
||||
|
||||
// Reset early reflections detector.
|
||||
early_reverb_estimator_.Reset();
|
||||
}
|
||||
|
||||
void ReverbDecayEstimator::AnalyzeFilter(rtc::ArrayView<const float> filter) {
|
||||
auto h = rtc::ArrayView<const float>(
|
||||
filter.begin() + block_to_analyze_ * kFftLengthBy2, kFftLengthBy2);
|
||||
|
||||
// Compute squared filter coeffiecients for the block to analyze_;
|
||||
std::array<float, kFftLengthBy2> h2;
|
||||
std::transform(h.begin(), h.end(), h2.begin(), [](float a) { return a * a; });
|
||||
|
||||
// Map out the region for estimating the reverb decay.
|
||||
bool adapting;
|
||||
bool above_noise_floor;
|
||||
AnalyzeBlockGain(h2, tail_gain_, &previous_gains_[block_to_analyze_],
|
||||
&adapting, &above_noise_floor);
|
||||
|
||||
// Count consecutive number of "good" filter sections, where "good" means:
|
||||
// 1) energy is above noise floor.
|
||||
// 2) energy of current section has not changed too much from last check.
|
||||
estimation_region_identified_ =
|
||||
estimation_region_identified_ || adapting || !above_noise_floor;
|
||||
if (!estimation_region_identified_) {
|
||||
++estimation_region_candidate_size_;
|
||||
}
|
||||
|
||||
// Accumulate data for reverb decay estimation and for the estimation of early
|
||||
// reflections.
|
||||
if (block_to_analyze_ <= late_reverb_end_) {
|
||||
if (block_to_analyze_ >= late_reverb_start_) {
|
||||
for (float h2_k : h2) {
|
||||
float h2_log2 = FastApproxLog2f(h2_k + 1e-10);
|
||||
late_reverb_decay_estimator_.Accumulate(h2_log2);
|
||||
early_reverb_estimator_.Accumulate(h2_log2, smoothing_constant_);
|
||||
}
|
||||
} else {
|
||||
for (float h2_k : h2) {
|
||||
float h2_log2 = FastApproxLog2f(h2_k + 1e-10);
|
||||
early_reverb_estimator_.Accumulate(h2_log2, smoothing_constant_);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ReverbDecayEstimator::Dump(ApmDataDumper* data_dumper) const {
|
||||
data_dumper->DumpRaw("aec3_reverb_decay", decay_);
|
||||
data_dumper->DumpRaw("aec3_reverb_tail_energy", tail_gain_);
|
||||
data_dumper->DumpRaw("aec3_reverb_alpha", smoothing_constant_);
|
||||
data_dumper->DumpRaw("aec3_num_reverb_decay_blocks",
|
||||
late_reverb_end_ - late_reverb_start_);
|
||||
data_dumper->DumpRaw("aec3_late_reverb_start", late_reverb_start_);
|
||||
data_dumper->DumpRaw("aec3_late_reverb_end", late_reverb_end_);
|
||||
early_reverb_estimator_.Dump(data_dumper);
|
||||
}
|
||||
|
||||
void ReverbDecayEstimator::LateReverbLinearRegressor::Reset(
|
||||
int num_data_points) {
|
||||
RTC_DCHECK_LE(0, num_data_points);
|
||||
RTC_DCHECK_EQ(0, num_data_points % 2);
|
||||
const int N = num_data_points;
|
||||
nz_ = 0.f;
|
||||
// Arithmetic sum of $2 \sum_{i=0.5}^{(N-1)/2}i^2$ calculated directly.
|
||||
nn_ = SymmetricArithmetricSum(N);
|
||||
// The linear regression approach assumes symmetric index around 0.
|
||||
count_ = N > 0 ? -N * 0.5f + 0.5f : 0.f;
|
||||
N_ = N;
|
||||
n_ = 0;
|
||||
}
|
||||
|
||||
void ReverbDecayEstimator::LateReverbLinearRegressor::Accumulate(float z) {
|
||||
nz_ += count_ * z;
|
||||
++count_;
|
||||
++n_;
|
||||
}
|
||||
|
||||
float ReverbDecayEstimator::LateReverbLinearRegressor::Estimate() {
|
||||
RTC_DCHECK(EstimateAvailable());
|
||||
if (nn_ == 0.f) {
|
||||
RTC_NOTREACHED();
|
||||
return 0.f;
|
||||
}
|
||||
return nz_ / nn_;
|
||||
}
|
||||
|
||||
ReverbDecayEstimator::EarlyReverbLengthEstimator::EarlyReverbLengthEstimator(
|
||||
int max_blocks)
|
||||
: numerators_smooth_(max_blocks - kBlocksPerSection, 0.f),
|
||||
numerators_(numerators_smooth_.size(), 0.f),
|
||||
coefficients_counter_(0) {
|
||||
RTC_DCHECK_LE(0, max_blocks);
|
||||
}
|
||||
|
||||
ReverbDecayEstimator::EarlyReverbLengthEstimator::
|
||||
~EarlyReverbLengthEstimator() = default;
|
||||
|
||||
void ReverbDecayEstimator::EarlyReverbLengthEstimator::Reset() {
|
||||
coefficients_counter_ = 0;
|
||||
std::fill(numerators_.begin(), numerators_.end(), 0.f);
|
||||
block_counter_ = 0;
|
||||
}
|
||||
|
||||
void ReverbDecayEstimator::EarlyReverbLengthEstimator::Accumulate(
|
||||
float value,
|
||||
float smoothing) {
|
||||
// Each section is composed by kBlocksPerSection blocks and each section
|
||||
// overlaps with the next one in (kBlocksPerSection - 1) blocks. For example,
|
||||
// the first section covers the blocks [0:5], the second covers the blocks
|
||||
// [1:6] and so on. As a result, for each value, kBlocksPerSection sections
|
||||
// need to be updated.
|
||||
int first_section_index = std::max(block_counter_ - kBlocksPerSection + 1, 0);
|
||||
int last_section_index =
|
||||
std::min(block_counter_, static_cast<int>(numerators_.size() - 1));
|
||||
float x_value = static_cast<float>(coefficients_counter_) +
|
||||
kEarlyReverbFirstPointAtLinearRegressors;
|
||||
const float value_to_inc = kFftLengthBy2 * value;
|
||||
float value_to_add =
|
||||
x_value * value + (block_counter_ - last_section_index) * value_to_inc;
|
||||
for (int section = last_section_index; section >= first_section_index;
|
||||
--section, value_to_add += value_to_inc) {
|
||||
numerators_[section] += value_to_add;
|
||||
}
|
||||
|
||||
// Check if this update was the last coefficient of the current block. In that
|
||||
// case, check if we are at the end of one of the sections and update the
|
||||
// numerator of the linear regressor that is computed in such section.
|
||||
if (++coefficients_counter_ == kFftLengthBy2) {
|
||||
if (block_counter_ >= (kBlocksPerSection - 1)) {
|
||||
size_t section = block_counter_ - (kBlocksPerSection - 1);
|
||||
RTC_DCHECK_GT(numerators_.size(), section);
|
||||
RTC_DCHECK_GT(numerators_smooth_.size(), section);
|
||||
numerators_smooth_[section] +=
|
||||
smoothing * (numerators_[section] - numerators_smooth_[section]);
|
||||
n_sections_ = section + 1;
|
||||
}
|
||||
++block_counter_;
|
||||
coefficients_counter_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Estimates the size in blocks of the early reverb. The estimation is done by
|
||||
// comparing the tilt that is estimated in each section. As an optimization
|
||||
// detail and due to the fact that all the linear regressors that are computed
|
||||
// shared the same denominator, the comparison of the tilts is done by a
|
||||
// comparison of the numerator of the linear regressors.
|
||||
int ReverbDecayEstimator::EarlyReverbLengthEstimator::Estimate() {
|
||||
constexpr float N = kBlocksPerSection * kFftLengthBy2;
|
||||
constexpr float nn = SymmetricArithmetricSum(N);
|
||||
// numerator_11 refers to the quantity that the linear regressor needs in the
|
||||
// numerator for getting a decay equal to 1.1 (which is not a decay).
|
||||
// log2(1.1) * nn / kFftLengthBy2.
|
||||
constexpr float numerator_11 = 0.13750352374993502f * nn / kFftLengthBy2;
|
||||
// log2(0.8) * nn / kFftLengthBy2.
|
||||
constexpr float numerator_08 = -0.32192809488736229f * nn / kFftLengthBy2;
|
||||
constexpr int kNumSectionsToAnalyze = 9;
|
||||
|
||||
if (n_sections_ < kNumSectionsToAnalyze) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Estimation of the blocks that correspond to early reverberations. The
|
||||
// estimation is done by analyzing the impulse response. The portions of the
|
||||
// impulse response whose energy is not decreasing over its coefficients are
|
||||
// considered to be part of the early reverberations. Furthermore, the blocks
|
||||
// where the energy is decreasing faster than what it does at the end of the
|
||||
// impulse response are also considered to be part of the early
|
||||
// reverberations. The estimation is limited to the first
|
||||
// kNumSectionsToAnalyze sections.
|
||||
|
||||
RTC_DCHECK_LE(n_sections_, numerators_smooth_.size());
|
||||
const float min_numerator_tail =
|
||||
*std::min_element(numerators_smooth_.begin() + kNumSectionsToAnalyze,
|
||||
numerators_smooth_.begin() + n_sections_);
|
||||
int early_reverb_size_minus_1 = 0;
|
||||
for (int k = 0; k < kNumSectionsToAnalyze; ++k) {
|
||||
if ((numerators_smooth_[k] > numerator_11) ||
|
||||
(numerators_smooth_[k] < numerator_08 &&
|
||||
numerators_smooth_[k] < 0.9f * min_numerator_tail)) {
|
||||
early_reverb_size_minus_1 = k;
|
||||
}
|
||||
}
|
||||
|
||||
return early_reverb_size_minus_1 == 0 ? 0 : early_reverb_size_minus_1 + 1;
|
||||
}
|
||||
|
||||
void ReverbDecayEstimator::EarlyReverbLengthEstimator::Dump(
|
||||
ApmDataDumper* data_dumper) const {
|
||||
data_dumper->DumpRaw("aec3_er_acum_numerator", numerators_smooth_);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
112
webrtc/modules/audio_processing/aec3/reverb_decay_estimator.h
Normal file
112
webrtc/modules/audio_processing/aec3/reverb_decay_estimator.h
Normal file
@ -0,0 +1,112 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_REVERB_DECAY_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_DECAY_ESTIMATOR_H_
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h" // kMaxAdaptiveFilter...
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
struct EchoCanceller3Config;
|
||||
|
||||
// Class for estimating the decay of the late reverb.
|
||||
class ReverbDecayEstimator {
|
||||
public:
|
||||
explicit ReverbDecayEstimator(const EchoCanceller3Config& config);
|
||||
~ReverbDecayEstimator();
|
||||
// Updates the decay estimate.
|
||||
void Update(rtc::ArrayView<const float> filter,
|
||||
const absl::optional<float>& filter_quality,
|
||||
int filter_delay_blocks,
|
||||
bool usable_linear_filter,
|
||||
bool stationary_signal);
|
||||
// Returns the decay for the exponential model.
|
||||
float Decay() const { return decay_; }
|
||||
// Dumps debug data.
|
||||
void Dump(ApmDataDumper* data_dumper) const;
|
||||
|
||||
private:
|
||||
void EstimateDecay(rtc::ArrayView<const float> filter, int peak_block);
|
||||
void AnalyzeFilter(rtc::ArrayView<const float> filter);
|
||||
|
||||
void ResetDecayEstimation();
|
||||
|
||||
// Class for estimating the decay of the late reverb from the linear filter.
|
||||
class LateReverbLinearRegressor {
|
||||
public:
|
||||
// Resets the estimator to receive a specified number of data points.
|
||||
void Reset(int num_data_points);
|
||||
// Accumulates estimation data.
|
||||
void Accumulate(float z);
|
||||
// Estimates the decay.
|
||||
float Estimate();
|
||||
// Returns whether an estimate is available.
|
||||
bool EstimateAvailable() const { return n_ == N_ && N_ != 0; }
|
||||
|
||||
public:
|
||||
float nz_ = 0.f;
|
||||
float nn_ = 0.f;
|
||||
float count_ = 0.f;
|
||||
int N_ = 0;
|
||||
int n_ = 0;
|
||||
};
|
||||
|
||||
// Class for identifying the length of the early reverb from the linear
|
||||
// filter. For identifying the early reverberations, the impulse response is
|
||||
// divided in sections and the tilt of each section is computed by a linear
|
||||
// regressor.
|
||||
class EarlyReverbLengthEstimator {
|
||||
public:
|
||||
explicit EarlyReverbLengthEstimator(int max_blocks);
|
||||
~EarlyReverbLengthEstimator();
|
||||
|
||||
// Resets the estimator.
|
||||
void Reset();
|
||||
// Accumulates estimation data.
|
||||
void Accumulate(float value, float smoothing);
|
||||
// Estimates the size in blocks of the early reverb.
|
||||
int Estimate();
|
||||
// Dumps debug data.
|
||||
void Dump(ApmDataDumper* data_dumper) const;
|
||||
|
||||
private:
|
||||
std::vector<float> numerators_smooth_;
|
||||
std::vector<float> numerators_;
|
||||
int coefficients_counter_;
|
||||
int block_counter_ = 0;
|
||||
int n_sections_ = 0;
|
||||
};
|
||||
|
||||
const int filter_length_blocks_;
|
||||
const int filter_length_coefficients_;
|
||||
const bool use_adaptive_echo_decay_;
|
||||
LateReverbLinearRegressor late_reverb_decay_estimator_;
|
||||
EarlyReverbLengthEstimator early_reverb_estimator_;
|
||||
int late_reverb_start_;
|
||||
int late_reverb_end_;
|
||||
int block_to_analyze_ = 0;
|
||||
int estimation_region_candidate_size_ = 0;
|
||||
bool estimation_region_identified_ = false;
|
||||
std::vector<float> previous_gains_;
|
||||
float decay_;
|
||||
float tail_gain_ = 0.f;
|
||||
float smoothing_constant_ = 0.f;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_REVERB_DECAY_ESTIMATOR_H_
|
@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/reverb_frequency_response.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <numeric>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Computes the ratio of the energies between the direct path and the tail. The
|
||||
// energy is computed in the power spectrum domain discarding the DC
|
||||
// contributions.
|
||||
float AverageDecayWithinFilter(
|
||||
rtc::ArrayView<const float> freq_resp_direct_path,
|
||||
rtc::ArrayView<const float> freq_resp_tail) {
|
||||
// Skipping the DC for the ratio computation
|
||||
constexpr size_t kSkipBins = 1;
|
||||
RTC_CHECK_EQ(freq_resp_direct_path.size(), freq_resp_tail.size());
|
||||
|
||||
float direct_path_energy =
|
||||
std::accumulate(freq_resp_direct_path.begin() + kSkipBins,
|
||||
freq_resp_direct_path.end(), 0.f);
|
||||
|
||||
if (direct_path_energy == 0.f) {
|
||||
return 0.f;
|
||||
}
|
||||
|
||||
float tail_energy = std::accumulate(freq_resp_tail.begin() + kSkipBins,
|
||||
freq_resp_tail.end(), 0.f);
|
||||
return tail_energy / direct_path_energy;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ReverbFrequencyResponse::ReverbFrequencyResponse() {
|
||||
tail_response_.fill(0.f);
|
||||
}
|
||||
ReverbFrequencyResponse::~ReverbFrequencyResponse() = default;
|
||||
|
||||
void ReverbFrequencyResponse::Update(
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
|
||||
frequency_response,
|
||||
int filter_delay_blocks,
|
||||
const absl::optional<float>& linear_filter_quality,
|
||||
bool stationary_block) {
|
||||
if (stationary_block || !linear_filter_quality) {
|
||||
return;
|
||||
}
|
||||
|
||||
Update(frequency_response, filter_delay_blocks, *linear_filter_quality);
|
||||
}
|
||||
|
||||
void ReverbFrequencyResponse::Update(
|
||||
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
|
||||
frequency_response,
|
||||
int filter_delay_blocks,
|
||||
float linear_filter_quality) {
|
||||
rtc::ArrayView<const float> freq_resp_tail(
|
||||
frequency_response[frequency_response.size() - 1]);
|
||||
|
||||
rtc::ArrayView<const float> freq_resp_direct_path(
|
||||
frequency_response[filter_delay_blocks]);
|
||||
|
||||
float average_decay =
|
||||
AverageDecayWithinFilter(freq_resp_direct_path, freq_resp_tail);
|
||||
|
||||
const float smoothing = 0.2f * linear_filter_quality;
|
||||
average_decay_ += smoothing * (average_decay - average_decay_);
|
||||
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
tail_response_[k] = freq_resp_direct_path[k] * average_decay_;
|
||||
}
|
||||
|
||||
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
||||
const float avg_neighbour =
|
||||
0.5f * (tail_response_[k - 1] + tail_response_[k + 1]);
|
||||
tail_response_[k] = std::max(tail_response_[k], avg_neighbour);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,53 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_REVERB_FREQUENCY_RESPONSE_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_FREQUENCY_RESPONSE_H_
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Class for updating the frequency response for the reverb.
|
||||
class ReverbFrequencyResponse {
|
||||
public:
|
||||
ReverbFrequencyResponse();
|
||||
~ReverbFrequencyResponse();
|
||||
|
||||
// Updates the frequency response estimate of the reverb.
|
||||
void Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
|
||||
frequency_response,
|
||||
int filter_delay_blocks,
|
||||
const absl::optional<float>& linear_filter_quality,
|
||||
bool stationary_block);
|
||||
|
||||
// Returns the estimated frequency response for the reverb.
|
||||
rtc::ArrayView<const float> FrequencyResponse() const {
|
||||
return tail_response_;
|
||||
}
|
||||
|
||||
private:
|
||||
void Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
|
||||
frequency_response,
|
||||
int filter_delay_blocks,
|
||||
float linear_filter_quality);
|
||||
|
||||
float average_decay_ = 0.f;
|
||||
std::array<float, kFftLengthBy2Plus1> tail_response_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_REVERB_FREQUENCY_RESPONSE_H_
|
59
webrtc/modules/audio_processing/aec3/reverb_model.cc
Normal file
59
webrtc/modules/audio_processing/aec3/reverb_model.cc
Normal file
@ -0,0 +1,59 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/reverb_model.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
|
||||
#include "api/array_view.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
ReverbModel::ReverbModel() {
|
||||
Reset();
|
||||
}
|
||||
|
||||
ReverbModel::~ReverbModel() = default;
|
||||
|
||||
void ReverbModel::Reset() {
|
||||
reverb_.fill(0.);
|
||||
}
|
||||
|
||||
void ReverbModel::UpdateReverbNoFreqShaping(
|
||||
rtc::ArrayView<const float> power_spectrum,
|
||||
float power_spectrum_scaling,
|
||||
float reverb_decay) {
|
||||
if (reverb_decay > 0) {
|
||||
// Update the estimate of the reverberant power.
|
||||
for (size_t k = 0; k < power_spectrum.size(); ++k) {
|
||||
reverb_[k] = (reverb_[k] + power_spectrum[k] * power_spectrum_scaling) *
|
||||
reverb_decay;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void ReverbModel::UpdateReverb(
|
||||
rtc::ArrayView<const float> power_spectrum,
|
||||
rtc::ArrayView<const float> power_spectrum_scaling,
|
||||
float reverb_decay) {
|
||||
if (reverb_decay > 0) {
|
||||
// Update the estimate of the reverberant power.
|
||||
for (size_t k = 0; k < power_spectrum.size(); ++k) {
|
||||
reverb_[k] =
|
||||
(reverb_[k] + power_spectrum[k] * power_spectrum_scaling[k]) *
|
||||
reverb_decay;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
58
webrtc/modules/audio_processing/aec3/reverb_model.h
Normal file
58
webrtc/modules/audio_processing/aec3/reverb_model.h
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_H_
|
||||
|
||||
#include <array>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// The ReverbModel class describes an exponential reverberant model
|
||||
// that can be applied over power spectrums.
|
||||
class ReverbModel {
|
||||
public:
|
||||
ReverbModel();
|
||||
~ReverbModel();
|
||||
|
||||
// Resets the state.
|
||||
void Reset();
|
||||
|
||||
// Returns the reverb.
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb() const {
|
||||
return reverb_;
|
||||
}
|
||||
|
||||
// The methods UpdateReverbNoFreqShaping and UpdateReverb update the
|
||||
// estimate of the reverberation contribution to an input/output power
|
||||
// spectrum. Before applying the exponential reverberant model, the input
|
||||
// power spectrum is pre-scaled. Use the method UpdateReverb when a different
|
||||
// scaling should be applied per frequency and UpdateReverb_no_freq_shape if
|
||||
// the same scaling should be used for all the frequencies.
|
||||
void UpdateReverbNoFreqShaping(rtc::ArrayView<const float> power_spectrum,
|
||||
float power_spectrum_scaling,
|
||||
float reverb_decay);
|
||||
|
||||
// Update the reverb based on new data.
|
||||
void UpdateReverb(rtc::ArrayView<const float> power_spectrum,
|
||||
rtc::ArrayView<const float> power_spectrum_scaling,
|
||||
float reverb_decay);
|
||||
|
||||
private:
|
||||
|
||||
std::array<float, kFftLengthBy2Plus1> reverb_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_H_
|
@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/reverb_model_estimator.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
ReverbModelEstimator::ReverbModelEstimator(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: reverb_decay_estimators_(num_capture_channels),
|
||||
reverb_frequency_responses_(num_capture_channels) {
|
||||
for (size_t ch = 0; ch < reverb_decay_estimators_.size(); ++ch) {
|
||||
reverb_decay_estimators_[ch] =
|
||||
std::make_unique<ReverbDecayEstimator>(config);
|
||||
}
|
||||
}
|
||||
|
||||
ReverbModelEstimator::~ReverbModelEstimator() = default;
|
||||
|
||||
void ReverbModelEstimator::Update(
|
||||
rtc::ArrayView<const std::vector<float>> impulse_responses,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
frequency_responses,
|
||||
rtc::ArrayView<const absl::optional<float>> linear_filter_qualities,
|
||||
rtc::ArrayView<const int> filter_delays_blocks,
|
||||
const std::vector<bool>& usable_linear_estimates,
|
||||
bool stationary_block) {
|
||||
const size_t num_capture_channels = reverb_decay_estimators_.size();
|
||||
RTC_DCHECK_EQ(num_capture_channels, impulse_responses.size());
|
||||
RTC_DCHECK_EQ(num_capture_channels, frequency_responses.size());
|
||||
RTC_DCHECK_EQ(num_capture_channels, usable_linear_estimates.size());
|
||||
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
// Estimate the frequency response for the reverb.
|
||||
reverb_frequency_responses_[ch].Update(
|
||||
frequency_responses[ch], filter_delays_blocks[ch],
|
||||
linear_filter_qualities[ch], stationary_block);
|
||||
|
||||
// Estimate the reverb decay,
|
||||
reverb_decay_estimators_[ch]->Update(
|
||||
impulse_responses[ch], linear_filter_qualities[ch],
|
||||
filter_delays_blocks[ch], usable_linear_estimates[ch],
|
||||
stationary_block);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_ESTIMATOR_H_
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h" // kFftLengthBy2Plus1
|
||||
#include "modules/audio_processing/aec3/reverb_decay_estimator.h"
|
||||
#include "modules/audio_processing/aec3/reverb_frequency_response.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class ApmDataDumper;
|
||||
|
||||
// Class for estimating the model parameters for the reverberant echo.
|
||||
class ReverbModelEstimator {
|
||||
public:
|
||||
ReverbModelEstimator(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels);
|
||||
~ReverbModelEstimator();
|
||||
|
||||
// Updates the estimates based on new data.
|
||||
void Update(
|
||||
rtc::ArrayView<const std::vector<float>> impulse_responses,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
frequency_responses,
|
||||
rtc::ArrayView<const absl::optional<float>> linear_filter_qualities,
|
||||
rtc::ArrayView<const int> filter_delays_blocks,
|
||||
const std::vector<bool>& usable_linear_estimates,
|
||||
bool stationary_block);
|
||||
|
||||
// Returns the exponential decay of the reverberant echo.
|
||||
// TODO(peah): Correct to properly support multiple channels.
|
||||
float ReverbDecay() const { return reverb_decay_estimators_[0]->Decay(); }
|
||||
|
||||
// Return the frequency response of the reverberant echo.
|
||||
// TODO(peah): Correct to properly support multiple channels.
|
||||
rtc::ArrayView<const float> GetReverbFrequencyResponse() const {
|
||||
return reverb_frequency_responses_[0].FrequencyResponse();
|
||||
}
|
||||
|
||||
// Dumps debug data.
|
||||
void Dump(ApmDataDumper* data_dumper) const {
|
||||
reverb_decay_estimators_[0]->Dump(data_dumper);
|
||||
}
|
||||
|
||||
private:
|
||||
std::vector<std::unique_ptr<ReverbDecayEstimator>> reverb_decay_estimators_;
|
||||
std::vector<ReverbFrequencyResponse> reverb_frequency_responses_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_ESTIMATOR_H_
|
@ -0,0 +1,406 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/signal_dependent_erle_estimator.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <functional>
|
||||
#include <numeric>
|
||||
|
||||
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr std::array<size_t, SignalDependentErleEstimator::kSubbands + 1>
|
||||
kBandBoundaries = {1, 8, 16, 24, 32, 48, kFftLengthBy2Plus1};
|
||||
|
||||
std::array<size_t, kFftLengthBy2Plus1> FormSubbandMap() {
|
||||
std::array<size_t, kFftLengthBy2Plus1> map_band_to_subband;
|
||||
size_t subband = 1;
|
||||
for (size_t k = 0; k < map_band_to_subband.size(); ++k) {
|
||||
RTC_DCHECK_LT(subband, kBandBoundaries.size());
|
||||
if (k >= kBandBoundaries[subband]) {
|
||||
subband++;
|
||||
RTC_DCHECK_LT(k, kBandBoundaries[subband]);
|
||||
}
|
||||
map_band_to_subband[k] = subband - 1;
|
||||
}
|
||||
return map_band_to_subband;
|
||||
}
|
||||
|
||||
// Defines the size in blocks of the sections that are used for dividing the
|
||||
// linear filter. The sections are split in a non-linear manner so that lower
|
||||
// sections that typically represent the direct path have a larger resolution
|
||||
// than the higher sections which typically represent more reverberant acoustic
|
||||
// paths.
|
||||
std::vector<size_t> DefineFilterSectionSizes(size_t delay_headroom_blocks,
|
||||
size_t num_blocks,
|
||||
size_t num_sections) {
|
||||
size_t filter_length_blocks = num_blocks - delay_headroom_blocks;
|
||||
std::vector<size_t> section_sizes(num_sections);
|
||||
size_t remaining_blocks = filter_length_blocks;
|
||||
size_t remaining_sections = num_sections;
|
||||
size_t estimator_size = 2;
|
||||
size_t idx = 0;
|
||||
while (remaining_sections > 1 &&
|
||||
remaining_blocks > estimator_size * remaining_sections) {
|
||||
RTC_DCHECK_LT(idx, section_sizes.size());
|
||||
section_sizes[idx] = estimator_size;
|
||||
remaining_blocks -= estimator_size;
|
||||
remaining_sections--;
|
||||
estimator_size *= 2;
|
||||
idx++;
|
||||
}
|
||||
|
||||
size_t last_groups_size = remaining_blocks / remaining_sections;
|
||||
for (; idx < num_sections; idx++) {
|
||||
section_sizes[idx] = last_groups_size;
|
||||
}
|
||||
section_sizes[num_sections - 1] +=
|
||||
remaining_blocks - last_groups_size * remaining_sections;
|
||||
return section_sizes;
|
||||
}
|
||||
|
||||
// Forms the limits in blocks for each filter section. Those sections
|
||||
// are used for analyzing the echo estimates and investigating which
|
||||
// linear filter sections contribute most to the echo estimate energy.
|
||||
std::vector<size_t> SetSectionsBoundaries(size_t delay_headroom_blocks,
|
||||
size_t num_blocks,
|
||||
size_t num_sections) {
|
||||
std::vector<size_t> estimator_boundaries_blocks(num_sections + 1);
|
||||
if (estimator_boundaries_blocks.size() == 2) {
|
||||
estimator_boundaries_blocks[0] = 0;
|
||||
estimator_boundaries_blocks[1] = num_blocks;
|
||||
return estimator_boundaries_blocks;
|
||||
}
|
||||
RTC_DCHECK_GT(estimator_boundaries_blocks.size(), 2);
|
||||
const std::vector<size_t> section_sizes =
|
||||
DefineFilterSectionSizes(delay_headroom_blocks, num_blocks,
|
||||
estimator_boundaries_blocks.size() - 1);
|
||||
|
||||
size_t idx = 0;
|
||||
size_t current_size_block = 0;
|
||||
RTC_DCHECK_EQ(section_sizes.size() + 1, estimator_boundaries_blocks.size());
|
||||
estimator_boundaries_blocks[0] = delay_headroom_blocks;
|
||||
for (size_t k = delay_headroom_blocks; k < num_blocks; ++k) {
|
||||
current_size_block++;
|
||||
if (current_size_block >= section_sizes[idx]) {
|
||||
idx = idx + 1;
|
||||
if (idx == section_sizes.size()) {
|
||||
break;
|
||||
}
|
||||
estimator_boundaries_blocks[idx] = k + 1;
|
||||
current_size_block = 0;
|
||||
}
|
||||
}
|
||||
estimator_boundaries_blocks[section_sizes.size()] = num_blocks;
|
||||
return estimator_boundaries_blocks;
|
||||
}
|
||||
|
||||
std::array<float, SignalDependentErleEstimator::kSubbands>
|
||||
SetMaxErleSubbands(float max_erle_l, float max_erle_h, size_t limit_subband_l) {
|
||||
std::array<float, SignalDependentErleEstimator::kSubbands> max_erle;
|
||||
std::fill(max_erle.begin(), max_erle.begin() + limit_subband_l, max_erle_l);
|
||||
std::fill(max_erle.begin() + limit_subband_l, max_erle.end(), max_erle_h);
|
||||
return max_erle;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
SignalDependentErleEstimator::SignalDependentErleEstimator(
|
||||
const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: min_erle_(config.erle.min),
|
||||
num_sections_(config.erle.num_sections),
|
||||
num_blocks_(config.filter.refined.length_blocks),
|
||||
delay_headroom_blocks_(config.delay.delay_headroom_samples / kBlockSize),
|
||||
band_to_subband_(FormSubbandMap()),
|
||||
max_erle_(SetMaxErleSubbands(config.erle.max_l,
|
||||
config.erle.max_h,
|
||||
band_to_subband_[kFftLengthBy2 / 2])),
|
||||
section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_,
|
||||
num_blocks_,
|
||||
num_sections_)),
|
||||
erle_(num_capture_channels),
|
||||
S2_section_accum_(
|
||||
num_capture_channels,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>(num_sections_)),
|
||||
erle_estimators_(
|
||||
num_capture_channels,
|
||||
std::vector<std::array<float, kSubbands>>(num_sections_)),
|
||||
erle_ref_(num_capture_channels),
|
||||
correction_factors_(
|
||||
num_capture_channels,
|
||||
std::vector<std::array<float, kSubbands>>(num_sections_)),
|
||||
num_updates_(num_capture_channels),
|
||||
n_active_sections_(num_capture_channels) {
|
||||
RTC_DCHECK_LE(num_sections_, num_blocks_);
|
||||
RTC_DCHECK_GE(num_sections_, 1);
|
||||
Reset();
|
||||
}
|
||||
|
||||
SignalDependentErleEstimator::~SignalDependentErleEstimator() = default;
|
||||
|
||||
void SignalDependentErleEstimator::Reset() {
|
||||
for (size_t ch = 0; ch < erle_.size(); ++ch) {
|
||||
erle_[ch].fill(min_erle_);
|
||||
for (auto& erle_estimator : erle_estimators_[ch]) {
|
||||
erle_estimator.fill(min_erle_);
|
||||
}
|
||||
erle_ref_[ch].fill(min_erle_);
|
||||
for (auto& factor : correction_factors_[ch]) {
|
||||
factor.fill(1.0f);
|
||||
}
|
||||
num_updates_[ch].fill(0);
|
||||
n_active_sections_[ch].fill(0);
|
||||
}
|
||||
}
|
||||
|
||||
// Updates the Erle estimate by analyzing the current input signals. It takes
|
||||
// the render buffer and the filter frequency response in order to do an
|
||||
// estimation of the number of sections of the linear filter that are needed
|
||||
// for getting the majority of the energy in the echo estimate. Based on that
|
||||
// number of sections, it updates the erle estimation by introducing a
|
||||
// correction factor to the erle that is given as an input to this method.
|
||||
void SignalDependentErleEstimator::Update(
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
filter_frequency_responses,
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> X2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> average_erle,
|
||||
const std::vector<bool>& converged_filters) {
|
||||
RTC_DCHECK_GT(num_sections_, 1);
|
||||
|
||||
// Gets the number of filter sections that are needed for achieving 90 %
|
||||
// of the power spectrum energy of the echo estimate.
|
||||
ComputeNumberOfActiveFilterSections(render_buffer,
|
||||
filter_frequency_responses);
|
||||
|
||||
// Updates the correction factors that is used for correcting the erle and
|
||||
// adapt it to the particular characteristics of the input signal.
|
||||
UpdateCorrectionFactors(X2, Y2, E2, converged_filters);
|
||||
|
||||
// Applies the correction factor to the input erle for getting a more refined
|
||||
// erle estimation for the current input signal.
|
||||
for (size_t ch = 0; ch < erle_.size(); ++ch) {
|
||||
for (size_t k = 0; k < kFftLengthBy2; ++k) {
|
||||
RTC_DCHECK_GT(correction_factors_[ch].size(), n_active_sections_[ch][k]);
|
||||
float correction_factor =
|
||||
correction_factors_[ch][n_active_sections_[ch][k]]
|
||||
[band_to_subband_[k]];
|
||||
erle_[ch][k] = rtc::SafeClamp(average_erle[ch][k] * correction_factor,
|
||||
min_erle_, max_erle_[band_to_subband_[k]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SignalDependentErleEstimator::Dump(
|
||||
const std::unique_ptr<ApmDataDumper>& data_dumper) const {
|
||||
for (auto& erle : erle_estimators_[0]) {
|
||||
data_dumper->DumpRaw("aec3_all_erle", erle);
|
||||
}
|
||||
data_dumper->DumpRaw("aec3_ref_erle", erle_ref_[0]);
|
||||
for (auto& factor : correction_factors_[0]) {
|
||||
data_dumper->DumpRaw("aec3_erle_correction_factor", factor);
|
||||
}
|
||||
}
|
||||
|
||||
// Estimates for each band the smallest number of sections in the filter that
|
||||
// together constitute 90% of the estimated echo energy.
|
||||
void SignalDependentErleEstimator::ComputeNumberOfActiveFilterSections(
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
filter_frequency_responses) {
|
||||
RTC_DCHECK_GT(num_sections_, 1);
|
||||
// Computes an approximation of the power spectrum if the filter would have
|
||||
// been limited to a certain number of filter sections.
|
||||
ComputeEchoEstimatePerFilterSection(render_buffer,
|
||||
filter_frequency_responses);
|
||||
// For each band, computes the number of filter sections that are needed for
|
||||
// achieving the 90 % energy in the echo estimate.
|
||||
ComputeActiveFilterSections();
|
||||
}
|
||||
|
||||
void SignalDependentErleEstimator::UpdateCorrectionFactors(
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> X2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
|
||||
const std::vector<bool>& converged_filters) {
|
||||
for (size_t ch = 0; ch < converged_filters.size(); ++ch) {
|
||||
if (converged_filters[ch]) {
|
||||
constexpr float kX2BandEnergyThreshold = 44015068.0f;
|
||||
constexpr float kSmthConstantDecreases = 0.1f;
|
||||
constexpr float kSmthConstantIncreases = kSmthConstantDecreases / 2.f;
|
||||
auto subband_powers = [](rtc::ArrayView<const float> power_spectrum,
|
||||
rtc::ArrayView<float> power_spectrum_subbands) {
|
||||
for (size_t subband = 0; subband < kSubbands; ++subband) {
|
||||
RTC_DCHECK_LE(kBandBoundaries[subband + 1], power_spectrum.size());
|
||||
power_spectrum_subbands[subband] = std::accumulate(
|
||||
power_spectrum.begin() + kBandBoundaries[subband],
|
||||
power_spectrum.begin() + kBandBoundaries[subband + 1], 0.f);
|
||||
}
|
||||
};
|
||||
|
||||
std::array<float, kSubbands> X2_subbands, E2_subbands, Y2_subbands;
|
||||
subband_powers(X2, X2_subbands);
|
||||
subband_powers(E2[ch], E2_subbands);
|
||||
subband_powers(Y2[ch], Y2_subbands);
|
||||
std::array<size_t, kSubbands> idx_subbands;
|
||||
for (size_t subband = 0; subband < kSubbands; ++subband) {
|
||||
// When aggregating the number of active sections in the filter for
|
||||
// different bands we choose to take the minimum of all of them. As an
|
||||
// example, if for one of the bands it is the direct path its refined
|
||||
// contributor to the final echo estimate, we consider the direct path
|
||||
// is as well the refined contributor for the subband that contains that
|
||||
// particular band. That aggregate number of sections will be later used
|
||||
// as the identifier of the erle estimator that needs to be updated.
|
||||
RTC_DCHECK_LE(kBandBoundaries[subband + 1],
|
||||
n_active_sections_[ch].size());
|
||||
idx_subbands[subband] = *std::min_element(
|
||||
n_active_sections_[ch].begin() + kBandBoundaries[subband],
|
||||
n_active_sections_[ch].begin() + kBandBoundaries[subband + 1]);
|
||||
}
|
||||
|
||||
std::array<float, kSubbands> new_erle;
|
||||
std::array<bool, kSubbands> is_erle_updated;
|
||||
is_erle_updated.fill(false);
|
||||
new_erle.fill(0.f);
|
||||
for (size_t subband = 0; subband < kSubbands; ++subband) {
|
||||
if (X2_subbands[subband] > kX2BandEnergyThreshold &&
|
||||
E2_subbands[subband] > 0) {
|
||||
new_erle[subband] = Y2_subbands[subband] / E2_subbands[subband];
|
||||
RTC_DCHECK_GT(new_erle[subband], 0);
|
||||
is_erle_updated[subband] = true;
|
||||
++num_updates_[ch][subband];
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t subband = 0; subband < kSubbands; ++subband) {
|
||||
const size_t idx = idx_subbands[subband];
|
||||
RTC_DCHECK_LT(idx, erle_estimators_[ch].size());
|
||||
float alpha = new_erle[subband] > erle_estimators_[ch][idx][subband]
|
||||
? kSmthConstantIncreases
|
||||
: kSmthConstantDecreases;
|
||||
alpha = static_cast<float>(is_erle_updated[subband]) * alpha;
|
||||
erle_estimators_[ch][idx][subband] +=
|
||||
alpha * (new_erle[subband] - erle_estimators_[ch][idx][subband]);
|
||||
erle_estimators_[ch][idx][subband] = rtc::SafeClamp(
|
||||
erle_estimators_[ch][idx][subband], min_erle_, max_erle_[subband]);
|
||||
}
|
||||
|
||||
for (size_t subband = 0; subband < kSubbands; ++subband) {
|
||||
float alpha = new_erle[subband] > erle_ref_[ch][subband]
|
||||
? kSmthConstantIncreases
|
||||
: kSmthConstantDecreases;
|
||||
alpha = static_cast<float>(is_erle_updated[subband]) * alpha;
|
||||
erle_ref_[ch][subband] +=
|
||||
alpha * (new_erle[subband] - erle_ref_[ch][subband]);
|
||||
erle_ref_[ch][subband] = rtc::SafeClamp(erle_ref_[ch][subband],
|
||||
min_erle_, max_erle_[subband]);
|
||||
}
|
||||
|
||||
for (size_t subband = 0; subband < kSubbands; ++subband) {
|
||||
constexpr int kNumUpdateThr = 50;
|
||||
if (is_erle_updated[subband] &&
|
||||
num_updates_[ch][subband] > kNumUpdateThr) {
|
||||
const size_t idx = idx_subbands[subband];
|
||||
RTC_DCHECK_GT(erle_ref_[ch][subband], 0.f);
|
||||
// Computes the ratio between the erle that is updated using all the
|
||||
// points and the erle that is updated only on signals that share the
|
||||
// same number of active filter sections.
|
||||
float new_correction_factor =
|
||||
erle_estimators_[ch][idx][subband] / erle_ref_[ch][subband];
|
||||
|
||||
correction_factors_[ch][idx][subband] +=
|
||||
0.1f *
|
||||
(new_correction_factor - correction_factors_[ch][idx][subband]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SignalDependentErleEstimator::ComputeEchoEstimatePerFilterSection(
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
filter_frequency_responses) {
|
||||
const SpectrumBuffer& spectrum_render_buffer =
|
||||
render_buffer.GetSpectrumBuffer();
|
||||
const size_t num_render_channels = spectrum_render_buffer.buffer[0].size();
|
||||
const size_t num_capture_channels = S2_section_accum_.size();
|
||||
const float one_by_num_render_channels = 1.f / num_render_channels;
|
||||
|
||||
RTC_DCHECK_EQ(S2_section_accum_.size(), filter_frequency_responses.size());
|
||||
|
||||
for (size_t capture_ch = 0; capture_ch < num_capture_channels; ++capture_ch) {
|
||||
RTC_DCHECK_EQ(S2_section_accum_[capture_ch].size() + 1,
|
||||
section_boundaries_blocks_.size());
|
||||
size_t idx_render = render_buffer.Position();
|
||||
idx_render = spectrum_render_buffer.OffsetIndex(
|
||||
idx_render, section_boundaries_blocks_[0]);
|
||||
|
||||
for (size_t section = 0; section < num_sections_; ++section) {
|
||||
std::array<float, kFftLengthBy2Plus1> X2_section;
|
||||
std::array<float, kFftLengthBy2Plus1> H2_section;
|
||||
X2_section.fill(0.f);
|
||||
H2_section.fill(0.f);
|
||||
const size_t block_limit =
|
||||
std::min(section_boundaries_blocks_[section + 1],
|
||||
filter_frequency_responses[capture_ch].size());
|
||||
for (size_t block = section_boundaries_blocks_[section];
|
||||
block < block_limit; ++block) {
|
||||
for (size_t render_ch = 0;
|
||||
render_ch < spectrum_render_buffer.buffer[idx_render].size();
|
||||
++render_ch) {
|
||||
for (size_t k = 0; k < X2_section.size(); ++k) {
|
||||
X2_section[k] +=
|
||||
spectrum_render_buffer.buffer[idx_render][render_ch][k] *
|
||||
one_by_num_render_channels;
|
||||
}
|
||||
}
|
||||
std::transform(H2_section.begin(), H2_section.end(),
|
||||
filter_frequency_responses[capture_ch][block].begin(),
|
||||
H2_section.begin(), std::plus<float>());
|
||||
idx_render = spectrum_render_buffer.IncIndex(idx_render);
|
||||
}
|
||||
|
||||
std::transform(X2_section.begin(), X2_section.end(), H2_section.begin(),
|
||||
S2_section_accum_[capture_ch][section].begin(),
|
||||
std::multiplies<float>());
|
||||
}
|
||||
|
||||
for (size_t section = 1; section < num_sections_; ++section) {
|
||||
std::transform(S2_section_accum_[capture_ch][section - 1].begin(),
|
||||
S2_section_accum_[capture_ch][section - 1].end(),
|
||||
S2_section_accum_[capture_ch][section].begin(),
|
||||
S2_section_accum_[capture_ch][section].begin(),
|
||||
std::plus<float>());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SignalDependentErleEstimator::ComputeActiveFilterSections() {
|
||||
for (size_t ch = 0; ch < n_active_sections_.size(); ++ch) {
|
||||
std::fill(n_active_sections_[ch].begin(), n_active_sections_[ch].end(), 0);
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
size_t section = num_sections_;
|
||||
float target = 0.9f * S2_section_accum_[ch][num_sections_ - 1][k];
|
||||
while (section > 0 && S2_section_accum_[ch][section - 1][k] >= target) {
|
||||
n_active_sections_[ch][k] = --section;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} // namespace webrtc
|
@ -0,0 +1,98 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This class estimates the dependency of the Erle to the input signal. By
|
||||
// looking at the input signal, an estimation on whether the current echo
|
||||
// estimate is due to the direct path or to a more reverberant one is performed.
|
||||
// Once that estimation is done, it is possible to refine the average Erle that
|
||||
// this class receive as an input.
|
||||
class SignalDependentErleEstimator {
|
||||
public:
|
||||
SignalDependentErleEstimator(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels);
|
||||
|
||||
~SignalDependentErleEstimator();
|
||||
|
||||
void Reset();
|
||||
|
||||
// Returns the Erle per frequency subband.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
|
||||
return erle_;
|
||||
}
|
||||
|
||||
// Updates the Erle estimate. The Erle that is passed as an input is required
|
||||
// to be an estimation of the average Erle achieved by the linear filter.
|
||||
void Update(
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
filter_frequency_response,
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> X2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> average_erle,
|
||||
const std::vector<bool>& converged_filters);
|
||||
|
||||
void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
|
||||
|
||||
static constexpr size_t kSubbands = 6;
|
||||
|
||||
private:
|
||||
void ComputeNumberOfActiveFilterSections(
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
filter_frequency_responses);
|
||||
|
||||
void UpdateCorrectionFactors(
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> X2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
|
||||
const std::vector<bool>& converged_filters);
|
||||
|
||||
void ComputeEchoEstimatePerFilterSection(
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
filter_frequency_responses);
|
||||
|
||||
void ComputeActiveFilterSections();
|
||||
|
||||
const float min_erle_;
|
||||
const size_t num_sections_;
|
||||
const size_t num_blocks_;
|
||||
const size_t delay_headroom_blocks_;
|
||||
const std::array<size_t, kFftLengthBy2Plus1> band_to_subband_;
|
||||
const std::array<float, kSubbands> max_erle_;
|
||||
const std::vector<size_t> section_boundaries_blocks_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_;
|
||||
std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
S2_section_accum_;
|
||||
std::vector<std::vector<std::array<float, kSubbands>>> erle_estimators_;
|
||||
std::vector<std::array<float, kSubbands>> erle_ref_;
|
||||
std::vector<std::vector<std::array<float, kSubbands>>> correction_factors_;
|
||||
std::vector<std::array<int, kSubbands>> num_updates_;
|
||||
std::vector<std::array<size_t, kFftLengthBy2Plus1>> n_active_sections_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_SIGNAL_DEPENDENT_ERLE_ESTIMATOR_H_
|
30
webrtc/modules/audio_processing/aec3/spectrum_buffer.cc
Normal file
30
webrtc/modules/audio_processing/aec3/spectrum_buffer.cc
Normal file
@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
SpectrumBuffer::SpectrumBuffer(size_t size, size_t num_channels)
|
||||
: size(static_cast<int>(size)),
|
||||
buffer(size,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>(num_channels)) {
|
||||
for (auto& channel : buffer) {
|
||||
for (auto& c : channel) {
|
||||
std::fill(c.begin(), c.end(), 0.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SpectrumBuffer::~SpectrumBuffer() = default;
|
||||
|
||||
} // namespace webrtc
|
62
webrtc/modules/audio_processing/aec3/spectrum_buffer.h
Normal file
62
webrtc/modules/audio_processing/aec3/spectrum_buffer.h
Normal file
@ -0,0 +1,62 @@
|
||||
/*
|
||||
* Copyright (c) 2017 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_SPECTRUM_BUFFER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_SPECTRUM_BUFFER_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Struct for bundling a circular buffer of one dimensional vector objects
|
||||
// together with the read and write indices.
|
||||
struct SpectrumBuffer {
|
||||
SpectrumBuffer(size_t size, size_t num_channels);
|
||||
~SpectrumBuffer();
|
||||
|
||||
int IncIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index < size - 1 ? index + 1 : 0;
|
||||
}
|
||||
|
||||
int DecIndex(int index) const {
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
return index > 0 ? index - 1 : size - 1;
|
||||
}
|
||||
|
||||
int OffsetIndex(int index, int offset) const {
|
||||
RTC_DCHECK_GE(size, offset);
|
||||
RTC_DCHECK_EQ(buffer.size(), static_cast<size_t>(size));
|
||||
RTC_DCHECK_GE(size + index + offset, 0);
|
||||
return (size + index + offset) % size;
|
||||
}
|
||||
|
||||
void UpdateWriteIndex(int offset) { write = OffsetIndex(write, offset); }
|
||||
void IncWriteIndex() { write = IncIndex(write); }
|
||||
void DecWriteIndex() { write = DecIndex(write); }
|
||||
void UpdateReadIndex(int offset) { read = OffsetIndex(read, offset); }
|
||||
void IncReadIndex() { read = IncIndex(read); }
|
||||
void DecReadIndex() { read = DecIndex(read); }
|
||||
|
||||
const int size;
|
||||
std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>> buffer;
|
||||
int write = 0;
|
||||
int read = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_SPECTRUM_BUFFER_H_
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user