Bump to WebRTC M120 release
Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone. We're continuing to carry iSAC even though it's gone upstream, but maybe we'll want to drop that soon.
This commit is contained in:
@ -21,7 +21,6 @@ group("modules") {
|
||||
"rtp_rtcp",
|
||||
"utility",
|
||||
"video_coding",
|
||||
"video_processing",
|
||||
]
|
||||
|
||||
if (rtc_desktop_capture_supported) {
|
||||
@ -36,10 +35,7 @@ rtc_source_set("module_api_public") {
|
||||
|
||||
rtc_source_set("module_api") {
|
||||
visibility = [ "*" ]
|
||||
sources = [
|
||||
"include/module.h",
|
||||
"include/module_common_types.h",
|
||||
]
|
||||
sources = [ "include/module_common_types.h" ]
|
||||
}
|
||||
|
||||
rtc_source_set("module_fec_api") {
|
||||
@ -47,7 +43,7 @@ rtc_source_set("module_fec_api") {
|
||||
sources = [ "include/module_fec_types.h" ]
|
||||
}
|
||||
|
||||
if (rtc_include_tests) {
|
||||
if (rtc_include_tests && !build_with_chromium) {
|
||||
modules_tests_resources = [
|
||||
"../resources/audio_coding/testfile16kHz.pcm",
|
||||
"../resources/audio_coding/testfile32kHz.pcm",
|
||||
@ -82,12 +78,14 @@ if (rtc_include_tests) {
|
||||
data = modules_tests_resources
|
||||
|
||||
if (is_android) {
|
||||
use_default_launcher = false
|
||||
deps += [
|
||||
# NOTE(brandtr): Including Java classes seems only to be possible from
|
||||
# rtc_test targets. Therefore we include this target here, instead of
|
||||
# in video_coding_modules_tests, where it is actually used.
|
||||
"../sdk/android:libjingle_peerconnection_java",
|
||||
"//testing/android/native_test:native_test_native_code",
|
||||
"//sdk/android:native_test_jni_onload",
|
||||
"//testing/android/native_test:native_test_support",
|
||||
]
|
||||
shard_timeout = 900
|
||||
}
|
||||
@ -147,18 +145,28 @@ if (rtc_include_tests) {
|
||||
"../resources/audio_processing/transient/wpd7.dat",
|
||||
"../resources/deflicker_before_cif_short.yuv",
|
||||
"../resources/far16_stereo.pcm",
|
||||
"../resources/far176_stereo.pcm",
|
||||
"../resources/far192_stereo.pcm",
|
||||
"../resources/far22_stereo.pcm",
|
||||
"../resources/far32_stereo.pcm",
|
||||
"../resources/far44_stereo.pcm",
|
||||
"../resources/far48_stereo.pcm",
|
||||
"../resources/far88_stereo.pcm",
|
||||
"../resources/far8_stereo.pcm",
|
||||
"../resources/far96_stereo.pcm",
|
||||
"../resources/foremanColorEnhanced_cif_short.yuv",
|
||||
"../resources/foreman_cif.yuv",
|
||||
"../resources/foreman_cif_short.yuv",
|
||||
"../resources/near16_stereo.pcm",
|
||||
"../resources/near176_stereo.pcm",
|
||||
"../resources/near192_stereo.pcm",
|
||||
"../resources/near22_stereo.pcm",
|
||||
"../resources/near32_stereo.pcm",
|
||||
"../resources/near44_stereo.pcm",
|
||||
"../resources/near48_stereo.pcm",
|
||||
"../resources/near88_stereo.pcm",
|
||||
"../resources/near8_stereo.pcm",
|
||||
"../resources/near96_stereo.pcm",
|
||||
"../resources/ref03.aecdump",
|
||||
"../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke1_0_AST.bin",
|
||||
"../resources/remote_bitrate_estimator/VideoSendersTest_BweTest_IncreasingChoke1_0_TOF.bin",
|
||||
@ -219,9 +227,9 @@ if (rtc_include_tests) {
|
||||
"pacing:pacing_unittests",
|
||||
"remote_bitrate_estimator:remote_bitrate_estimator_unittests",
|
||||
"rtp_rtcp:rtp_rtcp_unittests",
|
||||
"utility:utility_unittests",
|
||||
"video_coding:video_coding_unittests",
|
||||
"video_processing:video_processing_unittests",
|
||||
"video_coding/deprecated:deprecated_unittests",
|
||||
"video_coding/timing:timing_unittests",
|
||||
]
|
||||
|
||||
if (rtc_desktop_capture_supported) {
|
||||
@ -231,6 +239,7 @@ if (rtc_include_tests) {
|
||||
data = modules_unittests_resources
|
||||
|
||||
if (is_android) {
|
||||
use_default_launcher = false
|
||||
deps += [
|
||||
"../sdk/android:libjingle_peerconnection_java",
|
||||
"//testing/android/native_test:native_test_support",
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -11,6 +11,8 @@
|
||||
#ifndef MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_
|
||||
#define MODULES_AUDIO_CODING_CODECS_ISAC_MAIN_SOURCE_FILTER_FUNCTIONS_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
|
||||
|
||||
void WebRtcIsac_AutoCorr(double* r, const double* x, size_t N, size_t order);
|
||||
|
@ -25,8 +25,8 @@
|
||||
* Post-filtering:
|
||||
* y(z) = x(z) - damper(z) * gain * (x(z) + y(z)) * z ^ (-lag);
|
||||
*
|
||||
* Note that |lag| is a floating number so we perform an interpolation to
|
||||
* obtain the correct |lag|.
|
||||
* Note that `lag` is a floating number so we perform an interpolation to
|
||||
* obtain the correct `lag`.
|
||||
*
|
||||
*/
|
||||
|
||||
@ -86,7 +86,7 @@ typedef enum {
|
||||
* buffer : a buffer where the sum of previous inputs and outputs
|
||||
* are stored.
|
||||
* damper_state : the state of the damping filter. The filter is defined by
|
||||
* |kDampFilter|.
|
||||
* `kDampFilter`.
|
||||
* interpol_coeff : pointer to a set of coefficient which are used to utilize
|
||||
* fractional pitch by interpolation.
|
||||
* gain : pitch-gain to be applied to the current segment of input.
|
||||
@ -140,9 +140,9 @@ static void FilterSegment(const double* in_data, PitchFilterParam* parameters,
|
||||
int j;
|
||||
double sum;
|
||||
double sum2;
|
||||
/* Index of |parameters->buffer| where the output is written to. */
|
||||
/* Index of `parameters->buffer` where the output is written to. */
|
||||
int pos = parameters->index + PITCH_BUFFSIZE;
|
||||
/* Index of |parameters->buffer| where samples are read for fractional-lag
|
||||
/* Index of `parameters->buffer` where samples are read for fractional-lag
|
||||
* computation. */
|
||||
int pos_lag = pos - parameters->lag_offset;
|
||||
|
||||
@ -174,9 +174,9 @@ static void FilterSegment(const double* in_data, PitchFilterParam* parameters,
|
||||
/* Filter for fractional pitch. */
|
||||
sum2 = 0.0;
|
||||
for (m = PITCH_FRACORDER-1; m >= m_tmp; --m) {
|
||||
/* |lag_index + m| is always larger than or equal to zero, see how
|
||||
/* `lag_index + m` is always larger than or equal to zero, see how
|
||||
* m_tmp is computed. This is equivalent to assume samples outside
|
||||
* |out_dg[j]| are zero. */
|
||||
* `out_dg[j]` are zero. */
|
||||
sum2 += out_dg[j][lag_index + m] * parameters->interpol_coeff[m];
|
||||
}
|
||||
/* Add the contribution of differential gain change. */
|
||||
@ -353,7 +353,7 @@ static void FilterFrame(const double* in_data, PitchFiltstr* filter_state,
|
||||
|
||||
if ((mode == kPitchFilterPreGain) || (mode == kPitchFilterPreLa)) {
|
||||
/* Filter the lookahead segment, this is treated as the last sub-frame. So
|
||||
* set |pf_param| to last sub-frame. */
|
||||
* set `pf_param` to last sub-frame. */
|
||||
filter_parameters.sub_frame = PITCH_SUBFRAMES - 1;
|
||||
filter_parameters.num_samples = QLOOKAHEAD;
|
||||
FilterSegment(in_data, &filter_parameters, out_data, out_dg);
|
||||
|
@ -19,15 +19,6 @@ config("apm_debug_dump") {
|
||||
}
|
||||
}
|
||||
|
||||
rtc_library("config") {
|
||||
visibility = [ ":*" ]
|
||||
sources = [
|
||||
"include/config.cc",
|
||||
"include/config.h",
|
||||
]
|
||||
deps = [ "../../rtc_base/system:rtc_export" ]
|
||||
}
|
||||
|
||||
rtc_library("api") {
|
||||
visibility = [ "*" ]
|
||||
sources = [
|
||||
@ -37,20 +28,23 @@ rtc_library("api") {
|
||||
deps = [
|
||||
":audio_frame_view",
|
||||
":audio_processing_statistics",
|
||||
":config",
|
||||
"../../api:array_view",
|
||||
"../../api:scoped_refptr",
|
||||
"../../api/audio:aec3_config",
|
||||
"../../api/audio:audio_frame_api",
|
||||
"../../api/audio:echo_control",
|
||||
"../../rtc_base:deprecation",
|
||||
"../../rtc_base:rtc_base_approved",
|
||||
"../../rtc_base:macromagic",
|
||||
"../../rtc_base:refcount",
|
||||
"../../rtc_base:stringutils",
|
||||
"../../rtc_base/system:arch",
|
||||
"../../rtc_base/system:file_wrapper",
|
||||
"../../rtc_base/system:rtc_export",
|
||||
"agc:gain_control_interface",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
absl_deps = [
|
||||
"//third_party/abseil-cpp/absl/strings",
|
||||
"//third_party/abseil-cpp/absl/types:optional",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_library("audio_frame_proxies") {
|
||||
@ -119,7 +113,41 @@ rtc_source_set("aec_dump_interface") {
|
||||
deps = [
|
||||
":api",
|
||||
":audio_frame_view",
|
||||
"../../rtc_base:deprecation",
|
||||
]
|
||||
absl_deps = [
|
||||
"//third_party/abseil-cpp/absl/base:core_headers",
|
||||
"//third_party/abseil-cpp/absl/types:optional",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_library("gain_controller2") {
|
||||
configs += [ ":apm_debug_dump" ]
|
||||
sources = [
|
||||
"gain_controller2.cc",
|
||||
"gain_controller2.h",
|
||||
]
|
||||
defines = []
|
||||
deps = [
|
||||
":aec_dump_interface",
|
||||
":api",
|
||||
":apm_logging",
|
||||
":audio_buffer",
|
||||
":audio_frame_view",
|
||||
"../../common_audio",
|
||||
"../../rtc_base:checks",
|
||||
"../../rtc_base:logging",
|
||||
"../../rtc_base:stringutils",
|
||||
"../../system_wrappers:field_trial",
|
||||
"agc2:adaptive_digital_gain_controller",
|
||||
"agc2:common",
|
||||
"agc2:cpu_features",
|
||||
"agc2:fixed_digital",
|
||||
"agc2:gain_applier",
|
||||
"agc2:input_volume_controller",
|
||||
"agc2:noise_level_estimator",
|
||||
"agc2:saturation_protector",
|
||||
"agc2:speech_level_estimator",
|
||||
"agc2:vad_wrapper",
|
||||
]
|
||||
}
|
||||
|
||||
@ -130,28 +158,11 @@ rtc_library("audio_processing") {
|
||||
"audio_processing_builder_impl.cc",
|
||||
"audio_processing_impl.cc",
|
||||
"audio_processing_impl.h",
|
||||
"common.h",
|
||||
"echo_control_mobile_impl.cc",
|
||||
"echo_control_mobile_impl.h",
|
||||
"echo_detector/circular_buffer.cc",
|
||||
"echo_detector/circular_buffer.h",
|
||||
"echo_detector/mean_variance_estimator.cc",
|
||||
"echo_detector/mean_variance_estimator.h",
|
||||
"echo_detector/moving_max.cc",
|
||||
"echo_detector/moving_max.h",
|
||||
"echo_detector/normalized_covariance_estimator.cc",
|
||||
"echo_detector/normalized_covariance_estimator.h",
|
||||
"gain_control_impl.cc",
|
||||
"gain_control_impl.h",
|
||||
"gain_controller2.cc",
|
||||
"gain_controller2.h",
|
||||
"level_estimator.cc",
|
||||
"level_estimator.h",
|
||||
"render_queue_item_verifier.h",
|
||||
"residual_echo_detector.cc",
|
||||
"residual_echo_detector.h",
|
||||
"typing_detection.cc",
|
||||
"typing_detection.h",
|
||||
]
|
||||
|
||||
defines = []
|
||||
@ -163,13 +174,13 @@ rtc_library("audio_processing") {
|
||||
":audio_frame_proxies",
|
||||
":audio_frame_view",
|
||||
":audio_processing_statistics",
|
||||
":config",
|
||||
":gain_controller2",
|
||||
":high_pass_filter",
|
||||
":optionally_built_submodule_creators",
|
||||
":rms_level",
|
||||
":voice_detection",
|
||||
"../../api:array_view",
|
||||
"../../api:function_view",
|
||||
"../../api:make_ref_counted",
|
||||
"../../api/audio:aec3_config",
|
||||
"../../api/audio:audio_frame_api",
|
||||
"../../api/audio:echo_control",
|
||||
@ -177,15 +188,20 @@ rtc_library("audio_processing") {
|
||||
"../../common_audio:common_audio_c",
|
||||
"../../common_audio/third_party/ooura:fft_size_256",
|
||||
"../../rtc_base:checks",
|
||||
"../../rtc_base:deprecation",
|
||||
"../../rtc_base:event_tracer",
|
||||
"../../rtc_base:gtest_prod",
|
||||
"../../rtc_base:ignore_wundef",
|
||||
"../../rtc_base:refcount",
|
||||
"../../rtc_base:logging",
|
||||
"../../rtc_base:macromagic",
|
||||
"../../rtc_base:safe_minmax",
|
||||
"../../rtc_base:sanitizer",
|
||||
"../../rtc_base:swap_queue",
|
||||
"../../rtc_base:timeutils",
|
||||
"../../rtc_base/experiments:field_trial_parser",
|
||||
"../../rtc_base/synchronization:mutex",
|
||||
"../../rtc_base/system:rtc_export",
|
||||
"../../system_wrappers",
|
||||
"../../system_wrappers:denormal_disabler",
|
||||
"../../system_wrappers:field_trial",
|
||||
"../../system_wrappers:metrics",
|
||||
"aec3",
|
||||
@ -194,20 +210,21 @@ rtc_library("audio_processing") {
|
||||
"agc",
|
||||
"agc:gain_control_interface",
|
||||
"agc:legacy_agc",
|
||||
"agc2:adaptive_digital",
|
||||
"agc2:fixed_digital",
|
||||
"agc2:gain_applier",
|
||||
"agc2:input_volume_stats_reporter",
|
||||
"capture_levels_adjuster",
|
||||
"ns",
|
||||
"transient:transient_suppressor_api",
|
||||
"vad",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
absl_deps = [
|
||||
"//third_party/abseil-cpp/absl/strings",
|
||||
"//third_party/abseil-cpp/absl/types:optional",
|
||||
]
|
||||
|
||||
deps += [
|
||||
"../../common_audio",
|
||||
"../../common_audio:fir_filter",
|
||||
"../../common_audio:fir_filter_factory",
|
||||
"../../rtc_base:rtc_base_approved",
|
||||
"../../system_wrappers",
|
||||
]
|
||||
|
||||
@ -218,18 +235,30 @@ rtc_library("audio_processing") {
|
||||
}
|
||||
}
|
||||
|
||||
rtc_library("voice_detection") {
|
||||
rtc_library("residual_echo_detector") {
|
||||
poisonous = [ "default_echo_detector" ]
|
||||
configs += [ ":apm_debug_dump" ]
|
||||
sources = [
|
||||
"voice_detection.cc",
|
||||
"voice_detection.h",
|
||||
"echo_detector/circular_buffer.cc",
|
||||
"echo_detector/circular_buffer.h",
|
||||
"echo_detector/mean_variance_estimator.cc",
|
||||
"echo_detector/mean_variance_estimator.h",
|
||||
"echo_detector/moving_max.cc",
|
||||
"echo_detector/moving_max.h",
|
||||
"echo_detector/normalized_covariance_estimator.cc",
|
||||
"echo_detector/normalized_covariance_estimator.h",
|
||||
"residual_echo_detector.cc",
|
||||
"residual_echo_detector.h",
|
||||
]
|
||||
deps = [
|
||||
":api",
|
||||
":audio_buffer",
|
||||
"../../api/audio:audio_frame_api",
|
||||
"../../common_audio:common_audio_c",
|
||||
":apm_logging",
|
||||
"../../api:array_view",
|
||||
"../../rtc_base:checks",
|
||||
"../../rtc_base:logging",
|
||||
"../../system_wrappers:metrics",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
}
|
||||
|
||||
rtc_library("optionally_built_submodule_creators") {
|
||||
@ -289,7 +318,11 @@ rtc_library("apm_logging") {
|
||||
"../../api:array_view",
|
||||
"../../common_audio",
|
||||
"../../rtc_base:checks",
|
||||
"../../rtc_base:rtc_base_approved",
|
||||
"../../rtc_base:stringutils",
|
||||
]
|
||||
absl_deps = [
|
||||
"//third_party/abseil-cpp/absl/strings",
|
||||
"//third_party/abseil-cpp/absl/types:optional",
|
||||
]
|
||||
defines = []
|
||||
}
|
||||
@ -306,143 +339,165 @@ if (rtc_include_tests) {
|
||||
":audio_processing_statistics",
|
||||
"../../test:test_support",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
|
||||
}
|
||||
|
||||
group("audio_processing_tests") {
|
||||
testonly = true
|
||||
deps = [
|
||||
":audioproc_test_utils",
|
||||
"transient:click_annotate",
|
||||
"transient:transient_suppression_test",
|
||||
]
|
||||
|
||||
if (rtc_enable_protobuf) {
|
||||
deps += [
|
||||
":audioproc_unittest_proto",
|
||||
"aec_dump:aec_dump_unittests",
|
||||
"test/conversational_speech",
|
||||
"test/py_quality_assessment",
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
rtc_library("audio_processing_unittests") {
|
||||
testonly = true
|
||||
|
||||
configs += [ ":apm_debug_dump" ]
|
||||
sources = [
|
||||
"audio_buffer_unittest.cc",
|
||||
"audio_frame_view_unittest.cc",
|
||||
"config_unittest.cc",
|
||||
"echo_control_mobile_unittest.cc",
|
||||
"gain_controller2_unittest.cc",
|
||||
"splitting_filter_unittest.cc",
|
||||
"test/fake_recording_device_unittest.cc",
|
||||
]
|
||||
|
||||
deps = [
|
||||
":analog_mic_simulation",
|
||||
":api",
|
||||
":apm_logging",
|
||||
":audio_buffer",
|
||||
":audio_frame_view",
|
||||
":audio_processing",
|
||||
":audioproc_test_utils",
|
||||
":config",
|
||||
":high_pass_filter",
|
||||
":mocks",
|
||||
":voice_detection",
|
||||
"../../api:array_view",
|
||||
"../../api:scoped_refptr",
|
||||
"../../api/audio:aec3_config",
|
||||
"../../api/audio:aec3_factory",
|
||||
"../../common_audio",
|
||||
"../../common_audio:common_audio_c",
|
||||
"../../rtc_base",
|
||||
"../../rtc_base:checks",
|
||||
"../../rtc_base:gtest_prod",
|
||||
"../../rtc_base:ignore_wundef",
|
||||
"../../rtc_base:protobuf_utils",
|
||||
"../../rtc_base:rtc_base_approved",
|
||||
"../../rtc_base:rtc_base_tests_utils",
|
||||
"../../rtc_base:safe_minmax",
|
||||
"../../rtc_base:task_queue_for_test",
|
||||
"../../rtc_base/synchronization:mutex",
|
||||
"../../rtc_base/system:arch",
|
||||
"../../rtc_base/system:file_wrapper",
|
||||
"../../system_wrappers",
|
||||
"../../test:fileutils",
|
||||
"../../test:rtc_expect_death",
|
||||
"../../test:test_support",
|
||||
"../audio_coding:neteq_input_audio_tools",
|
||||
"aec_dump:mock_aec_dump_unittests",
|
||||
"agc:agc_unittests",
|
||||
"agc2:adaptive_digital_unittests",
|
||||
"agc2:biquad_filter_unittests",
|
||||
"agc2:fixed_digital_unittests",
|
||||
"agc2:noise_estimator_unittests",
|
||||
"agc2:rnn_vad_with_level_unittests",
|
||||
"agc2:test_utils",
|
||||
"agc2/rnn_vad:unittests",
|
||||
"test/conversational_speech:unittest",
|
||||
"transient:transient_suppression_unittests",
|
||||
"utility:legacy_delay_estimator_unittest",
|
||||
"utility:pffft_wrapper_unittest",
|
||||
"vad:vad_unittests",
|
||||
"//testing/gtest",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
|
||||
defines = []
|
||||
|
||||
if (rtc_prefer_fixed_point) {
|
||||
defines += [ "WEBRTC_AUDIOPROC_FIXED_PROFILE" ]
|
||||
} else {
|
||||
defines += [ "WEBRTC_AUDIOPROC_FLOAT_PROFILE" ]
|
||||
}
|
||||
|
||||
if (rtc_enable_protobuf) {
|
||||
defines += [ "WEBRTC_AUDIOPROC_DEBUG_DUMP" ]
|
||||
deps += [
|
||||
":audioproc_debug_proto",
|
||||
":audioproc_protobuf_utils",
|
||||
if (!build_with_chromium) {
|
||||
group("audio_processing_tests") {
|
||||
testonly = true
|
||||
deps = [
|
||||
":audioproc_test_utils",
|
||||
":audioproc_unittest_proto",
|
||||
":optionally_built_submodule_creators",
|
||||
":rms_level",
|
||||
":runtime_settings_protobuf_utils",
|
||||
"../../api/audio:audio_frame_api",
|
||||
"../../api/audio:echo_control",
|
||||
"transient:click_annotate",
|
||||
"transient:transient_suppression_test",
|
||||
]
|
||||
|
||||
if (rtc_enable_protobuf) {
|
||||
deps += [
|
||||
":audioproc_unittest_proto",
|
||||
"aec_dump:aec_dump_unittests",
|
||||
"test/conversational_speech",
|
||||
"test/py_quality_assessment",
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
rtc_library("audio_processing_unittests") {
|
||||
testonly = true
|
||||
|
||||
configs += [ ":apm_debug_dump" ]
|
||||
sources = [
|
||||
"audio_buffer_unittest.cc",
|
||||
"audio_frame_view_unittest.cc",
|
||||
"echo_control_mobile_unittest.cc",
|
||||
"gain_controller2_unittest.cc",
|
||||
"splitting_filter_unittest.cc",
|
||||
"test/echo_canceller3_config_json_unittest.cc",
|
||||
"test/fake_recording_device_unittest.cc",
|
||||
]
|
||||
|
||||
deps = [
|
||||
":aec3_config_json",
|
||||
":analog_mic_simulation",
|
||||
":api",
|
||||
":apm_logging",
|
||||
":audio_buffer",
|
||||
":audio_frame_view",
|
||||
":audio_processing",
|
||||
":audioproc_test_utils",
|
||||
":gain_controller2",
|
||||
":high_pass_filter",
|
||||
":mocks",
|
||||
"../../api:array_view",
|
||||
"../../api:make_ref_counted",
|
||||
"../../api:scoped_refptr",
|
||||
"../../api/audio:aec3_config",
|
||||
"../../api/audio:aec3_factory",
|
||||
"../../api/audio:echo_detector_creator",
|
||||
"../../common_audio",
|
||||
"../../common_audio:common_audio_c",
|
||||
"../../rtc_base:checks",
|
||||
"../../rtc_base:gtest_prod",
|
||||
"../../rtc_base:ignore_wundef",
|
||||
"../../rtc_base:macromagic",
|
||||
"../../rtc_base:platform_thread",
|
||||
"../../rtc_base:protobuf_utils",
|
||||
"../../rtc_base:random",
|
||||
"../../rtc_base:rtc_base_tests_utils",
|
||||
"../../rtc_base:rtc_task_queue",
|
||||
"aec_dump",
|
||||
"aec_dump:aec_dump_unittests",
|
||||
"../../rtc_base:rtc_event",
|
||||
"../../rtc_base:safe_conversions",
|
||||
"../../rtc_base:safe_minmax",
|
||||
"../../rtc_base:stringutils",
|
||||
"../../rtc_base:swap_queue",
|
||||
"../../rtc_base:task_queue_for_test",
|
||||
"../../rtc_base:threading",
|
||||
"../../rtc_base/synchronization:mutex",
|
||||
"../../rtc_base/system:arch",
|
||||
"../../rtc_base/system:file_wrapper",
|
||||
"../../system_wrappers",
|
||||
"../../system_wrappers:denormal_disabler",
|
||||
"../../test:field_trial",
|
||||
"../../test:fileutils",
|
||||
"../../test:rtc_expect_death",
|
||||
"../../test:test_support",
|
||||
"../audio_coding:neteq_input_audio_tools",
|
||||
"aec_dump:mock_aec_dump_unittests",
|
||||
"agc:agc_unittests",
|
||||
"agc2:adaptive_digital_gain_controller_unittest",
|
||||
"agc2:biquad_filter_unittests",
|
||||
"agc2:fixed_digital_unittests",
|
||||
"agc2:gain_applier_unittest",
|
||||
"agc2:input_volume_controller_unittests",
|
||||
"agc2:input_volume_stats_reporter_unittests",
|
||||
"agc2:noise_estimator_unittests",
|
||||
"agc2:saturation_protector_unittest",
|
||||
"agc2:speech_level_estimator_unittest",
|
||||
"agc2:test_utils",
|
||||
"agc2:vad_wrapper_unittests",
|
||||
"agc2/rnn_vad:unittests",
|
||||
"capture_levels_adjuster",
|
||||
"capture_levels_adjuster:capture_levels_adjuster_unittests",
|
||||
"test/conversational_speech:unittest",
|
||||
"transient:transient_suppression_unittests",
|
||||
"utility:legacy_delay_estimator_unittest",
|
||||
"utility:pffft_wrapper_unittest",
|
||||
"vad:vad_unittests",
|
||||
"//testing/gtest",
|
||||
]
|
||||
absl_deps += [ "//third_party/abseil-cpp/absl/flags:flag" ]
|
||||
sources += [
|
||||
"audio_processing_impl_locking_unittest.cc",
|
||||
"audio_processing_impl_unittest.cc",
|
||||
"audio_processing_unittest.cc",
|
||||
"echo_control_mobile_bit_exact_unittest.cc",
|
||||
"echo_detector/circular_buffer_unittest.cc",
|
||||
"echo_detector/mean_variance_estimator_unittest.cc",
|
||||
"echo_detector/moving_max_unittest.cc",
|
||||
"echo_detector/normalized_covariance_estimator_unittest.cc",
|
||||
"gain_control_unittest.cc",
|
||||
"high_pass_filter_unittest.cc",
|
||||
"level_estimator_unittest.cc",
|
||||
"residual_echo_detector_unittest.cc",
|
||||
"rms_level_unittest.cc",
|
||||
"test/debug_dump_replayer.cc",
|
||||
"test/debug_dump_replayer.h",
|
||||
"test/debug_dump_test.cc",
|
||||
"test/echo_canceller_test_tools.cc",
|
||||
"test/echo_canceller_test_tools.h",
|
||||
"test/echo_canceller_test_tools_unittest.cc",
|
||||
"test/echo_control_mock.h",
|
||||
"test/test_utils.h",
|
||||
"voice_detection_unittest.cc",
|
||||
absl_deps = [
|
||||
"//third_party/abseil-cpp/absl/strings",
|
||||
"//third_party/abseil-cpp/absl/types:optional",
|
||||
]
|
||||
|
||||
defines = []
|
||||
|
||||
if (rtc_prefer_fixed_point) {
|
||||
defines += [ "WEBRTC_AUDIOPROC_FIXED_PROFILE" ]
|
||||
} else {
|
||||
defines += [ "WEBRTC_AUDIOPROC_FLOAT_PROFILE" ]
|
||||
}
|
||||
|
||||
if (rtc_enable_protobuf) {
|
||||
defines += [ "WEBRTC_AUDIOPROC_DEBUG_DUMP" ]
|
||||
deps += [
|
||||
":audioproc_debug_proto",
|
||||
":audioproc_protobuf_utils",
|
||||
":audioproc_test_utils",
|
||||
":audioproc_unittest_proto",
|
||||
":optionally_built_submodule_creators",
|
||||
":residual_echo_detector",
|
||||
":rms_level",
|
||||
":runtime_settings_protobuf_utils",
|
||||
"../../api/audio:audio_frame_api",
|
||||
"../../api/audio:echo_control",
|
||||
"../../rtc_base:rtc_base_tests_utils",
|
||||
"../../rtc_base:rtc_task_queue",
|
||||
"aec_dump",
|
||||
"aec_dump:aec_dump_unittests",
|
||||
]
|
||||
absl_deps += [ "//third_party/abseil-cpp/absl/flags:flag" ]
|
||||
sources += [
|
||||
"audio_processing_impl_locking_unittest.cc",
|
||||
"audio_processing_impl_unittest.cc",
|
||||
"audio_processing_unittest.cc",
|
||||
"echo_control_mobile_bit_exact_unittest.cc",
|
||||
"echo_detector/circular_buffer_unittest.cc",
|
||||
"echo_detector/mean_variance_estimator_unittest.cc",
|
||||
"echo_detector/moving_max_unittest.cc",
|
||||
"echo_detector/normalized_covariance_estimator_unittest.cc",
|
||||
"gain_control_unittest.cc",
|
||||
"high_pass_filter_unittest.cc",
|
||||
"residual_echo_detector_unittest.cc",
|
||||
"rms_level_unittest.cc",
|
||||
"test/debug_dump_replayer.cc",
|
||||
"test/debug_dump_replayer.h",
|
||||
"test/debug_dump_test.cc",
|
||||
"test/echo_canceller_test_tools.cc",
|
||||
"test/echo_canceller_test_tools.h",
|
||||
"test/echo_canceller_test_tools_unittest.cc",
|
||||
"test/echo_control_mock.h",
|
||||
"test/test_utils.h",
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -455,12 +510,18 @@ if (rtc_include_tests) {
|
||||
":audio_processing",
|
||||
":audioproc_test_utils",
|
||||
"../../api:array_view",
|
||||
"../../api/numerics",
|
||||
"../../api/test/metrics:global_metrics_logger_and_exporter",
|
||||
"../../api/test/metrics:metric",
|
||||
"../../rtc_base:platform_thread",
|
||||
"../../rtc_base:protobuf_utils",
|
||||
"../../rtc_base:rtc_base_approved",
|
||||
"../../rtc_base:random",
|
||||
"../../rtc_base:rtc_event",
|
||||
"../../rtc_base:safe_conversions",
|
||||
"../../system_wrappers",
|
||||
"../../test:perf_test",
|
||||
"../../test:test_support",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
|
||||
}
|
||||
|
||||
rtc_library("analog_mic_simulation") {
|
||||
@ -473,14 +534,15 @@ if (rtc_include_tests) {
|
||||
"../../api/audio:audio_frame_api",
|
||||
"../../common_audio",
|
||||
"../../rtc_base:checks",
|
||||
"../../rtc_base:rtc_base_approved",
|
||||
"../../rtc_base:logging",
|
||||
"../../rtc_base:safe_conversions",
|
||||
"../../rtc_base:safe_minmax",
|
||||
"agc:gain_map",
|
||||
"agc2:gain_map",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
}
|
||||
|
||||
if (rtc_enable_protobuf) {
|
||||
if (rtc_enable_protobuf && !build_with_chromium) {
|
||||
rtc_library("audioproc_f_impl") {
|
||||
testonly = true
|
||||
configs += [ ":apm_debug_dump" ]
|
||||
@ -498,6 +560,7 @@ if (rtc_include_tests) {
|
||||
]
|
||||
|
||||
deps = [
|
||||
":aec3_config_json",
|
||||
":analog_mic_simulation",
|
||||
":api",
|
||||
":apm_logging",
|
||||
@ -506,15 +569,18 @@ if (rtc_include_tests) {
|
||||
":audioproc_protobuf_utils",
|
||||
":audioproc_test_utils",
|
||||
":runtime_settings_protobuf_utils",
|
||||
"../../api/audio:aec3_config_json",
|
||||
"../../api/audio:aec3_factory",
|
||||
"../../api/audio:echo_detector_creator",
|
||||
"../../common_audio",
|
||||
"../../rtc_base:checks",
|
||||
"../../rtc_base:ignore_wundef",
|
||||
"../../rtc_base:logging",
|
||||
"../../rtc_base:protobuf_utils",
|
||||
"../../rtc_base:rtc_base_approved",
|
||||
"../../rtc_base:rtc_json",
|
||||
"../../rtc_base:safe_conversions",
|
||||
"../../rtc_base:stringutils",
|
||||
"../../rtc_base:task_queue_for_test",
|
||||
"../../rtc_base:timeutils",
|
||||
"../../rtc_base/system:file_wrapper",
|
||||
"../../system_wrappers",
|
||||
"../../system_wrappers:field_trial",
|
||||
@ -549,7 +615,6 @@ if (rtc_include_tests) {
|
||||
"../../rtc_base:checks",
|
||||
"../../rtc_base:ignore_wundef",
|
||||
"../../rtc_base:protobuf_utils",
|
||||
"../../rtc_base:rtc_base_approved",
|
||||
"../../rtc_base/system:arch",
|
||||
]
|
||||
}
|
||||
@ -599,7 +664,7 @@ rtc_library("audioproc_test_utils") {
|
||||
"../../api/audio:audio_frame_api",
|
||||
"../../common_audio",
|
||||
"../../rtc_base:checks",
|
||||
"../../rtc_base:rtc_base_approved",
|
||||
"../../rtc_base:random",
|
||||
"../../rtc_base/system:arch",
|
||||
"../../system_wrappers",
|
||||
"../../test:fileutils",
|
||||
@ -607,5 +672,26 @@ rtc_library("audioproc_test_utils") {
|
||||
"../audio_coding:neteq_input_audio_tools",
|
||||
"//testing/gtest",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
absl_deps = [
|
||||
"//third_party/abseil-cpp/absl/strings",
|
||||
"//third_party/abseil-cpp/absl/types:optional",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_library("aec3_config_json") {
|
||||
visibility = [ "*" ]
|
||||
testonly = true
|
||||
sources = [
|
||||
"test/echo_canceller3_config_json.cc",
|
||||
"test/echo_canceller3_config_json.h",
|
||||
]
|
||||
deps = [
|
||||
"../../api/audio:aec3_config",
|
||||
"../../rtc_base:checks",
|
||||
"../../rtc_base:logging",
|
||||
"../../rtc_base:rtc_json",
|
||||
"../../rtc_base:stringutils",
|
||||
"../../rtc_base/system:rtc_export",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
|
||||
}
|
||||
|
@ -22,6 +22,7 @@ rtc_library("aec3") {
|
||||
"alignment_mixer.h",
|
||||
"api_call_jitter_metrics.cc",
|
||||
"api_call_jitter_metrics.h",
|
||||
"block.h",
|
||||
"block_buffer.cc",
|
||||
"block_delay_buffer.cc",
|
||||
"block_delay_buffer.h",
|
||||
@ -37,6 +38,8 @@ rtc_library("aec3") {
|
||||
"coarse_filter_update_gain.h",
|
||||
"comfort_noise_generator.cc",
|
||||
"comfort_noise_generator.h",
|
||||
"config_selector.cc",
|
||||
"config_selector.h",
|
||||
"decimator.cc",
|
||||
"decimator.h",
|
||||
"delay_estimate.h",
|
||||
@ -72,6 +75,8 @@ rtc_library("aec3") {
|
||||
"matched_filter_lag_aggregator.h",
|
||||
"moving_average.cc",
|
||||
"moving_average.h",
|
||||
"multi_channel_content_detector.cc",
|
||||
"multi_channel_content_detector.h",
|
||||
"nearend_detector.h",
|
||||
"refined_filter_update_gain.cc",
|
||||
"refined_filter_update_gain.h",
|
||||
@ -140,8 +145,11 @@ rtc_library("aec3") {
|
||||
"../../../api/audio:echo_control",
|
||||
"../../../common_audio:common_audio_c",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base:logging",
|
||||
"../../../rtc_base:macromagic",
|
||||
"../../../rtc_base:race_checker",
|
||||
"../../../rtc_base:safe_minmax",
|
||||
"../../../rtc_base:swap_queue",
|
||||
"../../../rtc_base/experiments:field_trial_parser",
|
||||
"../../../rtc_base/system:arch",
|
||||
"../../../system_wrappers",
|
||||
@ -149,7 +157,10 @@ rtc_library("aec3") {
|
||||
"../../../system_wrappers:metrics",
|
||||
"../utility:cascaded_biquad_filter",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
absl_deps = [
|
||||
"//third_party/abseil-cpp/absl/strings",
|
||||
"//third_party/abseil-cpp/absl/types:optional",
|
||||
]
|
||||
|
||||
if (current_cpu == "x86" || current_cpu == "x64") {
|
||||
deps += [ ":aec3_avx2" ]
|
||||
@ -168,13 +179,13 @@ rtc_source_set("aec3_fft") {
|
||||
"../../../api:array_view",
|
||||
"../../../common_audio/third_party/ooura:fft_size_128",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base/system:arch",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_source_set("render_buffer") {
|
||||
sources = [
|
||||
"block.h",
|
||||
"block_buffer.h",
|
||||
"fft_buffer.h",
|
||||
"render_buffer.h",
|
||||
@ -185,7 +196,6 @@ rtc_source_set("render_buffer") {
|
||||
":fft_data",
|
||||
"../../../api:array_view",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base/system:arch",
|
||||
]
|
||||
}
|
||||
@ -201,6 +211,7 @@ rtc_source_set("adaptive_fir_filter") {
|
||||
"../../../api:array_view",
|
||||
"../../../rtc_base/system:arch",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
|
||||
}
|
||||
|
||||
rtc_source_set("adaptive_fir_filter_erl") {
|
||||
@ -217,9 +228,10 @@ rtc_source_set("matched_filter") {
|
||||
deps = [
|
||||
":aec3_common",
|
||||
"../../../api:array_view",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base:gtest_prod",
|
||||
"../../../rtc_base/system:arch",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
}
|
||||
|
||||
rtc_source_set("vector_math") {
|
||||
@ -302,15 +314,17 @@ if (rtc_include_tests) {
|
||||
"..:apm_logging",
|
||||
"..:audio_buffer",
|
||||
"..:audio_processing",
|
||||
"..:audio_processing_unittests",
|
||||
"..:high_pass_filter",
|
||||
"../../../api:array_view",
|
||||
"../../../api/audio:aec3_config",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base:macromagic",
|
||||
"../../../rtc_base:random",
|
||||
"../../../rtc_base:safe_minmax",
|
||||
"../../../rtc_base:stringutils",
|
||||
"../../../rtc_base/system:arch",
|
||||
"../../../system_wrappers",
|
||||
"../../../system_wrappers:metrics",
|
||||
"../../../test:field_trial",
|
||||
"../../../test:test_support",
|
||||
"../utility:cascaded_biquad_filter",
|
||||
@ -334,6 +348,7 @@ if (rtc_include_tests) {
|
||||
"clockdrift_detector_unittest.cc",
|
||||
"coarse_filter_update_gain_unittest.cc",
|
||||
"comfort_noise_generator_unittest.cc",
|
||||
"config_selector_unittest.cc",
|
||||
"decimator_unittest.cc",
|
||||
"echo_canceller3_unittest.cc",
|
||||
"echo_path_delay_estimator_unittest.cc",
|
||||
@ -348,6 +363,7 @@ if (rtc_include_tests) {
|
||||
"matched_filter_lag_aggregator_unittest.cc",
|
||||
"matched_filter_unittest.cc",
|
||||
"moving_average_unittest.cc",
|
||||
"multi_channel_content_detector_unittest.cc",
|
||||
"refined_filter_update_gain_unittest.cc",
|
||||
"render_buffer_unittest.cc",
|
||||
"render_delay_buffer_unittest.cc",
|
||||
@ -363,5 +379,9 @@ if (rtc_include_tests) {
|
||||
"vector_math_unittest.cc",
|
||||
]
|
||||
}
|
||||
|
||||
if (!build_with_chromium) {
|
||||
deps += [ "..:audio_processing_unittests" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -68,19 +68,21 @@ void ComputeFrequencyResponse_Neon(
|
||||
RTC_DCHECK_EQ(H.size(), H2->capacity());
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
|
||||
auto& H2_p = (*H2)[p];
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
for (size_t j = 0; j < kFftLengthBy2; j += 4) {
|
||||
const float32x4_t re = vld1q_f32(&H[p][ch].re[j]);
|
||||
const float32x4_t im = vld1q_f32(&H[p][ch].im[j]);
|
||||
const float32x4_t re = vld1q_f32(&H_p_ch.re[j]);
|
||||
const float32x4_t im = vld1q_f32(&H_p_ch.im[j]);
|
||||
float32x4_t H2_new = vmulq_f32(re, re);
|
||||
H2_new = vmlaq_f32(H2_new, im, im);
|
||||
float32x4_t H2_p_j = vld1q_f32(&(*H2)[p][j]);
|
||||
float32x4_t H2_p_j = vld1q_f32(&H2_p[j]);
|
||||
H2_p_j = vmaxq_f32(H2_p_j, H2_new);
|
||||
vst1q_f32(&(*H2)[p][j], H2_p_j);
|
||||
vst1q_f32(&H2_p[j], H2_p_j);
|
||||
}
|
||||
float H2_new = H[p][ch].re[kFftLengthBy2] * H[p][ch].re[kFftLengthBy2] +
|
||||
H[p][ch].im[kFftLengthBy2] * H[p][ch].im[kFftLengthBy2];
|
||||
(*H2)[p][kFftLengthBy2] = std::max((*H2)[p][kFftLengthBy2], H2_new);
|
||||
float H2_new = H_p_ch.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] +
|
||||
H_p_ch.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
|
||||
H2_p[kFftLengthBy2] = std::max(H2_p[kFftLengthBy2], H2_new);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -101,20 +103,22 @@ void ComputeFrequencyResponse_Sse2(
|
||||
// constexpr __mmmask8 kMaxMask = static_cast<__mmmask8>(256u);
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
|
||||
auto& H2_p = (*H2)[p];
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
for (size_t j = 0; j < kFftLengthBy2; j += 4) {
|
||||
const __m128 re = _mm_loadu_ps(&H[p][ch].re[j]);
|
||||
const __m128 re = _mm_loadu_ps(&H_p_ch.re[j]);
|
||||
const __m128 re2 = _mm_mul_ps(re, re);
|
||||
const __m128 im = _mm_loadu_ps(&H[p][ch].im[j]);
|
||||
const __m128 im = _mm_loadu_ps(&H_p_ch.im[j]);
|
||||
const __m128 im2 = _mm_mul_ps(im, im);
|
||||
const __m128 H2_new = _mm_add_ps(re2, im2);
|
||||
__m128 H2_k_j = _mm_loadu_ps(&(*H2)[p][j]);
|
||||
__m128 H2_k_j = _mm_loadu_ps(&H2_p[j]);
|
||||
H2_k_j = _mm_max_ps(H2_k_j, H2_new);
|
||||
_mm_storeu_ps(&(*H2)[p][j], H2_k_j);
|
||||
_mm_storeu_ps(&H2_p[j], H2_k_j);
|
||||
}
|
||||
float H2_new = H[p][ch].re[kFftLengthBy2] * H[p][ch].re[kFftLengthBy2] +
|
||||
H[p][ch].im[kFftLengthBy2] * H[p][ch].im[kFftLengthBy2];
|
||||
(*H2)[p][kFftLengthBy2] = std::max((*H2)[p][kFftLengthBy2], H2_new);
|
||||
float H2_new = H_p_ch.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] +
|
||||
H_p_ch.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
|
||||
H2_p[kFftLengthBy2] = std::max(H2_p[kFftLengthBy2], H2_new);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec3_fft.h"
|
||||
@ -141,7 +142,7 @@ class AdaptiveFirFilter {
|
||||
// Returns the maximum number of partitions for the filter.
|
||||
size_t max_filter_size_partitions() const { return max_size_partitions_; }
|
||||
|
||||
void DumpFilter(const char* name_frequency_domain) {
|
||||
void DumpFilter(absl::string_view name_frequency_domain) {
|
||||
for (size_t p = 0; p < max_size_partitions_; ++p) {
|
||||
data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].re);
|
||||
data_dumper_->DumpRaw(name_frequency_domain, H_[p][0].im);
|
||||
|
@ -8,10 +8,9 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/adaptive_fir_filter.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "modules/audio_processing/aec3/adaptive_fir_filter.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
@ -31,19 +30,21 @@ void ComputeFrequencyResponse_Avx2(
|
||||
RTC_DCHECK_EQ(H.size(), H2->capacity());
|
||||
for (size_t p = 0; p < num_partitions; ++p) {
|
||||
RTC_DCHECK_EQ(kFftLengthBy2Plus1, (*H2)[p].size());
|
||||
auto& H2_p = (*H2)[p];
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const FftData& H_p_ch = H[p][ch];
|
||||
for (size_t j = 0; j < kFftLengthBy2; j += 8) {
|
||||
__m256 re = _mm256_loadu_ps(&H[p][ch].re[j]);
|
||||
__m256 re = _mm256_loadu_ps(&H_p_ch.re[j]);
|
||||
__m256 re2 = _mm256_mul_ps(re, re);
|
||||
__m256 im = _mm256_loadu_ps(&H[p][ch].im[j]);
|
||||
__m256 im = _mm256_loadu_ps(&H_p_ch.im[j]);
|
||||
re2 = _mm256_fmadd_ps(im, im, re2);
|
||||
__m256 H2_k_j = _mm256_loadu_ps(&(*H2)[p][j]);
|
||||
__m256 H2_k_j = _mm256_loadu_ps(&H2_p[j]);
|
||||
H2_k_j = _mm256_max_ps(H2_k_j, re2);
|
||||
_mm256_storeu_ps(&(*H2)[p][j], H2_k_j);
|
||||
_mm256_storeu_ps(&H2_p[j], H2_k_j);
|
||||
}
|
||||
float H2_new = H[p][ch].re[kFftLengthBy2] * H[p][ch].re[kFftLengthBy2] +
|
||||
H[p][ch].im[kFftLengthBy2] * H[p][ch].im[kFftLengthBy2];
|
||||
(*H2)[p][kFftLengthBy2] = std::max((*H2)[p][kFftLengthBy2], H2_new);
|
||||
float H2_new = H_p_ch.re[kFftLengthBy2] * H_p_ch.re[kFftLengthBy2] +
|
||||
H_p_ch.im[kFftLengthBy2] * H_p_ch.im[kFftLengthBy2];
|
||||
H2_p[kFftLengthBy2] = std::max(H2_p[kFftLengthBy2], H2_new);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8,10 +8,10 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "modules/audio_processing/aec3/adaptive_fir_filter_erl.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace aec3 {
|
||||
|
@ -29,9 +29,9 @@ Aec3Optimization DetectOptimization() {
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
return Aec3Optimization::kNeon;
|
||||
#endif
|
||||
|
||||
#else
|
||||
return Aec3Optimization::kNone;
|
||||
#endif
|
||||
}
|
||||
|
||||
float FastApproxLog2f(const float in) {
|
||||
|
@ -28,7 +28,7 @@ enum class Aec3Optimization { kNone, kSse2, kAvx2, kNeon };
|
||||
constexpr int kNumBlocksPerSecond = 250;
|
||||
|
||||
constexpr int kMetricsReportingIntervalBlocks = 10 * kNumBlocksPerSecond;
|
||||
constexpr int kMetricsComputationBlocks = 7;
|
||||
constexpr int kMetricsComputationBlocks = 3;
|
||||
constexpr int kMetricsCollectionBlocks =
|
||||
kMetricsReportingIntervalBlocks - kMetricsComputationBlocks;
|
||||
|
||||
@ -85,10 +85,10 @@ constexpr size_t GetRenderDelayBufferSize(size_t down_sampling_factor,
|
||||
Aec3Optimization DetectOptimization();
|
||||
|
||||
// Computes the log2 of the input in a fast an approximate manner.
|
||||
float FastApproxLog2f(const float in);
|
||||
float FastApproxLog2f(float in);
|
||||
|
||||
// Returns dB from a power quantity expressed in log2.
|
||||
float Log2TodB(const float in_log2);
|
||||
float Log2TodB(float in_log2);
|
||||
|
||||
static_assert(1 << kBlockSizeLog2 == kBlockSize,
|
||||
"Proper number of shifts for blocksize");
|
||||
|
@ -101,10 +101,10 @@ void Aec3Fft::ZeroPaddedFft(rtc::ArrayView<const float> x,
|
||||
[](float a, float b) { return a * b; });
|
||||
break;
|
||||
case Window::kSqrtHanning:
|
||||
RTC_NOTREACHED();
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
break;
|
||||
default:
|
||||
RTC_NOTREACHED();
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
}
|
||||
|
||||
Fft(&fft, X);
|
||||
@ -125,7 +125,7 @@ void Aec3Fft::PaddedFft(rtc::ArrayView<const float> x,
|
||||
std::copy(x.begin(), x.end(), fft.begin() + x_old.size());
|
||||
break;
|
||||
case Window::kHanning:
|
||||
RTC_NOTREACHED();
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
break;
|
||||
case Window::kSqrtHanning:
|
||||
std::transform(x_old.begin(), x_old.end(), std::begin(kSqrtHanning128),
|
||||
@ -135,7 +135,7 @@ void Aec3Fft::PaddedFft(rtc::ArrayView<const float> x,
|
||||
fft.begin() + x_old.size(), std::multiplies<float>());
|
||||
break;
|
||||
default:
|
||||
RTC_NOTREACHED();
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
}
|
||||
|
||||
Fft(&fft, X);
|
||||
|
@ -18,7 +18,6 @@
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -30,6 +29,9 @@ class Aec3Fft {
|
||||
|
||||
Aec3Fft();
|
||||
|
||||
Aec3Fft(const Aec3Fft&) = delete;
|
||||
Aec3Fft& operator=(const Aec3Fft&) = delete;
|
||||
|
||||
// Computes the FFT. Note that both the input and output are modified.
|
||||
void Fft(std::array<float, kFftLength>* x, FftData* X) const {
|
||||
RTC_DCHECK(x);
|
||||
@ -66,8 +68,6 @@ class Aec3Fft {
|
||||
|
||||
private:
|
||||
const OouraFft ooura_fft_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(Aec3Fft);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
@ -97,7 +96,7 @@ void ComputeAvgRenderReverb(
|
||||
|
||||
} // namespace
|
||||
|
||||
int AecState::instance_count_ = 0;
|
||||
std::atomic<int> AecState::instance_count_(0);
|
||||
|
||||
void AecState::GetResidualEchoScaling(
|
||||
rtc::ArrayView<float> residual_scaling) const {
|
||||
@ -113,18 +112,9 @@ void AecState::GetResidualEchoScaling(
|
||||
residual_scaling);
|
||||
}
|
||||
|
||||
absl::optional<float> AecState::ErleUncertainty() const {
|
||||
if (SaturatedEcho()) {
|
||||
return 1.f;
|
||||
}
|
||||
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
AecState::AecState(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
config_(config),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
deactivate_initial_state_reset_at_echo_path_change_(
|
||||
@ -197,8 +187,10 @@ void AecState::Update(
|
||||
|
||||
// Analyze the filter outputs and filters.
|
||||
bool any_filter_converged;
|
||||
bool any_coarse_filter_converged;
|
||||
bool all_filters_diverged;
|
||||
subtractor_output_analyzer_.Update(subtractor_output, &any_filter_converged,
|
||||
&any_coarse_filter_converged,
|
||||
&all_filters_diverged);
|
||||
|
||||
bool any_filter_consistent;
|
||||
@ -212,15 +204,16 @@ void AecState::Update(
|
||||
strong_not_saturated_render_blocks_);
|
||||
}
|
||||
|
||||
const std::vector<std::vector<float>>& aligned_render_block =
|
||||
render_buffer.Block(-delay_state_.MinDirectPathFilterDelay())[0];
|
||||
const Block& aligned_render_block =
|
||||
render_buffer.GetBlock(-delay_state_.MinDirectPathFilterDelay());
|
||||
|
||||
// Update render counters.
|
||||
bool active_render = false;
|
||||
for (size_t ch = 0; ch < aligned_render_block.size(); ++ch) {
|
||||
const float render_energy = std::inner_product(
|
||||
aligned_render_block[ch].begin(), aligned_render_block[ch].end(),
|
||||
aligned_render_block[ch].begin(), 0.f);
|
||||
for (int ch = 0; ch < aligned_render_block.NumChannels(); ++ch) {
|
||||
const float render_energy =
|
||||
std::inner_product(aligned_render_block.begin(/*block=*/0, ch),
|
||||
aligned_render_block.end(/*block=*/0, ch),
|
||||
aligned_render_block.begin(/*block=*/0, ch), 0.f);
|
||||
if (render_energy > (config_.render_levels.active_render_limit *
|
||||
config_.render_levels.active_render_limit) *
|
||||
kFftLengthBy2) {
|
||||
@ -235,8 +228,9 @@ void AecState::Update(
|
||||
std::array<float, kFftLengthBy2Plus1> avg_render_spectrum_with_reverb;
|
||||
|
||||
ComputeAvgRenderReverb(render_buffer.GetSpectrumBuffer(),
|
||||
delay_state_.MinDirectPathFilterDelay(), ReverbDecay(),
|
||||
&avg_render_reverb_, avg_render_spectrum_with_reverb);
|
||||
delay_state_.MinDirectPathFilterDelay(),
|
||||
ReverbDecay(/*mild=*/false), &avg_render_reverb_,
|
||||
avg_render_spectrum_with_reverb);
|
||||
|
||||
if (config_.echo_audibility.use_stationarity_properties) {
|
||||
// Update the echo audibility evaluator.
|
||||
@ -272,10 +266,10 @@ void AecState::Update(
|
||||
|
||||
// Detect whether the transparent mode should be activated.
|
||||
if (transparent_state_) {
|
||||
transparent_state_->Update(delay_state_.MinDirectPathFilterDelay(),
|
||||
any_filter_consistent, any_filter_converged,
|
||||
all_filters_diverged, active_render,
|
||||
SaturatedCapture());
|
||||
transparent_state_->Update(
|
||||
delay_state_.MinDirectPathFilterDelay(), any_filter_consistent,
|
||||
any_filter_converged, any_coarse_filter_converged, all_filters_diverged,
|
||||
active_render, SaturatedCapture());
|
||||
}
|
||||
|
||||
// Analyze the quality of the filter.
|
||||
@ -300,7 +294,9 @@ void AecState::Update(
|
||||
data_dumper_->DumpRaw("aec3_active_render", active_render);
|
||||
data_dumper_->DumpRaw("aec3_erl", Erl());
|
||||
data_dumper_->DumpRaw("aec3_erl_time_domain", ErlTimeDomain());
|
||||
data_dumper_->DumpRaw("aec3_erle", Erle()[0]);
|
||||
data_dumper_->DumpRaw("aec3_erle", Erle(/*onset_compensated=*/false)[0]);
|
||||
data_dumper_->DumpRaw("aec3_erle_onset_compensated",
|
||||
Erle(/*onset_compensated=*/true)[0]);
|
||||
data_dumper_->DumpRaw("aec3_usable_linear_estimate", UsableLinearEstimate());
|
||||
data_dumper_->DumpRaw("aec3_transparent_mode", TransparentModeActive());
|
||||
data_dumper_->DumpRaw("aec3_filter_delay",
|
||||
@ -312,12 +308,19 @@ void AecState::Update(
|
||||
data_dumper_->DumpRaw("aec3_capture_saturation", SaturatedCapture());
|
||||
data_dumper_->DumpRaw("aec3_echo_saturation", SaturatedEcho());
|
||||
data_dumper_->DumpRaw("aec3_any_filter_converged", any_filter_converged);
|
||||
data_dumper_->DumpRaw("aec3_any_coarse_filter_converged",
|
||||
any_coarse_filter_converged);
|
||||
data_dumper_->DumpRaw("aec3_all_filters_diverged", all_filters_diverged);
|
||||
|
||||
data_dumper_->DumpRaw("aec3_external_delay_avaliable",
|
||||
external_delay ? 1 : 0);
|
||||
data_dumper_->DumpRaw("aec3_filter_tail_freq_resp_est",
|
||||
GetReverbFrequencyResponse());
|
||||
data_dumper_->DumpRaw("aec3_subtractor_y2", subtractor_output[0].y2);
|
||||
data_dumper_->DumpRaw("aec3_subtractor_e2_coarse",
|
||||
subtractor_output[0].e2_coarse);
|
||||
data_dumper_->DumpRaw("aec3_subtractor_e2_refined",
|
||||
subtractor_output[0].e2_refined);
|
||||
}
|
||||
|
||||
AecState::InitialState::InitialState(const EchoCanceller3Config& config)
|
||||
@ -442,7 +445,7 @@ void AecState::FilteringQualityAnalyzer::Update(
|
||||
}
|
||||
|
||||
void AecState::SaturationDetector::Update(
|
||||
rtc::ArrayView<const std::vector<float>> x,
|
||||
const Block& x,
|
||||
bool saturated_capture,
|
||||
bool usable_linear_estimate,
|
||||
rtc::ArrayView<const SubtractorOutput> subtractor_output,
|
||||
@ -462,8 +465,9 @@ void AecState::SaturationDetector::Update(
|
||||
}
|
||||
} else {
|
||||
float max_sample = 0.f;
|
||||
for (auto& channel : x) {
|
||||
for (float sample : channel) {
|
||||
for (int ch = 0; ch < x.NumChannels(); ++ch) {
|
||||
rtc::ArrayView<const float, kBlockSize> x_ch = x.View(/*band=*/0, ch);
|
||||
for (float sample : x_ch) {
|
||||
max_sample = std::max(max_sample, fabsf(sample));
|
||||
}
|
||||
}
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
@ -70,15 +71,16 @@ class AecState {
|
||||
}
|
||||
|
||||
// Returns the ERLE.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
|
||||
return erle_estimator_.Erle();
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle(
|
||||
bool onset_compensated) const {
|
||||
return erle_estimator_.Erle(onset_compensated);
|
||||
}
|
||||
|
||||
// Returns an offset to apply to the estimation of the residual echo
|
||||
// computation. Returning nullopt means that no offset should be used, while
|
||||
// any other value will be applied as a multiplier to the estimated residual
|
||||
// echo.
|
||||
absl::optional<float> ErleUncertainty() const;
|
||||
// Returns the non-capped ERLE.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleUnbounded()
|
||||
const {
|
||||
return erle_estimator_.ErleUnbounded();
|
||||
}
|
||||
|
||||
// Returns the fullband ERLE estimate in log2 units.
|
||||
float FullBandErleLog2() const { return erle_estimator_.FullbandErleLog2(); }
|
||||
@ -115,8 +117,12 @@ class AecState {
|
||||
// Takes appropriate action at an echo path change.
|
||||
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
|
||||
|
||||
// Returns the decay factor for the echo reverberation.
|
||||
float ReverbDecay() const { return reverb_model_estimator_.ReverbDecay(); }
|
||||
// Returns the decay factor for the echo reverberation. The parameter `mild`
|
||||
// indicates which exponential decay to return. The default one or a milder
|
||||
// one that can be used during nearend regions.
|
||||
float ReverbDecay(bool mild) const {
|
||||
return reverb_model_estimator_.ReverbDecay(mild);
|
||||
}
|
||||
|
||||
// Return the frequency response of the reverberant echo.
|
||||
rtc::ArrayView<const float> GetReverbFrequencyResponse() const {
|
||||
@ -149,7 +155,7 @@ class AecState {
|
||||
}
|
||||
|
||||
private:
|
||||
static int instance_count_;
|
||||
static std::atomic<int> instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const EchoCanceller3Config config_;
|
||||
const size_t num_capture_channels_;
|
||||
@ -267,7 +273,7 @@ class AecState {
|
||||
bool SaturatedEcho() const { return saturated_echo_; }
|
||||
|
||||
// Updates the detection decision based on new data.
|
||||
void Update(rtc::ArrayView<const std::vector<float>> x,
|
||||
void Update(const Block& x,
|
||||
bool saturated_capture,
|
||||
bool usable_linear_estimate,
|
||||
rtc::ArrayView<const SubtractorOutput> subtractor_output,
|
||||
|
@ -63,9 +63,10 @@ AlignmentMixer::AlignmentMixer(size_t num_channels,
|
||||
}
|
||||
}
|
||||
|
||||
void AlignmentMixer::ProduceOutput(rtc::ArrayView<const std::vector<float>> x,
|
||||
void AlignmentMixer::ProduceOutput(const Block& x,
|
||||
rtc::ArrayView<float, kBlockSize> y) {
|
||||
RTC_DCHECK_EQ(x.size(), num_channels_);
|
||||
RTC_DCHECK_EQ(x.NumChannels(), num_channels_);
|
||||
|
||||
if (selection_variant_ == MixingVariant::kDownmix) {
|
||||
Downmix(x, y);
|
||||
return;
|
||||
@ -73,18 +74,20 @@ void AlignmentMixer::ProduceOutput(rtc::ArrayView<const std::vector<float>> x,
|
||||
|
||||
int ch = selection_variant_ == MixingVariant::kFixed ? 0 : SelectChannel(x);
|
||||
|
||||
RTC_DCHECK_GE(x.size(), ch);
|
||||
std::copy(x[ch].begin(), x[ch].end(), y.begin());
|
||||
RTC_DCHECK_GT(x.NumChannels(), ch);
|
||||
std::copy(x.begin(/*band=*/0, ch), x.end(/*band=*/0, ch), y.begin());
|
||||
}
|
||||
|
||||
void AlignmentMixer::Downmix(rtc::ArrayView<const std::vector<float>> x,
|
||||
void AlignmentMixer::Downmix(const Block& x,
|
||||
rtc::ArrayView<float, kBlockSize> y) const {
|
||||
RTC_DCHECK_EQ(x.size(), num_channels_);
|
||||
RTC_DCHECK_EQ(x.NumChannels(), num_channels_);
|
||||
RTC_DCHECK_GE(num_channels_, 2);
|
||||
std::copy(x[0].begin(), x[0].end(), y.begin());
|
||||
std::memcpy(&y[0], x.View(/*band=*/0, /*channel=*/0).data(),
|
||||
kBlockSize * sizeof(y[0]));
|
||||
for (size_t ch = 1; ch < num_channels_; ++ch) {
|
||||
const auto x_ch = x.View(/*band=*/0, ch);
|
||||
for (size_t i = 0; i < kBlockSize; ++i) {
|
||||
y[i] += x[ch][i];
|
||||
y[i] += x_ch[i];
|
||||
}
|
||||
}
|
||||
|
||||
@ -93,8 +96,8 @@ void AlignmentMixer::Downmix(rtc::ArrayView<const std::vector<float>> x,
|
||||
}
|
||||
}
|
||||
|
||||
int AlignmentMixer::SelectChannel(rtc::ArrayView<const std::vector<float>> x) {
|
||||
RTC_DCHECK_EQ(x.size(), num_channels_);
|
||||
int AlignmentMixer::SelectChannel(const Block& x) {
|
||||
RTC_DCHECK_EQ(x.NumChannels(), num_channels_);
|
||||
RTC_DCHECK_GE(num_channels_, 2);
|
||||
RTC_DCHECK_EQ(cumulative_energies_.size(), num_channels_);
|
||||
|
||||
@ -112,10 +115,10 @@ int AlignmentMixer::SelectChannel(rtc::ArrayView<const std::vector<float>> x) {
|
||||
++block_counter_;
|
||||
|
||||
for (int ch = 0; ch < num_ch_to_analyze; ++ch) {
|
||||
RTC_DCHECK_EQ(x[ch].size(), kBlockSize);
|
||||
float x2_sum = 0.f;
|
||||
rtc::ArrayView<const float, kBlockSize> x_ch = x.View(/*band=*/0, ch);
|
||||
for (size_t i = 0; i < kBlockSize; ++i) {
|
||||
x2_sum += x[ch][i] * x[ch][i];
|
||||
x2_sum += x_ch[i] * x_ch[i];
|
||||
}
|
||||
|
||||
if (ch < 2 && x2_sum > excitation_energy_threshold_) {
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -33,8 +34,7 @@ class AlignmentMixer {
|
||||
float excitation_limit,
|
||||
bool prefer_first_two_channels);
|
||||
|
||||
void ProduceOutput(rtc::ArrayView<const std::vector<float>> x,
|
||||
rtc::ArrayView<float, kBlockSize> y);
|
||||
void ProduceOutput(const Block& x, rtc::ArrayView<float, kBlockSize> y);
|
||||
|
||||
enum class MixingVariant { kDownmix, kAdaptive, kFixed };
|
||||
|
||||
@ -49,9 +49,8 @@ class AlignmentMixer {
|
||||
int selected_channel_ = 0;
|
||||
size_t block_counter_ = 0;
|
||||
|
||||
void Downmix(const rtc::ArrayView<const std::vector<float>> x,
|
||||
rtc::ArrayView<float, kBlockSize> y) const;
|
||||
int SelectChannel(rtc::ArrayView<const std::vector<float>> x);
|
||||
void Downmix(const Block& x, rtc::ArrayView<float, kBlockSize> y) const;
|
||||
int SelectChannel(const Block& x);
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
|
91
webrtc/modules/audio_processing/aec3/block.h
Normal file
91
webrtc/modules/audio_processing/aec3/block.h
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_
|
||||
|
||||
#include <array>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Contains one or more channels of 4 milliseconds of audio data.
|
||||
// The audio is split in one or more frequency bands, each with a sampling
|
||||
// rate of 16 kHz.
|
||||
class Block {
|
||||
public:
|
||||
Block(int num_bands, int num_channels, float default_value = 0.0f)
|
||||
: num_bands_(num_bands),
|
||||
num_channels_(num_channels),
|
||||
data_(num_bands * num_channels * kBlockSize, default_value) {}
|
||||
|
||||
// Returns the number of bands.
|
||||
int NumBands() const { return num_bands_; }
|
||||
|
||||
// Returns the number of channels.
|
||||
int NumChannels() const { return num_channels_; }
|
||||
|
||||
// Modifies the number of channels and sets all samples to zero.
|
||||
void SetNumChannels(int num_channels) {
|
||||
num_channels_ = num_channels;
|
||||
data_.resize(num_bands_ * num_channels_ * kBlockSize);
|
||||
std::fill(data_.begin(), data_.end(), 0.0f);
|
||||
}
|
||||
|
||||
// Iterators for accessing the data.
|
||||
auto begin(int band, int channel) {
|
||||
return data_.begin() + GetIndex(band, channel);
|
||||
}
|
||||
|
||||
auto begin(int band, int channel) const {
|
||||
return data_.begin() + GetIndex(band, channel);
|
||||
}
|
||||
|
||||
auto end(int band, int channel) { return begin(band, channel) + kBlockSize; }
|
||||
|
||||
auto end(int band, int channel) const {
|
||||
return begin(band, channel) + kBlockSize;
|
||||
}
|
||||
|
||||
// Access data via ArrayView.
|
||||
rtc::ArrayView<float, kBlockSize> View(int band, int channel) {
|
||||
return rtc::ArrayView<float, kBlockSize>(&data_[GetIndex(band, channel)],
|
||||
kBlockSize);
|
||||
}
|
||||
|
||||
rtc::ArrayView<const float, kBlockSize> View(int band, int channel) const {
|
||||
return rtc::ArrayView<const float, kBlockSize>(
|
||||
&data_[GetIndex(band, channel)], kBlockSize);
|
||||
}
|
||||
|
||||
// Lets two Blocks swap audio data.
|
||||
void Swap(Block& b) {
|
||||
std::swap(num_bands_, b.num_bands_);
|
||||
std::swap(num_channels_, b.num_channels_);
|
||||
data_.swap(b.data_);
|
||||
}
|
||||
|
||||
private:
|
||||
// Returns the index of the first sample of the requested |band| and
|
||||
// |channel|.
|
||||
int GetIndex(int band, int channel) const {
|
||||
return (band * num_channels_ + channel) * kBlockSize;
|
||||
}
|
||||
|
||||
int num_bands_;
|
||||
int num_channels_;
|
||||
std::vector<float> data_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_BLOCK_H_
|
@ -14,25 +14,9 @@
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
BlockBuffer::BlockBuffer(size_t size,
|
||||
size_t num_bands,
|
||||
size_t num_channels,
|
||||
size_t frame_length)
|
||||
BlockBuffer::BlockBuffer(size_t size, size_t num_bands, size_t num_channels)
|
||||
: size(static_cast<int>(size)),
|
||||
buffer(size,
|
||||
std::vector<std::vector<std::vector<float>>>(
|
||||
num_bands,
|
||||
std::vector<std::vector<float>>(
|
||||
num_channels,
|
||||
std::vector<float>(frame_length, 0.f)))) {
|
||||
for (auto& block : buffer) {
|
||||
for (auto& band : block) {
|
||||
for (auto& channel : band) {
|
||||
std::fill(channel.begin(), channel.end(), 0.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
buffer(size, Block(num_bands, num_channels)) {}
|
||||
|
||||
BlockBuffer::~BlockBuffer() = default;
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
@ -22,10 +23,7 @@ namespace webrtc {
|
||||
// Struct for bundling a circular buffer of two dimensional vector objects
|
||||
// together with the read and write indices.
|
||||
struct BlockBuffer {
|
||||
BlockBuffer(size_t size,
|
||||
size_t num_bands,
|
||||
size_t num_channels,
|
||||
size_t frame_length);
|
||||
BlockBuffer(size_t size, size_t num_bands, size_t num_channels);
|
||||
~BlockBuffer();
|
||||
|
||||
int IncIndex(int index) const {
|
||||
@ -52,7 +50,7 @@ struct BlockBuffer {
|
||||
void DecReadIndex() { read = DecIndex(read); }
|
||||
|
||||
const int size;
|
||||
std::vector<std::vector<std::vector<std::vector<float>>>> buffer;
|
||||
std::vector<Block> buffer;
|
||||
int write = 0;
|
||||
int read = 0;
|
||||
};
|
||||
|
@ -41,17 +41,24 @@ void BlockDelayBuffer::DelaySignal(AudioBuffer* frame) {
|
||||
RTC_DCHECK_EQ(buf_[ch].size(), frame->num_bands());
|
||||
RTC_DCHECK_EQ(buf_[ch].size(), num_bands);
|
||||
rtc::ArrayView<float* const> frame_ch(frame->split_bands(ch), num_bands);
|
||||
const size_t delay = delay_;
|
||||
|
||||
for (size_t band = 0; band < num_bands; ++band) {
|
||||
RTC_DCHECK_EQ(delay_, buf_[ch][band].size());
|
||||
i = i_start;
|
||||
|
||||
for (size_t k = 0; k < frame_length_; ++k) {
|
||||
const float tmp = buf_[ch][band][i];
|
||||
buf_[ch][band][i] = frame_ch[band][k];
|
||||
frame_ch[band][k] = tmp;
|
||||
// Offloading these pointers and class variables to local variables allows
|
||||
// the compiler to optimize the below loop when compiling with
|
||||
// '-fno-strict-aliasing'.
|
||||
float* buf_ch_band = buf_[ch][band].data();
|
||||
float* frame_ch_band = frame_ch[band];
|
||||
|
||||
i = i < delay_ - 1 ? i + 1 : 0;
|
||||
for (size_t k = 0, frame_length = frame_length_; k < frame_length; ++k) {
|
||||
const float tmp = buf_ch_band[i];
|
||||
buf_ch_band[i] = frame_ch_band[k];
|
||||
frame_ch_band[k] = tmp;
|
||||
|
||||
i = i < delay - 1 ? i + 1 : 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -34,35 +34,32 @@ BlockFramer::~BlockFramer() = default;
|
||||
// samples for InsertBlockAndExtractSubFrame to produce a frame. In order to
|
||||
// achieve this, the InsertBlockAndExtractSubFrame and InsertBlock methods need
|
||||
// to be called in the correct order.
|
||||
void BlockFramer::InsertBlock(
|
||||
const std::vector<std::vector<std::vector<float>>>& block) {
|
||||
RTC_DCHECK_EQ(num_bands_, block.size());
|
||||
void BlockFramer::InsertBlock(const Block& block) {
|
||||
RTC_DCHECK_EQ(num_bands_, block.NumBands());
|
||||
RTC_DCHECK_EQ(num_channels_, block.NumChannels());
|
||||
for (size_t band = 0; band < num_bands_; ++band) {
|
||||
RTC_DCHECK_EQ(num_channels_, block[band].size());
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
RTC_DCHECK_EQ(kBlockSize, block[band][channel].size());
|
||||
RTC_DCHECK_EQ(0, buffer_[band][channel].size());
|
||||
|
||||
buffer_[band][channel].insert(buffer_[band][channel].begin(),
|
||||
block[band][channel].begin(),
|
||||
block[band][channel].end());
|
||||
block.begin(band, channel),
|
||||
block.end(band, channel));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void BlockFramer::InsertBlockAndExtractSubFrame(
|
||||
const std::vector<std::vector<std::vector<float>>>& block,
|
||||
const Block& block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame) {
|
||||
RTC_DCHECK(sub_frame);
|
||||
RTC_DCHECK_EQ(num_bands_, block.size());
|
||||
RTC_DCHECK_EQ(num_bands_, block.NumBands());
|
||||
RTC_DCHECK_EQ(num_channels_, block.NumChannels());
|
||||
RTC_DCHECK_EQ(num_bands_, sub_frame->size());
|
||||
for (size_t band = 0; band < num_bands_; ++band) {
|
||||
RTC_DCHECK_EQ(num_channels_, block[band].size());
|
||||
RTC_DCHECK_EQ(num_channels_, (*sub_frame)[0].size());
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
RTC_DCHECK_LE(kSubFrameLength,
|
||||
buffer_[band][channel].size() + kBlockSize);
|
||||
RTC_DCHECK_EQ(kBlockSize, block[band][channel].size());
|
||||
RTC_DCHECK_GE(kBlockSize, buffer_[band][channel].size());
|
||||
RTC_DCHECK_EQ(kSubFrameLength, (*sub_frame)[band][channel].size());
|
||||
|
||||
@ -71,14 +68,14 @@ void BlockFramer::InsertBlockAndExtractSubFrame(
|
||||
std::copy(buffer_[band][channel].begin(), buffer_[band][channel].end(),
|
||||
(*sub_frame)[band][channel].begin());
|
||||
std::copy(
|
||||
block[band][channel].begin(),
|
||||
block[band][channel].begin() + samples_to_frame,
|
||||
block.begin(band, channel),
|
||||
block.begin(band, channel) + samples_to_frame,
|
||||
(*sub_frame)[band][channel].begin() + buffer_[band][channel].size());
|
||||
buffer_[band][channel].clear();
|
||||
buffer_[band][channel].insert(
|
||||
buffer_[band][channel].begin(),
|
||||
block[band][channel].begin() + samples_to_frame,
|
||||
block[band][channel].end());
|
||||
block.begin(band, channel) + samples_to_frame,
|
||||
block.end(band, channel));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -15,6 +15,7 @@
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -32,10 +33,10 @@ class BlockFramer {
|
||||
BlockFramer& operator=(const BlockFramer&) = delete;
|
||||
|
||||
// Adds a 64 sample block into the data that will form the next output frame.
|
||||
void InsertBlock(const std::vector<std::vector<std::vector<float>>>& block);
|
||||
void InsertBlock(const Block& block);
|
||||
// Adds a 64 sample block and extracts an 80 sample subframe.
|
||||
void InsertBlockAndExtractSubFrame(
|
||||
const std::vector<std::vector<std::vector<float>>>& block,
|
||||
const Block& block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame);
|
||||
|
||||
private:
|
||||
|
@ -11,6 +11,7 @@
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
@ -26,7 +27,6 @@
|
||||
#include "modules/audio_processing/aec3/render_delay_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_controller.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
|
||||
@ -49,23 +49,22 @@ class BlockProcessorImpl final : public BlockProcessor {
|
||||
|
||||
~BlockProcessorImpl() override;
|
||||
|
||||
void ProcessCapture(
|
||||
bool echo_path_gain_change,
|
||||
bool capture_signal_saturation,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output,
|
||||
std::vector<std::vector<std::vector<float>>>* capture_block) override;
|
||||
void ProcessCapture(bool echo_path_gain_change,
|
||||
bool capture_signal_saturation,
|
||||
Block* linear_output,
|
||||
Block* capture_block) override;
|
||||
|
||||
void BufferRender(
|
||||
const std::vector<std::vector<std::vector<float>>>& block) override;
|
||||
void BufferRender(const Block& block) override;
|
||||
|
||||
void UpdateEchoLeakageStatus(bool leakage_detected) override;
|
||||
|
||||
void GetMetrics(EchoControl::Metrics* metrics) const override;
|
||||
|
||||
void SetAudioBufferDelay(int delay_ms) override;
|
||||
void SetCaptureOutputUsage(bool capture_output_used) override;
|
||||
|
||||
private:
|
||||
static int instance_count_;
|
||||
static std::atomic<int> instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const EchoCanceller3Config config_;
|
||||
bool capture_properly_started_ = false;
|
||||
@ -80,7 +79,7 @@ class BlockProcessorImpl final : public BlockProcessor {
|
||||
absl::optional<DelayEstimate> estimated_delay_;
|
||||
};
|
||||
|
||||
int BlockProcessorImpl::instance_count_ = 0;
|
||||
std::atomic<int> BlockProcessorImpl::instance_count_(0);
|
||||
|
||||
BlockProcessorImpl::BlockProcessorImpl(
|
||||
const EchoCanceller3Config& config,
|
||||
@ -90,8 +89,7 @@ BlockProcessorImpl::BlockProcessorImpl(
|
||||
std::unique_ptr<RenderDelayBuffer> render_buffer,
|
||||
std::unique_ptr<RenderDelayController> delay_controller,
|
||||
std::unique_ptr<EchoRemover> echo_remover)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
config_(config),
|
||||
sample_rate_hz_(sample_rate_hz),
|
||||
render_buffer_(std::move(render_buffer)),
|
||||
@ -103,21 +101,20 @@ BlockProcessorImpl::BlockProcessorImpl(
|
||||
|
||||
BlockProcessorImpl::~BlockProcessorImpl() = default;
|
||||
|
||||
void BlockProcessorImpl::ProcessCapture(
|
||||
bool echo_path_gain_change,
|
||||
bool capture_signal_saturation,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output,
|
||||
std::vector<std::vector<std::vector<float>>>* capture_block) {
|
||||
void BlockProcessorImpl::ProcessCapture(bool echo_path_gain_change,
|
||||
bool capture_signal_saturation,
|
||||
Block* linear_output,
|
||||
Block* capture_block) {
|
||||
RTC_DCHECK(capture_block);
|
||||
RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), capture_block->size());
|
||||
RTC_DCHECK_EQ(kBlockSize, (*capture_block)[0][0].size());
|
||||
RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), capture_block->NumBands());
|
||||
|
||||
capture_call_counter_++;
|
||||
|
||||
data_dumper_->DumpRaw("aec3_processblock_call_order",
|
||||
static_cast<int>(BlockProcessorApiCall::kCapture));
|
||||
data_dumper_->DumpWav("aec3_processblock_capture_input", kBlockSize,
|
||||
&(*capture_block)[0][0][0], 16000, 1);
|
||||
data_dumper_->DumpWav("aec3_processblock_capture_input",
|
||||
capture_block->View(/*band=*/0, /*channel=*/0), 16000,
|
||||
1);
|
||||
|
||||
if (render_properly_started_) {
|
||||
if (!capture_properly_started_) {
|
||||
@ -158,8 +155,9 @@ void BlockProcessorImpl::ProcessCapture(
|
||||
delay_controller_->Reset(false);
|
||||
}
|
||||
|
||||
data_dumper_->DumpWav("aec3_processblock_capture_input2", kBlockSize,
|
||||
&(*capture_block)[0][0][0], 16000, 1);
|
||||
data_dumper_->DumpWav("aec3_processblock_capture_input2",
|
||||
capture_block->View(/*band=*/0, /*channel=*/0), 16000,
|
||||
1);
|
||||
|
||||
bool has_delay_estimator = !config_.delay.use_external_delay_estimator;
|
||||
if (has_delay_estimator) {
|
||||
@ -168,7 +166,7 @@ void BlockProcessorImpl::ProcessCapture(
|
||||
// alignment.
|
||||
estimated_delay_ = delay_controller_->GetDelay(
|
||||
render_buffer_->GetDownsampledRenderBuffer(), render_buffer_->Delay(),
|
||||
(*capture_block)[0]);
|
||||
*capture_block);
|
||||
|
||||
if (estimated_delay_) {
|
||||
bool delay_change =
|
||||
@ -201,16 +199,12 @@ void BlockProcessorImpl::ProcessCapture(
|
||||
metrics_.UpdateCapture(false);
|
||||
}
|
||||
|
||||
void BlockProcessorImpl::BufferRender(
|
||||
const std::vector<std::vector<std::vector<float>>>& block) {
|
||||
RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), block.size());
|
||||
RTC_DCHECK_EQ(kBlockSize, block[0][0].size());
|
||||
void BlockProcessorImpl::BufferRender(const Block& block) {
|
||||
RTC_DCHECK_EQ(NumBandsForRate(sample_rate_hz_), block.NumBands());
|
||||
data_dumper_->DumpRaw("aec3_processblock_call_order",
|
||||
static_cast<int>(BlockProcessorApiCall::kRender));
|
||||
data_dumper_->DumpWav("aec3_processblock_render_input", kBlockSize,
|
||||
&block[0][0][0], 16000, 1);
|
||||
data_dumper_->DumpWav("aec3_processblock_render_input2", kBlockSize,
|
||||
&block[0][0][0], 16000, 1);
|
||||
data_dumper_->DumpWav("aec3_processblock_render_input",
|
||||
block.View(/*band=*/0, /*channel=*/0), 16000, 1);
|
||||
|
||||
render_event_ = render_buffer_->Insert(block);
|
||||
|
||||
@ -237,6 +231,10 @@ void BlockProcessorImpl::SetAudioBufferDelay(int delay_ms) {
|
||||
render_buffer_->SetAudioBufferDelay(delay_ms);
|
||||
}
|
||||
|
||||
void BlockProcessorImpl::SetCaptureOutputUsage(bool capture_output_used) {
|
||||
echo_remover_->SetCaptureOutputUsage(capture_output_used);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
BlockProcessor* BlockProcessor::Create(const EchoCanceller3Config& config,
|
||||
|
@ -18,6 +18,7 @@
|
||||
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "api/audio/echo_control.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
#include "modules/audio_processing/aec3/echo_remover.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_controller.h"
|
||||
@ -56,19 +57,23 @@ class BlockProcessor {
|
||||
virtual void SetAudioBufferDelay(int delay_ms) = 0;
|
||||
|
||||
// Processes a block of capture data.
|
||||
virtual void ProcessCapture(
|
||||
bool echo_path_gain_change,
|
||||
bool capture_signal_saturation,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output,
|
||||
std::vector<std::vector<std::vector<float>>>* capture_block) = 0;
|
||||
virtual void ProcessCapture(bool echo_path_gain_change,
|
||||
bool capture_signal_saturation,
|
||||
Block* linear_output,
|
||||
Block* capture_block) = 0;
|
||||
|
||||
// Buffers a block of render data supplied by a FrameBlocker object.
|
||||
virtual void BufferRender(
|
||||
const std::vector<std::vector<std::vector<float>>>& render_block) = 0;
|
||||
virtual void BufferRender(const Block& render_block) = 0;
|
||||
|
||||
// Reports whether echo leakage has been detected in the echo canceller
|
||||
// output.
|
||||
virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0;
|
||||
|
||||
// Specifies whether the capture output will be used. The purpose of this is
|
||||
// to allow the block processor to deactivate some of the processing when the
|
||||
// resulting output is anyway not used, for instance when the endpoint is
|
||||
// muted.
|
||||
virtual void SetCaptureOutputUsage(bool capture_output_used) = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -11,8 +11,6 @@
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_BLOCK_PROCESSOR_METRICS_H_
|
||||
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Handles the reporting of metrics for the block_processor.
|
||||
@ -20,6 +18,9 @@ class BlockProcessorMetrics {
|
||||
public:
|
||||
BlockProcessorMetrics() = default;
|
||||
|
||||
BlockProcessorMetrics(const BlockProcessorMetrics&) = delete;
|
||||
BlockProcessorMetrics& operator=(const BlockProcessorMetrics&) = delete;
|
||||
|
||||
// Updates the metric with new capture data.
|
||||
void UpdateCapture(bool underrun);
|
||||
|
||||
@ -38,8 +39,6 @@ class BlockProcessorMetrics {
|
||||
int render_buffer_underruns_ = 0;
|
||||
int render_buffer_overruns_ = 0;
|
||||
int buffer_render_calls_ = 0;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(BlockProcessorMetrics);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
71
webrtc/modules/audio_processing/aec3/config_selector.cc
Normal file
71
webrtc/modules/audio_processing/aec3/config_selector.cc
Normal file
@ -0,0 +1,71 @@
|
||||
|
||||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/config_selector.h"
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
// Validates that the mono and the multichannel configs have compatible fields.
|
||||
bool CompatibleConfigs(const EchoCanceller3Config& mono_config,
|
||||
const EchoCanceller3Config& multichannel_config) {
|
||||
if (mono_config.delay.fixed_capture_delay_samples !=
|
||||
multichannel_config.delay.fixed_capture_delay_samples) {
|
||||
return false;
|
||||
}
|
||||
if (mono_config.filter.export_linear_aec_output !=
|
||||
multichannel_config.filter.export_linear_aec_output) {
|
||||
return false;
|
||||
}
|
||||
if (mono_config.filter.high_pass_filter_echo_reference !=
|
||||
multichannel_config.filter.high_pass_filter_echo_reference) {
|
||||
return false;
|
||||
}
|
||||
if (mono_config.multi_channel.detect_stereo_content !=
|
||||
multichannel_config.multi_channel.detect_stereo_content) {
|
||||
return false;
|
||||
}
|
||||
if (mono_config.multi_channel.stereo_detection_timeout_threshold_seconds !=
|
||||
multichannel_config.multi_channel
|
||||
.stereo_detection_timeout_threshold_seconds) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
ConfigSelector::ConfigSelector(
|
||||
const EchoCanceller3Config& config,
|
||||
const absl::optional<EchoCanceller3Config>& multichannel_config,
|
||||
int num_render_input_channels)
|
||||
: config_(config), multichannel_config_(multichannel_config) {
|
||||
if (multichannel_config_.has_value()) {
|
||||
RTC_DCHECK(CompatibleConfigs(config_, *multichannel_config_));
|
||||
}
|
||||
|
||||
Update(!config_.multi_channel.detect_stereo_content &&
|
||||
num_render_input_channels > 1);
|
||||
|
||||
RTC_DCHECK(active_config_);
|
||||
}
|
||||
|
||||
void ConfigSelector::Update(bool multichannel_content) {
|
||||
if (multichannel_content && multichannel_config_.has_value()) {
|
||||
active_config_ = &(*multichannel_config_);
|
||||
} else {
|
||||
active_config_ = &config_;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
41
webrtc/modules/audio_processing/aec3/config_selector.h
Normal file
41
webrtc/modules/audio_processing/aec3/config_selector.h
Normal file
@ -0,0 +1,41 @@
|
||||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Selects the config to use.
|
||||
class ConfigSelector {
|
||||
public:
|
||||
ConfigSelector(
|
||||
const EchoCanceller3Config& config,
|
||||
const absl::optional<EchoCanceller3Config>& multichannel_config,
|
||||
int num_render_input_channels);
|
||||
|
||||
// Updates the config selection based on the detection of multichannel
|
||||
// content.
|
||||
void Update(bool multichannel_content);
|
||||
|
||||
const EchoCanceller3Config& active_config() const { return *active_config_; }
|
||||
|
||||
private:
|
||||
const EchoCanceller3Config config_;
|
||||
const absl::optional<EchoCanceller3Config> multichannel_config_;
|
||||
const EchoCanceller3Config* active_config_ = nullptr;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_CONFIG_SELECTOR_H_
|
@ -17,7 +17,6 @@
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/utility/cascaded_biquad_filter.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -26,6 +25,9 @@ class Decimator {
|
||||
public:
|
||||
explicit Decimator(size_t down_sampling_factor);
|
||||
|
||||
Decimator(const Decimator&) = delete;
|
||||
Decimator& operator=(const Decimator&) = delete;
|
||||
|
||||
// Downsamples the signal.
|
||||
void Decimate(rtc::ArrayView<const float> in, rtc::ArrayView<float> out);
|
||||
|
||||
@ -33,8 +35,6 @@ class Decimator {
|
||||
const size_t down_sampling_factor_;
|
||||
CascadedBiQuadFilter anti_aliasing_filter_;
|
||||
CascadedBiQuadFilter noise_reduction_filter_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(Decimator);
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
|
@ -11,6 +11,8 @@
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_DELAY_ESTIMATE_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Stores delay_estimates.
|
||||
|
@ -88,7 +88,7 @@ void EchoAudibility::UpdateRenderNoiseEstimator(
|
||||
|
||||
bool EchoAudibility::IsRenderTooLow(const BlockBuffer& block_buffer) {
|
||||
const int num_render_channels =
|
||||
static_cast<int>(block_buffer.buffer[0][0].size());
|
||||
static_cast<int>(block_buffer.buffer[0].NumChannels());
|
||||
bool too_low = false;
|
||||
const int render_block_write_current = block_buffer.write;
|
||||
if (render_block_write_current == render_block_write_prev_) {
|
||||
@ -98,7 +98,8 @@ bool EchoAudibility::IsRenderTooLow(const BlockBuffer& block_buffer) {
|
||||
idx = block_buffer.IncIndex(idx)) {
|
||||
float max_abs_over_channels = 0.f;
|
||||
for (int ch = 0; ch < num_render_channels; ++ch) {
|
||||
auto block = block_buffer.buffer[idx][0][ch];
|
||||
rtc::ArrayView<const float, kBlockSize> block =
|
||||
block_buffer.buffer[idx].View(/*band=*/0, /*channel=*/ch);
|
||||
auto r = std::minmax_element(block.cbegin(), block.cend());
|
||||
float max_abs_channel =
|
||||
std::max(std::fabs(*r.first), std::fabs(*r.second));
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
||||
#include "modules/audio_processing/aec3/stationarity_estimator.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
|
@ -12,10 +12,10 @@
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/high_pass_filter.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/experiments/field_trial_parser.h"
|
||||
#include "rtc_base/logging.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
@ -27,8 +27,8 @@ namespace {
|
||||
enum class EchoCanceller3ApiCall { kCapture, kRender };
|
||||
|
||||
bool DetectSaturation(rtc::ArrayView<const float> y) {
|
||||
for (auto y_k : y) {
|
||||
if (y_k >= 32700.0f || y_k <= -32700.0f) {
|
||||
for (size_t k = 0; k < y.size(); ++k) {
|
||||
if (y[k] >= 32700.0f || y[k] <= -32700.0f) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -38,7 +38,7 @@ bool DetectSaturation(rtc::ArrayView<const float> y) {
|
||||
// Retrieves a value from a field trial if it is available. If no value is
|
||||
// present, the default value is returned. If the retrieved value is beyond the
|
||||
// specified limits, the default value is returned instead.
|
||||
void RetrieveFieldTrialValue(const char* trial_name,
|
||||
void RetrieveFieldTrialValue(absl::string_view trial_name,
|
||||
float min,
|
||||
float max,
|
||||
float* value_to_update) {
|
||||
@ -49,12 +49,16 @@ void RetrieveFieldTrialValue(const char* trial_name,
|
||||
ParseFieldTrial({&field_trial_param}, field_trial_str);
|
||||
float field_trial_value = static_cast<float>(field_trial_param.Get());
|
||||
|
||||
if (field_trial_value >= min && field_trial_value <= max) {
|
||||
if (field_trial_value >= min && field_trial_value <= max &&
|
||||
field_trial_value != *value_to_update) {
|
||||
RTC_LOG(LS_INFO) << "Key " << trial_name
|
||||
<< " changing AEC3 parameter value from "
|
||||
<< *value_to_update << " to " << field_trial_value;
|
||||
*value_to_update = field_trial_value;
|
||||
}
|
||||
}
|
||||
|
||||
void RetrieveFieldTrialValue(const char* trial_name,
|
||||
void RetrieveFieldTrialValue(absl::string_view trial_name,
|
||||
int min,
|
||||
int max,
|
||||
int* value_to_update) {
|
||||
@ -65,7 +69,11 @@ void RetrieveFieldTrialValue(const char* trial_name,
|
||||
ParseFieldTrial({&field_trial_param}, field_trial_str);
|
||||
float field_trial_value = field_trial_param.Get();
|
||||
|
||||
if (field_trial_value >= min && field_trial_value <= max) {
|
||||
if (field_trial_value >= min && field_trial_value <= max &&
|
||||
field_trial_value != *value_to_update) {
|
||||
RTC_LOG(LS_INFO) << "Key " << trial_name
|
||||
<< " changing AEC3 parameter value from "
|
||||
<< *value_to_update << " to " << field_trial_value;
|
||||
*value_to_update = field_trial_value;
|
||||
}
|
||||
}
|
||||
@ -88,18 +96,50 @@ void FillSubFrameView(
|
||||
}
|
||||
|
||||
void FillSubFrameView(
|
||||
bool proper_downmix_needed,
|
||||
std::vector<std::vector<std::vector<float>>>* frame,
|
||||
size_t sub_frame_index,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
|
||||
RTC_DCHECK_GE(1, sub_frame_index);
|
||||
RTC_DCHECK_EQ(frame->size(), sub_frame_view->size());
|
||||
RTC_DCHECK_EQ((*frame)[0].size(), (*sub_frame_view)[0].size());
|
||||
for (size_t band = 0; band < frame->size(); ++band) {
|
||||
for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) {
|
||||
(*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
|
||||
&(*frame)[band][channel][sub_frame_index * kSubFrameLength],
|
||||
const size_t frame_num_channels = (*frame)[0].size();
|
||||
const size_t sub_frame_num_channels = (*sub_frame_view)[0].size();
|
||||
if (frame_num_channels > sub_frame_num_channels) {
|
||||
RTC_DCHECK_EQ(sub_frame_num_channels, 1u);
|
||||
if (proper_downmix_needed) {
|
||||
// When a proper downmix is needed (which is the case when proper stereo
|
||||
// is present in the echo reference signal but the echo canceller does the
|
||||
// processing in mono) downmix the echo reference by averaging the channel
|
||||
// content (otherwise downmixing is done by selecting channel 0).
|
||||
for (size_t band = 0; band < frame->size(); ++band) {
|
||||
for (size_t ch = 1; ch < frame_num_channels; ++ch) {
|
||||
for (size_t k = 0; k < kSubFrameLength; ++k) {
|
||||
(*frame)[band][/*channel=*/0]
|
||||
[sub_frame_index * kSubFrameLength + k] +=
|
||||
(*frame)[band][ch][sub_frame_index * kSubFrameLength + k];
|
||||
}
|
||||
}
|
||||
const float one_by_num_channels = 1.0f / frame_num_channels;
|
||||
for (size_t k = 0; k < kSubFrameLength; ++k) {
|
||||
(*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength +
|
||||
k] *= one_by_num_channels;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (size_t band = 0; band < frame->size(); ++band) {
|
||||
(*sub_frame_view)[band][/*channel=*/0] = rtc::ArrayView<float>(
|
||||
&(*frame)[band][/*channel=*/0][sub_frame_index * kSubFrameLength],
|
||||
kSubFrameLength);
|
||||
}
|
||||
} else {
|
||||
RTC_DCHECK_EQ(frame_num_channels, sub_frame_num_channels);
|
||||
for (size_t band = 0; band < frame->size(); ++band) {
|
||||
for (size_t channel = 0; channel < (*frame)[band].size(); ++channel) {
|
||||
(*sub_frame_view)[band][channel] = rtc::ArrayView<float>(
|
||||
&(*frame)[band][channel][sub_frame_index * kSubFrameLength],
|
||||
kSubFrameLength);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -107,16 +147,17 @@ void ProcessCaptureFrameContent(
|
||||
AudioBuffer* linear_output,
|
||||
AudioBuffer* capture,
|
||||
bool level_change,
|
||||
bool aec_reference_is_downmixed_stereo,
|
||||
bool saturated_microphone_signal,
|
||||
size_t sub_frame_index,
|
||||
FrameBlocker* capture_blocker,
|
||||
BlockFramer* linear_output_framer,
|
||||
BlockFramer* output_framer,
|
||||
BlockProcessor* block_processor,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output_block,
|
||||
Block* linear_output_block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>*
|
||||
linear_output_sub_frame_view,
|
||||
std::vector<std::vector<std::vector<float>>>* capture_block,
|
||||
Block* capture_block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* capture_sub_frame_view) {
|
||||
FillSubFrameView(capture, sub_frame_index, capture_sub_frame_view);
|
||||
|
||||
@ -130,8 +171,10 @@ void ProcessCaptureFrameContent(
|
||||
|
||||
capture_blocker->InsertSubFrameAndExtractBlock(*capture_sub_frame_view,
|
||||
capture_block);
|
||||
block_processor->ProcessCapture(level_change, saturated_microphone_signal,
|
||||
linear_output_block, capture_block);
|
||||
block_processor->ProcessCapture(
|
||||
/*echo_path_gain_change=*/level_change ||
|
||||
aec_reference_is_downmixed_stereo,
|
||||
saturated_microphone_signal, linear_output_block, capture_block);
|
||||
output_framer->InsertBlockAndExtractSubFrame(*capture_block,
|
||||
capture_sub_frame_view);
|
||||
|
||||
@ -142,22 +185,24 @@ void ProcessCaptureFrameContent(
|
||||
}
|
||||
}
|
||||
|
||||
void ProcessRemainingCaptureFrameContent(
|
||||
bool level_change,
|
||||
bool saturated_microphone_signal,
|
||||
FrameBlocker* capture_blocker,
|
||||
BlockFramer* linear_output_framer,
|
||||
BlockFramer* output_framer,
|
||||
BlockProcessor* block_processor,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output_block,
|
||||
std::vector<std::vector<std::vector<float>>>* block) {
|
||||
void ProcessRemainingCaptureFrameContent(bool level_change,
|
||||
bool aec_reference_is_downmixed_stereo,
|
||||
bool saturated_microphone_signal,
|
||||
FrameBlocker* capture_blocker,
|
||||
BlockFramer* linear_output_framer,
|
||||
BlockFramer* output_framer,
|
||||
BlockProcessor* block_processor,
|
||||
Block* linear_output_block,
|
||||
Block* block) {
|
||||
if (!capture_blocker->IsBlockAvailable()) {
|
||||
return;
|
||||
}
|
||||
|
||||
capture_blocker->ExtractBlock(block);
|
||||
block_processor->ProcessCapture(level_change, saturated_microphone_signal,
|
||||
linear_output_block, block);
|
||||
block_processor->ProcessCapture(
|
||||
/*echo_path_gain_change=*/level_change ||
|
||||
aec_reference_is_downmixed_stereo,
|
||||
saturated_microphone_signal, linear_output_block, block);
|
||||
output_framer->InsertBlock(*block);
|
||||
|
||||
if (linear_output_framer) {
|
||||
@ -167,21 +212,22 @@ void ProcessRemainingCaptureFrameContent(
|
||||
}
|
||||
|
||||
void BufferRenderFrameContent(
|
||||
bool proper_downmix_needed,
|
||||
std::vector<std::vector<std::vector<float>>>* render_frame,
|
||||
size_t sub_frame_index,
|
||||
FrameBlocker* render_blocker,
|
||||
BlockProcessor* block_processor,
|
||||
std::vector<std::vector<std::vector<float>>>* block,
|
||||
Block* block,
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>* sub_frame_view) {
|
||||
FillSubFrameView(render_frame, sub_frame_index, sub_frame_view);
|
||||
FillSubFrameView(proper_downmix_needed, render_frame, sub_frame_index,
|
||||
sub_frame_view);
|
||||
render_blocker->InsertSubFrameAndExtractBlock(*sub_frame_view, block);
|
||||
block_processor->BufferRender(*block);
|
||||
}
|
||||
|
||||
void BufferRemainingRenderFrameContent(
|
||||
FrameBlocker* render_blocker,
|
||||
BlockProcessor* block_processor,
|
||||
std::vector<std::vector<std::vector<float>>>* block) {
|
||||
void BufferRemainingRenderFrameContent(FrameBlocker* render_blocker,
|
||||
BlockProcessor* block_processor,
|
||||
Block* block) {
|
||||
if (!render_blocker->IsBlockAvailable()) {
|
||||
return;
|
||||
}
|
||||
@ -213,6 +259,10 @@ void CopyBufferIntoFrame(const AudioBuffer& buffer,
|
||||
EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
|
||||
EchoCanceller3Config adjusted_cfg = config;
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3StereoContentDetectionKillSwitch")) {
|
||||
adjusted_cfg.multi_channel.detect_stereo_content = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3AntiHowlingMinimizationKillSwitch")) {
|
||||
adjusted_cfg.suppressor.high_bands_suppression
|
||||
.anti_howling_activation_threshold = 25.f;
|
||||
@ -251,24 +301,39 @@ EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
|
||||
adjusted_cfg.filter.initial_state_seconds = 2.0f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3HighPassFilterEchoReference")) {
|
||||
adjusted_cfg.filter.high_pass_filter_echo_reference = true;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3EchoSaturationDetectionKillSwitch")) {
|
||||
adjusted_cfg.ep_strength.echo_can_saturate = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3UseDot2ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.2f;
|
||||
} else if (field_trial::IsEnabled("WebRTC-Aec3UseDot3ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.3f;
|
||||
} else if (field_trial::IsEnabled("WebRTC-Aec3UseDot4ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.4f;
|
||||
} else if (field_trial::IsEnabled("WebRTC-Aec3UseDot5ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.5f;
|
||||
} else if (field_trial::IsEnabled("WebRTC-Aec3UseDot6ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.6f;
|
||||
} else if (field_trial::IsEnabled("WebRTC-Aec3UseDot7ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.7f;
|
||||
} else if (field_trial::IsEnabled("WebRTC-Aec3UseDot8ReverbDefaultLen")) {
|
||||
adjusted_cfg.ep_strength.default_len = 0.8f;
|
||||
const std::string use_nearend_reverb_len_tunings =
|
||||
field_trial::FindFullName("WebRTC-Aec3UseNearendReverbLen");
|
||||
FieldTrialParameter<double> nearend_reverb_default_len(
|
||||
"default_len", adjusted_cfg.ep_strength.default_len);
|
||||
FieldTrialParameter<double> nearend_reverb_nearend_len(
|
||||
"nearend_len", adjusted_cfg.ep_strength.nearend_len);
|
||||
|
||||
ParseFieldTrial({&nearend_reverb_default_len, &nearend_reverb_nearend_len},
|
||||
use_nearend_reverb_len_tunings);
|
||||
float default_len = static_cast<float>(nearend_reverb_default_len.Get());
|
||||
float nearend_len = static_cast<float>(nearend_reverb_nearend_len.Get());
|
||||
if (default_len > -1 && default_len < 1 && nearend_len > -1 &&
|
||||
nearend_len < 1) {
|
||||
adjusted_cfg.ep_strength.default_len =
|
||||
static_cast<float>(nearend_reverb_default_len.Get());
|
||||
adjusted_cfg.ep_strength.nearend_len =
|
||||
static_cast<float>(nearend_reverb_nearend_len.Get());
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3ConservativeTailFreqResponse")) {
|
||||
adjusted_cfg.ep_strength.use_conservative_tail_frequency_response = true;
|
||||
}
|
||||
|
||||
if (field_trial::IsDisabled("WebRTC-Aec3ConservativeTailFreqResponse")) {
|
||||
adjusted_cfg.ep_strength.use_conservative_tail_frequency_response = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3ShortHeadroomKillSwitch")) {
|
||||
@ -313,6 +378,14 @@ EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
|
||||
false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3DelayEstimatorDetectPreEcho")) {
|
||||
adjusted_cfg.delay.detect_pre_echo = true;
|
||||
}
|
||||
|
||||
if (field_trial::IsDisabled("WebRTC-Aec3DelayEstimatorDetectPreEcho")) {
|
||||
adjusted_cfg.delay.detect_pre_echo = false;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3SensitiveDominantNearendActivation")) {
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.enr_threshold = 0.5f;
|
||||
} else if (field_trial::IsEnabled(
|
||||
@ -368,6 +441,10 @@ EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
|
||||
adjusted_cfg.suppressor.nearend_tuning.max_dec_factor_lf = .2f;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3EnforceConservativeHfSuppression")) {
|
||||
adjusted_cfg.suppressor.conservative_hf_suppression = true;
|
||||
}
|
||||
|
||||
if (field_trial::IsEnabled("WebRTC-Aec3EnforceStationarityProperties")) {
|
||||
adjusted_cfg.echo_audibility.use_stationarity_properties = true;
|
||||
}
|
||||
@ -443,8 +520,6 @@ EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
|
||||
FieldTrialParameter<int> dominant_nearend_detection_trigger_threshold(
|
||||
"dominant_nearend_detection_trigger_threshold",
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold);
|
||||
FieldTrialParameter<double> ep_strength_default_len(
|
||||
"ep_strength_default_len", adjusted_cfg.ep_strength.default_len);
|
||||
|
||||
ParseFieldTrial(
|
||||
{&nearend_tuning_mask_lf_enr_transparent,
|
||||
@ -461,7 +536,7 @@ EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
|
||||
&dominant_nearend_detection_enr_exit_threshold,
|
||||
&dominant_nearend_detection_snr_threshold,
|
||||
&dominant_nearend_detection_hold_duration,
|
||||
&dominant_nearend_detection_trigger_threshold, &ep_strength_default_len},
|
||||
&dominant_nearend_detection_trigger_threshold},
|
||||
suppressor_tuning_override_trial_name);
|
||||
|
||||
adjusted_cfg.suppressor.nearend_tuning.mask_lf.enr_transparent =
|
||||
@ -498,8 +573,6 @@ EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
|
||||
dominant_nearend_detection_hold_duration.Get();
|
||||
adjusted_cfg.suppressor.dominant_nearend_detection.trigger_threshold =
|
||||
dominant_nearend_detection_trigger_threshold.Get();
|
||||
adjusted_cfg.ep_strength.default_len =
|
||||
static_cast<float>(ep_strength_default_len.Get());
|
||||
|
||||
// Field trial-based overrides of individual suppressor parameters.
|
||||
RetrieveFieldTrialValue(
|
||||
@ -561,8 +634,12 @@ EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
|
||||
"WebRTC-Aec3SuppressorAntiHowlingGainOverride", 0.f, 10.f,
|
||||
&adjusted_cfg.suppressor.high_bands_suppression.anti_howling_gain);
|
||||
|
||||
RetrieveFieldTrialValue("WebRTC-Aec3SuppressorEpStrengthDefaultLenOverride",
|
||||
-1.f, 1.f, &adjusted_cfg.ep_strength.default_len);
|
||||
// Field trial-based overrides of individual delay estimator parameters.
|
||||
RetrieveFieldTrialValue("WebRTC-Aec3DelayEstimateSmoothingOverride", 0.f, 1.f,
|
||||
&adjusted_cfg.delay.delay_estimate_smoothing);
|
||||
RetrieveFieldTrialValue(
|
||||
"WebRTC-Aec3DelayEstimateSmoothingDelayFoundOverride", 0.f, 1.f,
|
||||
&adjusted_cfg.delay.delay_estimate_smoothing_delay_found);
|
||||
|
||||
return adjusted_cfg;
|
||||
}
|
||||
@ -570,6 +647,7 @@ EchoCanceller3Config AdjustConfig(const EchoCanceller3Config& config) {
|
||||
class EchoCanceller3::RenderWriter {
|
||||
public:
|
||||
RenderWriter(ApmDataDumper* data_dumper,
|
||||
const EchoCanceller3Config& config,
|
||||
SwapQueue<std::vector<std::vector<std::vector<float>>>,
|
||||
Aec3RenderQueueItemVerifier>* render_transfer_queue,
|
||||
size_t num_bands,
|
||||
@ -586,7 +664,7 @@ class EchoCanceller3::RenderWriter {
|
||||
ApmDataDumper* data_dumper_;
|
||||
const size_t num_bands_;
|
||||
const size_t num_channels_;
|
||||
HighPassFilter high_pass_filter_;
|
||||
std::unique_ptr<HighPassFilter> high_pass_filter_;
|
||||
std::vector<std::vector<std::vector<float>>> render_queue_input_frame_;
|
||||
SwapQueue<std::vector<std::vector<std::vector<float>>>,
|
||||
Aec3RenderQueueItemVerifier>* render_transfer_queue_;
|
||||
@ -594,6 +672,7 @@ class EchoCanceller3::RenderWriter {
|
||||
|
||||
EchoCanceller3::RenderWriter::RenderWriter(
|
||||
ApmDataDumper* data_dumper,
|
||||
const EchoCanceller3Config& config,
|
||||
SwapQueue<std::vector<std::vector<std::vector<float>>>,
|
||||
Aec3RenderQueueItemVerifier>* render_transfer_queue,
|
||||
size_t num_bands,
|
||||
@ -601,7 +680,6 @@ EchoCanceller3::RenderWriter::RenderWriter(
|
||||
: data_dumper_(data_dumper),
|
||||
num_bands_(num_bands),
|
||||
num_channels_(num_channels),
|
||||
high_pass_filter_(16000, num_channels),
|
||||
render_queue_input_frame_(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(
|
||||
@ -609,6 +687,9 @@ EchoCanceller3::RenderWriter::RenderWriter(
|
||||
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
|
||||
render_transfer_queue_(render_transfer_queue) {
|
||||
RTC_DCHECK(data_dumper);
|
||||
if (config.filter.high_pass_filter_echo_reference) {
|
||||
high_pass_filter_ = std::make_unique<HighPassFilter>(16000, num_channels);
|
||||
}
|
||||
}
|
||||
|
||||
EchoCanceller3::RenderWriter::~RenderWriter() = default;
|
||||
@ -627,104 +708,124 @@ void EchoCanceller3::RenderWriter::Insert(const AudioBuffer& input) {
|
||||
|
||||
CopyBufferIntoFrame(input, num_bands_, num_channels_,
|
||||
&render_queue_input_frame_);
|
||||
high_pass_filter_.Process(&render_queue_input_frame_[0]);
|
||||
if (high_pass_filter_) {
|
||||
high_pass_filter_->Process(&render_queue_input_frame_[0]);
|
||||
}
|
||||
|
||||
static_cast<void>(render_transfer_queue_->Insert(&render_queue_input_frame_));
|
||||
}
|
||||
|
||||
int EchoCanceller3::instance_count_ = 0;
|
||||
std::atomic<int> EchoCanceller3::instance_count_(0);
|
||||
|
||||
EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels)
|
||||
: EchoCanceller3(AdjustConfig(config),
|
||||
sample_rate_hz,
|
||||
num_render_channels,
|
||||
num_capture_channels,
|
||||
std::unique_ptr<BlockProcessor>(
|
||||
BlockProcessor::Create(AdjustConfig(config),
|
||||
sample_rate_hz,
|
||||
num_render_channels,
|
||||
num_capture_channels))) {}
|
||||
EchoCanceller3::EchoCanceller3(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<BlockProcessor> block_processor)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
config_(config),
|
||||
EchoCanceller3::EchoCanceller3(
|
||||
const EchoCanceller3Config& config,
|
||||
const absl::optional<EchoCanceller3Config>& multichannel_config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
config_(AdjustConfig(config)),
|
||||
sample_rate_hz_(sample_rate_hz),
|
||||
num_bands_(NumBandsForRate(sample_rate_hz_)),
|
||||
num_render_channels_(num_render_channels),
|
||||
num_render_input_channels_(num_render_channels),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
config_selector_(AdjustConfig(config),
|
||||
multichannel_config,
|
||||
num_render_input_channels_),
|
||||
multichannel_content_detector_(
|
||||
config_selector_.active_config().multi_channel.detect_stereo_content,
|
||||
num_render_input_channels_,
|
||||
config_selector_.active_config()
|
||||
.multi_channel.stereo_detection_threshold,
|
||||
config_selector_.active_config()
|
||||
.multi_channel.stereo_detection_timeout_threshold_seconds,
|
||||
config_selector_.active_config()
|
||||
.multi_channel.stereo_detection_hysteresis_seconds),
|
||||
output_framer_(num_bands_, num_capture_channels_),
|
||||
capture_blocker_(num_bands_, num_capture_channels_),
|
||||
render_blocker_(num_bands_, num_render_channels_),
|
||||
render_transfer_queue_(
|
||||
kRenderTransferQueueSizeFrames,
|
||||
std::vector<std::vector<std::vector<float>>>(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(
|
||||
num_render_channels_,
|
||||
num_render_input_channels_,
|
||||
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
|
||||
Aec3RenderQueueItemVerifier(num_bands_,
|
||||
num_render_channels_,
|
||||
num_render_input_channels_,
|
||||
AudioBuffer::kSplitBandSize)),
|
||||
block_processor_(std::move(block_processor)),
|
||||
render_queue_output_frame_(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(
|
||||
num_render_channels_,
|
||||
num_render_input_channels_,
|
||||
std::vector<float>(AudioBuffer::kSplitBandSize, 0.f))),
|
||||
render_block_(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(num_render_channels_,
|
||||
std::vector<float>(kBlockSize, 0.f))),
|
||||
capture_block_(
|
||||
num_bands_,
|
||||
std::vector<std::vector<float>>(num_capture_channels_,
|
||||
std::vector<float>(kBlockSize, 0.f))),
|
||||
render_sub_frame_view_(
|
||||
num_bands_,
|
||||
std::vector<rtc::ArrayView<float>>(num_render_channels_)),
|
||||
render_block_(num_bands_, num_render_input_channels_),
|
||||
capture_block_(num_bands_, num_capture_channels_),
|
||||
capture_sub_frame_view_(
|
||||
num_bands_,
|
||||
std::vector<rtc::ArrayView<float>>(num_capture_channels_)) {
|
||||
RTC_DCHECK(ValidFullBandRate(sample_rate_hz_));
|
||||
|
||||
if (config_.delay.fixed_capture_delay_samples > 0) {
|
||||
if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) {
|
||||
block_delay_buffer_.reset(new BlockDelayBuffer(
|
||||
num_capture_channels_, num_bands_, AudioBuffer::kSplitBandSize,
|
||||
config_.delay.fixed_capture_delay_samples));
|
||||
}
|
||||
|
||||
render_writer_.reset(new RenderWriter(data_dumper_.get(),
|
||||
&render_transfer_queue_, num_bands_,
|
||||
num_render_channels_));
|
||||
render_writer_.reset(new RenderWriter(
|
||||
data_dumper_.get(), config_selector_.active_config(),
|
||||
&render_transfer_queue_, num_bands_, num_render_input_channels_));
|
||||
|
||||
RTC_DCHECK_EQ(num_bands_, std::max(sample_rate_hz_, 16000) / 16000);
|
||||
RTC_DCHECK_GE(kMaxNumBands, num_bands_);
|
||||
|
||||
if (config_.filter.export_linear_aec_output) {
|
||||
linear_output_framer_.reset(new BlockFramer(1, num_capture_channels_));
|
||||
if (config_selector_.active_config().filter.export_linear_aec_output) {
|
||||
linear_output_framer_.reset(
|
||||
new BlockFramer(/*num_bands=*/1, num_capture_channels_));
|
||||
linear_output_block_ =
|
||||
std::make_unique<std::vector<std::vector<std::vector<float>>>>(
|
||||
1, std::vector<std::vector<float>>(
|
||||
num_capture_channels_, std::vector<float>(kBlockSize, 0.f)));
|
||||
std::make_unique<Block>(/*num_bands=*/1, num_capture_channels_),
|
||||
linear_output_sub_frame_view_ =
|
||||
std::vector<std::vector<rtc::ArrayView<float>>>(
|
||||
1, std::vector<rtc::ArrayView<float>>(num_capture_channels_));
|
||||
}
|
||||
|
||||
Initialize();
|
||||
|
||||
RTC_LOG(LS_INFO) << "AEC3 created with sample rate: " << sample_rate_hz_
|
||||
<< " Hz, num render channels: " << num_render_input_channels_
|
||||
<< ", num capture channels: " << num_capture_channels_;
|
||||
}
|
||||
|
||||
EchoCanceller3::~EchoCanceller3() = default;
|
||||
|
||||
void EchoCanceller3::Initialize() {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
|
||||
num_render_channels_to_aec_ =
|
||||
multichannel_content_detector_.IsProperMultiChannelContentDetected()
|
||||
? num_render_input_channels_
|
||||
: 1;
|
||||
|
||||
config_selector_.Update(
|
||||
multichannel_content_detector_.IsProperMultiChannelContentDetected());
|
||||
|
||||
render_block_.SetNumChannels(num_render_channels_to_aec_);
|
||||
|
||||
render_blocker_.reset(
|
||||
new FrameBlocker(num_bands_, num_render_channels_to_aec_));
|
||||
|
||||
block_processor_.reset(BlockProcessor::Create(
|
||||
config_selector_.active_config(), sample_rate_hz_,
|
||||
num_render_channels_to_aec_, num_capture_channels_));
|
||||
|
||||
render_sub_frame_view_ = std::vector<std::vector<rtc::ArrayView<float>>>(
|
||||
num_bands_,
|
||||
std::vector<rtc::ArrayView<float>>(num_render_channels_to_aec_));
|
||||
}
|
||||
|
||||
void EchoCanceller3::AnalyzeRender(const AudioBuffer& render) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&render_race_checker_);
|
||||
|
||||
RTC_DCHECK_EQ(render.num_channels(), num_render_channels_);
|
||||
RTC_DCHECK_EQ(render.num_channels(), num_render_input_channels_);
|
||||
data_dumper_->DumpRaw("aec3_call_order",
|
||||
static_cast<int>(EchoCanceller3ApiCall::kRender));
|
||||
|
||||
@ -764,7 +865,7 @@ void EchoCanceller3::ProcessCapture(AudioBuffer* capture,
|
||||
if (linear_output && !linear_output_framer_) {
|
||||
RTC_LOG(LS_ERROR) << "Trying to retrieve the linear AEC output without "
|
||||
"properly configuring AEC3.";
|
||||
RTC_NOTREACHED();
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
}
|
||||
|
||||
// Report capture call in the metrics and periodically update API call
|
||||
@ -772,7 +873,7 @@ void EchoCanceller3::ProcessCapture(AudioBuffer* capture,
|
||||
api_call_metrics_.ReportCaptureCall();
|
||||
|
||||
// Optionally delay the capture signal.
|
||||
if (config_.delay.fixed_capture_delay_samples > 0) {
|
||||
if (config_selector_.active_config().delay.fixed_capture_delay_samples > 0) {
|
||||
RTC_DCHECK(block_delay_buffer_);
|
||||
block_delay_buffer_->DelaySignal(capture);
|
||||
}
|
||||
@ -784,22 +885,26 @@ void EchoCanceller3::ProcessCapture(AudioBuffer* capture,
|
||||
|
||||
EmptyRenderQueue();
|
||||
|
||||
ProcessCaptureFrameContent(linear_output, capture, level_change,
|
||||
saturated_microphone_signal_, 0, &capture_blocker_,
|
||||
linear_output_framer_.get(), &output_framer_,
|
||||
block_processor_.get(), linear_output_block_.get(),
|
||||
&linear_output_sub_frame_view_, &capture_block_,
|
||||
&capture_sub_frame_view_);
|
||||
ProcessCaptureFrameContent(
|
||||
linear_output, capture, level_change,
|
||||
multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(),
|
||||
saturated_microphone_signal_, 0, &capture_blocker_,
|
||||
linear_output_framer_.get(), &output_framer_, block_processor_.get(),
|
||||
linear_output_block_.get(), &linear_output_sub_frame_view_,
|
||||
&capture_block_, &capture_sub_frame_view_);
|
||||
|
||||
ProcessCaptureFrameContent(linear_output, capture, level_change,
|
||||
saturated_microphone_signal_, 1, &capture_blocker_,
|
||||
linear_output_framer_.get(), &output_framer_,
|
||||
block_processor_.get(), linear_output_block_.get(),
|
||||
&linear_output_sub_frame_view_, &capture_block_,
|
||||
&capture_sub_frame_view_);
|
||||
ProcessCaptureFrameContent(
|
||||
linear_output, capture, level_change,
|
||||
multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(),
|
||||
saturated_microphone_signal_, 1, &capture_blocker_,
|
||||
linear_output_framer_.get(), &output_framer_, block_processor_.get(),
|
||||
linear_output_block_.get(), &linear_output_sub_frame_view_,
|
||||
&capture_block_, &capture_sub_frame_view_);
|
||||
|
||||
ProcessRemainingCaptureFrameContent(
|
||||
level_change, saturated_microphone_signal_, &capture_blocker_,
|
||||
level_change,
|
||||
multichannel_content_detector_.IsTemporaryMultiChannelContentDetected(),
|
||||
saturated_microphone_signal_, &capture_blocker_,
|
||||
linear_output_framer_.get(), &output_framer_, block_processor_.get(),
|
||||
linear_output_block_.get(), &capture_block_);
|
||||
|
||||
@ -819,29 +924,37 @@ void EchoCanceller3::SetAudioBufferDelay(int delay_ms) {
|
||||
block_processor_->SetAudioBufferDelay(delay_ms);
|
||||
}
|
||||
|
||||
void EchoCanceller3::SetCaptureOutputUsage(bool capture_output_used) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
block_processor_->SetCaptureOutputUsage(capture_output_used);
|
||||
}
|
||||
|
||||
bool EchoCanceller3::ActiveProcessing() const {
|
||||
return true;
|
||||
}
|
||||
|
||||
EchoCanceller3Config EchoCanceller3::CreateDefaultConfig(
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels) {
|
||||
EchoCanceller3Config EchoCanceller3::CreateDefaultMultichannelConfig() {
|
||||
EchoCanceller3Config cfg;
|
||||
if (num_render_channels > 1) {
|
||||
// Use shorter and more rapidly adapting coarse filter to compensate for
|
||||
// thge increased number of total filter parameters to adapt.
|
||||
cfg.filter.coarse.length_blocks = 11;
|
||||
cfg.filter.coarse.rate = 0.95f;
|
||||
cfg.filter.coarse_initial.length_blocks = 11;
|
||||
cfg.filter.coarse_initial.rate = 0.95f;
|
||||
// Use shorter and more rapidly adapting coarse filter to compensate for
|
||||
// thge increased number of total filter parameters to adapt.
|
||||
cfg.filter.coarse.length_blocks = 11;
|
||||
cfg.filter.coarse.rate = 0.95f;
|
||||
cfg.filter.coarse_initial.length_blocks = 11;
|
||||
cfg.filter.coarse_initial.rate = 0.95f;
|
||||
|
||||
// Use more concervative suppressor behavior for non-nearend speech.
|
||||
cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f;
|
||||
cfg.suppressor.normal_tuning.max_inc_factor = 1.5f;
|
||||
}
|
||||
// Use more concervative suppressor behavior for non-nearend speech.
|
||||
cfg.suppressor.normal_tuning.max_dec_factor_lf = 0.35f;
|
||||
cfg.suppressor.normal_tuning.max_inc_factor = 1.5f;
|
||||
return cfg;
|
||||
}
|
||||
|
||||
void EchoCanceller3::SetBlockProcessorForTesting(
|
||||
std::unique_ptr<BlockProcessor> block_processor) {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
RTC_DCHECK(block_processor);
|
||||
block_processor_ = std::move(block_processor);
|
||||
}
|
||||
|
||||
void EchoCanceller3::EmptyRenderQueue() {
|
||||
RTC_DCHECK_RUNS_SERIALIZED(&capture_race_checker_);
|
||||
bool frame_to_buffer =
|
||||
@ -850,16 +963,27 @@ void EchoCanceller3::EmptyRenderQueue() {
|
||||
// Report render call in the metrics.
|
||||
api_call_metrics_.ReportRenderCall();
|
||||
|
||||
BufferRenderFrameContent(&render_queue_output_frame_, 0, &render_blocker_,
|
||||
block_processor_.get(), &render_block_,
|
||||
&render_sub_frame_view_);
|
||||
if (multichannel_content_detector_.UpdateDetection(
|
||||
render_queue_output_frame_)) {
|
||||
// Reinitialize the AEC when proper stereo is detected.
|
||||
Initialize();
|
||||
}
|
||||
|
||||
BufferRenderFrameContent(&render_queue_output_frame_, 1, &render_blocker_,
|
||||
block_processor_.get(), &render_block_,
|
||||
&render_sub_frame_view_);
|
||||
// Buffer frame content.
|
||||
BufferRenderFrameContent(
|
||||
/*proper_downmix_needed=*/multichannel_content_detector_
|
||||
.IsTemporaryMultiChannelContentDetected(),
|
||||
&render_queue_output_frame_, 0, render_blocker_.get(),
|
||||
block_processor_.get(), &render_block_, &render_sub_frame_view_);
|
||||
|
||||
BufferRemainingRenderFrameContent(&render_blocker_, block_processor_.get(),
|
||||
&render_block_);
|
||||
BufferRenderFrameContent(
|
||||
/*proper_downmix_needed=*/multichannel_content_detector_
|
||||
.IsTemporaryMultiChannelContentDetected(),
|
||||
&render_queue_output_frame_, 1, render_blocker_.get(),
|
||||
block_processor_.get(), &render_block_, &render_sub_frame_view_);
|
||||
|
||||
BufferRemainingRenderFrameContent(render_blocker_.get(),
|
||||
block_processor_.get(), &render_block_);
|
||||
|
||||
frame_to_buffer =
|
||||
render_transfer_queue_.Remove(&render_queue_output_frame_);
|
||||
|
@ -13,9 +13,11 @@
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "api/audio/echo_control.h"
|
||||
@ -23,7 +25,9 @@
|
||||
#include "modules/audio_processing/aec3/block_delay_buffer.h"
|
||||
#include "modules/audio_processing/aec3/block_framer.h"
|
||||
#include "modules/audio_processing/aec3/block_processor.h"
|
||||
#include "modules/audio_processing/aec3/config_selector.h"
|
||||
#include "modules/audio_processing/aec3/frame_blocker.h"
|
||||
#include "modules/audio_processing/aec3/multi_channel_content_detector.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
@ -84,18 +88,15 @@ class Aec3RenderQueueItemVerifier {
|
||||
// AnalyzeRender call which can be called concurrently with the other methods.
|
||||
class EchoCanceller3 : public EchoControl {
|
||||
public:
|
||||
// Normal c-tor to use.
|
||||
EchoCanceller3(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
// Testing c-tor that is used only for testing purposes.
|
||||
EchoCanceller3(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels,
|
||||
std::unique_ptr<BlockProcessor> block_processor);
|
||||
EchoCanceller3(
|
||||
const EchoCanceller3Config& config,
|
||||
const absl::optional<EchoCanceller3Config>& multichannel_config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
|
||||
~EchoCanceller3() override;
|
||||
|
||||
EchoCanceller3(const EchoCanceller3&) = delete;
|
||||
EchoCanceller3& operator=(const EchoCanceller3&) = delete;
|
||||
|
||||
@ -118,6 +119,12 @@ class EchoCanceller3 : public EchoControl {
|
||||
// Provides an optional external estimate of the audio buffer delay.
|
||||
void SetAudioBufferDelay(int delay_ms) override;
|
||||
|
||||
// Specifies whether the capture output will be used. The purpose of this is
|
||||
// to allow the echo controller to deactivate some of the processing when the
|
||||
// resulting output is anyway not used, for instance when the endpoint is
|
||||
// muted.
|
||||
void SetCaptureOutputUsage(bool capture_output_used) override;
|
||||
|
||||
bool ActiveProcessing() const override;
|
||||
|
||||
// Signals whether an external detector has detected echo leakage from the
|
||||
@ -129,14 +136,39 @@ class EchoCanceller3 : public EchoControl {
|
||||
block_processor_->UpdateEchoLeakageStatus(leakage_detected);
|
||||
}
|
||||
|
||||
// Produces a default configuration that is suitable for a certain combination
|
||||
// of render and capture channels.
|
||||
static EchoCanceller3Config CreateDefaultConfig(size_t num_render_channels,
|
||||
size_t num_capture_channels);
|
||||
// Produces a default configuration for multichannel.
|
||||
static EchoCanceller3Config CreateDefaultMultichannelConfig();
|
||||
|
||||
private:
|
||||
friend class EchoCanceller3Tester;
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3, DetectionOfProperStereo);
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
|
||||
DetectionOfProperStereoUsingThreshold);
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
|
||||
DetectionOfProperStereoUsingHysteresis);
|
||||
FRIEND_TEST_ALL_PREFIXES(EchoCanceller3,
|
||||
StereoContentDetectionForMonoSignals);
|
||||
|
||||
class RenderWriter;
|
||||
|
||||
// (Re-)Initializes the selected subset of the EchoCanceller3 fields, at
|
||||
// creation as well as during reconfiguration.
|
||||
void Initialize();
|
||||
|
||||
// Only for testing. Replaces the internal block processor.
|
||||
void SetBlockProcessorForTesting(
|
||||
std::unique_ptr<BlockProcessor> block_processor);
|
||||
|
||||
// Only for testing. Returns whether stereo processing is active.
|
||||
bool StereoRenderProcessingActiveForTesting() const {
|
||||
return multichannel_content_detector_.IsProperMultiChannelContentDetected();
|
||||
}
|
||||
|
||||
// Only for testing.
|
||||
const EchoCanceller3Config& GetActiveConfigForTesting() const {
|
||||
return config_selector_.active_config();
|
||||
}
|
||||
|
||||
// Empties the render SwapQueue.
|
||||
void EmptyRenderQueue();
|
||||
|
||||
@ -154,18 +186,22 @@ class EchoCanceller3 : public EchoControl {
|
||||
RTC_GUARDED_BY(render_race_checker_);
|
||||
|
||||
// State that may be accessed by the capture thread.
|
||||
static int instance_count_;
|
||||
static std::atomic<int> instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const EchoCanceller3Config config_;
|
||||
const int sample_rate_hz_;
|
||||
const int num_bands_;
|
||||
const size_t num_render_channels_;
|
||||
const size_t num_render_input_channels_;
|
||||
size_t num_render_channels_to_aec_;
|
||||
const size_t num_capture_channels_;
|
||||
ConfigSelector config_selector_;
|
||||
MultiChannelContentDetector multichannel_content_detector_;
|
||||
std::unique_ptr<BlockFramer> linear_output_framer_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
BlockFramer output_framer_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
FrameBlocker capture_blocker_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
FrameBlocker render_blocker_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::unique_ptr<FrameBlocker> render_blocker_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
SwapQueue<std::vector<std::vector<std::vector<float>>>,
|
||||
Aec3RenderQueueItemVerifier>
|
||||
render_transfer_queue_;
|
||||
@ -175,12 +211,10 @@ class EchoCanceller3 : public EchoControl {
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
bool saturated_microphone_signal_ RTC_GUARDED_BY(capture_race_checker_) =
|
||||
false;
|
||||
std::vector<std::vector<std::vector<float>>> render_block_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::unique_ptr<std::vector<std::vector<std::vector<float>>>>
|
||||
linear_output_block_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::vector<std::vector<std::vector<float>>> capture_block_
|
||||
Block render_block_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::unique_ptr<Block> linear_output_block_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
Block capture_block_ RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::vector<std::vector<rtc::ArrayView<float>>> render_sub_frame_view_
|
||||
RTC_GUARDED_BY(capture_race_checker_);
|
||||
std::vector<std::vector<rtc::ArrayView<float>>> linear_output_sub_frame_view_
|
||||
|
@ -42,10 +42,12 @@ EchoPathDelayEstimator::EchoPathDelayEstimator(
|
||||
? config.render_levels.poor_excitation_render_limit_ds8
|
||||
: config.render_levels.poor_excitation_render_limit,
|
||||
config.delay.delay_estimate_smoothing,
|
||||
config.delay.delay_candidate_detection_threshold),
|
||||
config.delay.delay_estimate_smoothing_delay_found,
|
||||
config.delay.delay_candidate_detection_threshold,
|
||||
config.delay.detect_pre_echo),
|
||||
matched_filter_lag_aggregator_(data_dumper_,
|
||||
matched_filter_.GetMaxFilterLag(),
|
||||
config.delay.delay_selection_thresholds) {
|
||||
config.delay) {
|
||||
RTC_DCHECK(data_dumper);
|
||||
RTC_DCHECK(down_sampling_factor_ > 0);
|
||||
}
|
||||
@ -58,9 +60,7 @@ void EchoPathDelayEstimator::Reset(bool reset_delay_confidence) {
|
||||
|
||||
absl::optional<DelayEstimate> EchoPathDelayEstimator::EstimateDelay(
|
||||
const DownsampledRenderBuffer& render_buffer,
|
||||
const std::vector<std::vector<float>>& capture) {
|
||||
RTC_DCHECK_EQ(kBlockSize, capture[0].size());
|
||||
|
||||
const Block& capture) {
|
||||
std::array<float, kBlockSize> downsampled_capture_data;
|
||||
rtc::ArrayView<float> downsampled_capture(downsampled_capture_data.data(),
|
||||
sub_block_size_);
|
||||
@ -71,17 +71,19 @@ absl::optional<DelayEstimate> EchoPathDelayEstimator::EstimateDelay(
|
||||
data_dumper_->DumpWav("aec3_capture_decimator_output",
|
||||
downsampled_capture.size(), downsampled_capture.data(),
|
||||
16000 / down_sampling_factor_, 1);
|
||||
matched_filter_.Update(render_buffer, downsampled_capture);
|
||||
matched_filter_.Update(render_buffer, downsampled_capture,
|
||||
matched_filter_lag_aggregator_.ReliableDelayFound());
|
||||
|
||||
absl::optional<DelayEstimate> aggregated_matched_filter_lag =
|
||||
matched_filter_lag_aggregator_.Aggregate(
|
||||
matched_filter_.GetLagEstimates());
|
||||
matched_filter_.GetBestLagEstimate());
|
||||
|
||||
// Run clockdrift detection.
|
||||
if (aggregated_matched_filter_lag &&
|
||||
(*aggregated_matched_filter_lag).quality ==
|
||||
DelayEstimate::Quality::kRefined)
|
||||
clockdrift_detector_.Update((*aggregated_matched_filter_lag).delay);
|
||||
clockdrift_detector_.Update(
|
||||
matched_filter_lag_aggregator_.GetDelayAtHighestPeak());
|
||||
|
||||
// TODO(peah): Move this logging outside of this class once EchoCanceller3
|
||||
// development is done.
|
||||
@ -118,7 +120,7 @@ void EchoPathDelayEstimator::Reset(bool reset_lag_aggregator,
|
||||
if (reset_lag_aggregator) {
|
||||
matched_filter_lag_aggregator_.Reset(reset_delay_confidence);
|
||||
}
|
||||
matched_filter_.Reset();
|
||||
matched_filter_.Reset(/*full_reset=*/reset_lag_aggregator);
|
||||
old_aggregated_lag_ = absl::nullopt;
|
||||
consistent_estimate_counter_ = 0;
|
||||
}
|
||||
|
@ -16,12 +16,12 @@
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/alignment_mixer.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
#include "modules/audio_processing/aec3/clockdrift_detector.h"
|
||||
#include "modules/audio_processing/aec3/decimator.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/matched_filter.h"
|
||||
#include "modules/audio_processing/aec3/matched_filter_lag_aggregator.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -37,6 +37,9 @@ class EchoPathDelayEstimator {
|
||||
size_t num_capture_channels);
|
||||
~EchoPathDelayEstimator();
|
||||
|
||||
EchoPathDelayEstimator(const EchoPathDelayEstimator&) = delete;
|
||||
EchoPathDelayEstimator& operator=(const EchoPathDelayEstimator&) = delete;
|
||||
|
||||
// Resets the estimation. If the delay confidence is reset, the reset behavior
|
||||
// is as if the call is restarted.
|
||||
void Reset(bool reset_delay_confidence);
|
||||
@ -44,7 +47,7 @@ class EchoPathDelayEstimator {
|
||||
// Produce a delay estimate if such is avaliable.
|
||||
absl::optional<DelayEstimate> EstimateDelay(
|
||||
const DownsampledRenderBuffer& render_buffer,
|
||||
const std::vector<std::vector<float>>& capture);
|
||||
const Block& capture);
|
||||
|
||||
// Log delay estimator properties.
|
||||
void LogDelayEstimationProperties(int sample_rate_hz, size_t shift) const {
|
||||
@ -71,8 +74,6 @@ class EchoPathDelayEstimator {
|
||||
|
||||
// Internal reset method with more granularity.
|
||||
void Reset(bool reset_lag_aggregator, bool reset_delay_confidence);
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(EchoPathDelayEstimator);
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
|
@ -14,11 +14,7 @@
|
||||
namespace webrtc {
|
||||
|
||||
struct EchoPathVariability {
|
||||
enum class DelayAdjustment {
|
||||
kNone,
|
||||
kBufferFlush,
|
||||
kNewDetectedDelay
|
||||
};
|
||||
enum class DelayAdjustment { kNone, kBufferFlush, kNewDetectedDelay };
|
||||
|
||||
EchoPathVariability(bool gain_change,
|
||||
DelayAdjustment delay_change,
|
||||
|
@ -14,6 +14,7 @@
|
||||
|
||||
#include <algorithm>
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
|
||||
@ -33,7 +34,6 @@
|
||||
#include "modules/audio_processing/aec3/suppression_filter.h"
|
||||
#include "modules/audio_processing/aec3/suppression_gain.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
|
||||
@ -118,13 +118,12 @@ class EchoRemoverImpl final : public EchoRemover {
|
||||
// Removes the echo from a block of samples from the capture signal. The
|
||||
// supplied render signal is assumed to be pre-aligned with the capture
|
||||
// signal.
|
||||
void ProcessCapture(
|
||||
EchoPathVariability echo_path_variability,
|
||||
bool capture_signal_saturation,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
RenderBuffer* render_buffer,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output,
|
||||
std::vector<std::vector<std::vector<float>>>* capture) override;
|
||||
void ProcessCapture(EchoPathVariability echo_path_variability,
|
||||
bool capture_signal_saturation,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
RenderBuffer* render_buffer,
|
||||
Block* linear_output,
|
||||
Block* capture) override;
|
||||
|
||||
// Updates the status on whether echo leakage is detected in the output of the
|
||||
// echo remover.
|
||||
@ -132,6 +131,10 @@ class EchoRemoverImpl final : public EchoRemover {
|
||||
echo_leakage_detected_ = leakage_detected;
|
||||
}
|
||||
|
||||
void SetCaptureOutputUsage(bool capture_output_used) override {
|
||||
capture_output_used_ = capture_output_used;
|
||||
}
|
||||
|
||||
private:
|
||||
// Selects which of the coarse and refined linear filter outputs that is most
|
||||
// appropriate to pass to the suppressor and forms the linear filter output by
|
||||
@ -139,7 +142,7 @@ class EchoRemoverImpl final : public EchoRemover {
|
||||
void FormLinearFilterOutput(const SubtractorOutput& subtractor_output,
|
||||
rtc::ArrayView<float> output);
|
||||
|
||||
static int instance_count_;
|
||||
static std::atomic<int> instance_count_;
|
||||
const EchoCanceller3Config config_;
|
||||
const Aec3Fft fft_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
@ -155,6 +158,7 @@ class EchoRemoverImpl final : public EchoRemover {
|
||||
RenderSignalAnalyzer render_signal_analyzer_;
|
||||
ResidualEchoEstimator residual_echo_estimator_;
|
||||
bool echo_leakage_detected_ = false;
|
||||
bool capture_output_used_ = true;
|
||||
AecState aec_state_;
|
||||
EchoRemoverMetrics metrics_;
|
||||
std::vector<std::array<float, kFftLengthBy2>> e_old_;
|
||||
@ -167,6 +171,7 @@ class EchoRemoverImpl final : public EchoRemover {
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> Y2_heap_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> E2_heap_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> R2_heap_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> R2_unbounded_heap_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> S2_linear_heap_;
|
||||
std::vector<FftData> Y_heap_;
|
||||
std::vector<FftData> E_heap_;
|
||||
@ -175,7 +180,7 @@ class EchoRemoverImpl final : public EchoRemover {
|
||||
std::vector<SubtractorOutput> subtractor_output_heap_;
|
||||
};
|
||||
|
||||
int EchoRemoverImpl::instance_count_ = 0;
|
||||
std::atomic<int> EchoRemoverImpl::instance_count_(0);
|
||||
|
||||
EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
@ -183,8 +188,7 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: config_(config),
|
||||
fft_(),
|
||||
data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
optimization_(DetectOptimization()),
|
||||
sample_rate_hz_(sample_rate_hz),
|
||||
num_render_channels_(num_render_channels),
|
||||
@ -213,6 +217,7 @@ EchoRemoverImpl::EchoRemoverImpl(const EchoCanceller3Config& config,
|
||||
Y2_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
E2_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
R2_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
R2_unbounded_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
S2_linear_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
Y_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
E_heap_(NumChannelsOnHeap(num_capture_channels_)),
|
||||
@ -236,20 +241,17 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
bool capture_signal_saturation,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
RenderBuffer* render_buffer,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output,
|
||||
std::vector<std::vector<std::vector<float>>>* capture) {
|
||||
Block* linear_output,
|
||||
Block* capture) {
|
||||
++block_counter_;
|
||||
const std::vector<std::vector<std::vector<float>>>& x =
|
||||
render_buffer->Block(0);
|
||||
std::vector<std::vector<std::vector<float>>>* y = capture;
|
||||
const Block& x = render_buffer->GetBlock(0);
|
||||
Block* y = capture;
|
||||
RTC_DCHECK(render_buffer);
|
||||
RTC_DCHECK(y);
|
||||
RTC_DCHECK_EQ(x.size(), NumBandsForRate(sample_rate_hz_));
|
||||
RTC_DCHECK_EQ(y->size(), NumBandsForRate(sample_rate_hz_));
|
||||
RTC_DCHECK_EQ(x[0].size(), num_render_channels_);
|
||||
RTC_DCHECK_EQ((*y)[0].size(), num_capture_channels_);
|
||||
RTC_DCHECK_EQ(x[0][0].size(), kBlockSize);
|
||||
RTC_DCHECK_EQ((*y)[0][0].size(), kBlockSize);
|
||||
RTC_DCHECK_EQ(x.NumBands(), NumBandsForRate(sample_rate_hz_));
|
||||
RTC_DCHECK_EQ(y->NumBands(), NumBandsForRate(sample_rate_hz_));
|
||||
RTC_DCHECK_EQ(x.NumChannels(), num_render_channels_);
|
||||
RTC_DCHECK_EQ(y->NumChannels(), num_capture_channels_);
|
||||
|
||||
// Stack allocated data to use when the number of channels is low.
|
||||
std::array<std::array<float, kFftLengthBy2>, kMaxNumChannelsOnStack> e_stack;
|
||||
@ -259,6 +261,8 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
E2_stack;
|
||||
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
|
||||
R2_stack;
|
||||
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
|
||||
R2_unbounded_stack;
|
||||
std::array<std::array<float, kFftLengthBy2Plus1>, kMaxNumChannelsOnStack>
|
||||
S2_linear_stack;
|
||||
std::array<FftData, kMaxNumChannelsOnStack> Y_stack;
|
||||
@ -275,6 +279,8 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
E2_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2(
|
||||
R2_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded(
|
||||
R2_unbounded_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> S2_linear(
|
||||
S2_linear_stack.data(), num_capture_channels_);
|
||||
rtc::ArrayView<FftData> Y(Y_stack.data(), num_capture_channels_);
|
||||
@ -296,6 +302,8 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
E2_heap_.data(), num_capture_channels_);
|
||||
R2 = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
|
||||
R2_heap_.data(), num_capture_channels_);
|
||||
R2_unbounded = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
|
||||
R2_unbounded_heap_.data(), num_capture_channels_);
|
||||
S2_linear = rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>>(
|
||||
S2_linear_heap_.data(), num_capture_channels_);
|
||||
Y = rtc::ArrayView<FftData>(Y_heap_.data(), num_capture_channels_);
|
||||
@ -308,12 +316,14 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
subtractor_output_heap_.data(), num_capture_channels_);
|
||||
}
|
||||
|
||||
data_dumper_->DumpWav("aec3_echo_remover_capture_input", kBlockSize,
|
||||
&(*y)[0][0][0], 16000, 1);
|
||||
data_dumper_->DumpWav("aec3_echo_remover_render_input", kBlockSize,
|
||||
&x[0][0][0], 16000, 1);
|
||||
data_dumper_->DumpRaw("aec3_echo_remover_capture_input", (*y)[0][0]);
|
||||
data_dumper_->DumpRaw("aec3_echo_remover_render_input", x[0][0]);
|
||||
data_dumper_->DumpWav("aec3_echo_remover_capture_input",
|
||||
y->View(/*band=*/0, /*channel=*/0), 16000, 1);
|
||||
data_dumper_->DumpWav("aec3_echo_remover_render_input",
|
||||
x.View(/*band=*/0, /*channel=*/0), 16000, 1);
|
||||
data_dumper_->DumpRaw("aec3_echo_remover_capture_input",
|
||||
y->View(/*band=*/0, /*channel=*/0));
|
||||
data_dumper_->DumpRaw("aec3_echo_remover_render_input",
|
||||
x.View(/*band=*/0, /*channel=*/0));
|
||||
|
||||
aec_state_.UpdateCaptureSaturation(capture_signal_saturation);
|
||||
|
||||
@ -356,13 +366,13 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
}
|
||||
|
||||
// Perform linear echo cancellation.
|
||||
subtractor_.Process(*render_buffer, (*y)[0], render_signal_analyzer_,
|
||||
aec_state_, subtractor_output);
|
||||
subtractor_.Process(*render_buffer, *y, render_signal_analyzer_, aec_state_,
|
||||
subtractor_output);
|
||||
|
||||
// Compute spectra.
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
FormLinearFilterOutput(subtractor_output[ch], e[ch]);
|
||||
WindowedPaddedFft(fft_, (*y)[0][ch], y_old_[ch], &Y[ch]);
|
||||
WindowedPaddedFft(fft_, y->View(/*band=*/0, ch), y_old_[ch], &Y[ch]);
|
||||
WindowedPaddedFft(fft_, e[ch], e_old_[ch], &E[ch]);
|
||||
LinearEchoPower(E[ch], Y[ch], &S2_linear[ch]);
|
||||
Y[ch].Spectrum(optimization_, Y2[ch]);
|
||||
@ -371,11 +381,11 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
|
||||
// Optionally return the linear filter output.
|
||||
if (linear_output) {
|
||||
RTC_DCHECK_GE(1, linear_output->size());
|
||||
RTC_DCHECK_EQ(num_capture_channels_, linear_output[0].size());
|
||||
RTC_DCHECK_GE(1, linear_output->NumBands());
|
||||
RTC_DCHECK_EQ(num_capture_channels_, linear_output->NumChannels());
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
RTC_DCHECK_EQ(kBlockSize, (*linear_output)[0][ch].size());
|
||||
std::copy(e[ch].begin(), e[ch].end(), (*linear_output)[0][ch].begin());
|
||||
std::copy(e[ch].begin(), e[ch].end(),
|
||||
linear_output->begin(/*band=*/0, ch));
|
||||
}
|
||||
}
|
||||
|
||||
@ -387,42 +397,54 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
// Choose the linear output.
|
||||
const auto& Y_fft = aec_state_.UseLinearFilterOutput() ? E : Y;
|
||||
|
||||
data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &(*y)[0][0][0], 16000,
|
||||
1);
|
||||
data_dumper_->DumpWav("aec3_output_linear",
|
||||
y->View(/*band=*/0, /*channel=*/0), 16000, 1);
|
||||
data_dumper_->DumpWav("aec3_output_linear2", kBlockSize, &e[0][0], 16000, 1);
|
||||
|
||||
// Estimate the residual echo power.
|
||||
residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2,
|
||||
R2);
|
||||
|
||||
// Estimate the comfort noise.
|
||||
cng_.Compute(aec_state_.SaturatedCapture(), Y2, comfort_noise,
|
||||
high_band_comfort_noise);
|
||||
|
||||
// Suppressor nearend estimate.
|
||||
if (aec_state_.UsableLinearEstimate()) {
|
||||
// E2 is bound by Y2.
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
std::transform(E2[ch].begin(), E2[ch].end(), Y2[ch].begin(),
|
||||
E2[ch].begin(),
|
||||
[](float a, float b) { return std::min(a, b); });
|
||||
}
|
||||
}
|
||||
const auto& nearend_spectrum = aec_state_.UsableLinearEstimate() ? E2 : Y2;
|
||||
|
||||
// Suppressor echo estimate.
|
||||
const auto& echo_spectrum =
|
||||
aec_state_.UsableLinearEstimate() ? S2_linear : R2;
|
||||
|
||||
// Compute preferred gains.
|
||||
float high_bands_gain;
|
||||
// Only do the below processing if the output of the audio processing module
|
||||
// is used.
|
||||
std::array<float, kFftLengthBy2Plus1> G;
|
||||
suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2,
|
||||
cng_.NoiseSpectrum(), render_signal_analyzer_,
|
||||
aec_state_, x, &high_bands_gain, &G);
|
||||
if (capture_output_used_) {
|
||||
// Estimate the residual echo power.
|
||||
residual_echo_estimator_.Estimate(aec_state_, *render_buffer, S2_linear, Y2,
|
||||
suppression_gain_.IsDominantNearend(), R2,
|
||||
R2_unbounded);
|
||||
|
||||
suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
|
||||
high_bands_gain, Y_fft, y);
|
||||
// Suppressor nearend estimate.
|
||||
if (aec_state_.UsableLinearEstimate()) {
|
||||
// E2 is bound by Y2.
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
std::transform(E2[ch].begin(), E2[ch].end(), Y2[ch].begin(),
|
||||
E2[ch].begin(),
|
||||
[](float a, float b) { return std::min(a, b); });
|
||||
}
|
||||
}
|
||||
const auto& nearend_spectrum = aec_state_.UsableLinearEstimate() ? E2 : Y2;
|
||||
|
||||
// Suppressor echo estimate.
|
||||
const auto& echo_spectrum =
|
||||
aec_state_.UsableLinearEstimate() ? S2_linear : R2;
|
||||
|
||||
// Determine if the suppressor should assume clock drift.
|
||||
const bool clock_drift = config_.echo_removal_control.has_clock_drift ||
|
||||
echo_path_variability.clock_drift;
|
||||
|
||||
// Compute preferred gains.
|
||||
float high_bands_gain;
|
||||
suppression_gain_.GetGain(nearend_spectrum, echo_spectrum, R2, R2_unbounded,
|
||||
cng_.NoiseSpectrum(), render_signal_analyzer_,
|
||||
aec_state_, x, clock_drift, &high_bands_gain, &G);
|
||||
|
||||
suppression_filter_.ApplyGain(comfort_noise, high_band_comfort_noise, G,
|
||||
high_bands_gain, Y_fft, y);
|
||||
|
||||
} else {
|
||||
G.fill(0.f);
|
||||
}
|
||||
|
||||
// Update the metrics.
|
||||
metrics_.Update(aec_state_, cng_.NoiseSpectrum()[0], G);
|
||||
@ -430,13 +452,12 @@ void EchoRemoverImpl::ProcessCapture(
|
||||
// Debug outputs for the purpose of development and analysis.
|
||||
data_dumper_->DumpWav("aec3_echo_estimate", kBlockSize,
|
||||
&subtractor_output[0].s_refined[0], 16000, 1);
|
||||
data_dumper_->DumpRaw("aec3_output", (*y)[0][0]);
|
||||
data_dumper_->DumpRaw("aec3_output", y->View(/*band=*/0, /*channel=*/0));
|
||||
data_dumper_->DumpRaw("aec3_narrow_render",
|
||||
render_signal_analyzer_.NarrowPeakBand() ? 1 : 0);
|
||||
data_dumper_->DumpRaw("aec3_N2", cng_.NoiseSpectrum()[0]);
|
||||
data_dumper_->DumpRaw("aec3_suppressor_gain", G);
|
||||
data_dumper_->DumpWav("aec3_output",
|
||||
rtc::ArrayView<const float>(&(*y)[0][0][0], kBlockSize),
|
||||
data_dumper_->DumpWav("aec3_output", y->View(/*band=*/0, /*channel=*/0),
|
||||
16000, 1);
|
||||
data_dumper_->DumpRaw("aec3_using_subtractor_output[0]",
|
||||
aec_state_.UseLinearFilterOutput() ? 1 : 0);
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "api/audio/echo_control.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
@ -42,12 +43,18 @@ class EchoRemover {
|
||||
bool capture_signal_saturation,
|
||||
const absl::optional<DelayEstimate>& external_delay,
|
||||
RenderBuffer* render_buffer,
|
||||
std::vector<std::vector<std::vector<float>>>* linear_output,
|
||||
std::vector<std::vector<std::vector<float>>>* capture) = 0;
|
||||
Block* linear_output,
|
||||
Block* capture) = 0;
|
||||
|
||||
// Updates the status on whether echo leakage is detected in the output of the
|
||||
// echo remover.
|
||||
virtual void UpdateEchoLeakageStatus(bool leakage_detected) = 0;
|
||||
|
||||
// Specifies whether the capture output will be used. The purpose of this is
|
||||
// to allow the echo remover to deactivate some of the processing when the
|
||||
// resulting output is anyway not used, for instance when the endpoint is
|
||||
// muted.
|
||||
virtual void SetCaptureOutputUsage(bool capture_output_used) = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -23,12 +23,6 @@
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr float kOneByMetricsCollectionBlocks = 1.f / kMetricsCollectionBlocks;
|
||||
|
||||
} // namespace
|
||||
|
||||
EchoRemoverMetrics::DbMetric::DbMetric() : DbMetric(0.f, 0.f, 0.f) {}
|
||||
EchoRemoverMetrics::DbMetric::DbMetric(float sum_value,
|
||||
float floor_value,
|
||||
@ -52,11 +46,8 @@ EchoRemoverMetrics::EchoRemoverMetrics() {
|
||||
}
|
||||
|
||||
void EchoRemoverMetrics::ResetMetrics() {
|
||||
erl_.fill(DbMetric(0.f, 10000.f, 0.000f));
|
||||
erl_time_domain_ = DbMetric(0.f, 10000.f, 0.000f);
|
||||
erle_.fill(DbMetric(0.f, 0.f, 1000.f));
|
||||
erle_time_domain_ = DbMetric(0.f, 0.f, 1000.f);
|
||||
active_render_count_ = 0;
|
||||
saturated_capture_ = false;
|
||||
}
|
||||
|
||||
@ -66,104 +57,24 @@ void EchoRemoverMetrics::Update(
|
||||
const std::array<float, kFftLengthBy2Plus1>& suppressor_gain) {
|
||||
metrics_reported_ = false;
|
||||
if (++block_counter_ <= kMetricsCollectionBlocks) {
|
||||
aec3::UpdateDbMetric(aec_state.Erl(), &erl_);
|
||||
erl_time_domain_.UpdateInstant(aec_state.ErlTimeDomain());
|
||||
aec3::UpdateDbMetric(aec_state.Erle()[0], &erle_);
|
||||
erle_time_domain_.UpdateInstant(aec_state.FullBandErleLog2());
|
||||
active_render_count_ += (aec_state.ActiveRender() ? 1 : 0);
|
||||
saturated_capture_ = saturated_capture_ || aec_state.SaturatedCapture();
|
||||
} else {
|
||||
// Report the metrics over several frames in order to lower the impact of
|
||||
// the logarithms involved on the computational complexity.
|
||||
constexpr int kMetricsCollectionBlocksBy2 = kMetricsCollectionBlocks / 2;
|
||||
switch (block_counter_) {
|
||||
case kMetricsCollectionBlocks + 1:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErleBand0.Average",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f,
|
||||
kOneByMetricsCollectionBlocks,
|
||||
erle_[0].sum_value),
|
||||
0, 19, 20);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErleBand0.Max",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_[0].ceil_value),
|
||||
0, 19, 20);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErleBand0.Min",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_[0].floor_value),
|
||||
0, 19, 20);
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 2:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErleBand1.Average",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f,
|
||||
kOneByMetricsCollectionBlocks,
|
||||
erle_[1].sum_value),
|
||||
0, 19, 20);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErleBand1.Max",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_[1].ceil_value),
|
||||
0, 19, 20);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErleBand1.Min",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 19.f, 0.f, 1.f,
|
||||
erle_[1].floor_value),
|
||||
0, 19, 20);
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 3:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErlBand0.Average",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f,
|
||||
kOneByMetricsCollectionBlocks,
|
||||
erl_[0].sum_value),
|
||||
0, 59, 30);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErlBand0.Max",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_[0].ceil_value),
|
||||
0, 59, 30);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErlBand0.Min",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_[0].floor_value),
|
||||
0, 59, 30);
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 4:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErlBand1.Average",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f,
|
||||
kOneByMetricsCollectionBlocks,
|
||||
erl_[1].sum_value),
|
||||
0, 59, 30);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErlBand1.Max",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_[1].ceil_value),
|
||||
0, 59, 30);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.ErlBand1.Min",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
erl_[1].floor_value),
|
||||
0, 59, 30);
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 5:
|
||||
RTC_HISTOGRAM_BOOLEAN(
|
||||
"WebRTC.Audio.EchoCanceller.UsableLinearEstimate",
|
||||
static_cast<int>(aec_state.UsableLinearEstimate() ? 1 : 0));
|
||||
RTC_HISTOGRAM_BOOLEAN(
|
||||
"WebRTC.Audio.EchoCanceller.ActiveRender",
|
||||
static_cast<int>(
|
||||
active_render_count_ > kMetricsCollectionBlocksBy2 ? 1 : 0));
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.FilterDelay",
|
||||
aec_state.MinDirectPathFilterDelay(), 0, 30,
|
||||
31);
|
||||
RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.EchoCanceller.CaptureSaturation",
|
||||
static_cast<int>(saturated_capture_ ? 1 : 0));
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 6:
|
||||
case kMetricsCollectionBlocks + 2:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erl.Value",
|
||||
aec3::TransformDbMetricForReporting(true, 0.f, 59.f, 30.f, 1.f,
|
||||
@ -180,7 +91,7 @@ void EchoRemoverMetrics::Update(
|
||||
erl_time_domain_.floor_value),
|
||||
0, 59, 30);
|
||||
break;
|
||||
case kMetricsCollectionBlocks + 7:
|
||||
case kMetricsCollectionBlocks + 3:
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR(
|
||||
"WebRTC.Audio.EchoCanceller.Erle.Value",
|
||||
aec3::TransformDbMetricForReporting(false, 0.f, 19.f, 0.f, 1.f,
|
||||
@ -202,7 +113,7 @@ void EchoRemoverMetrics::Update(
|
||||
ResetMetrics();
|
||||
break;
|
||||
default:
|
||||
RTC_NOTREACHED();
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -15,7 +15,6 @@
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -34,6 +33,9 @@ class EchoRemoverMetrics {
|
||||
|
||||
EchoRemoverMetrics();
|
||||
|
||||
EchoRemoverMetrics(const EchoRemoverMetrics&) = delete;
|
||||
EchoRemoverMetrics& operator=(const EchoRemoverMetrics&) = delete;
|
||||
|
||||
// Updates the metric with new data.
|
||||
void Update(
|
||||
const AecState& aec_state,
|
||||
@ -48,15 +50,10 @@ class EchoRemoverMetrics {
|
||||
void ResetMetrics();
|
||||
|
||||
int block_counter_ = 0;
|
||||
std::array<DbMetric, 2> erl_;
|
||||
DbMetric erl_time_domain_;
|
||||
std::array<DbMetric, 2> erle_;
|
||||
DbMetric erle_time_domain_;
|
||||
int active_render_count_ = 0;
|
||||
bool saturated_capture_ = false;
|
||||
bool metrics_reported_ = false;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(EchoRemoverMetrics);
|
||||
};
|
||||
|
||||
namespace aec3 {
|
||||
|
@ -18,7 +18,6 @@
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -28,6 +27,9 @@ class ErlEstimator {
|
||||
explicit ErlEstimator(size_t startup_phase_length_blocks_);
|
||||
~ErlEstimator();
|
||||
|
||||
ErlEstimator(const ErlEstimator&) = delete;
|
||||
ErlEstimator& operator=(const ErlEstimator&) = delete;
|
||||
|
||||
// Resets the ERL estimation.
|
||||
void Reset();
|
||||
|
||||
@ -49,7 +51,6 @@ class ErlEstimator {
|
||||
float erl_time_domain_;
|
||||
int hold_counter_time_domain_;
|
||||
size_t blocks_since_reset_ = 0;
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(ErlEstimator);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -52,8 +52,9 @@ void ErleEstimator::Update(
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
subtractor_spectra,
|
||||
const std::vector<bool>& converged_filters) {
|
||||
RTC_DCHECK_EQ(subband_erle_estimator_.Erle().size(), capture_spectra.size());
|
||||
RTC_DCHECK_EQ(subband_erle_estimator_.Erle().size(),
|
||||
RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(),
|
||||
capture_spectra.size());
|
||||
RTC_DCHECK_EQ(subband_erle_estimator_.Erle(/*onset_compensated=*/true).size(),
|
||||
subtractor_spectra.size());
|
||||
const auto& X2_reverb = avg_render_spectrum_with_reverb;
|
||||
const auto& Y2 = capture_spectra;
|
||||
@ -68,7 +69,9 @@ void ErleEstimator::Update(
|
||||
if (signal_dependent_erle_estimator_) {
|
||||
signal_dependent_erle_estimator_->Update(
|
||||
render_buffer, filter_frequency_responses, X2_reverb, Y2, E2,
|
||||
subband_erle_estimator_.Erle(), converged_filters);
|
||||
subband_erle_estimator_.Erle(/*onset_compensated=*/false),
|
||||
subband_erle_estimator_.Erle(/*onset_compensated=*/true),
|
||||
converged_filters);
|
||||
}
|
||||
|
||||
fullband_erle_estimator_.Update(X2_reverb, Y2, E2, converged_filters);
|
||||
|
@ -55,17 +55,30 @@ class ErleEstimator {
|
||||
const std::vector<bool>& converged_filters);
|
||||
|
||||
// Returns the most recent subband ERLE estimates.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle(
|
||||
bool onset_compensated) const {
|
||||
return signal_dependent_erle_estimator_
|
||||
? signal_dependent_erle_estimator_->Erle()
|
||||
: subband_erle_estimator_.Erle();
|
||||
? signal_dependent_erle_estimator_->Erle(onset_compensated)
|
||||
: subband_erle_estimator_.Erle(onset_compensated);
|
||||
}
|
||||
|
||||
// Returns the non-capped subband ERLE.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleUnbounded()
|
||||
const {
|
||||
// Unbounded ERLE is only used with the subband erle estimator where the
|
||||
// ERLE is often capped at low values. When the signal dependent ERLE
|
||||
// estimator is used the capped ERLE is returned.
|
||||
return !signal_dependent_erle_estimator_
|
||||
? subband_erle_estimator_.ErleUnbounded()
|
||||
: signal_dependent_erle_estimator_->Erle(
|
||||
/*onset_compensated=*/false);
|
||||
}
|
||||
|
||||
// Returns the subband ERLE that are estimated during onsets (only used for
|
||||
// testing).
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleOnsets()
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleDuringOnsets()
|
||||
const {
|
||||
return subband_erle_estimator_.ErleOnsets();
|
||||
return subband_erle_estimator_.ErleDuringOnsets();
|
||||
}
|
||||
|
||||
// Returns the fullband ERLE estimate.
|
||||
|
@ -8,11 +8,10 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
|
@ -19,7 +19,6 @@
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
@ -45,12 +44,11 @@ size_t FindPeakIndex(rtc::ArrayView<const float> filter_time_domain,
|
||||
|
||||
} // namespace
|
||||
|
||||
int FilterAnalyzer::instance_count_ = 0;
|
||||
std::atomic<int> FilterAnalyzer::instance_count_(0);
|
||||
|
||||
FilterAnalyzer::FilterAnalyzer(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
bounded_erl_(config.ep_strength.bounded_erl),
|
||||
default_gain_(config.ep_strength.default_gain),
|
||||
h_highpass_(num_capture_channels,
|
||||
@ -131,7 +129,7 @@ void FilterAnalyzer::AnalyzeRegion(
|
||||
|
||||
st_ch.consistent_estimate = st_ch.consistent_filter_detector.Detect(
|
||||
h_highpass_[ch], region_,
|
||||
render_buffer.Block(-filter_delays_blocks_[ch])[0], st_ch.peak_index,
|
||||
render_buffer.GetBlock(-filter_delays_blocks_[ch]), st_ch.peak_index,
|
||||
filter_delays_blocks_[ch]);
|
||||
}
|
||||
}
|
||||
@ -170,11 +168,16 @@ void FilterAnalyzer::PreProcessFilters(
|
||||
|
||||
std::fill(h_highpass_[ch].begin() + region_.start_sample_,
|
||||
h_highpass_[ch].begin() + region_.end_sample_ + 1, 0.f);
|
||||
float* h_highpass_ch = h_highpass_[ch].data();
|
||||
const float* filters_time_domain_ch = filters_time_domain[ch].data();
|
||||
const size_t region_end = region_.end_sample_;
|
||||
for (size_t k = std::max(h.size() - 1, region_.start_sample_);
|
||||
k <= region_.end_sample_; ++k) {
|
||||
k <= region_end; ++k) {
|
||||
float tmp = h_highpass_ch[k];
|
||||
for (size_t j = 0; j < h.size(); ++j) {
|
||||
h_highpass_[ch][k] += filters_time_domain[ch][k - j] * h[j];
|
||||
tmp += filters_time_domain_ch[k - j] * h[j];
|
||||
}
|
||||
h_highpass_ch[k] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -219,7 +222,7 @@ void FilterAnalyzer::ConsistentFilterDetector::Reset() {
|
||||
bool FilterAnalyzer::ConsistentFilterDetector::Detect(
|
||||
rtc::ArrayView<const float> filter_to_analyze,
|
||||
const FilterRegion& region,
|
||||
rtc::ArrayView<const std::vector<float>> x_block,
|
||||
const Block& x_block,
|
||||
size_t peak_index,
|
||||
int delay_blocks) {
|
||||
if (region.start_sample_ == 0) {
|
||||
@ -230,19 +233,23 @@ bool FilterAnalyzer::ConsistentFilterDetector::Detect(
|
||||
peak_index > filter_to_analyze.size() - 129 ? 0 : peak_index + 128;
|
||||
}
|
||||
|
||||
float filter_floor_accum = filter_floor_accum_;
|
||||
float filter_secondary_peak = filter_secondary_peak_;
|
||||
for (size_t k = region.start_sample_;
|
||||
k < std::min(region.end_sample_ + 1, filter_floor_low_limit_); ++k) {
|
||||
float abs_h = fabsf(filter_to_analyze[k]);
|
||||
filter_floor_accum_ += abs_h;
|
||||
filter_secondary_peak_ = std::max(filter_secondary_peak_, abs_h);
|
||||
filter_floor_accum += abs_h;
|
||||
filter_secondary_peak = std::max(filter_secondary_peak, abs_h);
|
||||
}
|
||||
|
||||
for (size_t k = std::max(filter_floor_high_limit_, region.start_sample_);
|
||||
k <= region.end_sample_; ++k) {
|
||||
float abs_h = fabsf(filter_to_analyze[k]);
|
||||
filter_floor_accum_ += abs_h;
|
||||
filter_secondary_peak_ = std::max(filter_secondary_peak_, abs_h);
|
||||
filter_floor_accum += abs_h;
|
||||
filter_secondary_peak = std::max(filter_secondary_peak, abs_h);
|
||||
}
|
||||
filter_floor_accum_ = filter_floor_accum;
|
||||
filter_secondary_peak_ = filter_secondary_peak;
|
||||
|
||||
if (region.end_sample_ == filter_to_analyze.size() - 1) {
|
||||
float filter_floor = filter_floor_accum_ /
|
||||
@ -256,7 +263,9 @@ bool FilterAnalyzer::ConsistentFilterDetector::Detect(
|
||||
|
||||
if (significant_peak_) {
|
||||
bool active_render_block = false;
|
||||
for (auto& x_channel : x_block) {
|
||||
for (int ch = 0; ch < x_block.NumChannels(); ++ch) {
|
||||
rtc::ArrayView<const float, kBlockSize> x_channel =
|
||||
x_block.View(/*band=*/0, ch);
|
||||
const float x_energy = std::inner_product(
|
||||
x_channel.begin(), x_channel.end(), x_channel.begin(), 0.f);
|
||||
if (x_energy > active_render_threshold_) {
|
||||
|
@ -14,13 +14,14 @@
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -94,7 +95,7 @@ class FilterAnalyzer {
|
||||
void Reset();
|
||||
bool Detect(rtc::ArrayView<const float> filter_to_analyze,
|
||||
const FilterRegion& region,
|
||||
rtc::ArrayView<const std::vector<float>> x_block,
|
||||
const Block& x_block,
|
||||
size_t peak_index,
|
||||
int delay_blocks);
|
||||
|
||||
@ -129,7 +130,7 @@ class FilterAnalyzer {
|
||||
ConsistentFilterDetector consistent_filter_detector;
|
||||
};
|
||||
|
||||
static int instance_count_;
|
||||
static std::atomic<int> instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const bool bounded_erl_;
|
||||
const float default_gain_;
|
||||
|
@ -33,26 +33,22 @@ FrameBlocker::~FrameBlocker() = default;
|
||||
|
||||
void FrameBlocker::InsertSubFrameAndExtractBlock(
|
||||
const std::vector<std::vector<rtc::ArrayView<float>>>& sub_frame,
|
||||
std::vector<std::vector<std::vector<float>>>* block) {
|
||||
Block* block) {
|
||||
RTC_DCHECK(block);
|
||||
RTC_DCHECK_EQ(num_bands_, block->size());
|
||||
RTC_DCHECK_EQ(num_bands_, block->NumBands());
|
||||
RTC_DCHECK_EQ(num_bands_, sub_frame.size());
|
||||
for (size_t band = 0; band < num_bands_; ++band) {
|
||||
RTC_DCHECK_EQ(num_channels_, (*block)[band].size());
|
||||
RTC_DCHECK_EQ(num_channels_, block->NumChannels());
|
||||
RTC_DCHECK_EQ(num_channels_, sub_frame[band].size());
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
RTC_DCHECK_GE(kBlockSize - 16, buffer_[band][channel].size());
|
||||
RTC_DCHECK_EQ(kBlockSize, (*block)[band][channel].size());
|
||||
RTC_DCHECK_EQ(kSubFrameLength, sub_frame[band][channel].size());
|
||||
const int samples_to_block = kBlockSize - buffer_[band][channel].size();
|
||||
(*block)[band][channel].clear();
|
||||
(*block)[band][channel].insert((*block)[band][channel].begin(),
|
||||
buffer_[band][channel].begin(),
|
||||
buffer_[band][channel].end());
|
||||
(*block)[band][channel].insert(
|
||||
(*block)[band][channel].begin() + buffer_[band][channel].size(),
|
||||
sub_frame[band][channel].begin(),
|
||||
sub_frame[band][channel].begin() + samples_to_block);
|
||||
std::copy(buffer_[band][channel].begin(), buffer_[band][channel].end(),
|
||||
block->begin(band, channel));
|
||||
std::copy(sub_frame[band][channel].begin(),
|
||||
sub_frame[band][channel].begin() + samples_to_block,
|
||||
block->begin(band, channel) + kBlockSize - samples_to_block);
|
||||
buffer_[band][channel].clear();
|
||||
buffer_[band][channel].insert(
|
||||
buffer_[band][channel].begin(),
|
||||
@ -66,20 +62,16 @@ bool FrameBlocker::IsBlockAvailable() const {
|
||||
return kBlockSize == buffer_[0][0].size();
|
||||
}
|
||||
|
||||
void FrameBlocker::ExtractBlock(
|
||||
std::vector<std::vector<std::vector<float>>>* block) {
|
||||
void FrameBlocker::ExtractBlock(Block* block) {
|
||||
RTC_DCHECK(block);
|
||||
RTC_DCHECK_EQ(num_bands_, block->size());
|
||||
RTC_DCHECK_EQ(num_bands_, block->NumBands());
|
||||
RTC_DCHECK_EQ(num_channels_, block->NumChannels());
|
||||
RTC_DCHECK(IsBlockAvailable());
|
||||
for (size_t band = 0; band < num_bands_; ++band) {
|
||||
RTC_DCHECK_EQ(num_channels_, (*block)[band].size());
|
||||
for (size_t channel = 0; channel < num_channels_; ++channel) {
|
||||
RTC_DCHECK_EQ(kBlockSize, buffer_[band][channel].size());
|
||||
RTC_DCHECK_EQ(kBlockSize, (*block)[band][channel].size());
|
||||
(*block)[band][channel].clear();
|
||||
(*block)[band][channel].insert((*block)[band][channel].begin(),
|
||||
buffer_[band][channel].begin(),
|
||||
buffer_[band][channel].end());
|
||||
std::copy(buffer_[band][channel].begin(), buffer_[band][channel].end(),
|
||||
block->begin(band, channel));
|
||||
buffer_[band][channel].clear();
|
||||
}
|
||||
}
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -33,12 +34,12 @@ class FrameBlocker {
|
||||
// extracts one 64 sample multiband block.
|
||||
void InsertSubFrameAndExtractBlock(
|
||||
const std::vector<std::vector<rtc::ArrayView<float>>>& sub_frame,
|
||||
std::vector<std::vector<std::vector<float>>>* block);
|
||||
Block* block);
|
||||
// Reports whether a multiband block of 64 samples is available for
|
||||
// extraction.
|
||||
bool IsBlockAvailable() const;
|
||||
// Extracts a multiband block of 64 samples.
|
||||
void ExtractBlock(std::vector<std::vector<std::vector<float>>>* block);
|
||||
void ExtractBlock(Block* block);
|
||||
|
||||
private:
|
||||
const size_t num_bands_;
|
||||
|
@ -34,8 +34,8 @@ FullBandErleEstimator::FullBandErleEstimator(
|
||||
const EchoCanceller3Config::Erle& config,
|
||||
size_t num_capture_channels)
|
||||
: min_erle_log2_(FastApproxLog2f(config.min + kEpsilon)),
|
||||
max_erle_lf_log2(FastApproxLog2f(config.max_l + kEpsilon)),
|
||||
hold_counters_time_domain_(num_capture_channels, 0),
|
||||
max_erle_lf_log2_(FastApproxLog2f(config.max_l + kEpsilon)),
|
||||
hold_counters_instantaneous_erle_(num_capture_channels, 0),
|
||||
erle_time_domain_log2_(num_capture_channels, min_erle_log2_),
|
||||
instantaneous_erle_(num_capture_channels, ErleInstantaneous(config)),
|
||||
linear_filters_qualities_(num_capture_channels) {
|
||||
@ -52,8 +52,8 @@ void FullBandErleEstimator::Reset() {
|
||||
UpdateQualityEstimates();
|
||||
std::fill(erle_time_domain_log2_.begin(), erle_time_domain_log2_.end(),
|
||||
min_erle_log2_);
|
||||
std::fill(hold_counters_time_domain_.begin(),
|
||||
hold_counters_time_domain_.end(), 0);
|
||||
std::fill(hold_counters_instantaneous_erle_.begin(),
|
||||
hold_counters_instantaneous_erle_.end(), 0);
|
||||
}
|
||||
|
||||
void FullBandErleEstimator::Update(
|
||||
@ -71,21 +71,17 @@ void FullBandErleEstimator::Update(
|
||||
const float E2_sum =
|
||||
std::accumulate(E2[ch].begin(), E2[ch].end(), 0.0f);
|
||||
if (instantaneous_erle_[ch].Update(Y2_sum, E2_sum)) {
|
||||
hold_counters_time_domain_[ch] = kBlocksToHoldErle;
|
||||
hold_counters_instantaneous_erle_[ch] = kBlocksToHoldErle;
|
||||
erle_time_domain_log2_[ch] +=
|
||||
0.1f * ((instantaneous_erle_[ch].GetInstErleLog2().value()) -
|
||||
erle_time_domain_log2_[ch]);
|
||||
erle_time_domain_log2_[ch] = rtc::SafeClamp(
|
||||
erle_time_domain_log2_[ch], min_erle_log2_, max_erle_lf_log2);
|
||||
0.05f * ((instantaneous_erle_[ch].GetInstErleLog2().value()) -
|
||||
erle_time_domain_log2_[ch]);
|
||||
erle_time_domain_log2_[ch] =
|
||||
std::max(erle_time_domain_log2_[ch], min_erle_log2_);
|
||||
}
|
||||
}
|
||||
}
|
||||
--hold_counters_time_domain_[ch];
|
||||
if (hold_counters_time_domain_[ch] <= 0) {
|
||||
erle_time_domain_log2_[ch] =
|
||||
std::max(min_erle_log2_, erle_time_domain_log2_[ch] - 0.044f);
|
||||
}
|
||||
if (hold_counters_time_domain_[ch] == 0) {
|
||||
--hold_counters_instantaneous_erle_[ch];
|
||||
if (hold_counters_instantaneous_erle_[ch] == 0) {
|
||||
instantaneous_erle_[ch].ResetAccumulators();
|
||||
}
|
||||
}
|
||||
@ -166,17 +162,12 @@ void FullBandErleEstimator::ErleInstantaneous::Dump(
|
||||
|
||||
void FullBandErleEstimator::ErleInstantaneous::UpdateMaxMin() {
|
||||
RTC_DCHECK(erle_log2_);
|
||||
if (erle_log2_.value() > max_erle_log2_) {
|
||||
max_erle_log2_ = erle_log2_.value();
|
||||
} else {
|
||||
max_erle_log2_ -= 0.0004; // Forget factor, approx 1dB every 3 sec.
|
||||
}
|
||||
|
||||
if (erle_log2_.value() < min_erle_log2_) {
|
||||
min_erle_log2_ = erle_log2_.value();
|
||||
} else {
|
||||
min_erle_log2_ += 0.0004; // Forget factor, approx 1dB every 3 sec.
|
||||
}
|
||||
// Adding the forgetting factors for the maximum and minimum and capping the
|
||||
// result to the incoming value.
|
||||
max_erle_log2_ -= 0.0004f; // Forget factor, approx 1dB every 3 sec.
|
||||
max_erle_log2_ = std::max(max_erle_log2_, erle_log2_.value());
|
||||
min_erle_log2_ += 0.0004f; // Forget factor, approx 1dB every 3 sec.
|
||||
min_erle_log2_ = std::min(min_erle_log2_, erle_log2_.value());
|
||||
}
|
||||
|
||||
void FullBandErleEstimator::ErleInstantaneous::UpdateQualityEstimate() {
|
||||
|
@ -67,7 +67,7 @@ class FullBandErleEstimator {
|
||||
// Updates the estimator with a new point, returns true
|
||||
// if the instantaneous ERLE was updated due to having enough
|
||||
// points for performing the estimate.
|
||||
bool Update(const float Y2_sum, const float E2_sum);
|
||||
bool Update(float Y2_sum, float E2_sum);
|
||||
// Resets the instantaneous ERLE estimator to its initial state.
|
||||
void Reset();
|
||||
// Resets the members related with an instantaneous estimate.
|
||||
@ -106,8 +106,8 @@ class FullBandErleEstimator {
|
||||
};
|
||||
|
||||
const float min_erle_log2_;
|
||||
const float max_erle_lf_log2;
|
||||
std::vector<int> hold_counters_time_domain_;
|
||||
const float max_erle_lf_log2_;
|
||||
std::vector<int> hold_counters_instantaneous_erle_;
|
||||
std::vector<float> erle_time_domain_log2_;
|
||||
std::vector<ErleInstantaneous> instantaneous_erle_;
|
||||
std::vector<absl::optional<float>> linear_filters_qualities_;
|
||||
|
@ -24,16 +24,220 @@
|
||||
#include <iterator>
|
||||
#include <numeric>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/experiments/field_trial_parser.h"
|
||||
#include "rtc_base/logging.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
namespace {
|
||||
|
||||
// Subsample rate used for computing the accumulated error.
|
||||
// The implementation of some core functions depends on this constant being
|
||||
// equal to 4.
|
||||
constexpr int kAccumulatedErrorSubSampleRate = 4;
|
||||
|
||||
void UpdateAccumulatedError(
|
||||
const rtc::ArrayView<const float> instantaneous_accumulated_error,
|
||||
const rtc::ArrayView<float> accumulated_error,
|
||||
float one_over_error_sum_anchor,
|
||||
float smooth_constant_increases) {
|
||||
for (size_t k = 0; k < instantaneous_accumulated_error.size(); ++k) {
|
||||
float error_norm =
|
||||
instantaneous_accumulated_error[k] * one_over_error_sum_anchor;
|
||||
if (error_norm < accumulated_error[k]) {
|
||||
accumulated_error[k] = error_norm;
|
||||
} else {
|
||||
accumulated_error[k] +=
|
||||
smooth_constant_increases * (error_norm - accumulated_error[k]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
size_t ComputePreEchoLag(
|
||||
const webrtc::MatchedFilter::PreEchoConfiguration& pre_echo_configuration,
|
||||
const rtc::ArrayView<const float> accumulated_error,
|
||||
size_t lag,
|
||||
size_t alignment_shift_winner) {
|
||||
RTC_DCHECK_GE(lag, alignment_shift_winner);
|
||||
size_t pre_echo_lag_estimate = lag - alignment_shift_winner;
|
||||
size_t maximum_pre_echo_lag =
|
||||
std::min(pre_echo_lag_estimate / kAccumulatedErrorSubSampleRate,
|
||||
accumulated_error.size());
|
||||
switch (pre_echo_configuration.mode) {
|
||||
case 0:
|
||||
// Mode 0: Pre echo lag is defined as the first coefficient with an error
|
||||
// lower than a threshold with a certain decrease slope.
|
||||
for (size_t k = 1; k < maximum_pre_echo_lag; ++k) {
|
||||
if (accumulated_error[k] <
|
||||
pre_echo_configuration.threshold * accumulated_error[k - 1] &&
|
||||
accumulated_error[k] < pre_echo_configuration.threshold) {
|
||||
pre_echo_lag_estimate = (k + 1) * kAccumulatedErrorSubSampleRate - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
// Mode 1: Pre echo lag is defined as the first coefficient with an error
|
||||
// lower than a certain threshold.
|
||||
for (size_t k = 0; k < maximum_pre_echo_lag; ++k) {
|
||||
if (accumulated_error[k] < pre_echo_configuration.threshold) {
|
||||
pre_echo_lag_estimate = (k + 1) * kAccumulatedErrorSubSampleRate - 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
case 3:
|
||||
// Mode 2,3: Pre echo lag is defined as the closest coefficient to the lag
|
||||
// with an error lower than a certain threshold.
|
||||
for (int k = static_cast<int>(maximum_pre_echo_lag) - 1; k >= 0; --k) {
|
||||
if (accumulated_error[k] > pre_echo_configuration.threshold) {
|
||||
break;
|
||||
}
|
||||
pre_echo_lag_estimate = (k + 1) * kAccumulatedErrorSubSampleRate - 1;
|
||||
}
|
||||
break;
|
||||
default:
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
break;
|
||||
}
|
||||
return pre_echo_lag_estimate + alignment_shift_winner;
|
||||
}
|
||||
|
||||
webrtc::MatchedFilter::PreEchoConfiguration FetchPreEchoConfiguration() {
|
||||
constexpr float kDefaultThreshold = 0.5f;
|
||||
constexpr int kDefaultMode = 3;
|
||||
float threshold = kDefaultThreshold;
|
||||
int mode = kDefaultMode;
|
||||
const std::string pre_echo_configuration_field_trial =
|
||||
webrtc::field_trial::FindFullName("WebRTC-Aec3PreEchoConfiguration");
|
||||
webrtc::FieldTrialParameter<double> threshold_field_trial_parameter(
|
||||
/*key=*/"threshold", /*default_value=*/kDefaultThreshold);
|
||||
webrtc::FieldTrialParameter<int> mode_field_trial_parameter(
|
||||
/*key=*/"mode", /*default_value=*/kDefaultMode);
|
||||
webrtc::ParseFieldTrial(
|
||||
{&threshold_field_trial_parameter, &mode_field_trial_parameter},
|
||||
pre_echo_configuration_field_trial);
|
||||
float threshold_read =
|
||||
static_cast<float>(threshold_field_trial_parameter.Get());
|
||||
int mode_read = mode_field_trial_parameter.Get();
|
||||
if (threshold_read < 1.0f && threshold_read > 0.0f) {
|
||||
threshold = threshold_read;
|
||||
} else {
|
||||
RTC_LOG(LS_ERROR)
|
||||
<< "AEC3: Pre echo configuration: wrong input, threshold = "
|
||||
<< threshold_read << ".";
|
||||
}
|
||||
if (mode_read >= 0 && mode_read <= 3) {
|
||||
mode = mode_read;
|
||||
} else {
|
||||
RTC_LOG(LS_ERROR) << "AEC3: Pre echo configuration: wrong input, mode = "
|
||||
<< mode_read << ".";
|
||||
}
|
||||
RTC_LOG(LS_INFO) << "AEC3: Pre echo configuration: threshold = " << threshold
|
||||
<< ", mode = " << mode << ".";
|
||||
return {.threshold = threshold, .mode = mode};
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace webrtc {
|
||||
namespace aec3 {
|
||||
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
|
||||
inline float SumAllElements(float32x4_t elements) {
|
||||
float32x2_t sum = vpadd_f32(vget_low_f32(elements), vget_high_f32(elements));
|
||||
sum = vpadd_f32(sum, sum);
|
||||
return vget_lane_f32(sum, 0);
|
||||
}
|
||||
|
||||
void MatchedFilterCoreWithAccumulatedError_NEON(
|
||||
size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum,
|
||||
rtc::ArrayView<float> accumulated_error,
|
||||
rtc::ArrayView<float> scratch_memory) {
|
||||
const int h_size = static_cast<int>(h.size());
|
||||
const int x_size = static_cast<int>(x.size());
|
||||
RTC_DCHECK_EQ(0, h_size % 4);
|
||||
std::fill(accumulated_error.begin(), accumulated_error.end(), 0.0f);
|
||||
// Process for all samples in the sub-block.
|
||||
for (size_t i = 0; i < y.size(); ++i) {
|
||||
// Apply the matched filter as filter * x, and compute x * x.
|
||||
RTC_DCHECK_GT(x_size, x_start_index);
|
||||
// Compute loop chunk sizes until, and after, the wraparound of the circular
|
||||
// buffer for x.
|
||||
const int chunk1 =
|
||||
std::min(h_size, static_cast<int>(x_size - x_start_index));
|
||||
if (chunk1 != h_size) {
|
||||
const int chunk2 = h_size - chunk1;
|
||||
std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin());
|
||||
std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1);
|
||||
}
|
||||
const float* x_p =
|
||||
chunk1 != h_size ? scratch_memory.data() : &x[x_start_index];
|
||||
const float* h_p = &h[0];
|
||||
float* accumulated_error_p = &accumulated_error[0];
|
||||
// Initialize values for the accumulation.
|
||||
float32x4_t x2_sum_128 = vdupq_n_f32(0);
|
||||
float x2_sum = 0.f;
|
||||
float s = 0;
|
||||
// Perform 128 bit vector operations.
|
||||
const int limit_by_4 = h_size >> 2;
|
||||
for (int k = limit_by_4; k > 0;
|
||||
--k, h_p += 4, x_p += 4, accumulated_error_p++) {
|
||||
// Load the data into 128 bit vectors.
|
||||
const float32x4_t x_k = vld1q_f32(x_p);
|
||||
const float32x4_t h_k = vld1q_f32(h_p);
|
||||
// Compute and accumulate x * x.
|
||||
x2_sum_128 = vmlaq_f32(x2_sum_128, x_k, x_k);
|
||||
// Compute x * h
|
||||
float32x4_t hk_xk_128 = vmulq_f32(h_k, x_k);
|
||||
s += SumAllElements(hk_xk_128);
|
||||
const float e = s - y[i];
|
||||
accumulated_error_p[0] += e * e;
|
||||
}
|
||||
// Combine the accumulated vector and scalar values.
|
||||
x2_sum += SumAllElements(x2_sum_128);
|
||||
// Compute the matched filter error.
|
||||
float e = y[i] - s;
|
||||
const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f;
|
||||
(*error_sum) += e * e;
|
||||
// Update the matched filter estimate in an NLMS manner.
|
||||
if (x2_sum > x2_sum_threshold && !saturation) {
|
||||
RTC_DCHECK_LT(0.f, x2_sum);
|
||||
const float alpha = smoothing * e / x2_sum;
|
||||
const float32x4_t alpha_128 = vmovq_n_f32(alpha);
|
||||
// filter = filter + smoothing * (y - filter * x) * x / x * x.
|
||||
float* h_p = &h[0];
|
||||
x_p = chunk1 != h_size ? scratch_memory.data() : &x[x_start_index];
|
||||
// Perform 128 bit vector operations.
|
||||
const int limit_by_4 = h_size >> 2;
|
||||
for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) {
|
||||
// Load the data into 128 bit vectors.
|
||||
float32x4_t h_k = vld1q_f32(h_p);
|
||||
const float32x4_t x_k = vld1q_f32(x_p);
|
||||
// Compute h = h + alpha * x.
|
||||
h_k = vmlaq_f32(h_k, alpha_128, x_k);
|
||||
// Store the result.
|
||||
vst1q_f32(h_p, h_k);
|
||||
}
|
||||
*filters_updated = true;
|
||||
}
|
||||
x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void MatchedFilterCore_NEON(size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
@ -41,11 +245,20 @@ void MatchedFilterCore_NEON(size_t x_start_index,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum) {
|
||||
float* error_sum,
|
||||
bool compute_accumulated_error,
|
||||
rtc::ArrayView<float> accumulated_error,
|
||||
rtc::ArrayView<float> scratch_memory) {
|
||||
const int h_size = static_cast<int>(h.size());
|
||||
const int x_size = static_cast<int>(x.size());
|
||||
RTC_DCHECK_EQ(0, h_size % 4);
|
||||
|
||||
if (compute_accumulated_error) {
|
||||
return MatchedFilterCoreWithAccumulatedError_NEON(
|
||||
x_start_index, x2_sum_threshold, smoothing, x, y, h, filters_updated,
|
||||
error_sum, accumulated_error, scratch_memory);
|
||||
}
|
||||
|
||||
// Process for all samples in the sub-block.
|
||||
for (size_t i = 0; i < y.size(); ++i) {
|
||||
// Apply the matched filter as filter * x, and compute x * x.
|
||||
@ -90,10 +303,8 @@ void MatchedFilterCore_NEON(size_t x_start_index,
|
||||
}
|
||||
|
||||
// Combine the accumulated vector and scalar values.
|
||||
float* v = reinterpret_cast<float*>(&x2_sum_128);
|
||||
x2_sum += v[0] + v[1] + v[2] + v[3];
|
||||
v = reinterpret_cast<float*>(&s_128);
|
||||
s += v[0] + v[1] + v[2] + v[3];
|
||||
s += SumAllElements(s_128);
|
||||
x2_sum += SumAllElements(x2_sum_128);
|
||||
|
||||
// Compute the matched filter error.
|
||||
float e = y[i] - s;
|
||||
@ -144,6 +355,103 @@ void MatchedFilterCore_NEON(size_t x_start_index,
|
||||
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
|
||||
void MatchedFilterCore_AccumulatedError_SSE2(
|
||||
size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum,
|
||||
rtc::ArrayView<float> accumulated_error,
|
||||
rtc::ArrayView<float> scratch_memory) {
|
||||
const int h_size = static_cast<int>(h.size());
|
||||
const int x_size = static_cast<int>(x.size());
|
||||
RTC_DCHECK_EQ(0, h_size % 8);
|
||||
std::fill(accumulated_error.begin(), accumulated_error.end(), 0.0f);
|
||||
// Process for all samples in the sub-block.
|
||||
for (size_t i = 0; i < y.size(); ++i) {
|
||||
// Apply the matched filter as filter * x, and compute x * x.
|
||||
RTC_DCHECK_GT(x_size, x_start_index);
|
||||
const int chunk1 =
|
||||
std::min(h_size, static_cast<int>(x_size - x_start_index));
|
||||
if (chunk1 != h_size) {
|
||||
const int chunk2 = h_size - chunk1;
|
||||
std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin());
|
||||
std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1);
|
||||
}
|
||||
const float* x_p =
|
||||
chunk1 != h_size ? scratch_memory.data() : &x[x_start_index];
|
||||
const float* h_p = &h[0];
|
||||
float* a_p = &accumulated_error[0];
|
||||
__m128 s_inst_128;
|
||||
__m128 s_inst_128_4;
|
||||
__m128 x2_sum_128 = _mm_set1_ps(0);
|
||||
__m128 x2_sum_128_4 = _mm_set1_ps(0);
|
||||
__m128 e_128;
|
||||
float* const s_p = reinterpret_cast<float*>(&s_inst_128);
|
||||
float* const s_4_p = reinterpret_cast<float*>(&s_inst_128_4);
|
||||
float* const e_p = reinterpret_cast<float*>(&e_128);
|
||||
float x2_sum = 0.0f;
|
||||
float s_acum = 0;
|
||||
// Perform 128 bit vector operations.
|
||||
const int limit_by_8 = h_size >> 3;
|
||||
for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8, a_p += 2) {
|
||||
// Load the data into 128 bit vectors.
|
||||
const __m128 x_k = _mm_loadu_ps(x_p);
|
||||
const __m128 h_k = _mm_loadu_ps(h_p);
|
||||
const __m128 x_k_4 = _mm_loadu_ps(x_p + 4);
|
||||
const __m128 h_k_4 = _mm_loadu_ps(h_p + 4);
|
||||
const __m128 xx = _mm_mul_ps(x_k, x_k);
|
||||
const __m128 xx_4 = _mm_mul_ps(x_k_4, x_k_4);
|
||||
// Compute and accumulate x * x and h * x.
|
||||
x2_sum_128 = _mm_add_ps(x2_sum_128, xx);
|
||||
x2_sum_128_4 = _mm_add_ps(x2_sum_128_4, xx_4);
|
||||
s_inst_128 = _mm_mul_ps(h_k, x_k);
|
||||
s_inst_128_4 = _mm_mul_ps(h_k_4, x_k_4);
|
||||
s_acum += s_p[0] + s_p[1] + s_p[2] + s_p[3];
|
||||
e_p[0] = s_acum - y[i];
|
||||
s_acum += s_4_p[0] + s_4_p[1] + s_4_p[2] + s_4_p[3];
|
||||
e_p[1] = s_acum - y[i];
|
||||
a_p[0] += e_p[0] * e_p[0];
|
||||
a_p[1] += e_p[1] * e_p[1];
|
||||
}
|
||||
// Combine the accumulated vector and scalar values.
|
||||
x2_sum_128 = _mm_add_ps(x2_sum_128, x2_sum_128_4);
|
||||
float* v = reinterpret_cast<float*>(&x2_sum_128);
|
||||
x2_sum += v[0] + v[1] + v[2] + v[3];
|
||||
// Compute the matched filter error.
|
||||
float e = y[i] - s_acum;
|
||||
const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f;
|
||||
(*error_sum) += e * e;
|
||||
// Update the matched filter estimate in an NLMS manner.
|
||||
if (x2_sum > x2_sum_threshold && !saturation) {
|
||||
RTC_DCHECK_LT(0.f, x2_sum);
|
||||
const float alpha = smoothing * e / x2_sum;
|
||||
const __m128 alpha_128 = _mm_set1_ps(alpha);
|
||||
// filter = filter + smoothing * (y - filter * x) * x / x * x.
|
||||
float* h_p = &h[0];
|
||||
const float* x_p =
|
||||
chunk1 != h_size ? scratch_memory.data() : &x[x_start_index];
|
||||
// Perform 128 bit vector operations.
|
||||
const int limit_by_4 = h_size >> 2;
|
||||
for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) {
|
||||
// Load the data into 128 bit vectors.
|
||||
__m128 h_k = _mm_loadu_ps(h_p);
|
||||
const __m128 x_k = _mm_loadu_ps(x_p);
|
||||
// Compute h = h + alpha * x.
|
||||
const __m128 alpha_x = _mm_mul_ps(alpha_128, x_k);
|
||||
h_k = _mm_add_ps(h_k, alpha_x);
|
||||
// Store the result.
|
||||
_mm_storeu_ps(h_p, h_k);
|
||||
}
|
||||
*filters_updated = true;
|
||||
}
|
||||
x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void MatchedFilterCore_SSE2(size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
@ -151,77 +459,83 @@ void MatchedFilterCore_SSE2(size_t x_start_index,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum) {
|
||||
float* error_sum,
|
||||
bool compute_accumulated_error,
|
||||
rtc::ArrayView<float> accumulated_error,
|
||||
rtc::ArrayView<float> scratch_memory) {
|
||||
if (compute_accumulated_error) {
|
||||
return MatchedFilterCore_AccumulatedError_SSE2(
|
||||
x_start_index, x2_sum_threshold, smoothing, x, y, h, filters_updated,
|
||||
error_sum, accumulated_error, scratch_memory);
|
||||
}
|
||||
const int h_size = static_cast<int>(h.size());
|
||||
const int x_size = static_cast<int>(x.size());
|
||||
RTC_DCHECK_EQ(0, h_size % 4);
|
||||
|
||||
// Process for all samples in the sub-block.
|
||||
for (size_t i = 0; i < y.size(); ++i) {
|
||||
// Apply the matched filter as filter * x, and compute x * x.
|
||||
|
||||
RTC_DCHECK_GT(x_size, x_start_index);
|
||||
const float* x_p = &x[x_start_index];
|
||||
const float* h_p = &h[0];
|
||||
|
||||
// Initialize values for the accumulation.
|
||||
__m128 s_128 = _mm_set1_ps(0);
|
||||
__m128 s_128_4 = _mm_set1_ps(0);
|
||||
__m128 x2_sum_128 = _mm_set1_ps(0);
|
||||
__m128 x2_sum_128_4 = _mm_set1_ps(0);
|
||||
float x2_sum = 0.f;
|
||||
float s = 0;
|
||||
|
||||
// Compute loop chunk sizes until, and after, the wraparound of the circular
|
||||
// buffer for x.
|
||||
const int chunk1 =
|
||||
std::min(h_size, static_cast<int>(x_size - x_start_index));
|
||||
|
||||
// Perform the loop in two chunks.
|
||||
const int chunk2 = h_size - chunk1;
|
||||
for (int limit : {chunk1, chunk2}) {
|
||||
// Perform 128 bit vector operations.
|
||||
const int limit_by_4 = limit >> 2;
|
||||
for (int k = limit_by_4; k > 0; --k, h_p += 4, x_p += 4) {
|
||||
const int limit_by_8 = limit >> 3;
|
||||
for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8) {
|
||||
// Load the data into 128 bit vectors.
|
||||
const __m128 x_k = _mm_loadu_ps(x_p);
|
||||
const __m128 h_k = _mm_loadu_ps(h_p);
|
||||
const __m128 x_k_4 = _mm_loadu_ps(x_p + 4);
|
||||
const __m128 h_k_4 = _mm_loadu_ps(h_p + 4);
|
||||
const __m128 xx = _mm_mul_ps(x_k, x_k);
|
||||
const __m128 xx_4 = _mm_mul_ps(x_k_4, x_k_4);
|
||||
// Compute and accumulate x * x and h * x.
|
||||
x2_sum_128 = _mm_add_ps(x2_sum_128, xx);
|
||||
x2_sum_128_4 = _mm_add_ps(x2_sum_128_4, xx_4);
|
||||
const __m128 hx = _mm_mul_ps(h_k, x_k);
|
||||
const __m128 hx_4 = _mm_mul_ps(h_k_4, x_k_4);
|
||||
s_128 = _mm_add_ps(s_128, hx);
|
||||
s_128_4 = _mm_add_ps(s_128_4, hx_4);
|
||||
}
|
||||
|
||||
// Perform non-vector operations for any remaining items.
|
||||
for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) {
|
||||
for (int k = limit - limit_by_8 * 8; k > 0; --k, ++h_p, ++x_p) {
|
||||
const float x_k = *x_p;
|
||||
x2_sum += x_k * x_k;
|
||||
s += *h_p * x_k;
|
||||
}
|
||||
|
||||
x_p = &x[0];
|
||||
}
|
||||
|
||||
// Combine the accumulated vector and scalar values.
|
||||
x2_sum_128 = _mm_add_ps(x2_sum_128, x2_sum_128_4);
|
||||
float* v = reinterpret_cast<float*>(&x2_sum_128);
|
||||
x2_sum += v[0] + v[1] + v[2] + v[3];
|
||||
s_128 = _mm_add_ps(s_128, s_128_4);
|
||||
v = reinterpret_cast<float*>(&s_128);
|
||||
s += v[0] + v[1] + v[2] + v[3];
|
||||
|
||||
// Compute the matched filter error.
|
||||
float e = y[i] - s;
|
||||
const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f;
|
||||
(*error_sum) += e * e;
|
||||
|
||||
// Update the matched filter estimate in an NLMS manner.
|
||||
if (x2_sum > x2_sum_threshold && !saturation) {
|
||||
RTC_DCHECK_LT(0.f, x2_sum);
|
||||
const float alpha = smoothing * e / x2_sum;
|
||||
const __m128 alpha_128 = _mm_set1_ps(alpha);
|
||||
|
||||
// filter = filter + smoothing * (y - filter * x) * x / x * x.
|
||||
float* h_p = &h[0];
|
||||
x_p = &x[x_start_index];
|
||||
|
||||
// Perform the loop in two chunks.
|
||||
for (int limit : {chunk1, chunk2}) {
|
||||
// Perform 128 bit vector operations.
|
||||
@ -234,22 +548,17 @@ void MatchedFilterCore_SSE2(size_t x_start_index,
|
||||
// Compute h = h + alpha * x.
|
||||
const __m128 alpha_x = _mm_mul_ps(alpha_128, x_k);
|
||||
h_k = _mm_add_ps(h_k, alpha_x);
|
||||
|
||||
// Store the result.
|
||||
_mm_storeu_ps(h_p, h_k);
|
||||
}
|
||||
|
||||
// Perform non-vector operations for any remaining items.
|
||||
for (int k = limit - limit_by_4 * 4; k > 0; --k, ++h_p, ++x_p) {
|
||||
*h_p += alpha * *x_p;
|
||||
}
|
||||
|
||||
x_p = &x[0];
|
||||
}
|
||||
|
||||
*filters_updated = true;
|
||||
}
|
||||
|
||||
x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1;
|
||||
}
|
||||
}
|
||||
@ -262,17 +571,35 @@ void MatchedFilterCore(size_t x_start_index,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum) {
|
||||
float* error_sum,
|
||||
bool compute_accumulated_error,
|
||||
rtc::ArrayView<float> accumulated_error) {
|
||||
if (compute_accumulated_error) {
|
||||
std::fill(accumulated_error.begin(), accumulated_error.end(), 0.0f);
|
||||
}
|
||||
|
||||
// Process for all samples in the sub-block.
|
||||
for (size_t i = 0; i < y.size(); ++i) {
|
||||
// Apply the matched filter as filter * x, and compute x * x.
|
||||
float x2_sum = 0.f;
|
||||
float s = 0;
|
||||
size_t x_index = x_start_index;
|
||||
for (size_t k = 0; k < h.size(); ++k) {
|
||||
x2_sum += x[x_index] * x[x_index];
|
||||
s += h[k] * x[x_index];
|
||||
x_index = x_index < (x.size() - 1) ? x_index + 1 : 0;
|
||||
if (compute_accumulated_error) {
|
||||
for (size_t k = 0; k < h.size(); ++k) {
|
||||
x2_sum += x[x_index] * x[x_index];
|
||||
s += h[k] * x[x_index];
|
||||
x_index = x_index < (x.size() - 1) ? x_index + 1 : 0;
|
||||
if ((k + 1 & 0b11) == 0) {
|
||||
int idx = k >> 2;
|
||||
accumulated_error[idx] += (y[i] - s) * (y[i] - s);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (size_t k = 0; k < h.size(); ++k) {
|
||||
x2_sum += x[x_index] * x[x_index];
|
||||
s += h[k] * x[x_index];
|
||||
x_index = x_index < (x.size() - 1) ? x_index + 1 : 0;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute the matched filter error.
|
||||
@ -298,6 +625,41 @@ void MatchedFilterCore(size_t x_start_index,
|
||||
}
|
||||
}
|
||||
|
||||
size_t MaxSquarePeakIndex(rtc::ArrayView<const float> h) {
|
||||
if (h.size() < 2) {
|
||||
return 0;
|
||||
}
|
||||
float max_element1 = h[0] * h[0];
|
||||
float max_element2 = h[1] * h[1];
|
||||
size_t lag_estimate1 = 0;
|
||||
size_t lag_estimate2 = 1;
|
||||
const size_t last_index = h.size() - 1;
|
||||
// Keeping track of even & odd max elements separately typically allows the
|
||||
// compiler to produce more efficient code.
|
||||
for (size_t k = 2; k < last_index; k += 2) {
|
||||
float element1 = h[k] * h[k];
|
||||
float element2 = h[k + 1] * h[k + 1];
|
||||
if (element1 > max_element1) {
|
||||
max_element1 = element1;
|
||||
lag_estimate1 = k;
|
||||
}
|
||||
if (element2 > max_element2) {
|
||||
max_element2 = element2;
|
||||
lag_estimate2 = k + 1;
|
||||
}
|
||||
}
|
||||
if (max_element2 > max_element1) {
|
||||
max_element1 = max_element2;
|
||||
lag_estimate1 = lag_estimate2;
|
||||
}
|
||||
// In case of odd h size, we have not yet checked the last element.
|
||||
float last_element = h[last_index] * h[last_index];
|
||||
if (last_element > max_element1) {
|
||||
return last_index;
|
||||
}
|
||||
return lag_estimate1;
|
||||
}
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
MatchedFilter::MatchedFilter(ApmDataDumper* data_dumper,
|
||||
@ -307,8 +669,10 @@ MatchedFilter::MatchedFilter(ApmDataDumper* data_dumper,
|
||||
int num_matched_filters,
|
||||
size_t alignment_shift_sub_blocks,
|
||||
float excitation_limit,
|
||||
float smoothing,
|
||||
float matching_filter_threshold)
|
||||
float smoothing_fast,
|
||||
float smoothing_slow,
|
||||
float matching_filter_threshold,
|
||||
bool detect_pre_echo)
|
||||
: data_dumper_(data_dumper),
|
||||
optimization_(optimization),
|
||||
sub_block_size_(sub_block_size),
|
||||
@ -316,42 +680,82 @@ MatchedFilter::MatchedFilter(ApmDataDumper* data_dumper,
|
||||
filters_(
|
||||
num_matched_filters,
|
||||
std::vector<float>(window_size_sub_blocks * sub_block_size_, 0.f)),
|
||||
lag_estimates_(num_matched_filters),
|
||||
filters_offsets_(num_matched_filters, 0),
|
||||
excitation_limit_(excitation_limit),
|
||||
smoothing_(smoothing),
|
||||
matching_filter_threshold_(matching_filter_threshold) {
|
||||
smoothing_fast_(smoothing_fast),
|
||||
smoothing_slow_(smoothing_slow),
|
||||
matching_filter_threshold_(matching_filter_threshold),
|
||||
detect_pre_echo_(detect_pre_echo),
|
||||
pre_echo_config_(FetchPreEchoConfiguration()) {
|
||||
RTC_DCHECK(data_dumper);
|
||||
RTC_DCHECK_LT(0, window_size_sub_blocks);
|
||||
RTC_DCHECK((kBlockSize % sub_block_size) == 0);
|
||||
RTC_DCHECK((sub_block_size % 4) == 0);
|
||||
static_assert(kAccumulatedErrorSubSampleRate == 4);
|
||||
if (detect_pre_echo_) {
|
||||
accumulated_error_ = std::vector<std::vector<float>>(
|
||||
num_matched_filters,
|
||||
std::vector<float>(window_size_sub_blocks * sub_block_size_ /
|
||||
kAccumulatedErrorSubSampleRate,
|
||||
1.0f));
|
||||
|
||||
instantaneous_accumulated_error_ =
|
||||
std::vector<float>(window_size_sub_blocks * sub_block_size_ /
|
||||
kAccumulatedErrorSubSampleRate,
|
||||
0.0f);
|
||||
scratch_memory_ =
|
||||
std::vector<float>(window_size_sub_blocks * sub_block_size_);
|
||||
}
|
||||
}
|
||||
|
||||
MatchedFilter::~MatchedFilter() = default;
|
||||
|
||||
void MatchedFilter::Reset() {
|
||||
void MatchedFilter::Reset(bool full_reset) {
|
||||
for (auto& f : filters_) {
|
||||
std::fill(f.begin(), f.end(), 0.f);
|
||||
}
|
||||
|
||||
for (auto& l : lag_estimates_) {
|
||||
l = MatchedFilter::LagEstimate();
|
||||
winner_lag_ = absl::nullopt;
|
||||
reported_lag_estimate_ = absl::nullopt;
|
||||
if (pre_echo_config_.mode != 3 || full_reset) {
|
||||
for (auto& e : accumulated_error_) {
|
||||
std::fill(e.begin(), e.end(), 1.0f);
|
||||
}
|
||||
number_pre_echo_updates_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const float> capture) {
|
||||
rtc::ArrayView<const float> capture,
|
||||
bool use_slow_smoothing) {
|
||||
RTC_DCHECK_EQ(sub_block_size_, capture.size());
|
||||
auto& y = capture;
|
||||
|
||||
const float smoothing =
|
||||
use_slow_smoothing ? smoothing_slow_ : smoothing_fast_;
|
||||
|
||||
const float x2_sum_threshold =
|
||||
filters_[0].size() * excitation_limit_ * excitation_limit_;
|
||||
|
||||
// Compute anchor for the matched filter error.
|
||||
float error_sum_anchor = 0.0f;
|
||||
for (size_t k = 0; k < y.size(); ++k) {
|
||||
error_sum_anchor += y[k] * y[k];
|
||||
}
|
||||
|
||||
// Apply all matched filters.
|
||||
float winner_error_sum = error_sum_anchor;
|
||||
winner_lag_ = absl::nullopt;
|
||||
reported_lag_estimate_ = absl::nullopt;
|
||||
size_t alignment_shift = 0;
|
||||
for (size_t n = 0; n < filters_.size(); ++n) {
|
||||
absl::optional<size_t> previous_lag_estimate;
|
||||
const int num_filters = static_cast<int>(filters_.size());
|
||||
int winner_index = -1;
|
||||
for (int n = 0; n < num_filters; ++n) {
|
||||
float error_sum = 0.f;
|
||||
bool filters_updated = false;
|
||||
const bool compute_pre_echo =
|
||||
detect_pre_echo_ && n == last_detected_best_lag_filter_;
|
||||
|
||||
size_t x_start_index =
|
||||
(render_buffer.read + alignment_shift + sub_block_size_ - 1) %
|
||||
@ -360,87 +764,94 @@ void MatchedFilter::Update(const DownsampledRenderBuffer& render_buffer,
|
||||
switch (optimization_) {
|
||||
#if defined(WEBRTC_ARCH_X86_FAMILY)
|
||||
case Aec3Optimization::kSse2:
|
||||
aec3::MatchedFilterCore_SSE2(x_start_index, x2_sum_threshold,
|
||||
smoothing_, render_buffer.buffer, y,
|
||||
filters_[n], &filters_updated, &error_sum);
|
||||
aec3::MatchedFilterCore_SSE2(
|
||||
x_start_index, x2_sum_threshold, smoothing, render_buffer.buffer, y,
|
||||
filters_[n], &filters_updated, &error_sum, compute_pre_echo,
|
||||
instantaneous_accumulated_error_, scratch_memory_);
|
||||
break;
|
||||
case Aec3Optimization::kAvx2:
|
||||
aec3::MatchedFilterCore_AVX2(x_start_index, x2_sum_threshold,
|
||||
smoothing_, render_buffer.buffer, y,
|
||||
filters_[n], &filters_updated, &error_sum);
|
||||
aec3::MatchedFilterCore_AVX2(
|
||||
x_start_index, x2_sum_threshold, smoothing, render_buffer.buffer, y,
|
||||
filters_[n], &filters_updated, &error_sum, compute_pre_echo,
|
||||
instantaneous_accumulated_error_, scratch_memory_);
|
||||
break;
|
||||
#endif
|
||||
#if defined(WEBRTC_HAS_NEON)
|
||||
case Aec3Optimization::kNeon:
|
||||
aec3::MatchedFilterCore_NEON(x_start_index, x2_sum_threshold,
|
||||
smoothing_, render_buffer.buffer, y,
|
||||
filters_[n], &filters_updated, &error_sum);
|
||||
aec3::MatchedFilterCore_NEON(
|
||||
x_start_index, x2_sum_threshold, smoothing, render_buffer.buffer, y,
|
||||
filters_[n], &filters_updated, &error_sum, compute_pre_echo,
|
||||
instantaneous_accumulated_error_, scratch_memory_);
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
aec3::MatchedFilterCore(x_start_index, x2_sum_threshold, smoothing_,
|
||||
aec3::MatchedFilterCore(x_start_index, x2_sum_threshold, smoothing,
|
||||
render_buffer.buffer, y, filters_[n],
|
||||
&filters_updated, &error_sum);
|
||||
&filters_updated, &error_sum, compute_pre_echo,
|
||||
instantaneous_accumulated_error_);
|
||||
}
|
||||
|
||||
// Compute anchor for the matched filter error.
|
||||
const float error_sum_anchor =
|
||||
std::inner_product(y.begin(), y.end(), y.begin(), 0.f);
|
||||
|
||||
// Estimate the lag in the matched filter as the distance to the portion in
|
||||
// the filter that contributes the most to the matched filter output. This
|
||||
// is detected as the peak of the matched filter.
|
||||
const size_t lag_estimate = std::distance(
|
||||
filters_[n].begin(),
|
||||
std::max_element(
|
||||
filters_[n].begin(), filters_[n].end(),
|
||||
[](float a, float b) -> bool { return a * a < b * b; }));
|
||||
const size_t lag_estimate = aec3::MaxSquarePeakIndex(filters_[n]);
|
||||
const bool reliable =
|
||||
lag_estimate > 2 && lag_estimate < (filters_[n].size() - 10) &&
|
||||
error_sum < matching_filter_threshold_ * error_sum_anchor;
|
||||
|
||||
// Update the lag estimates for the matched filter.
|
||||
lag_estimates_[n] = LagEstimate(
|
||||
error_sum_anchor - error_sum,
|
||||
(lag_estimate > 2 && lag_estimate < (filters_[n].size() - 10) &&
|
||||
error_sum < matching_filter_threshold_ * error_sum_anchor),
|
||||
lag_estimate + alignment_shift, filters_updated);
|
||||
|
||||
RTC_DCHECK_GE(10, filters_.size());
|
||||
switch (n) {
|
||||
case 0:
|
||||
data_dumper_->DumpRaw("aec3_correlator_0_h", filters_[0]);
|
||||
break;
|
||||
case 1:
|
||||
data_dumper_->DumpRaw("aec3_correlator_1_h", filters_[1]);
|
||||
break;
|
||||
case 2:
|
||||
data_dumper_->DumpRaw("aec3_correlator_2_h", filters_[2]);
|
||||
break;
|
||||
case 3:
|
||||
data_dumper_->DumpRaw("aec3_correlator_3_h", filters_[3]);
|
||||
break;
|
||||
case 4:
|
||||
data_dumper_->DumpRaw("aec3_correlator_4_h", filters_[4]);
|
||||
break;
|
||||
case 5:
|
||||
data_dumper_->DumpRaw("aec3_correlator_5_h", filters_[5]);
|
||||
break;
|
||||
case 6:
|
||||
data_dumper_->DumpRaw("aec3_correlator_6_h", filters_[6]);
|
||||
break;
|
||||
case 7:
|
||||
data_dumper_->DumpRaw("aec3_correlator_7_h", filters_[7]);
|
||||
break;
|
||||
case 8:
|
||||
data_dumper_->DumpRaw("aec3_correlator_8_h", filters_[8]);
|
||||
break;
|
||||
case 9:
|
||||
data_dumper_->DumpRaw("aec3_correlator_9_h", filters_[9]);
|
||||
break;
|
||||
default:
|
||||
RTC_NOTREACHED();
|
||||
// Find the best estimate
|
||||
const size_t lag = lag_estimate + alignment_shift;
|
||||
if (filters_updated && reliable && error_sum < winner_error_sum) {
|
||||
winner_error_sum = error_sum;
|
||||
winner_index = n;
|
||||
// In case that 2 matched filters return the same winner candidate
|
||||
// (overlap region), the one with the smaller index is chosen in order
|
||||
// to search for pre-echoes.
|
||||
if (previous_lag_estimate && previous_lag_estimate == lag) {
|
||||
winner_lag_ = previous_lag_estimate;
|
||||
winner_index = n - 1;
|
||||
} else {
|
||||
winner_lag_ = lag;
|
||||
}
|
||||
}
|
||||
|
||||
previous_lag_estimate = lag;
|
||||
alignment_shift += filter_intra_lag_shift_;
|
||||
}
|
||||
|
||||
if (winner_index != -1) {
|
||||
RTC_DCHECK(winner_lag_.has_value());
|
||||
reported_lag_estimate_ =
|
||||
LagEstimate(winner_lag_.value(), /*pre_echo_lag=*/winner_lag_.value());
|
||||
if (detect_pre_echo_ && last_detected_best_lag_filter_ == winner_index) {
|
||||
const float energy_threshold =
|
||||
pre_echo_config_.mode == 3 ? 1.0f : 30.0f * 30.0f * y.size();
|
||||
|
||||
if (error_sum_anchor > energy_threshold) {
|
||||
const float smooth_constant_increases =
|
||||
pre_echo_config_.mode != 3 ? 0.01f : 0.015f;
|
||||
|
||||
UpdateAccumulatedError(
|
||||
instantaneous_accumulated_error_, accumulated_error_[winner_index],
|
||||
1.0f / error_sum_anchor, smooth_constant_increases);
|
||||
number_pre_echo_updates_++;
|
||||
}
|
||||
if (pre_echo_config_.mode != 3 || number_pre_echo_updates_ >= 50) {
|
||||
reported_lag_estimate_->pre_echo_lag = ComputePreEchoLag(
|
||||
pre_echo_config_, accumulated_error_[winner_index],
|
||||
winner_lag_.value(),
|
||||
winner_index * filter_intra_lag_shift_ /*alignment_shift_winner*/);
|
||||
} else {
|
||||
reported_lag_estimate_->pre_echo_lag = winner_lag_.value();
|
||||
}
|
||||
}
|
||||
last_detected_best_lag_filter_ = winner_index;
|
||||
}
|
||||
if (ApmDataDumper::IsAvailable()) {
|
||||
Dump();
|
||||
data_dumper_->DumpRaw("error_sum_anchor", error_sum_anchor / y.size());
|
||||
data_dumper_->DumpRaw("number_pre_echo_updates", number_pre_echo_updates_);
|
||||
data_dumper_->DumpRaw("filter_smoothing", smoothing);
|
||||
}
|
||||
}
|
||||
|
||||
void MatchedFilter::LogFilterProperties(int sample_rate_hz,
|
||||
@ -461,4 +872,31 @@ void MatchedFilter::LogFilterProperties(int sample_rate_hz,
|
||||
}
|
||||
}
|
||||
|
||||
void MatchedFilter::Dump() {
|
||||
for (size_t n = 0; n < filters_.size(); ++n) {
|
||||
const size_t lag_estimate = aec3::MaxSquarePeakIndex(filters_[n]);
|
||||
std::string dumper_filter = "aec3_correlator_" + std::to_string(n) + "_h";
|
||||
data_dumper_->DumpRaw(dumper_filter.c_str(), filters_[n]);
|
||||
std::string dumper_lag = "aec3_correlator_lag_" + std::to_string(n);
|
||||
data_dumper_->DumpRaw(dumper_lag.c_str(),
|
||||
lag_estimate + n * filter_intra_lag_shift_);
|
||||
if (detect_pre_echo_) {
|
||||
std::string dumper_error =
|
||||
"aec3_correlator_error_" + std::to_string(n) + "_h";
|
||||
data_dumper_->DumpRaw(dumper_error.c_str(), accumulated_error_[n]);
|
||||
|
||||
size_t pre_echo_lag =
|
||||
ComputePreEchoLag(pre_echo_config_, accumulated_error_[n],
|
||||
lag_estimate + n * filter_intra_lag_shift_,
|
||||
n * filter_intra_lag_shift_);
|
||||
std::string dumper_pre_lag =
|
||||
"aec3_correlator_pre_echo_lag_" + std::to_string(n);
|
||||
data_dumper_->DumpRaw(dumper_pre_lag.c_str(), pre_echo_lag);
|
||||
if (static_cast<int>(n) == last_detected_best_lag_filter_) {
|
||||
data_dumper_->DumpRaw("aec3_pre_echo_delay_winner_inst", pre_echo_lag);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -15,8 +15,10 @@
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "rtc_base/gtest_prod_util.h"
|
||||
#include "rtc_base/system/arch.h"
|
||||
|
||||
namespace webrtc {
|
||||
@ -36,7 +38,10 @@ void MatchedFilterCore_NEON(size_t x_start_index,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum);
|
||||
float* error_sum,
|
||||
bool compute_accumulation_error,
|
||||
rtc::ArrayView<float> accumulated_error,
|
||||
rtc::ArrayView<float> scratch_memory);
|
||||
|
||||
#endif
|
||||
|
||||
@ -50,7 +55,10 @@ void MatchedFilterCore_SSE2(size_t x_start_index,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum);
|
||||
float* error_sum,
|
||||
bool compute_accumulated_error,
|
||||
rtc::ArrayView<float> accumulated_error,
|
||||
rtc::ArrayView<float> scratch_memory);
|
||||
|
||||
// Filter core for the matched filter that is optimized for AVX2.
|
||||
void MatchedFilterCore_AVX2(size_t x_start_index,
|
||||
@ -60,7 +68,10 @@ void MatchedFilterCore_AVX2(size_t x_start_index,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum);
|
||||
float* error_sum,
|
||||
bool compute_accumulated_error,
|
||||
rtc::ArrayView<float> accumulated_error,
|
||||
rtc::ArrayView<float> scratch_memory);
|
||||
|
||||
#endif
|
||||
|
||||
@ -72,7 +83,12 @@ void MatchedFilterCore(size_t x_start_index,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum);
|
||||
float* error_sum,
|
||||
bool compute_accumulation_error,
|
||||
rtc::ArrayView<float> accumulated_error);
|
||||
|
||||
// Find largest peak of squared values in array.
|
||||
size_t MaxSquarePeakIndex(rtc::ArrayView<const float> h);
|
||||
|
||||
} // namespace aec3
|
||||
|
||||
@ -84,13 +100,15 @@ class MatchedFilter {
|
||||
// shift.
|
||||
struct LagEstimate {
|
||||
LagEstimate() = default;
|
||||
LagEstimate(float accuracy, bool reliable, size_t lag, bool updated)
|
||||
: accuracy(accuracy), reliable(reliable), lag(lag), updated(updated) {}
|
||||
|
||||
float accuracy = 0.f;
|
||||
bool reliable = false;
|
||||
LagEstimate(size_t lag, size_t pre_echo_lag)
|
||||
: lag(lag), pre_echo_lag(pre_echo_lag) {}
|
||||
size_t lag = 0;
|
||||
bool updated = false;
|
||||
size_t pre_echo_lag = 0;
|
||||
};
|
||||
|
||||
struct PreEchoConfiguration {
|
||||
const float threshold;
|
||||
const int mode;
|
||||
};
|
||||
|
||||
MatchedFilter(ApmDataDumper* data_dumper,
|
||||
@ -100,8 +118,10 @@ class MatchedFilter {
|
||||
int num_matched_filters,
|
||||
size_t alignment_shift_sub_blocks,
|
||||
float excitation_limit,
|
||||
float smoothing,
|
||||
float matching_filter_threshold);
|
||||
float smoothing_fast,
|
||||
float smoothing_slow,
|
||||
float matching_filter_threshold,
|
||||
bool detect_pre_echo);
|
||||
|
||||
MatchedFilter() = delete;
|
||||
MatchedFilter(const MatchedFilter&) = delete;
|
||||
@ -111,14 +131,15 @@ class MatchedFilter {
|
||||
|
||||
// Updates the correlation with the values in the capture buffer.
|
||||
void Update(const DownsampledRenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const float> capture);
|
||||
rtc::ArrayView<const float> capture,
|
||||
bool use_slow_smoothing);
|
||||
|
||||
// Resets the matched filter.
|
||||
void Reset();
|
||||
void Reset(bool full_reset);
|
||||
|
||||
// Returns the current lag estimates.
|
||||
rtc::ArrayView<const MatchedFilter::LagEstimate> GetLagEstimates() const {
|
||||
return lag_estimates_;
|
||||
absl::optional<const MatchedFilter::LagEstimate> GetBestLagEstimate() const {
|
||||
return reported_lag_estimate_;
|
||||
}
|
||||
|
||||
// Returns the maximum filter lag.
|
||||
@ -132,16 +153,36 @@ class MatchedFilter {
|
||||
size_t downsampling_factor) const;
|
||||
|
||||
private:
|
||||
FRIEND_TEST_ALL_PREFIXES(MatchedFilterFieldTrialTest,
|
||||
PreEchoConfigurationTest);
|
||||
FRIEND_TEST_ALL_PREFIXES(MatchedFilterFieldTrialTest,
|
||||
WrongPreEchoConfigurationTest);
|
||||
|
||||
// Only for testing. Gets the pre echo detection configuration.
|
||||
const PreEchoConfiguration& GetPreEchoConfiguration() const {
|
||||
return pre_echo_config_;
|
||||
}
|
||||
void Dump();
|
||||
|
||||
ApmDataDumper* const data_dumper_;
|
||||
const Aec3Optimization optimization_;
|
||||
const size_t sub_block_size_;
|
||||
const size_t filter_intra_lag_shift_;
|
||||
std::vector<std::vector<float>> filters_;
|
||||
std::vector<LagEstimate> lag_estimates_;
|
||||
std::vector<std::vector<float>> accumulated_error_;
|
||||
std::vector<float> instantaneous_accumulated_error_;
|
||||
std::vector<float> scratch_memory_;
|
||||
absl::optional<MatchedFilter::LagEstimate> reported_lag_estimate_;
|
||||
absl::optional<size_t> winner_lag_;
|
||||
int last_detected_best_lag_filter_ = -1;
|
||||
std::vector<size_t> filters_offsets_;
|
||||
int number_pre_echo_updates_ = 0;
|
||||
const float excitation_limit_;
|
||||
const float smoothing_;
|
||||
const float smoothing_fast_;
|
||||
const float smoothing_slow_;
|
||||
const float matching_filter_threshold_;
|
||||
const bool detect_pre_echo_;
|
||||
const PreEchoConfiguration pre_echo_config_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -8,15 +8,134 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/matched_filter.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "modules/audio_processing/aec3/matched_filter.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace aec3 {
|
||||
|
||||
// Let ha denote the horizontal of a, and hb the horizontal sum of b
|
||||
// returns [ha, hb, ha, hb]
|
||||
inline __m128 hsum_ab(__m256 a, __m256 b) {
|
||||
__m256 s_256 = _mm256_hadd_ps(a, b);
|
||||
const __m256i mask = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
|
||||
s_256 = _mm256_permutevar8x32_ps(s_256, mask);
|
||||
__m128 s = _mm_hadd_ps(_mm256_extractf128_ps(s_256, 0),
|
||||
_mm256_extractf128_ps(s_256, 1));
|
||||
s = _mm_hadd_ps(s, s);
|
||||
return s;
|
||||
}
|
||||
|
||||
void MatchedFilterCore_AccumulatedError_AVX2(
|
||||
size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
rtc::ArrayView<const float> x,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum,
|
||||
rtc::ArrayView<float> accumulated_error,
|
||||
rtc::ArrayView<float> scratch_memory) {
|
||||
const int h_size = static_cast<int>(h.size());
|
||||
const int x_size = static_cast<int>(x.size());
|
||||
RTC_DCHECK_EQ(0, h_size % 16);
|
||||
std::fill(accumulated_error.begin(), accumulated_error.end(), 0.0f);
|
||||
|
||||
// Process for all samples in the sub-block.
|
||||
for (size_t i = 0; i < y.size(); ++i) {
|
||||
// Apply the matched filter as filter * x, and compute x * x.
|
||||
RTC_DCHECK_GT(x_size, x_start_index);
|
||||
const int chunk1 =
|
||||
std::min(h_size, static_cast<int>(x_size - x_start_index));
|
||||
if (chunk1 != h_size) {
|
||||
const int chunk2 = h_size - chunk1;
|
||||
std::copy(x.begin() + x_start_index, x.end(), scratch_memory.begin());
|
||||
std::copy(x.begin(), x.begin() + chunk2, scratch_memory.begin() + chunk1);
|
||||
}
|
||||
const float* x_p =
|
||||
chunk1 != h_size ? scratch_memory.data() : &x[x_start_index];
|
||||
const float* h_p = &h[0];
|
||||
float* a_p = &accumulated_error[0];
|
||||
__m256 s_inst_hadd_256;
|
||||
__m256 s_inst_256;
|
||||
__m256 s_inst_256_8;
|
||||
__m256 x2_sum_256 = _mm256_set1_ps(0);
|
||||
__m256 x2_sum_256_8 = _mm256_set1_ps(0);
|
||||
__m128 e_128;
|
||||
float x2_sum = 0.0f;
|
||||
float s_acum = 0;
|
||||
const int limit_by_16 = h_size >> 4;
|
||||
for (int k = limit_by_16; k > 0; --k, h_p += 16, x_p += 16, a_p += 4) {
|
||||
// Load the data into 256 bit vectors.
|
||||
__m256 x_k = _mm256_loadu_ps(x_p);
|
||||
__m256 h_k = _mm256_loadu_ps(h_p);
|
||||
__m256 x_k_8 = _mm256_loadu_ps(x_p + 8);
|
||||
__m256 h_k_8 = _mm256_loadu_ps(h_p + 8);
|
||||
// Compute and accumulate x * x and h * x.
|
||||
x2_sum_256 = _mm256_fmadd_ps(x_k, x_k, x2_sum_256);
|
||||
x2_sum_256_8 = _mm256_fmadd_ps(x_k_8, x_k_8, x2_sum_256_8);
|
||||
s_inst_256 = _mm256_mul_ps(h_k, x_k);
|
||||
s_inst_256_8 = _mm256_mul_ps(h_k_8, x_k_8);
|
||||
s_inst_hadd_256 = _mm256_hadd_ps(s_inst_256, s_inst_256_8);
|
||||
s_inst_hadd_256 = _mm256_hadd_ps(s_inst_hadd_256, s_inst_hadd_256);
|
||||
s_acum += s_inst_hadd_256[0];
|
||||
e_128[0] = s_acum - y[i];
|
||||
s_acum += s_inst_hadd_256[4];
|
||||
e_128[1] = s_acum - y[i];
|
||||
s_acum += s_inst_hadd_256[1];
|
||||
e_128[2] = s_acum - y[i];
|
||||
s_acum += s_inst_hadd_256[5];
|
||||
e_128[3] = s_acum - y[i];
|
||||
|
||||
__m128 accumulated_error = _mm_load_ps(a_p);
|
||||
accumulated_error = _mm_fmadd_ps(e_128, e_128, accumulated_error);
|
||||
_mm_storeu_ps(a_p, accumulated_error);
|
||||
}
|
||||
// Sum components together.
|
||||
x2_sum_256 = _mm256_add_ps(x2_sum_256, x2_sum_256_8);
|
||||
__m128 x2_sum_128 = _mm_add_ps(_mm256_extractf128_ps(x2_sum_256, 0),
|
||||
_mm256_extractf128_ps(x2_sum_256, 1));
|
||||
// Combine the accumulated vector and scalar values.
|
||||
float* v = reinterpret_cast<float*>(&x2_sum_128);
|
||||
x2_sum += v[0] + v[1] + v[2] + v[3];
|
||||
|
||||
// Compute the matched filter error.
|
||||
float e = y[i] - s_acum;
|
||||
const bool saturation = y[i] >= 32000.f || y[i] <= -32000.f;
|
||||
(*error_sum) += e * e;
|
||||
|
||||
// Update the matched filter estimate in an NLMS manner.
|
||||
if (x2_sum > x2_sum_threshold && !saturation) {
|
||||
RTC_DCHECK_LT(0.f, x2_sum);
|
||||
const float alpha = smoothing * e / x2_sum;
|
||||
const __m256 alpha_256 = _mm256_set1_ps(alpha);
|
||||
|
||||
// filter = filter + smoothing * (y - filter * x) * x / x * x.
|
||||
float* h_p = &h[0];
|
||||
const float* x_p =
|
||||
chunk1 != h_size ? scratch_memory.data() : &x[x_start_index];
|
||||
// Perform 256 bit vector operations.
|
||||
const int limit_by_8 = h_size >> 3;
|
||||
for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8) {
|
||||
// Load the data into 256 bit vectors.
|
||||
__m256 h_k = _mm256_loadu_ps(h_p);
|
||||
__m256 x_k = _mm256_loadu_ps(x_p);
|
||||
// Compute h = h + alpha * x.
|
||||
h_k = _mm256_fmadd_ps(x_k, alpha_256, h_k);
|
||||
|
||||
// Store the result.
|
||||
_mm256_storeu_ps(h_p, h_k);
|
||||
}
|
||||
*filters_updated = true;
|
||||
}
|
||||
|
||||
x_start_index = x_start_index > 0 ? x_start_index - 1 : x_size - 1;
|
||||
}
|
||||
}
|
||||
|
||||
void MatchedFilterCore_AVX2(size_t x_start_index,
|
||||
float x2_sum_threshold,
|
||||
float smoothing,
|
||||
@ -24,7 +143,15 @@ void MatchedFilterCore_AVX2(size_t x_start_index,
|
||||
rtc::ArrayView<const float> y,
|
||||
rtc::ArrayView<float> h,
|
||||
bool* filters_updated,
|
||||
float* error_sum) {
|
||||
float* error_sum,
|
||||
bool compute_accumulated_error,
|
||||
rtc::ArrayView<float> accumulated_error,
|
||||
rtc::ArrayView<float> scratch_memory) {
|
||||
if (compute_accumulated_error) {
|
||||
return MatchedFilterCore_AccumulatedError_AVX2(
|
||||
x_start_index, x2_sum_threshold, smoothing, x, y, h, filters_updated,
|
||||
error_sum, accumulated_error, scratch_memory);
|
||||
}
|
||||
const int h_size = static_cast<int>(h.size());
|
||||
const int x_size = static_cast<int>(x.size());
|
||||
RTC_DCHECK_EQ(0, h_size % 8);
|
||||
@ -39,7 +166,9 @@ void MatchedFilterCore_AVX2(size_t x_start_index,
|
||||
|
||||
// Initialize values for the accumulation.
|
||||
__m256 s_256 = _mm256_set1_ps(0);
|
||||
__m256 s_256_8 = _mm256_set1_ps(0);
|
||||
__m256 x2_sum_256 = _mm256_set1_ps(0);
|
||||
__m256 x2_sum_256_8 = _mm256_set1_ps(0);
|
||||
float x2_sum = 0.f;
|
||||
float s = 0;
|
||||
|
||||
@ -52,18 +181,22 @@ void MatchedFilterCore_AVX2(size_t x_start_index,
|
||||
const int chunk2 = h_size - chunk1;
|
||||
for (int limit : {chunk1, chunk2}) {
|
||||
// Perform 256 bit vector operations.
|
||||
const int limit_by_8 = limit >> 3;
|
||||
for (int k = limit_by_8; k > 0; --k, h_p += 8, x_p += 8) {
|
||||
const int limit_by_16 = limit >> 4;
|
||||
for (int k = limit_by_16; k > 0; --k, h_p += 16, x_p += 16) {
|
||||
// Load the data into 256 bit vectors.
|
||||
__m256 x_k = _mm256_loadu_ps(x_p);
|
||||
__m256 h_k = _mm256_loadu_ps(h_p);
|
||||
__m256 x_k_8 = _mm256_loadu_ps(x_p + 8);
|
||||
__m256 h_k_8 = _mm256_loadu_ps(h_p + 8);
|
||||
// Compute and accumulate x * x and h * x.
|
||||
x2_sum_256 = _mm256_fmadd_ps(x_k, x_k, x2_sum_256);
|
||||
x2_sum_256_8 = _mm256_fmadd_ps(x_k_8, x_k_8, x2_sum_256_8);
|
||||
s_256 = _mm256_fmadd_ps(h_k, x_k, s_256);
|
||||
s_256_8 = _mm256_fmadd_ps(h_k_8, x_k_8, s_256_8);
|
||||
}
|
||||
|
||||
// Perform non-vector operations for any remaining items.
|
||||
for (int k = limit - limit_by_8 * 8; k > 0; --k, ++h_p, ++x_p) {
|
||||
for (int k = limit - limit_by_16 * 16; k > 0; --k, ++h_p, ++x_p) {
|
||||
const float x_k = *x_p;
|
||||
x2_sum += x_k * x_k;
|
||||
s += *h_p * x_k;
|
||||
@ -73,15 +206,11 @@ void MatchedFilterCore_AVX2(size_t x_start_index,
|
||||
}
|
||||
|
||||
// Sum components together.
|
||||
__m128 x2_sum_128 = _mm_add_ps(_mm256_extractf128_ps(x2_sum_256, 0),
|
||||
_mm256_extractf128_ps(x2_sum_256, 1));
|
||||
__m128 s_128 = _mm_add_ps(_mm256_extractf128_ps(s_256, 0),
|
||||
_mm256_extractf128_ps(s_256, 1));
|
||||
// Combine the accumulated vector and scalar values.
|
||||
float* v = reinterpret_cast<float*>(&x2_sum_128);
|
||||
x2_sum += v[0] + v[1] + v[2] + v[3];
|
||||
v = reinterpret_cast<float*>(&s_128);
|
||||
s += v[0] + v[1] + v[2] + v[3];
|
||||
x2_sum_256 = _mm256_add_ps(x2_sum_256, x2_sum_256_8);
|
||||
s_256 = _mm256_add_ps(s_256, s_256_8);
|
||||
__m128 sum = hsum_ab(x2_sum_256, s_256);
|
||||
x2_sum += sum[0];
|
||||
s += sum[1];
|
||||
|
||||
// Compute the matched filter error.
|
||||
float e = y[i] - s;
|
||||
|
@ -14,84 +14,179 @@
|
||||
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
constexpr int kPreEchoHistogramDataNotUpdated = -1;
|
||||
|
||||
int GetDownSamplingBlockSizeLog2(int down_sampling_factor) {
|
||||
int down_sampling_factor_log2 = 0;
|
||||
down_sampling_factor >>= 1;
|
||||
while (down_sampling_factor > 0) {
|
||||
down_sampling_factor_log2++;
|
||||
down_sampling_factor >>= 1;
|
||||
}
|
||||
return static_cast<int>(kBlockSizeLog2) > down_sampling_factor_log2
|
||||
? static_cast<int>(kBlockSizeLog2) - down_sampling_factor_log2
|
||||
: 0;
|
||||
}
|
||||
} // namespace
|
||||
|
||||
MatchedFilterLagAggregator::MatchedFilterLagAggregator(
|
||||
ApmDataDumper* data_dumper,
|
||||
size_t max_filter_lag,
|
||||
const EchoCanceller3Config::Delay::DelaySelectionThresholds& thresholds)
|
||||
const EchoCanceller3Config::Delay& delay_config)
|
||||
: data_dumper_(data_dumper),
|
||||
histogram_(max_filter_lag + 1, 0),
|
||||
thresholds_(thresholds) {
|
||||
thresholds_(delay_config.delay_selection_thresholds),
|
||||
headroom_(static_cast<int>(delay_config.delay_headroom_samples /
|
||||
delay_config.down_sampling_factor)),
|
||||
highest_peak_aggregator_(max_filter_lag) {
|
||||
if (delay_config.detect_pre_echo) {
|
||||
pre_echo_lag_aggregator_ = std::make_unique<PreEchoLagAggregator>(
|
||||
max_filter_lag, delay_config.down_sampling_factor);
|
||||
}
|
||||
RTC_DCHECK(data_dumper);
|
||||
RTC_DCHECK_LE(thresholds_.initial, thresholds_.converged);
|
||||
histogram_data_.fill(0);
|
||||
}
|
||||
|
||||
MatchedFilterLagAggregator::~MatchedFilterLagAggregator() = default;
|
||||
|
||||
void MatchedFilterLagAggregator::Reset(bool hard_reset) {
|
||||
std::fill(histogram_.begin(), histogram_.end(), 0);
|
||||
histogram_data_.fill(0);
|
||||
histogram_data_index_ = 0;
|
||||
highest_peak_aggregator_.Reset();
|
||||
if (pre_echo_lag_aggregator_ != nullptr) {
|
||||
pre_echo_lag_aggregator_->Reset();
|
||||
}
|
||||
if (hard_reset) {
|
||||
significant_candidate_found_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
absl::optional<DelayEstimate> MatchedFilterLagAggregator::Aggregate(
|
||||
rtc::ArrayView<const MatchedFilter::LagEstimate> lag_estimates) {
|
||||
// Choose the strongest lag estimate as the best one.
|
||||
float best_accuracy = 0.f;
|
||||
int best_lag_estimate_index = -1;
|
||||
for (size_t k = 0; k < lag_estimates.size(); ++k) {
|
||||
if (lag_estimates[k].updated && lag_estimates[k].reliable) {
|
||||
if (lag_estimates[k].accuracy > best_accuracy) {
|
||||
best_accuracy = lag_estimates[k].accuracy;
|
||||
best_lag_estimate_index = static_cast<int>(k);
|
||||
}
|
||||
}
|
||||
const absl::optional<const MatchedFilter::LagEstimate>& lag_estimate) {
|
||||
if (lag_estimate && pre_echo_lag_aggregator_) {
|
||||
pre_echo_lag_aggregator_->Dump(data_dumper_);
|
||||
pre_echo_lag_aggregator_->Aggregate(
|
||||
std::max(0, static_cast<int>(lag_estimate->pre_echo_lag) - headroom_));
|
||||
}
|
||||
|
||||
// TODO(peah): Remove this logging once all development is done.
|
||||
data_dumper_->DumpRaw("aec3_echo_path_delay_estimator_best_index",
|
||||
best_lag_estimate_index);
|
||||
data_dumper_->DumpRaw("aec3_echo_path_delay_estimator_histogram", histogram_);
|
||||
|
||||
if (best_lag_estimate_index != -1) {
|
||||
RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]);
|
||||
RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]);
|
||||
--histogram_[histogram_data_[histogram_data_index_]];
|
||||
|
||||
histogram_data_[histogram_data_index_] =
|
||||
lag_estimates[best_lag_estimate_index].lag;
|
||||
|
||||
RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]);
|
||||
RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]);
|
||||
++histogram_[histogram_data_[histogram_data_index_]];
|
||||
|
||||
histogram_data_index_ =
|
||||
(histogram_data_index_ + 1) % histogram_data_.size();
|
||||
|
||||
const int candidate =
|
||||
std::distance(histogram_.begin(),
|
||||
std::max_element(histogram_.begin(), histogram_.end()));
|
||||
|
||||
significant_candidate_found_ =
|
||||
significant_candidate_found_ ||
|
||||
histogram_[candidate] > thresholds_.converged;
|
||||
if (histogram_[candidate] > thresholds_.converged ||
|
||||
(histogram_[candidate] > thresholds_.initial &&
|
||||
if (lag_estimate) {
|
||||
highest_peak_aggregator_.Aggregate(
|
||||
std::max(0, static_cast<int>(lag_estimate->lag) - headroom_));
|
||||
rtc::ArrayView<const int> histogram = highest_peak_aggregator_.histogram();
|
||||
int candidate = highest_peak_aggregator_.candidate();
|
||||
significant_candidate_found_ = significant_candidate_found_ ||
|
||||
histogram[candidate] > thresholds_.converged;
|
||||
if (histogram[candidate] > thresholds_.converged ||
|
||||
(histogram[candidate] > thresholds_.initial &&
|
||||
!significant_candidate_found_)) {
|
||||
DelayEstimate::Quality quality = significant_candidate_found_
|
||||
? DelayEstimate::Quality::kRefined
|
||||
: DelayEstimate::Quality::kCoarse;
|
||||
return DelayEstimate(quality, candidate);
|
||||
int reported_delay = pre_echo_lag_aggregator_ != nullptr
|
||||
? pre_echo_lag_aggregator_->pre_echo_candidate()
|
||||
: candidate;
|
||||
return DelayEstimate(quality, reported_delay);
|
||||
}
|
||||
}
|
||||
|
||||
return absl::nullopt;
|
||||
}
|
||||
|
||||
MatchedFilterLagAggregator::HighestPeakAggregator::HighestPeakAggregator(
|
||||
size_t max_filter_lag)
|
||||
: histogram_(max_filter_lag + 1, 0) {
|
||||
histogram_data_.fill(0);
|
||||
}
|
||||
|
||||
void MatchedFilterLagAggregator::HighestPeakAggregator::Reset() {
|
||||
std::fill(histogram_.begin(), histogram_.end(), 0);
|
||||
histogram_data_.fill(0);
|
||||
histogram_data_index_ = 0;
|
||||
}
|
||||
|
||||
void MatchedFilterLagAggregator::HighestPeakAggregator::Aggregate(int lag) {
|
||||
RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]);
|
||||
RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]);
|
||||
--histogram_[histogram_data_[histogram_data_index_]];
|
||||
histogram_data_[histogram_data_index_] = lag;
|
||||
RTC_DCHECK_GT(histogram_.size(), histogram_data_[histogram_data_index_]);
|
||||
RTC_DCHECK_LE(0, histogram_data_[histogram_data_index_]);
|
||||
++histogram_[histogram_data_[histogram_data_index_]];
|
||||
histogram_data_index_ = (histogram_data_index_ + 1) % histogram_data_.size();
|
||||
candidate_ =
|
||||
std::distance(histogram_.begin(),
|
||||
std::max_element(histogram_.begin(), histogram_.end()));
|
||||
}
|
||||
|
||||
MatchedFilterLagAggregator::PreEchoLagAggregator::PreEchoLagAggregator(
|
||||
size_t max_filter_lag,
|
||||
size_t down_sampling_factor)
|
||||
: block_size_log2_(GetDownSamplingBlockSizeLog2(down_sampling_factor)),
|
||||
penalize_high_delays_initial_phase_(!field_trial::IsDisabled(
|
||||
"WebRTC-Aec3PenalyzeHighDelaysInitialPhase")),
|
||||
histogram_(
|
||||
((max_filter_lag + 1) * down_sampling_factor) >> kBlockSizeLog2,
|
||||
0) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
void MatchedFilterLagAggregator::PreEchoLagAggregator::Reset() {
|
||||
std::fill(histogram_.begin(), histogram_.end(), 0);
|
||||
histogram_data_.fill(kPreEchoHistogramDataNotUpdated);
|
||||
histogram_data_index_ = 0;
|
||||
pre_echo_candidate_ = 0;
|
||||
}
|
||||
|
||||
void MatchedFilterLagAggregator::PreEchoLagAggregator::Aggregate(
|
||||
int pre_echo_lag) {
|
||||
int pre_echo_block_size = pre_echo_lag >> block_size_log2_;
|
||||
RTC_DCHECK(pre_echo_block_size >= 0 &&
|
||||
pre_echo_block_size < static_cast<int>(histogram_.size()));
|
||||
pre_echo_block_size =
|
||||
rtc::SafeClamp(pre_echo_block_size, 0, histogram_.size() - 1);
|
||||
// Remove the oldest point from the `histogram_`, it ignores the initial
|
||||
// points where no updates have been done to the `histogram_data_` array.
|
||||
if (histogram_data_[histogram_data_index_] !=
|
||||
kPreEchoHistogramDataNotUpdated) {
|
||||
--histogram_[histogram_data_[histogram_data_index_]];
|
||||
}
|
||||
histogram_data_[histogram_data_index_] = pre_echo_block_size;
|
||||
++histogram_[histogram_data_[histogram_data_index_]];
|
||||
histogram_data_index_ = (histogram_data_index_ + 1) % histogram_data_.size();
|
||||
int pre_echo_candidate_block_size = 0;
|
||||
if (penalize_high_delays_initial_phase_ &&
|
||||
number_updates_ < kNumBlocksPerSecond * 2) {
|
||||
number_updates_++;
|
||||
float penalization_per_delay = 1.0f;
|
||||
float max_histogram_value = -1.0f;
|
||||
for (auto it = histogram_.begin();
|
||||
std::distance(it, histogram_.end()) >=
|
||||
static_cast<int>(kMatchedFilterWindowSizeSubBlocks);
|
||||
it = it + kMatchedFilterWindowSizeSubBlocks) {
|
||||
auto it_max_element =
|
||||
std::max_element(it, it + kMatchedFilterWindowSizeSubBlocks);
|
||||
float weighted_max_value =
|
||||
static_cast<float>(*it_max_element) * penalization_per_delay;
|
||||
if (weighted_max_value > max_histogram_value) {
|
||||
max_histogram_value = weighted_max_value;
|
||||
pre_echo_candidate_block_size =
|
||||
std::distance(histogram_.begin(), it_max_element);
|
||||
}
|
||||
penalization_per_delay *= 0.7f;
|
||||
}
|
||||
} else {
|
||||
pre_echo_candidate_block_size =
|
||||
std::distance(histogram_.begin(),
|
||||
std::max_element(histogram_.begin(), histogram_.end()));
|
||||
}
|
||||
pre_echo_candidate_ = (pre_echo_candidate_block_size << block_size_log2_);
|
||||
}
|
||||
|
||||
void MatchedFilterLagAggregator::PreEchoLagAggregator::Dump(
|
||||
ApmDataDumper* const data_dumper) {
|
||||
data_dumper->DumpRaw("aec3_pre_echo_delay_candidate", pre_echo_candidate_);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -26,10 +26,9 @@ class ApmDataDumper;
|
||||
// reliable combined lag estimate.
|
||||
class MatchedFilterLagAggregator {
|
||||
public:
|
||||
MatchedFilterLagAggregator(
|
||||
ApmDataDumper* data_dumper,
|
||||
size_t max_filter_lag,
|
||||
const EchoCanceller3Config::Delay::DelaySelectionThresholds& thresholds);
|
||||
MatchedFilterLagAggregator(ApmDataDumper* data_dumper,
|
||||
size_t max_filter_lag,
|
||||
const EchoCanceller3Config::Delay& delay_config);
|
||||
|
||||
MatchedFilterLagAggregator() = delete;
|
||||
MatchedFilterLagAggregator(const MatchedFilterLagAggregator&) = delete;
|
||||
@ -43,15 +42,57 @@ class MatchedFilterLagAggregator {
|
||||
|
||||
// Aggregates the provided lag estimates.
|
||||
absl::optional<DelayEstimate> Aggregate(
|
||||
rtc::ArrayView<const MatchedFilter::LagEstimate> lag_estimates);
|
||||
const absl::optional<const MatchedFilter::LagEstimate>& lag_estimate);
|
||||
|
||||
// Returns whether a reliable delay estimate has been found.
|
||||
bool ReliableDelayFound() const { return significant_candidate_found_; }
|
||||
|
||||
// Returns the delay candidate that is computed by looking at the highest peak
|
||||
// on the matched filters.
|
||||
int GetDelayAtHighestPeak() const {
|
||||
return highest_peak_aggregator_.candidate();
|
||||
}
|
||||
|
||||
private:
|
||||
class PreEchoLagAggregator {
|
||||
public:
|
||||
PreEchoLagAggregator(size_t max_filter_lag, size_t down_sampling_factor);
|
||||
void Reset();
|
||||
void Aggregate(int pre_echo_lag);
|
||||
int pre_echo_candidate() const { return pre_echo_candidate_; }
|
||||
void Dump(ApmDataDumper* const data_dumper);
|
||||
|
||||
private:
|
||||
const int block_size_log2_;
|
||||
const bool penalize_high_delays_initial_phase_;
|
||||
std::array<int, 250> histogram_data_;
|
||||
std::vector<int> histogram_;
|
||||
int histogram_data_index_ = 0;
|
||||
int pre_echo_candidate_ = 0;
|
||||
int number_updates_ = 0;
|
||||
};
|
||||
|
||||
class HighestPeakAggregator {
|
||||
public:
|
||||
explicit HighestPeakAggregator(size_t max_filter_lag);
|
||||
void Reset();
|
||||
void Aggregate(int lag);
|
||||
int candidate() const { return candidate_; }
|
||||
rtc::ArrayView<const int> histogram() const { return histogram_; }
|
||||
|
||||
private:
|
||||
std::vector<int> histogram_;
|
||||
std::array<int, 250> histogram_data_;
|
||||
int histogram_data_index_ = 0;
|
||||
int candidate_ = -1;
|
||||
};
|
||||
|
||||
ApmDataDumper* const data_dumper_;
|
||||
std::vector<int> histogram_;
|
||||
std::array<int, 250> histogram_data_;
|
||||
int histogram_data_index_ = 0;
|
||||
bool significant_candidate_found_ = false;
|
||||
const EchoCanceller3Config::Delay::DelaySelectionThresholds thresholds_;
|
||||
const int headroom_;
|
||||
HighestPeakAggregator highest_peak_aggregator_;
|
||||
std::unique_ptr<PreEchoLagAggregator> pre_echo_lag_aggregator_;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
|
@ -0,0 +1,148 @@
|
||||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/multi_channel_content_detector.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/metrics.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kNumFramesPerSecond = 100;
|
||||
|
||||
// Compares the left and right channels in the render `frame` to determine
|
||||
// whether the signal is a proper stereo signal. To allow for differences
|
||||
// introduced by hardware drivers, a threshold `detection_threshold` is used for
|
||||
// the detection.
|
||||
bool HasStereoContent(const std::vector<std::vector<std::vector<float>>>& frame,
|
||||
float detection_threshold) {
|
||||
if (frame[0].size() < 2) {
|
||||
return false;
|
||||
}
|
||||
|
||||
for (size_t band = 0; band < frame.size(); ++band) {
|
||||
for (size_t k = 0; k < frame[band][0].size(); ++k) {
|
||||
if (std::fabs(frame[band][0][k] - frame[band][1][k]) >
|
||||
detection_threshold) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// In order to avoid logging metrics for very short lifetimes that are unlikely
|
||||
// to reflect real calls and that may dilute the "real" data, logging is limited
|
||||
// to lifetimes of at leats 5 seconds.
|
||||
constexpr int kMinNumberOfFramesRequiredToLogMetrics = 500;
|
||||
|
||||
// Continuous metrics are logged every 10 seconds.
|
||||
constexpr int kFramesPer10Seconds = 1000;
|
||||
|
||||
} // namespace
|
||||
|
||||
MultiChannelContentDetector::MetricsLogger::MetricsLogger() {}
|
||||
|
||||
MultiChannelContentDetector::MetricsLogger::~MetricsLogger() {
|
||||
if (frame_counter_ < kMinNumberOfFramesRequiredToLogMetrics)
|
||||
return;
|
||||
|
||||
RTC_HISTOGRAM_BOOLEAN(
|
||||
"WebRTC.Audio.EchoCanceller.PersistentMultichannelContentEverDetected",
|
||||
any_multichannel_content_detected_ ? 1 : 0);
|
||||
}
|
||||
|
||||
void MultiChannelContentDetector::MetricsLogger::Update(
|
||||
bool persistent_multichannel_content_detected) {
|
||||
++frame_counter_;
|
||||
if (persistent_multichannel_content_detected) {
|
||||
any_multichannel_content_detected_ = true;
|
||||
++persistent_multichannel_frame_counter_;
|
||||
}
|
||||
|
||||
if (frame_counter_ < kMinNumberOfFramesRequiredToLogMetrics)
|
||||
return;
|
||||
if (frame_counter_ % kFramesPer10Seconds != 0)
|
||||
return;
|
||||
const bool mostly_multichannel_last_10_seconds =
|
||||
(persistent_multichannel_frame_counter_ >= kFramesPer10Seconds / 2);
|
||||
RTC_HISTOGRAM_BOOLEAN(
|
||||
"WebRTC.Audio.EchoCanceller.ProcessingPersistentMultichannelContent",
|
||||
mostly_multichannel_last_10_seconds ? 1 : 0);
|
||||
|
||||
persistent_multichannel_frame_counter_ = 0;
|
||||
}
|
||||
|
||||
MultiChannelContentDetector::MultiChannelContentDetector(
|
||||
bool detect_stereo_content,
|
||||
int num_render_input_channels,
|
||||
float detection_threshold,
|
||||
int stereo_detection_timeout_threshold_seconds,
|
||||
float stereo_detection_hysteresis_seconds)
|
||||
: detect_stereo_content_(detect_stereo_content),
|
||||
detection_threshold_(detection_threshold),
|
||||
detection_timeout_threshold_frames_(
|
||||
stereo_detection_timeout_threshold_seconds > 0
|
||||
? absl::make_optional(stereo_detection_timeout_threshold_seconds *
|
||||
kNumFramesPerSecond)
|
||||
: absl::nullopt),
|
||||
stereo_detection_hysteresis_frames_(static_cast<int>(
|
||||
stereo_detection_hysteresis_seconds * kNumFramesPerSecond)),
|
||||
metrics_logger_((detect_stereo_content && num_render_input_channels > 1)
|
||||
? std::make_unique<MetricsLogger>()
|
||||
: nullptr),
|
||||
persistent_multichannel_content_detected_(
|
||||
!detect_stereo_content && num_render_input_channels > 1) {}
|
||||
|
||||
bool MultiChannelContentDetector::UpdateDetection(
|
||||
const std::vector<std::vector<std::vector<float>>>& frame) {
|
||||
if (!detect_stereo_content_) {
|
||||
RTC_DCHECK_EQ(frame[0].size() > 1,
|
||||
persistent_multichannel_content_detected_);
|
||||
return false;
|
||||
}
|
||||
|
||||
const bool previous_persistent_multichannel_content_detected =
|
||||
persistent_multichannel_content_detected_;
|
||||
const bool stereo_detected_in_frame =
|
||||
HasStereoContent(frame, detection_threshold_);
|
||||
|
||||
consecutive_frames_with_stereo_ =
|
||||
stereo_detected_in_frame ? consecutive_frames_with_stereo_ + 1 : 0;
|
||||
frames_since_stereo_detected_last_ =
|
||||
stereo_detected_in_frame ? 0 : frames_since_stereo_detected_last_ + 1;
|
||||
|
||||
// Detect persistent multichannel content.
|
||||
if (consecutive_frames_with_stereo_ > stereo_detection_hysteresis_frames_) {
|
||||
persistent_multichannel_content_detected_ = true;
|
||||
}
|
||||
if (detection_timeout_threshold_frames_.has_value() &&
|
||||
frames_since_stereo_detected_last_ >=
|
||||
*detection_timeout_threshold_frames_) {
|
||||
persistent_multichannel_content_detected_ = false;
|
||||
}
|
||||
|
||||
// Detect temporary multichannel content.
|
||||
temporary_multichannel_content_detected_ =
|
||||
persistent_multichannel_content_detected_ ? false
|
||||
: stereo_detected_in_frame;
|
||||
|
||||
if (metrics_logger_)
|
||||
metrics_logger_->Update(persistent_multichannel_content_detected_);
|
||||
|
||||
return previous_persistent_multichannel_content_detected !=
|
||||
persistent_multichannel_content_detected_;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,95 @@
|
||||
/*
|
||||
* Copyright (c) 2022 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Analyzes audio content to determine whether the contained audio is proper
|
||||
// multichannel, or only upmixed mono. To allow for differences introduced by
|
||||
// hardware drivers, a threshold `detection_threshold` is used for the
|
||||
// detection.
|
||||
// Logs metrics continously and upon destruction.
|
||||
class MultiChannelContentDetector {
|
||||
public:
|
||||
// If |stereo_detection_timeout_threshold_seconds| <= 0, no timeout is
|
||||
// applied: Once multichannel is detected, the detector remains in that state
|
||||
// for its lifetime.
|
||||
MultiChannelContentDetector(bool detect_stereo_content,
|
||||
int num_render_input_channels,
|
||||
float detection_threshold,
|
||||
int stereo_detection_timeout_threshold_seconds,
|
||||
float stereo_detection_hysteresis_seconds);
|
||||
|
||||
// Compares the left and right channels in the render `frame` to determine
|
||||
// whether the signal is a proper multichannel signal. Returns a bool
|
||||
// indicating whether a change in the proper multichannel content was
|
||||
// detected.
|
||||
bool UpdateDetection(
|
||||
const std::vector<std::vector<std::vector<float>>>& frame);
|
||||
|
||||
bool IsProperMultiChannelContentDetected() const {
|
||||
return persistent_multichannel_content_detected_;
|
||||
}
|
||||
|
||||
bool IsTemporaryMultiChannelContentDetected() const {
|
||||
return temporary_multichannel_content_detected_;
|
||||
}
|
||||
|
||||
private:
|
||||
// Tracks and logs metrics for the amount of multichannel content detected.
|
||||
class MetricsLogger {
|
||||
public:
|
||||
MetricsLogger();
|
||||
|
||||
// The destructor logs call summary statistics.
|
||||
~MetricsLogger();
|
||||
|
||||
// Updates and logs metrics.
|
||||
void Update(bool persistent_multichannel_content_detected);
|
||||
|
||||
private:
|
||||
int frame_counter_ = 0;
|
||||
|
||||
// Counts the number of frames of persistent multichannel audio observed
|
||||
// during the current metrics collection interval.
|
||||
int persistent_multichannel_frame_counter_ = 0;
|
||||
|
||||
// Indicates whether persistent multichannel content has ever been detected.
|
||||
bool any_multichannel_content_detected_ = false;
|
||||
};
|
||||
|
||||
const bool detect_stereo_content_;
|
||||
const float detection_threshold_;
|
||||
const absl::optional<int> detection_timeout_threshold_frames_;
|
||||
const int stereo_detection_hysteresis_frames_;
|
||||
|
||||
// Collects and reports metrics on the amount of multichannel content
|
||||
// detected. Only created if |num_render_input_channels| > 1 and
|
||||
// |detect_stereo_content_| is true.
|
||||
const std::unique_ptr<MetricsLogger> metrics_logger_;
|
||||
|
||||
bool persistent_multichannel_content_detected_;
|
||||
bool temporary_multichannel_content_detected_ = false;
|
||||
int64_t frames_since_stereo_detected_last_ = 0;
|
||||
int64_t consecutive_frames_with_stereo_ = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AEC3_MULTI_CHANNEL_CONTENT_DETECTOR_H_
|
@ -20,7 +20,6 @@
|
||||
#include "modules/audio_processing/aec3/render_signal_analyzer.h"
|
||||
#include "modules/audio_processing/aec3/subtractor_output.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
@ -31,13 +30,12 @@ constexpr int kPoorExcitationCounterInitial = 1000;
|
||||
|
||||
} // namespace
|
||||
|
||||
int RefinedFilterUpdateGain::instance_count_ = 0;
|
||||
std::atomic<int> RefinedFilterUpdateGain::instance_count_(0);
|
||||
|
||||
RefinedFilterUpdateGain::RefinedFilterUpdateGain(
|
||||
const EchoCanceller3Config::Filter::RefinedConfiguration& config,
|
||||
size_t config_change_duration_blocks)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
config_change_duration_blocks_(
|
||||
static_cast<int>(config_change_duration_blocks)),
|
||||
poor_excitation_counter_(kPoorExcitationCounterInitial) {
|
||||
@ -73,6 +71,7 @@ void RefinedFilterUpdateGain::Compute(
|
||||
rtc::ArrayView<const float> erl,
|
||||
size_t size_partitions,
|
||||
bool saturated_capture_signal,
|
||||
bool disallow_leakage_diverged,
|
||||
FftData* gain_fft) {
|
||||
RTC_DCHECK(gain_fft);
|
||||
// Introducing shorter notation to improve readability.
|
||||
@ -125,7 +124,7 @@ void RefinedFilterUpdateGain::Compute(
|
||||
|
||||
// H_error = H_error + factor * erl.
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
if (E2_coarse[k] >= E2_refined[k]) {
|
||||
if (E2_refined[k] <= E2_coarse[k] || disallow_leakage_diverged) {
|
||||
H_error_[k] += current_config_.leakage_converged * erl[k];
|
||||
} else {
|
||||
H_error_[k] += current_config_.leakage_diverged * erl[k];
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
|
||||
#include "api/array_view.h"
|
||||
@ -51,6 +52,7 @@ class RefinedFilterUpdateGain {
|
||||
rtc::ArrayView<const float> erl,
|
||||
size_t size_partitions,
|
||||
bool saturated_capture_signal,
|
||||
bool disallow_leakage_diverged,
|
||||
FftData* gain_fft);
|
||||
|
||||
// Sets a new config.
|
||||
@ -68,7 +70,7 @@ class RefinedFilterUpdateGain {
|
||||
}
|
||||
|
||||
private:
|
||||
static int instance_count_;
|
||||
static std::atomic<int> instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const int config_change_duration_blocks_;
|
||||
float one_by_config_change_duration_blocks_;
|
||||
|
@ -42,8 +42,9 @@ void RenderBuffer::SpectralSum(
|
||||
int position = spectrum_buffer_->read;
|
||||
for (size_t j = 0; j < num_spectra; ++j) {
|
||||
for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) {
|
||||
std::transform(X2->begin(), X2->end(), channel_spectrum.begin(),
|
||||
X2->begin(), std::plus<float>());
|
||||
for (size_t k = 0; k < X2->size(); ++k) {
|
||||
(*X2)[k] += channel_spectrum[k];
|
||||
}
|
||||
}
|
||||
position = spectrum_buffer_->IncIndex(position);
|
||||
}
|
||||
@ -60,18 +61,18 @@ void RenderBuffer::SpectralSums(
|
||||
size_t j = 0;
|
||||
for (; j < num_spectra_shorter; ++j) {
|
||||
for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) {
|
||||
std::transform(X2_shorter->begin(), X2_shorter->end(),
|
||||
channel_spectrum.begin(), X2_shorter->begin(),
|
||||
std::plus<float>());
|
||||
for (size_t k = 0; k < X2_shorter->size(); ++k) {
|
||||
(*X2_shorter)[k] += channel_spectrum[k];
|
||||
}
|
||||
}
|
||||
position = spectrum_buffer_->IncIndex(position);
|
||||
}
|
||||
std::copy(X2_shorter->begin(), X2_shorter->end(), X2_longer->begin());
|
||||
for (; j < num_spectra_longer; ++j) {
|
||||
for (const auto& channel_spectrum : spectrum_buffer_->buffer[position]) {
|
||||
std::transform(X2_longer->begin(), X2_longer->end(),
|
||||
channel_spectrum.begin(), X2_longer->begin(),
|
||||
std::plus<float>());
|
||||
for (size_t k = 0; k < X2_longer->size(); ++k) {
|
||||
(*X2_longer)[k] += channel_spectrum[k];
|
||||
}
|
||||
}
|
||||
position = spectrum_buffer_->IncIndex(position);
|
||||
}
|
||||
|
@ -40,8 +40,7 @@ class RenderBuffer {
|
||||
~RenderBuffer();
|
||||
|
||||
// Get a block.
|
||||
const std::vector<std::vector<std::vector<float>>>& Block(
|
||||
int buffer_offset_blocks) const {
|
||||
const Block& GetBlock(int buffer_offset_blocks) const {
|
||||
int position =
|
||||
block_buffer_->OffsetIndex(block_buffer_->read, buffer_offset_blocks);
|
||||
return block_buffer_->buffer[position];
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <cmath>
|
||||
#include <memory>
|
||||
#include <numeric>
|
||||
@ -32,7 +33,6 @@
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
@ -40,11 +40,6 @@
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
bool UpdateCaptureCallCounterOnSkippedBlocks() {
|
||||
return !field_trial::IsEnabled(
|
||||
"WebRTC-Aec3RenderBufferCallCounterUpdateKillSwitch");
|
||||
}
|
||||
|
||||
class RenderDelayBufferImpl final : public RenderDelayBuffer {
|
||||
public:
|
||||
RenderDelayBufferImpl(const EchoCanceller3Config& config,
|
||||
@ -54,8 +49,7 @@ class RenderDelayBufferImpl final : public RenderDelayBuffer {
|
||||
~RenderDelayBufferImpl() override;
|
||||
|
||||
void Reset() override;
|
||||
BufferingEvent Insert(
|
||||
const std::vector<std::vector<std::vector<float>>>& block) override;
|
||||
BufferingEvent Insert(const Block& block) override;
|
||||
BufferingEvent PrepareCaptureProcessing() override;
|
||||
void HandleSkippedCaptureProcessing() override;
|
||||
bool AlignFromDelay(size_t delay) override;
|
||||
@ -75,11 +69,10 @@ class RenderDelayBufferImpl final : public RenderDelayBuffer {
|
||||
bool HasReceivedBufferDelay() override;
|
||||
|
||||
private:
|
||||
static int instance_count_;
|
||||
static std::atomic<int> instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const Aec3Optimization optimization_;
|
||||
const EchoCanceller3Config config_;
|
||||
const bool update_capture_call_counter_on_skipped_blocks_;
|
||||
const float render_linear_amplitude_gain_;
|
||||
const rtc::LoggingSeverity delay_log_level_;
|
||||
size_t down_sampling_factor_;
|
||||
@ -110,8 +103,7 @@ class RenderDelayBufferImpl final : public RenderDelayBuffer {
|
||||
int MapDelayToTotalDelay(size_t delay) const;
|
||||
int ComputeDelay() const;
|
||||
void ApplyTotalDelay(int delay);
|
||||
void InsertBlock(const std::vector<std::vector<std::vector<float>>>& block,
|
||||
int previous_write);
|
||||
void InsertBlock(const Block& block, int previous_write);
|
||||
bool DetectActiveRender(rtc::ArrayView<const float> x) const;
|
||||
bool DetectExcessRenderBlocks();
|
||||
void IncrementWriteIndices();
|
||||
@ -121,17 +113,14 @@ class RenderDelayBufferImpl final : public RenderDelayBuffer {
|
||||
bool RenderUnderrun();
|
||||
};
|
||||
|
||||
int RenderDelayBufferImpl::instance_count_ = 0;
|
||||
std::atomic<int> RenderDelayBufferImpl::instance_count_ = 0;
|
||||
|
||||
RenderDelayBufferImpl::RenderDelayBufferImpl(const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_render_channels)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
optimization_(DetectOptimization()),
|
||||
config_(config),
|
||||
update_capture_call_counter_on_skipped_blocks_(
|
||||
UpdateCaptureCallCounterOnSkippedBlocks()),
|
||||
render_linear_amplitude_gain_(
|
||||
std::pow(10.0f, config_.render_levels.render_power_gain_db / 20.f)),
|
||||
delay_log_level_(config_.delay.log_warning_on_delay_changes
|
||||
@ -145,8 +134,7 @@ RenderDelayBufferImpl::RenderDelayBufferImpl(const EchoCanceller3Config& config,
|
||||
config.delay.num_filters,
|
||||
config.filter.refined.length_blocks),
|
||||
NumBandsForRate(sample_rate_hz),
|
||||
num_render_channels,
|
||||
kBlockSize),
|
||||
num_render_channels),
|
||||
spectra_(blocks_.buffer.size(), num_render_channels),
|
||||
ffts_(blocks_.buffer.size(), num_render_channels),
|
||||
delay_(config_.delay.default_delay),
|
||||
@ -161,7 +149,7 @@ RenderDelayBufferImpl::RenderDelayBufferImpl(const EchoCanceller3Config& config,
|
||||
RTC_DCHECK_EQ(blocks_.buffer.size(), ffts_.buffer.size());
|
||||
RTC_DCHECK_EQ(spectra_.buffer.size(), ffts_.buffer.size());
|
||||
for (size_t i = 0; i < blocks_.buffer.size(); ++i) {
|
||||
RTC_DCHECK_EQ(blocks_.buffer[i][0].size(), ffts_.buffer[i].size());
|
||||
RTC_DCHECK_EQ(blocks_.buffer[i].NumChannels(), ffts_.buffer[i].size());
|
||||
RTC_DCHECK_EQ(spectra_.buffer[i].size(), ffts_.buffer[i].size());
|
||||
}
|
||||
|
||||
@ -211,7 +199,7 @@ void RenderDelayBufferImpl::Reset() {
|
||||
|
||||
// Inserts a new block into the render buffers.
|
||||
RenderDelayBuffer::BufferingEvent RenderDelayBufferImpl::Insert(
|
||||
const std::vector<std::vector<std::vector<float>>>& block) {
|
||||
const Block& block) {
|
||||
++render_call_counter_;
|
||||
if (delay_) {
|
||||
if (!last_call_was_render_) {
|
||||
@ -239,7 +227,8 @@ RenderDelayBuffer::BufferingEvent RenderDelayBufferImpl::Insert(
|
||||
|
||||
// Detect and update render activity.
|
||||
if (!render_activity_) {
|
||||
render_activity_counter_ += DetectActiveRender(block[0][0]) ? 1 : 0;
|
||||
render_activity_counter_ +=
|
||||
DetectActiveRender(block.View(/*band=*/0, /*channel=*/0)) ? 1 : 0;
|
||||
render_activity_ = render_activity_counter_ >= 20;
|
||||
}
|
||||
|
||||
@ -254,9 +243,7 @@ RenderDelayBuffer::BufferingEvent RenderDelayBufferImpl::Insert(
|
||||
}
|
||||
|
||||
void RenderDelayBufferImpl::HandleSkippedCaptureProcessing() {
|
||||
if (update_capture_call_counter_on_skipped_blocks_) {
|
||||
++capture_call_counter_;
|
||||
}
|
||||
++capture_call_counter_;
|
||||
}
|
||||
|
||||
// Prepares the render buffers for processing another capture block.
|
||||
@ -394,46 +381,45 @@ void RenderDelayBufferImpl::AlignFromExternalDelay() {
|
||||
}
|
||||
|
||||
// Inserts a block into the render buffers.
|
||||
void RenderDelayBufferImpl::InsertBlock(
|
||||
const std::vector<std::vector<std::vector<float>>>& block,
|
||||
int previous_write) {
|
||||
void RenderDelayBufferImpl::InsertBlock(const Block& block,
|
||||
int previous_write) {
|
||||
auto& b = blocks_;
|
||||
auto& lr = low_rate_;
|
||||
auto& ds = render_ds_;
|
||||
auto& f = ffts_;
|
||||
auto& s = spectra_;
|
||||
const size_t num_bands = b.buffer[b.write].size();
|
||||
const size_t num_render_channels = b.buffer[b.write][0].size();
|
||||
RTC_DCHECK_EQ(block.size(), b.buffer[b.write].size());
|
||||
const size_t num_bands = b.buffer[b.write].NumBands();
|
||||
const size_t num_render_channels = b.buffer[b.write].NumChannels();
|
||||
RTC_DCHECK_EQ(block.NumBands(), num_bands);
|
||||
RTC_DCHECK_EQ(block.NumChannels(), num_render_channels);
|
||||
for (size_t band = 0; band < num_bands; ++band) {
|
||||
RTC_DCHECK_EQ(block[band].size(), num_render_channels);
|
||||
RTC_DCHECK_EQ(b.buffer[b.write][band].size(), num_render_channels);
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
RTC_DCHECK_EQ(block[band][ch].size(), b.buffer[b.write][band][ch].size());
|
||||
std::copy(block[band][ch].begin(), block[band][ch].end(),
|
||||
b.buffer[b.write][band][ch].begin());
|
||||
std::copy(block.begin(band, ch), block.end(band, ch),
|
||||
b.buffer[b.write].begin(band, ch));
|
||||
}
|
||||
}
|
||||
|
||||
if (render_linear_amplitude_gain_ != 1.f) {
|
||||
for (size_t band = 0; band < num_bands; ++band) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
for (size_t k = 0; k < 64; ++k) {
|
||||
b.buffer[b.write][band][ch][k] *= render_linear_amplitude_gain_;
|
||||
rtc::ArrayView<float, kBlockSize> b_view =
|
||||
b.buffer[b.write].View(band, ch);
|
||||
for (float& sample : b_view) {
|
||||
sample *= render_linear_amplitude_gain_;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::array<float, kBlockSize> downmixed_render;
|
||||
render_mixer_.ProduceOutput(b.buffer[b.write][0], downmixed_render);
|
||||
render_mixer_.ProduceOutput(b.buffer[b.write], downmixed_render);
|
||||
render_decimator_.Decimate(downmixed_render, ds);
|
||||
data_dumper_->DumpWav("aec3_render_decimator_output", ds.size(), ds.data(),
|
||||
16000 / down_sampling_factor_, 1);
|
||||
std::copy(ds.rbegin(), ds.rend(), lr.buffer.begin() + lr.write);
|
||||
for (size_t channel = 0; channel < b.buffer[b.write][0].size(); ++channel) {
|
||||
fft_.PaddedFft(b.buffer[b.write][0][channel],
|
||||
b.buffer[previous_write][0][channel],
|
||||
for (int channel = 0; channel < b.buffer[b.write].NumChannels(); ++channel) {
|
||||
fft_.PaddedFft(b.buffer[b.write].View(/*band=*/0, channel),
|
||||
b.buffer[previous_write].View(/*band=*/0, channel),
|
||||
&f.buffer[f.write][channel]);
|
||||
f.buffer[f.write][channel].Spectrum(optimization_,
|
||||
s.buffer[s.write][channel]);
|
||||
|
@ -16,6 +16,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
|
||||
@ -41,8 +42,7 @@ class RenderDelayBuffer {
|
||||
virtual void Reset() = 0;
|
||||
|
||||
// Inserts a block into the buffer.
|
||||
virtual BufferingEvent Insert(
|
||||
const std::vector<std::vector<std::vector<float>>>& block) = 0;
|
||||
virtual BufferingEvent Insert(const Block& block) = 0;
|
||||
|
||||
// Updates the buffers one step based on the specified buffer delay. Returns
|
||||
// an enum indicating whether there was a special event that occurred.
|
||||
|
@ -12,6 +12,7 @@
|
||||
#include <stddef.h>
|
||||
|
||||
#include <algorithm>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
@ -23,7 +24,6 @@
|
||||
#include "modules/audio_processing/aec3/echo_path_delay_estimator.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_controller_metrics.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
@ -47,14 +47,13 @@ class RenderDelayControllerImpl final : public RenderDelayController {
|
||||
absl::optional<DelayEstimate> GetDelay(
|
||||
const DownsampledRenderBuffer& render_buffer,
|
||||
size_t render_delay_buffer_delay,
|
||||
const std::vector<std::vector<float>>& capture) override;
|
||||
const Block& capture) override;
|
||||
bool HasClockdrift() const override;
|
||||
|
||||
private:
|
||||
static int instance_count_;
|
||||
static std::atomic<int> instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const int hysteresis_limit_blocks_;
|
||||
const int delay_headroom_samples_;
|
||||
absl::optional<DelayEstimate> delay_;
|
||||
EchoPathDelayEstimator delay_estimator_;
|
||||
RenderDelayControllerMetrics metrics_;
|
||||
@ -67,15 +66,9 @@ class RenderDelayControllerImpl final : public RenderDelayController {
|
||||
DelayEstimate ComputeBufferDelay(
|
||||
const absl::optional<DelayEstimate>& current_delay,
|
||||
int hysteresis_limit_blocks,
|
||||
int delay_headroom_samples,
|
||||
DelayEstimate estimated_delay) {
|
||||
// Subtract delay headroom.
|
||||
const int delay_with_headroom_samples = std::max(
|
||||
static_cast<int>(estimated_delay.delay) - delay_headroom_samples, 0);
|
||||
|
||||
// Compute the buffer delay increase required to achieve the desired latency.
|
||||
size_t new_delay_blocks = delay_with_headroom_samples >> kBlockSizeLog2;
|
||||
|
||||
size_t new_delay_blocks = estimated_delay.delay >> kBlockSizeLog2;
|
||||
// Add hysteresis.
|
||||
if (current_delay) {
|
||||
size_t current_delay_blocks = current_delay->delay;
|
||||
@ -84,23 +77,20 @@ DelayEstimate ComputeBufferDelay(
|
||||
new_delay_blocks = current_delay_blocks;
|
||||
}
|
||||
}
|
||||
|
||||
DelayEstimate new_delay = estimated_delay;
|
||||
new_delay.delay = new_delay_blocks;
|
||||
return new_delay;
|
||||
}
|
||||
|
||||
int RenderDelayControllerImpl::instance_count_ = 0;
|
||||
std::atomic<int> RenderDelayControllerImpl::instance_count_(0);
|
||||
|
||||
RenderDelayControllerImpl::RenderDelayControllerImpl(
|
||||
const EchoCanceller3Config& config,
|
||||
int sample_rate_hz,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
hysteresis_limit_blocks_(
|
||||
static_cast<int>(config.delay.hysteresis_limit_blocks)),
|
||||
delay_headroom_samples_(config.delay.delay_headroom_samples),
|
||||
delay_estimator_(data_dumper_.get(), config, num_capture_channels),
|
||||
last_delay_estimate_quality_(DelayEstimate::Quality::kCoarse) {
|
||||
RTC_DCHECK(ValidFullBandRate(sample_rate_hz));
|
||||
@ -124,8 +114,7 @@ void RenderDelayControllerImpl::LogRenderCall() {}
|
||||
absl::optional<DelayEstimate> RenderDelayControllerImpl::GetDelay(
|
||||
const DownsampledRenderBuffer& render_buffer,
|
||||
size_t render_delay_buffer_delay,
|
||||
const std::vector<std::vector<float>>& capture) {
|
||||
RTC_DCHECK_EQ(kBlockSize, capture[0].size());
|
||||
const Block& capture) {
|
||||
++capture_call_counter_;
|
||||
|
||||
auto delay_samples = delay_estimator_.EstimateDelay(render_buffer, capture);
|
||||
@ -161,15 +150,16 @@ absl::optional<DelayEstimate> RenderDelayControllerImpl::GetDelay(
|
||||
const bool use_hysteresis =
|
||||
last_delay_estimate_quality_ == DelayEstimate::Quality::kRefined &&
|
||||
delay_samples_->quality == DelayEstimate::Quality::kRefined;
|
||||
delay_ = ComputeBufferDelay(delay_,
|
||||
use_hysteresis ? hysteresis_limit_blocks_ : 0,
|
||||
delay_headroom_samples_, *delay_samples_);
|
||||
delay_ = ComputeBufferDelay(
|
||||
delay_, use_hysteresis ? hysteresis_limit_blocks_ : 0, *delay_samples_);
|
||||
last_delay_estimate_quality_ = delay_samples_->quality;
|
||||
}
|
||||
|
||||
metrics_.Update(delay_samples_ ? absl::optional<size_t>(delay_samples_->delay)
|
||||
: absl::nullopt,
|
||||
delay_ ? delay_->delay : 0, 0, delay_estimator_.Clockdrift());
|
||||
metrics_.Update(
|
||||
delay_samples_ ? absl::optional<size_t>(delay_samples_->delay)
|
||||
: absl::nullopt,
|
||||
delay_ ? absl::optional<size_t>(delay_->delay) : absl::nullopt,
|
||||
delay_estimator_.Clockdrift());
|
||||
|
||||
data_dumper_->DumpRaw("aec3_render_delay_controller_delay",
|
||||
delay_samples ? delay_samples->delay : 0);
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "absl/types/optional.h"
|
||||
#include "api/array_view.h"
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
#include "modules/audio_processing/aec3/delay_estimate.h"
|
||||
#include "modules/audio_processing/aec3/downsampled_render_buffer.h"
|
||||
#include "modules/audio_processing/aec3/render_delay_buffer.h"
|
||||
@ -40,7 +41,7 @@ class RenderDelayController {
|
||||
virtual absl::optional<DelayEstimate> GetDelay(
|
||||
const DownsampledRenderBuffer& render_buffer,
|
||||
size_t render_delay_buffer_delay,
|
||||
const std::vector<std::vector<float>>& capture) = 0;
|
||||
const Block& capture) = 0;
|
||||
|
||||
// Returns true if clockdrift has been detected.
|
||||
virtual bool HasClockdrift() const = 0;
|
||||
|
@ -37,16 +37,13 @@ enum class DelayChangesCategory {
|
||||
kNumCategories
|
||||
};
|
||||
|
||||
constexpr int kMaxSkewShiftCount = 20;
|
||||
|
||||
} // namespace
|
||||
|
||||
RenderDelayControllerMetrics::RenderDelayControllerMetrics() = default;
|
||||
|
||||
void RenderDelayControllerMetrics::Update(
|
||||
absl::optional<size_t> delay_samples,
|
||||
size_t buffer_delay_blocks,
|
||||
absl::optional<int> skew_shift_blocks,
|
||||
absl::optional<size_t> buffer_delay_blocks,
|
||||
ClockdriftDetector::Level clockdrift) {
|
||||
++call_counter_;
|
||||
|
||||
@ -54,6 +51,8 @@ void RenderDelayControllerMetrics::Update(
|
||||
size_t delay_blocks;
|
||||
if (delay_samples) {
|
||||
++reliable_delay_estimate_counter_;
|
||||
// Add an offset by 1 (metric is halved before reporting) to reserve 0 for
|
||||
// absent delay.
|
||||
delay_blocks = (*delay_samples) / kBlockSize + 2;
|
||||
} else {
|
||||
delay_blocks = 0;
|
||||
@ -64,21 +63,21 @@ void RenderDelayControllerMetrics::Update(
|
||||
delay_blocks_ = delay_blocks;
|
||||
}
|
||||
|
||||
if (skew_shift_blocks) {
|
||||
skew_shift_count_ = std::min(kMaxSkewShiftCount, skew_shift_count_);
|
||||
}
|
||||
} else if (++initial_call_counter_ == 5 * kNumBlocksPerSecond) {
|
||||
initial_update = false;
|
||||
}
|
||||
|
||||
if (call_counter_ == kMetricsReportingIntervalBlocks) {
|
||||
int value_to_report = static_cast<int>(delay_blocks_);
|
||||
// Divide by 2 to compress metric range.
|
||||
value_to_report = std::min(124, value_to_report >> 1);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.EchoPathDelay",
|
||||
value_to_report, 0, 124, 125);
|
||||
|
||||
value_to_report = static_cast<int>(buffer_delay_blocks + 2);
|
||||
value_to_report = std::min(124, value_to_report >> 1);
|
||||
// Divide by 2 to compress metric range.
|
||||
// Offset by 1 to reserve 0 for absent delay.
|
||||
value_to_report = buffer_delay_blocks ? (*buffer_delay_blocks + 2) >> 1 : 0;
|
||||
value_to_report = std::min(124, value_to_report);
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.BufferDelay",
|
||||
value_to_report, 0, 124, 125);
|
||||
|
||||
@ -120,20 +119,8 @@ void RenderDelayControllerMetrics::Update(
|
||||
"WebRTC.Audio.EchoCanceller.Clockdrift", static_cast<int>(clockdrift),
|
||||
static_cast<int>(ClockdriftDetector::Level::kNumCategories));
|
||||
|
||||
metrics_reported_ = true;
|
||||
call_counter_ = 0;
|
||||
ResetMetrics();
|
||||
} else {
|
||||
metrics_reported_ = false;
|
||||
}
|
||||
|
||||
if (!initial_update && ++skew_report_timer_ == 60 * kNumBlocksPerSecond) {
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.EchoCanceller.MaxSkewShiftCount",
|
||||
skew_shift_count_, 0, kMaxSkewShiftCount,
|
||||
kMaxSkewShiftCount + 1);
|
||||
|
||||
skew_shift_count_ = 0;
|
||||
skew_report_timer_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -15,7 +15,6 @@
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "modules/audio_processing/aec3/clockdrift_detector.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -24,15 +23,15 @@ class RenderDelayControllerMetrics {
|
||||
public:
|
||||
RenderDelayControllerMetrics();
|
||||
|
||||
RenderDelayControllerMetrics(const RenderDelayControllerMetrics&) = delete;
|
||||
RenderDelayControllerMetrics& operator=(const RenderDelayControllerMetrics&) =
|
||||
delete;
|
||||
|
||||
// Updates the metric with new data.
|
||||
void Update(absl::optional<size_t> delay_samples,
|
||||
size_t buffer_delay_blocks,
|
||||
absl::optional<int> skew_shift_blocks,
|
||||
absl::optional<size_t> buffer_delay_blocks,
|
||||
ClockdriftDetector::Level clockdrift);
|
||||
|
||||
// Returns true if the metrics have just been reported, otherwise false.
|
||||
bool MetricsReported() { return metrics_reported_; }
|
||||
|
||||
private:
|
||||
// Resets the metrics.
|
||||
void ResetMetrics();
|
||||
@ -41,13 +40,8 @@ class RenderDelayControllerMetrics {
|
||||
int reliable_delay_estimate_counter_ = 0;
|
||||
int delay_change_counter_ = 0;
|
||||
int call_counter_ = 0;
|
||||
int skew_report_timer_ = 0;
|
||||
int initial_call_counter_ = 0;
|
||||
bool metrics_reported_ = false;
|
||||
bool initial_update = true;
|
||||
int skew_shift_count_ = 0;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(RenderDelayControllerMetrics);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -66,10 +66,9 @@ void IdentifyStrongNarrowBandComponent(const RenderBuffer& render_buffer,
|
||||
*narrow_peak_band = absl::nullopt;
|
||||
}
|
||||
|
||||
const std::vector<std::vector<std::vector<float>>>& x_latest =
|
||||
render_buffer.Block(0);
|
||||
const Block& x_latest = render_buffer.GetBlock(0);
|
||||
float max_peak_level = 0.f;
|
||||
for (size_t channel = 0; channel < x_latest[0].size(); ++channel) {
|
||||
for (int channel = 0; channel < x_latest.NumChannels(); ++channel) {
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> X2_latest =
|
||||
render_buffer.Spectrum(0)[channel];
|
||||
|
||||
@ -90,13 +89,14 @@ void IdentifyStrongNarrowBandComponent(const RenderBuffer& render_buffer,
|
||||
}
|
||||
|
||||
// Assess the render signal strength.
|
||||
auto result0 = std::minmax_element(x_latest[0][channel].begin(),
|
||||
x_latest[0][channel].end());
|
||||
auto result0 = std::minmax_element(x_latest.begin(/*band=*/0, channel),
|
||||
x_latest.end(/*band=*/0, channel));
|
||||
float max_abs = std::max(fabs(*result0.first), fabs(*result0.second));
|
||||
|
||||
if (x_latest.size() > 1) {
|
||||
const auto result1 = std::minmax_element(x_latest[1][channel].begin(),
|
||||
x_latest[1][channel].end());
|
||||
if (x_latest.NumBands() > 1) {
|
||||
const auto result1 =
|
||||
std::minmax_element(x_latest.begin(/*band=*/1, channel),
|
||||
x_latest.end(/*band=*/1, channel));
|
||||
max_abs =
|
||||
std::max(max_abs, static_cast<float>(std::max(
|
||||
fabs(*result1.first), fabs(*result1.second))));
|
||||
|
@ -20,7 +20,6 @@
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/render_buffer.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -30,6 +29,9 @@ class RenderSignalAnalyzer {
|
||||
explicit RenderSignalAnalyzer(const EchoCanceller3Config& config);
|
||||
~RenderSignalAnalyzer();
|
||||
|
||||
RenderSignalAnalyzer(const RenderSignalAnalyzer&) = delete;
|
||||
RenderSignalAnalyzer& operator=(const RenderSignalAnalyzer&) = delete;
|
||||
|
||||
// Updates the render signal analysis with the most recent render signal.
|
||||
void Update(const RenderBuffer& render_buffer,
|
||||
const absl::optional<size_t>& delay_partitions);
|
||||
@ -53,8 +55,6 @@ class RenderSignalAnalyzer {
|
||||
std::array<size_t, kFftLengthBy2 - 1> narrow_band_counters_;
|
||||
absl::optional<int> narrow_peak_band_;
|
||||
size_t narrow_peak_counter_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(RenderSignalAnalyzer);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -23,15 +23,10 @@
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
constexpr float kDefaultTransparentModeGain = 0.f;
|
||||
constexpr float kDefaultTransparentModeGain = 0.01f;
|
||||
|
||||
float GetTransparentModeGain() {
|
||||
if (field_trial::IsEnabled(
|
||||
"WebRTC-Aec3NoSuppressionInTransparentModeKillSwitch")) {
|
||||
return 0.01f;
|
||||
} else {
|
||||
return kDefaultTransparentModeGain;
|
||||
}
|
||||
return kDefaultTransparentModeGain;
|
||||
}
|
||||
|
||||
float GetEarlyReflectionsDefaultModeGain(
|
||||
@ -50,6 +45,13 @@ float GetLateReflectionsDefaultModeGain(
|
||||
return config.default_gain;
|
||||
}
|
||||
|
||||
bool UseErleOnsetCompensationInDominantNearend(
|
||||
const EchoCanceller3Config::EpStrength& config) {
|
||||
return config.erle_onset_compensation_in_dominant_nearend ||
|
||||
field_trial::IsEnabled(
|
||||
"WebRTC-Aec3UseErleOnsetCompensationInDominantNearend");
|
||||
}
|
||||
|
||||
// Computes the indexes that will be used for computing spectral power over
|
||||
// the blocks surrounding the delay.
|
||||
void GetRenderIndexesToAnalyze(
|
||||
@ -89,22 +91,6 @@ void LinearEstimate(
|
||||
}
|
||||
}
|
||||
|
||||
// Estimates the residual echo power based on an uncertainty estimate of the
|
||||
// echo return loss enhancement (ERLE) and the linear power estimate.
|
||||
void LinearEstimate(
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
|
||||
float erle_uncertainty,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
|
||||
RTC_DCHECK_EQ(S2_linear.size(), R2.size());
|
||||
|
||||
const size_t num_capture_channels = R2.size();
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
R2[ch][k] = S2_linear[ch][k] * erle_uncertainty;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Estimates the residual echo power based on the estimate of the echo path
|
||||
// gain.
|
||||
void NonLinearEstimate(
|
||||
@ -177,7 +163,9 @@ ResidualEchoEstimator::ResidualEchoEstimator(const EchoCanceller3Config& config,
|
||||
early_reflections_general_gain_(
|
||||
GetEarlyReflectionsDefaultModeGain(config_.ep_strength)),
|
||||
late_reflections_general_gain_(
|
||||
GetLateReflectionsDefaultModeGain(config_.ep_strength)) {
|
||||
GetLateReflectionsDefaultModeGain(config_.ep_strength)),
|
||||
erle_onset_compensation_in_dominant_nearend_(
|
||||
UseErleOnsetCompensationInDominantNearend(config_.ep_strength)) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
@ -188,7 +176,9 @@ void ResidualEchoEstimator::Estimate(
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
|
||||
bool dominant_nearend,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded) {
|
||||
RTC_DCHECK_EQ(R2.size(), Y2.size());
|
||||
RTC_DCHECK_EQ(R2.size(), S2_linear.size());
|
||||
|
||||
@ -204,17 +194,19 @@ void ResidualEchoEstimator::Estimate(
|
||||
if (aec_state.SaturatedEcho()) {
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
|
||||
std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin());
|
||||
}
|
||||
} else {
|
||||
absl::optional<float> erle_uncertainty = aec_state.ErleUncertainty();
|
||||
if (erle_uncertainty) {
|
||||
LinearEstimate(S2_linear, *erle_uncertainty, R2);
|
||||
} else {
|
||||
LinearEstimate(S2_linear, aec_state.Erle(), R2);
|
||||
}
|
||||
const bool onset_compensated =
|
||||
erle_onset_compensation_in_dominant_nearend_ || !dominant_nearend;
|
||||
LinearEstimate(S2_linear, aec_state.Erle(onset_compensated), R2);
|
||||
LinearEstimate(S2_linear, aec_state.ErleUnbounded(), R2_unbounded);
|
||||
}
|
||||
|
||||
AddReverb(ReverbType::kLinear, aec_state, render_buffer, R2);
|
||||
UpdateReverb(ReverbType::kLinear, aec_state, render_buffer,
|
||||
dominant_nearend);
|
||||
AddReverb(R2);
|
||||
AddReverb(R2_unbounded);
|
||||
} else {
|
||||
const float echo_path_gain =
|
||||
GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/true);
|
||||
@ -224,6 +216,7 @@ void ResidualEchoEstimator::Estimate(
|
||||
if (aec_state.SaturatedEcho()) {
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
std::copy(Y2[ch].begin(), Y2[ch].end(), R2[ch].begin());
|
||||
std::copy(Y2[ch].begin(), Y2[ch].end(), R2_unbounded[ch].begin());
|
||||
}
|
||||
} else {
|
||||
// Estimate the echo generating signal power.
|
||||
@ -243,11 +236,15 @@ void ResidualEchoEstimator::Estimate(
|
||||
}
|
||||
|
||||
NonLinearEstimate(echo_path_gain, X2, R2);
|
||||
NonLinearEstimate(echo_path_gain, X2, R2_unbounded);
|
||||
}
|
||||
|
||||
if (config_.echo_model.model_reverb_in_nonlinear_mode &&
|
||||
!aec_state.TransparentModeActive()) {
|
||||
AddReverb(ReverbType::kNonLinear, aec_state, render_buffer, R2);
|
||||
UpdateReverb(ReverbType::kNonLinear, aec_state, render_buffer,
|
||||
dominant_nearend);
|
||||
AddReverb(R2);
|
||||
AddReverb(R2_unbounded);
|
||||
}
|
||||
}
|
||||
|
||||
@ -258,6 +255,7 @@ void ResidualEchoEstimator::Estimate(
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
R2[ch][k] *= residual_scaling[k];
|
||||
R2_unbounded[ch][k] *= residual_scaling[k];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -306,14 +304,11 @@ void ResidualEchoEstimator::UpdateRenderNoisePower(
|
||||
}
|
||||
}
|
||||
|
||||
// Adds the estimated power of the reverb to the residual echo power.
|
||||
void ResidualEchoEstimator::AddReverb(
|
||||
ReverbType reverb_type,
|
||||
const AecState& aec_state,
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) {
|
||||
const size_t num_capture_channels = R2.size();
|
||||
|
||||
// Updates the reverb estimation.
|
||||
void ResidualEchoEstimator::UpdateReverb(ReverbType reverb_type,
|
||||
const AecState& aec_state,
|
||||
const RenderBuffer& render_buffer,
|
||||
bool dominant_nearend) {
|
||||
// Choose reverb partition based on what type of echo power model is used.
|
||||
const size_t first_reverb_partition =
|
||||
reverb_type == ReverbType::kLinear
|
||||
@ -338,16 +333,21 @@ void ResidualEchoEstimator::AddReverb(
|
||||
}
|
||||
|
||||
// Update the reverb estimate.
|
||||
float reverb_decay = aec_state.ReverbDecay(/*mild=*/dominant_nearend);
|
||||
if (reverb_type == ReverbType::kLinear) {
|
||||
echo_reverb_.UpdateReverb(render_power,
|
||||
aec_state.GetReverbFrequencyResponse(),
|
||||
aec_state.ReverbDecay());
|
||||
echo_reverb_.UpdateReverb(
|
||||
render_power, aec_state.GetReverbFrequencyResponse(), reverb_decay);
|
||||
} else {
|
||||
const float echo_path_gain =
|
||||
GetEchoPathGain(aec_state, /*gain_for_early_reflections=*/false);
|
||||
echo_reverb_.UpdateReverbNoFreqShaping(render_power, echo_path_gain,
|
||||
aec_state.ReverbDecay());
|
||||
reverb_decay);
|
||||
}
|
||||
}
|
||||
// Adds the estimated power of the reverb to the residual echo power.
|
||||
void ResidualEchoEstimator::AddReverb(
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) const {
|
||||
const size_t num_capture_channels = R2.size();
|
||||
|
||||
// Add the reverb power.
|
||||
rtc::ArrayView<const float, kFftLengthBy2Plus1> reverb_power =
|
||||
|
@ -39,7 +39,9 @@ class ResidualEchoEstimator {
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> S2_linear,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2);
|
||||
bool dominant_nearend,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2_unbounded);
|
||||
|
||||
private:
|
||||
enum class ReverbType { kLinear, kNonLinear };
|
||||
@ -51,12 +53,16 @@ class ResidualEchoEstimator {
|
||||
// render signal.
|
||||
void UpdateRenderNoisePower(const RenderBuffer& render_buffer);
|
||||
|
||||
// Updates the reverb estimation.
|
||||
void UpdateReverb(ReverbType reverb_type,
|
||||
const AecState& aec_state,
|
||||
const RenderBuffer& render_buffer,
|
||||
bool dominant_nearend);
|
||||
|
||||
// Adds the estimated unmodelled echo power to the residual echo power
|
||||
// estimate.
|
||||
void AddReverb(ReverbType reverb_type,
|
||||
const AecState& aec_state,
|
||||
const RenderBuffer& render_buffer,
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2);
|
||||
void AddReverb(
|
||||
rtc::ArrayView<std::array<float, kFftLengthBy2Plus1>> R2) const;
|
||||
|
||||
// Gets the echo path gain to apply.
|
||||
float GetEchoPathGain(const AecState& aec_state,
|
||||
@ -68,6 +74,7 @@ class ResidualEchoEstimator {
|
||||
const float late_reflections_transparent_mode_gain_;
|
||||
const float early_reflections_general_gain_;
|
||||
const float late_reflections_general_gain_;
|
||||
const bool erle_onset_compensation_in_dominant_nearend_;
|
||||
std::array<float, kFftLengthBy2Plus1> X2_noise_floor_;
|
||||
std::array<int, kFftLengthBy2Plus1> X2_noise_floor_counter_;
|
||||
ReverbModel echo_reverb_;
|
||||
|
@ -93,7 +93,8 @@ ReverbDecayEstimator::ReverbDecayEstimator(const EchoCanceller3Config& config)
|
||||
late_reverb_start_(kEarlyReverbMinSizeBlocks),
|
||||
late_reverb_end_(kEarlyReverbMinSizeBlocks),
|
||||
previous_gains_(config.filter.refined.length_blocks, 0.f),
|
||||
decay_(std::fabs(config.ep_strength.default_len)) {
|
||||
decay_(std::fabs(config.ep_strength.default_len)),
|
||||
mild_decay_(std::fabs(config.ep_strength.nearend_len)) {
|
||||
RTC_DCHECK_GT(config.filter.refined.length_blocks,
|
||||
static_cast<size_t>(kEarlyReverbMinSizeBlocks));
|
||||
}
|
||||
@ -295,7 +296,7 @@ void ReverbDecayEstimator::LateReverbLinearRegressor::Accumulate(float z) {
|
||||
float ReverbDecayEstimator::LateReverbLinearRegressor::Estimate() {
|
||||
RTC_DCHECK(EstimateAvailable());
|
||||
if (nn_ == 0.f) {
|
||||
RTC_NOTREACHED();
|
||||
RTC_DCHECK_NOTREACHED();
|
||||
return 0.f;
|
||||
}
|
||||
return nz_ / nn_;
|
||||
|
@ -34,8 +34,15 @@ class ReverbDecayEstimator {
|
||||
int filter_delay_blocks,
|
||||
bool usable_linear_filter,
|
||||
bool stationary_signal);
|
||||
// Returns the decay for the exponential model.
|
||||
float Decay() const { return decay_; }
|
||||
// Returns the decay for the exponential model. The parameter `mild` indicates
|
||||
// which exponential decay to return, the default one or a milder one.
|
||||
float Decay(bool mild) const {
|
||||
if (use_adaptive_echo_decay_) {
|
||||
return decay_;
|
||||
} else {
|
||||
return mild ? mild_decay_ : decay_;
|
||||
}
|
||||
}
|
||||
// Dumps debug data.
|
||||
void Dump(ApmDataDumper* data_dumper) const;
|
||||
|
||||
@ -103,6 +110,7 @@ class ReverbDecayEstimator {
|
||||
bool estimation_region_identified_ = false;
|
||||
std::vector<float> previous_gains_;
|
||||
float decay_;
|
||||
float mild_decay_;
|
||||
float tail_gain_ = 0.f;
|
||||
float smoothing_constant_ = 0.f;
|
||||
};
|
||||
|
@ -49,9 +49,13 @@ float AverageDecayWithinFilter(
|
||||
|
||||
} // namespace
|
||||
|
||||
ReverbFrequencyResponse::ReverbFrequencyResponse() {
|
||||
tail_response_.fill(0.f);
|
||||
ReverbFrequencyResponse::ReverbFrequencyResponse(
|
||||
bool use_conservative_tail_frequency_response)
|
||||
: use_conservative_tail_frequency_response_(
|
||||
use_conservative_tail_frequency_response) {
|
||||
tail_response_.fill(0.0f);
|
||||
}
|
||||
|
||||
ReverbFrequencyResponse::~ReverbFrequencyResponse() = default;
|
||||
|
||||
void ReverbFrequencyResponse::Update(
|
||||
@ -88,6 +92,12 @@ void ReverbFrequencyResponse::Update(
|
||||
tail_response_[k] = freq_resp_direct_path[k] * average_decay_;
|
||||
}
|
||||
|
||||
if (use_conservative_tail_frequency_response_) {
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; ++k) {
|
||||
tail_response_[k] = std::max(freq_resp_tail[k], tail_response_[k]);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
||||
const float avg_neighbour =
|
||||
0.5f * (tail_response_[k - 1] + tail_response_[k + 1]);
|
||||
|
@ -12,7 +12,6 @@
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_FREQUENCY_RESPONSE_H_
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
@ -24,7 +23,8 @@ namespace webrtc {
|
||||
// Class for updating the frequency response for the reverb.
|
||||
class ReverbFrequencyResponse {
|
||||
public:
|
||||
ReverbFrequencyResponse();
|
||||
explicit ReverbFrequencyResponse(
|
||||
bool use_conservative_tail_frequency_response);
|
||||
~ReverbFrequencyResponse();
|
||||
|
||||
// Updates the frequency response estimate of the reverb.
|
||||
@ -45,6 +45,7 @@ class ReverbFrequencyResponse {
|
||||
int filter_delay_blocks,
|
||||
float linear_filter_quality);
|
||||
|
||||
const bool use_conservative_tail_frequency_response_;
|
||||
float average_decay_ = 0.f;
|
||||
std::array<float, kFftLengthBy2Plus1> tail_response_;
|
||||
};
|
||||
|
@ -49,7 +49,6 @@ class ReverbModel {
|
||||
float reverb_decay);
|
||||
|
||||
private:
|
||||
|
||||
std::array<float, kFftLengthBy2Plus1> reverb_;
|
||||
};
|
||||
|
||||
|
@ -15,7 +15,10 @@ namespace webrtc {
|
||||
ReverbModelEstimator::ReverbModelEstimator(const EchoCanceller3Config& config,
|
||||
size_t num_capture_channels)
|
||||
: reverb_decay_estimators_(num_capture_channels),
|
||||
reverb_frequency_responses_(num_capture_channels) {
|
||||
reverb_frequency_responses_(
|
||||
num_capture_channels,
|
||||
ReverbFrequencyResponse(
|
||||
config.ep_strength.use_conservative_tail_frequency_response)) {
|
||||
for (size_t ch = 0; ch < reverb_decay_estimators_.size(); ++ch) {
|
||||
reverb_decay_estimators_[ch] =
|
||||
std::make_unique<ReverbDecayEstimator>(config);
|
||||
|
@ -12,6 +12,7 @@
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_REVERB_MODEL_ESTIMATOR_H_
|
||||
|
||||
#include <array>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
@ -42,9 +43,13 @@ class ReverbModelEstimator {
|
||||
const std::vector<bool>& usable_linear_estimates,
|
||||
bool stationary_block);
|
||||
|
||||
// Returns the exponential decay of the reverberant echo.
|
||||
// Returns the exponential decay of the reverberant echo. The parameter `mild`
|
||||
// indicates which exponential decay to return, the default one or a milder
|
||||
// one.
|
||||
// TODO(peah): Correct to properly support multiple channels.
|
||||
float ReverbDecay() const { return reverb_decay_estimators_[0]->Decay(); }
|
||||
float ReverbDecay(bool mild) const {
|
||||
return reverb_decay_estimators_[0]->Decay(mild);
|
||||
}
|
||||
|
||||
// Return the frequency response of the reverberant echo.
|
||||
// TODO(peah): Correct to properly support multiple channels.
|
||||
|
@ -131,7 +131,9 @@ SignalDependentErleEstimator::SignalDependentErleEstimator(
|
||||
section_boundaries_blocks_(SetSectionsBoundaries(delay_headroom_blocks_,
|
||||
num_blocks_,
|
||||
num_sections_)),
|
||||
use_onset_detection_(config.erle.onset_detection),
|
||||
erle_(num_capture_channels),
|
||||
erle_onset_compensated_(num_capture_channels),
|
||||
S2_section_accum_(
|
||||
num_capture_channels,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>(num_sections_)),
|
||||
@ -154,6 +156,7 @@ SignalDependentErleEstimator::~SignalDependentErleEstimator() = default;
|
||||
void SignalDependentErleEstimator::Reset() {
|
||||
for (size_t ch = 0; ch < erle_.size(); ++ch) {
|
||||
erle_[ch].fill(min_erle_);
|
||||
erle_onset_compensated_[ch].fill(min_erle_);
|
||||
for (auto& erle_estimator : erle_estimators_[ch]) {
|
||||
erle_estimator.fill(min_erle_);
|
||||
}
|
||||
@ -180,6 +183,8 @@ void SignalDependentErleEstimator::Update(
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> average_erle,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
average_erle_onset_compensated,
|
||||
const std::vector<bool>& converged_filters) {
|
||||
RTC_DCHECK_GT(num_sections_, 1);
|
||||
|
||||
@ -202,6 +207,11 @@ void SignalDependentErleEstimator::Update(
|
||||
[band_to_subband_[k]];
|
||||
erle_[ch][k] = rtc::SafeClamp(average_erle[ch][k] * correction_factor,
|
||||
min_erle_, max_erle_[band_to_subband_[k]]);
|
||||
if (use_onset_detection_) {
|
||||
erle_onset_compensated_[ch][k] = rtc::SafeClamp(
|
||||
average_erle_onset_compensated[ch][k] * correction_factor,
|
||||
min_erle_, max_erle_[band_to_subband_[k]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -37,8 +37,10 @@ class SignalDependentErleEstimator {
|
||||
void Reset();
|
||||
|
||||
// Returns the Erle per frequency subband.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
|
||||
return erle_;
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle(
|
||||
bool onset_compensated) const {
|
||||
return onset_compensated && use_onset_detection_ ? erle_onset_compensated_
|
||||
: erle_;
|
||||
}
|
||||
|
||||
// Updates the Erle estimate. The Erle that is passed as an input is required
|
||||
@ -51,6 +53,8 @@ class SignalDependentErleEstimator {
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Y2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> E2,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> average_erle,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
average_erle_onset_compensated,
|
||||
const std::vector<bool>& converged_filters);
|
||||
|
||||
void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
|
||||
@ -83,7 +87,9 @@ class SignalDependentErleEstimator {
|
||||
const std::array<size_t, kFftLengthBy2Plus1> band_to_subband_;
|
||||
const std::array<float, kSubbands> max_erle_;
|
||||
const std::vector<size_t> section_boundaries_blocks_;
|
||||
const bool use_onset_detection_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_onset_compensated_;
|
||||
std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
S2_section_accum_;
|
||||
std::vector<std::vector<std::array<float, kSubbands>>> erle_estimators_;
|
||||
|
@ -17,7 +17,6 @@
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/spectrum_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -29,8 +28,7 @@ constexpr int kNBlocksInitialPhase = kNumBlocksPerSecond * 2.;
|
||||
} // namespace
|
||||
|
||||
StationarityEstimator::StationarityEstimator()
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))) {
|
||||
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)) {
|
||||
Reset();
|
||||
}
|
||||
|
||||
@ -153,7 +151,7 @@ void StationarityEstimator::SmoothStationaryPerFreq() {
|
||||
stationarity_flags_ = all_ahead_stationary_smooth;
|
||||
}
|
||||
|
||||
int StationarityEstimator::instance_count_ = 0;
|
||||
std::atomic<int> StationarityEstimator::instance_count_(0);
|
||||
|
||||
StationarityEstimator::NoiseSpectrum::NoiseSpectrum() {
|
||||
Reset();
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include <stddef.h>
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
|
||||
#include "api/array_view.h"
|
||||
@ -110,7 +111,7 @@ class StationarityEstimator {
|
||||
size_t block_counter_;
|
||||
};
|
||||
|
||||
static int instance_count_;
|
||||
static std::atomic<int> instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
NoiseSpectrum noise_;
|
||||
std::array<int, kFftLengthBy2Plus1> hangovers_;
|
||||
|
@ -48,7 +48,9 @@ SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config,
|
||||
use_min_erle_during_onsets_(EnableMinErleDuringOnsets()),
|
||||
accum_spectra_(num_capture_channels),
|
||||
erle_(num_capture_channels),
|
||||
erle_onsets_(num_capture_channels),
|
||||
erle_onset_compensated_(num_capture_channels),
|
||||
erle_unbounded_(num_capture_channels),
|
||||
erle_during_onsets_(num_capture_channels),
|
||||
coming_onset_(num_capture_channels),
|
||||
hold_counters_(num_capture_channels) {
|
||||
Reset();
|
||||
@ -57,11 +59,12 @@ SubbandErleEstimator::SubbandErleEstimator(const EchoCanceller3Config& config,
|
||||
SubbandErleEstimator::~SubbandErleEstimator() = default;
|
||||
|
||||
void SubbandErleEstimator::Reset() {
|
||||
for (auto& erle : erle_) {
|
||||
erle.fill(min_erle_);
|
||||
}
|
||||
for (size_t ch = 0; ch < erle_onsets_.size(); ++ch) {
|
||||
erle_onsets_[ch].fill(min_erle_);
|
||||
const size_t num_capture_channels = erle_.size();
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
erle_[ch].fill(min_erle_);
|
||||
erle_onset_compensated_[ch].fill(min_erle_);
|
||||
erle_unbounded_[ch].fill(min_erle_);
|
||||
erle_during_onsets_[ch].fill(min_erle_);
|
||||
coming_onset_[ch].fill(true);
|
||||
hold_counters_[ch].fill(0);
|
||||
}
|
||||
@ -80,15 +83,25 @@ void SubbandErleEstimator::Update(
|
||||
DecreaseErlePerBandForLowRenderSignals();
|
||||
}
|
||||
|
||||
for (auto& erle : erle_) {
|
||||
const size_t num_capture_channels = erle_.size();
|
||||
for (size_t ch = 0; ch < num_capture_channels; ++ch) {
|
||||
auto& erle = erle_[ch];
|
||||
erle[0] = erle[1];
|
||||
erle[kFftLengthBy2] = erle[kFftLengthBy2 - 1];
|
||||
|
||||
auto& erle_oc = erle_onset_compensated_[ch];
|
||||
erle_oc[0] = erle_oc[1];
|
||||
erle_oc[kFftLengthBy2] = erle_oc[kFftLengthBy2 - 1];
|
||||
|
||||
auto& erle_u = erle_unbounded_[ch];
|
||||
erle_u[0] = erle_u[1];
|
||||
erle_u[kFftLengthBy2] = erle_u[kFftLengthBy2 - 1];
|
||||
}
|
||||
}
|
||||
|
||||
void SubbandErleEstimator::Dump(
|
||||
const std::unique_ptr<ApmDataDumper>& data_dumper) const {
|
||||
data_dumper->DumpRaw("aec3_erle_onset", ErleOnsets()[0]);
|
||||
data_dumper->DumpRaw("aec3_erle_onset", ErleDuringOnsets()[0]);
|
||||
}
|
||||
|
||||
void SubbandErleEstimator::UpdateBands(
|
||||
@ -102,13 +115,16 @@ void SubbandErleEstimator::UpdateBands(
|
||||
continue;
|
||||
}
|
||||
|
||||
if (accum_spectra_.num_points[ch] != kPointsToAccumulate) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::array<float, kFftLengthBy2> new_erle;
|
||||
std::array<bool, kFftLengthBy2> is_erle_updated;
|
||||
is_erle_updated.fill(false);
|
||||
|
||||
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
||||
if (accum_spectra_.num_points[ch] == kPointsToAccumulate &&
|
||||
accum_spectra_.E2[ch][k] > 0.f) {
|
||||
if (accum_spectra_.E2[ch][k] > 0.f) {
|
||||
new_erle[k] = accum_spectra_.Y2[ch][k] / accum_spectra_.E2[ch][k];
|
||||
is_erle_updated[k] = true;
|
||||
}
|
||||
@ -120,10 +136,11 @@ void SubbandErleEstimator::UpdateBands(
|
||||
if (coming_onset_[ch][k]) {
|
||||
coming_onset_[ch][k] = false;
|
||||
if (!use_min_erle_during_onsets_) {
|
||||
float alpha = new_erle[k] < erle_onsets_[ch][k] ? 0.3f : 0.15f;
|
||||
erle_onsets_[ch][k] = rtc::SafeClamp(
|
||||
erle_onsets_[ch][k] +
|
||||
alpha * (new_erle[k] - erle_onsets_[ch][k]),
|
||||
float alpha =
|
||||
new_erle[k] < erle_during_onsets_[ch][k] ? 0.3f : 0.15f;
|
||||
erle_during_onsets_[ch][k] = rtc::SafeClamp(
|
||||
erle_during_onsets_[ch][k] +
|
||||
alpha * (new_erle[k] - erle_during_onsets_[ch][k]),
|
||||
min_erle_, max_erle_[k]);
|
||||
}
|
||||
}
|
||||
@ -132,15 +149,31 @@ void SubbandErleEstimator::UpdateBands(
|
||||
}
|
||||
}
|
||||
|
||||
auto update_erle_band = [](float& erle, float new_erle,
|
||||
bool low_render_energy, float min_erle,
|
||||
float max_erle) {
|
||||
float alpha = 0.05f;
|
||||
if (new_erle < erle) {
|
||||
alpha = low_render_energy ? 0.f : 0.1f;
|
||||
}
|
||||
erle =
|
||||
rtc::SafeClamp(erle + alpha * (new_erle - erle), min_erle, max_erle);
|
||||
};
|
||||
|
||||
for (size_t k = 1; k < kFftLengthBy2; ++k) {
|
||||
if (is_erle_updated[k]) {
|
||||
float alpha = 0.05f;
|
||||
if (new_erle[k] < erle_[ch][k]) {
|
||||
alpha = accum_spectra_.low_render_energy[ch][k] ? 0.f : 0.1f;
|
||||
const bool low_render_energy = accum_spectra_.low_render_energy[ch][k];
|
||||
update_erle_band(erle_[ch][k], new_erle[k], low_render_energy,
|
||||
min_erle_, max_erle_[k]);
|
||||
if (use_onset_detection_) {
|
||||
update_erle_band(erle_onset_compensated_[ch][k], new_erle[k],
|
||||
low_render_energy, min_erle_, max_erle_[k]);
|
||||
}
|
||||
erle_[ch][k] =
|
||||
rtc::SafeClamp(erle_[ch][k] + alpha * (new_erle[k] - erle_[ch][k]),
|
||||
min_erle_, max_erle_[k]);
|
||||
|
||||
// Virtually unbounded ERLE.
|
||||
constexpr float kUnboundedErleMax = 100000.0f;
|
||||
update_erle_band(erle_unbounded_[ch][k], new_erle[k], low_render_energy,
|
||||
min_erle_, kUnboundedErleMax);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -153,9 +186,11 @@ void SubbandErleEstimator::DecreaseErlePerBandForLowRenderSignals() {
|
||||
--hold_counters_[ch][k];
|
||||
if (hold_counters_[ch][k] <=
|
||||
(kBlocksForOnsetDetection - kBlocksToHoldErle)) {
|
||||
if (erle_[ch][k] > erle_onsets_[ch][k]) {
|
||||
erle_[ch][k] = std::max(erle_onsets_[ch][k], 0.97f * erle_[ch][k]);
|
||||
RTC_DCHECK_LE(min_erle_, erle_[ch][k]);
|
||||
if (erle_onset_compensated_[ch][k] > erle_during_onsets_[ch][k]) {
|
||||
erle_onset_compensated_[ch][k] =
|
||||
std::max(erle_during_onsets_[ch][k],
|
||||
0.97f * erle_onset_compensated_[ch][k]);
|
||||
RTC_DCHECK_LE(min_erle_, erle_onset_compensated_[ch][k]);
|
||||
}
|
||||
if (hold_counters_[ch][k] <= 0) {
|
||||
coming_onset_[ch][k] = true;
|
||||
@ -167,7 +202,7 @@ void SubbandErleEstimator::DecreaseErlePerBandForLowRenderSignals() {
|
||||
}
|
||||
|
||||
void SubbandErleEstimator::ResetAccumulatedSpectra() {
|
||||
for (size_t ch = 0; ch < erle_onsets_.size(); ++ch) {
|
||||
for (size_t ch = 0; ch < erle_during_onsets_.size(); ++ch) {
|
||||
accum_spectra_.Y2[ch].fill(0.f);
|
||||
accum_spectra_.E2[ch].fill(0.f);
|
||||
accum_spectra_.num_points[ch] = 0;
|
||||
|
@ -41,14 +41,22 @@ class SubbandErleEstimator {
|
||||
const std::vector<bool>& converged_filters);
|
||||
|
||||
// Returns the ERLE estimate.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle() const {
|
||||
return erle_;
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> Erle(
|
||||
bool onset_compensated) const {
|
||||
return onset_compensated && use_onset_detection_ ? erle_onset_compensated_
|
||||
: erle_;
|
||||
}
|
||||
|
||||
// Returns the non-capped ERLE estimate.
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleUnbounded()
|
||||
const {
|
||||
return erle_unbounded_;
|
||||
}
|
||||
|
||||
// Returns the ERLE estimate at onsets (only used for testing).
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleOnsets()
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> ErleDuringOnsets()
|
||||
const {
|
||||
return erle_onsets_;
|
||||
return erle_during_onsets_;
|
||||
}
|
||||
|
||||
void Dump(const std::unique_ptr<ApmDataDumper>& data_dumper) const;
|
||||
@ -82,8 +90,13 @@ class SubbandErleEstimator {
|
||||
const std::array<float, kFftLengthBy2Plus1> max_erle_;
|
||||
const bool use_min_erle_during_onsets_;
|
||||
AccumulatedSpectra accum_spectra_;
|
||||
// ERLE without special handling of render onsets.
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_onsets_;
|
||||
// ERLE lowered during render onsets.
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_onset_compensated_;
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_unbounded_;
|
||||
// Estimation of ERLE during render onsets.
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>> erle_during_onsets_;
|
||||
std::vector<std::array<bool, kFftLengthBy2Plus1>> coming_onset_;
|
||||
std::vector<std::array<int, kFftLengthBy2Plus1>> hold_counters_;
|
||||
};
|
||||
|
@ -19,11 +19,17 @@
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
bool UseCoarseFilterResetHangover() {
|
||||
return !field_trial::IsEnabled(
|
||||
"WebRTC-Aec3CoarseFilterResetHangoverKillSwitch");
|
||||
}
|
||||
|
||||
void PredictionError(const Aec3Fft& fft,
|
||||
const FftData& S,
|
||||
rtc::ArrayView<const float> y,
|
||||
@ -66,12 +72,14 @@ Subtractor::Subtractor(const EchoCanceller3Config& config,
|
||||
optimization_(optimization),
|
||||
config_(config),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
use_coarse_filter_reset_hangover_(UseCoarseFilterResetHangover()),
|
||||
refined_filters_(num_capture_channels_),
|
||||
coarse_filter_(num_capture_channels_),
|
||||
refined_gains_(num_capture_channels_),
|
||||
coarse_gains_(num_capture_channels_),
|
||||
filter_misadjustment_estimators_(num_capture_channels_),
|
||||
poor_coarse_filter_counters_(num_capture_channels_, 0),
|
||||
coarse_filter_reset_hangover_(num_capture_channels_, 0),
|
||||
refined_frequency_responses_(
|
||||
num_capture_channels_,
|
||||
std::vector<std::array<float, kFftLengthBy2Plus1>>(
|
||||
@ -83,7 +91,20 @@ Subtractor::Subtractor(const EchoCanceller3Config& config,
|
||||
std::vector<float>(GetTimeDomainLength(std::max(
|
||||
config_.filter.refined_initial.length_blocks,
|
||||
config_.filter.refined.length_blocks)),
|
||||
0.f)) {
|
||||
0.f)),
|
||||
coarse_impulse_responses_(0) {
|
||||
// Set up the storing of coarse impulse responses if data dumping is
|
||||
// available.
|
||||
if (ApmDataDumper::IsAvailable()) {
|
||||
coarse_impulse_responses_.resize(num_capture_channels_);
|
||||
const size_t filter_size = GetTimeDomainLength(
|
||||
std::max(config_.filter.coarse_initial.length_blocks,
|
||||
config_.filter.coarse.length_blocks));
|
||||
for (std::vector<float>& impulse_response : coarse_impulse_responses_) {
|
||||
impulse_response.resize(filter_size, 0.f);
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
refined_filters_[ch] = std::make_unique<AdaptiveFirFilter>(
|
||||
config_.filter.refined.length_blocks,
|
||||
@ -155,11 +176,11 @@ void Subtractor::ExitInitialState() {
|
||||
}
|
||||
|
||||
void Subtractor::Process(const RenderBuffer& render_buffer,
|
||||
const std::vector<std::vector<float>>& capture,
|
||||
const Block& capture,
|
||||
const RenderSignalAnalyzer& render_signal_analyzer,
|
||||
const AecState& aec_state,
|
||||
rtc::ArrayView<SubtractorOutput> outputs) {
|
||||
RTC_DCHECK_EQ(num_capture_channels_, capture.size());
|
||||
RTC_DCHECK_EQ(num_capture_channels_, capture.NumChannels());
|
||||
|
||||
// Compute the render powers.
|
||||
const bool same_filter_sizes = refined_filters_[0]->SizePartitions() ==
|
||||
@ -183,9 +204,8 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
|
||||
|
||||
// Process all capture channels
|
||||
for (size_t ch = 0; ch < num_capture_channels_; ++ch) {
|
||||
RTC_DCHECK_EQ(kBlockSize, capture[ch].size());
|
||||
SubtractorOutput& output = outputs[ch];
|
||||
rtc::ArrayView<const float> y = capture[ch];
|
||||
rtc::ArrayView<const float> y = capture.View(/*band=*/0, ch);
|
||||
FftData& E_refined = output.E_refined;
|
||||
FftData E_coarse;
|
||||
std::array<float, kBlockSize>& e_refined = output.e_refined;
|
||||
@ -228,11 +248,19 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
|
||||
|
||||
// Update the refined filter.
|
||||
if (!refined_filters_adjusted) {
|
||||
// Do not allow the performance of the coarse filter to affect the
|
||||
// adaptation speed of the refined filter just after the coarse filter has
|
||||
// been reset.
|
||||
const bool disallow_leakage_diverged =
|
||||
coarse_filter_reset_hangover_[ch] > 0 &&
|
||||
use_coarse_filter_reset_hangover_;
|
||||
|
||||
std::array<float, kFftLengthBy2Plus1> erl;
|
||||
ComputeErl(optimization_, refined_frequency_responses_[ch], erl);
|
||||
refined_gains_[ch]->Compute(X2_refined, render_signal_analyzer, output,
|
||||
erl, refined_filters_[ch]->SizePartitions(),
|
||||
aec_state.SaturatedCapture(), &G);
|
||||
aec_state.SaturatedCapture(),
|
||||
disallow_leakage_diverged, &G);
|
||||
} else {
|
||||
G.re.fill(0.f);
|
||||
G.im.fill(0.f);
|
||||
@ -256,6 +284,8 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
|
||||
coarse_gains_[ch]->Compute(X2_coarse, render_signal_analyzer, E_coarse,
|
||||
coarse_filter_[ch]->SizePartitions(),
|
||||
aec_state.SaturatedCapture(), &G);
|
||||
coarse_filter_reset_hangover_[ch] =
|
||||
std::max(coarse_filter_reset_hangover_[ch] - 1, 0);
|
||||
} else {
|
||||
poor_coarse_filter_counters_[ch] = 0;
|
||||
coarse_filter_[ch]->SetFilter(refined_filters_[ch]->SizePartitions(),
|
||||
@ -263,9 +293,18 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
|
||||
coarse_gains_[ch]->Compute(X2_coarse, render_signal_analyzer, E_refined,
|
||||
coarse_filter_[ch]->SizePartitions(),
|
||||
aec_state.SaturatedCapture(), &G);
|
||||
coarse_filter_reset_hangover_[ch] =
|
||||
config_.filter.coarse_reset_hangover_blocks;
|
||||
}
|
||||
|
||||
if (ApmDataDumper::IsAvailable()) {
|
||||
RTC_DCHECK_LT(ch, coarse_impulse_responses_.size());
|
||||
coarse_filter_[ch]->Adapt(render_buffer, G,
|
||||
&coarse_impulse_responses_[ch]);
|
||||
} else {
|
||||
coarse_filter_[ch]->Adapt(render_buffer, G);
|
||||
}
|
||||
|
||||
coarse_filter_[ch]->Adapt(render_buffer, G);
|
||||
if (ch == 0) {
|
||||
data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.re);
|
||||
data_dumper_->DumpRaw("aec3_subtractor_G_coarse", G.im);
|
||||
|
@ -23,6 +23,7 @@
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec3_fft.h"
|
||||
#include "modules/audio_processing/aec3/aec_state.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
#include "modules/audio_processing/aec3/coarse_filter_update_gain.h"
|
||||
#include "modules/audio_processing/aec3/echo_path_variability.h"
|
||||
#include "modules/audio_processing/aec3/refined_filter_update_gain.h"
|
||||
@ -48,7 +49,7 @@ class Subtractor {
|
||||
|
||||
// Performs the echo subtraction.
|
||||
void Process(const RenderBuffer& render_buffer,
|
||||
const std::vector<std::vector<float>>& capture,
|
||||
const Block& capture,
|
||||
const RenderSignalAnalyzer& render_signal_analyzer,
|
||||
const AecState& aec_state,
|
||||
rtc::ArrayView<SubtractorOutput> outputs);
|
||||
@ -78,6 +79,15 @@ class Subtractor {
|
||||
refined_impulse_responses_[0].data(),
|
||||
GetTimeDomainLength(
|
||||
refined_filters_[0]->max_filter_size_partitions())));
|
||||
if (ApmDataDumper::IsAvailable()) {
|
||||
RTC_DCHECK_GT(coarse_impulse_responses_.size(), 0);
|
||||
data_dumper_->DumpRaw(
|
||||
"aec3_subtractor_h_coarse",
|
||||
rtc::ArrayView<const float>(
|
||||
coarse_impulse_responses_[0].data(),
|
||||
GetTimeDomainLength(
|
||||
coarse_filter_[0]->max_filter_size_partitions())));
|
||||
}
|
||||
|
||||
refined_filters_[0]->DumpFilter("aec3_subtractor_H_refined");
|
||||
coarse_filter_[0]->DumpFilter("aec3_subtractor_H_coarse");
|
||||
@ -120,6 +130,7 @@ class Subtractor {
|
||||
const Aec3Optimization optimization_;
|
||||
const EchoCanceller3Config config_;
|
||||
const size_t num_capture_channels_;
|
||||
const bool use_coarse_filter_reset_hangover_;
|
||||
|
||||
std::vector<std::unique_ptr<AdaptiveFirFilter>> refined_filters_;
|
||||
std::vector<std::unique_ptr<AdaptiveFirFilter>> coarse_filter_;
|
||||
@ -127,9 +138,11 @@ class Subtractor {
|
||||
std::vector<std::unique_ptr<CoarseFilterUpdateGain>> coarse_gains_;
|
||||
std::vector<FilterMisadjustmentEstimator> filter_misadjustment_estimators_;
|
||||
std::vector<size_t> poor_coarse_filter_counters_;
|
||||
std::vector<int> coarse_filter_reset_hangover_;
|
||||
std::vector<std::vector<std::array<float, kFftLengthBy2Plus1>>>
|
||||
refined_frequency_responses_;
|
||||
std::vector<std::vector<float>> refined_impulse_responses_;
|
||||
std::vector<std::vector<float>> coarse_impulse_responses_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -22,12 +22,14 @@ SubtractorOutputAnalyzer::SubtractorOutputAnalyzer(size_t num_capture_channels)
|
||||
void SubtractorOutputAnalyzer::Update(
|
||||
rtc::ArrayView<const SubtractorOutput> subtractor_output,
|
||||
bool* any_filter_converged,
|
||||
bool* any_coarse_filter_converged,
|
||||
bool* all_filters_diverged) {
|
||||
RTC_DCHECK(any_filter_converged);
|
||||
RTC_DCHECK(all_filters_diverged);
|
||||
RTC_DCHECK_EQ(subtractor_output.size(), filters_converged_.size());
|
||||
|
||||
*any_filter_converged = false;
|
||||
*any_coarse_filter_converged = false;
|
||||
*all_filters_diverged = true;
|
||||
|
||||
for (size_t ch = 0; ch < subtractor_output.size(); ++ch) {
|
||||
@ -36,16 +38,21 @@ void SubtractorOutputAnalyzer::Update(
|
||||
const float e2_coarse = subtractor_output[ch].e2_coarse;
|
||||
|
||||
constexpr float kConvergenceThreshold = 50 * 50 * kBlockSize;
|
||||
constexpr float kConvergenceThresholdLowLevel = 20 * 20 * kBlockSize;
|
||||
bool refined_filter_converged =
|
||||
e2_refined < 0.5f * y2 && y2 > kConvergenceThreshold;
|
||||
bool coarse_filter_converged =
|
||||
bool coarse_filter_converged_strict =
|
||||
e2_coarse < 0.05f * y2 && y2 > kConvergenceThreshold;
|
||||
bool coarse_filter_converged_relaxed =
|
||||
e2_coarse < 0.2f * y2 && y2 > kConvergenceThresholdLowLevel;
|
||||
float min_e2 = std::min(e2_refined, e2_coarse);
|
||||
bool filter_diverged = min_e2 > 1.5f * y2 && y2 > 30.f * 30.f * kBlockSize;
|
||||
filters_converged_[ch] =
|
||||
refined_filter_converged || coarse_filter_converged;
|
||||
refined_filter_converged || coarse_filter_converged_strict;
|
||||
|
||||
*any_filter_converged = *any_filter_converged || filters_converged_[ch];
|
||||
*any_coarse_filter_converged =
|
||||
*any_coarse_filter_converged || coarse_filter_converged_relaxed;
|
||||
*all_filters_diverged = *all_filters_diverged && filter_diverged;
|
||||
}
|
||||
}
|
||||
|
@ -26,6 +26,7 @@ class SubtractorOutputAnalyzer {
|
||||
// Analyses the subtractor output.
|
||||
void Update(rtc::ArrayView<const SubtractorOutput> subtractor_output,
|
||||
bool* any_filter_converged,
|
||||
bool* any_coarse_filter_converged,
|
||||
bool* all_filters_diverged);
|
||||
|
||||
const std::vector<bool>& ConvergedFilters() const {
|
||||
|
@ -86,9 +86,9 @@ void SuppressionFilter::ApplyGain(
|
||||
const std::array<float, kFftLengthBy2Plus1>& suppression_gain,
|
||||
float high_bands_gain,
|
||||
rtc::ArrayView<const FftData> E_lowest_band,
|
||||
std::vector<std::vector<std::vector<float>>>* e) {
|
||||
Block* e) {
|
||||
RTC_DCHECK(e);
|
||||
RTC_DCHECK_EQ(e->size(), NumBandsForRate(sample_rate_hz_));
|
||||
RTC_DCHECK_EQ(e->NumBands(), NumBandsForRate(sample_rate_hz_));
|
||||
|
||||
// Comfort noise gain is sqrt(1-g^2), where g is the suppression gain.
|
||||
std::array<float, kFftLengthBy2Plus1> noise_gain;
|
||||
@ -108,12 +108,12 @@ void SuppressionFilter::ApplyGain(
|
||||
|
||||
for (size_t i = 0; i < kFftLengthBy2Plus1; ++i) {
|
||||
// Apply suppression gains.
|
||||
E.re[i] *= suppression_gain[i];
|
||||
E.im[i] *= suppression_gain[i];
|
||||
float E_real = E.re[i] * suppression_gain[i];
|
||||
float E_imag = E.im[i] * suppression_gain[i];
|
||||
|
||||
// Scale and add the comfort noise.
|
||||
E.re[i] += noise_gain[i] * comfort_noise[ch].re[i];
|
||||
E.im[i] += noise_gain[i] * comfort_noise[ch].im[i];
|
||||
E.re[i] = E_real + noise_gain[i] * comfort_noise[ch].re[i];
|
||||
E.im[i] = E_imag + noise_gain[i] * comfort_noise[ch].im[i];
|
||||
}
|
||||
|
||||
// Synthesis filterbank.
|
||||
@ -121,36 +121,37 @@ void SuppressionFilter::ApplyGain(
|
||||
constexpr float kIfftNormalization = 2.f / kFftLength;
|
||||
fft_.Ifft(E, &e_extended);
|
||||
|
||||
auto& e0 = (*e)[0][ch];
|
||||
auto& e0_old = e_output_old_[0][ch];
|
||||
auto e0 = e->View(/*band=*/0, ch);
|
||||
float* e0_old = e_output_old_[0][ch].data();
|
||||
|
||||
// Window and add the first half of e_extended with the second half of
|
||||
// e_extended from the previous block.
|
||||
for (size_t i = 0; i < kFftLengthBy2; ++i) {
|
||||
e0[i] = e0_old[i] * kSqrtHanning[kFftLengthBy2 + i];
|
||||
e0[i] += e_extended[i] * kSqrtHanning[i];
|
||||
e0[i] *= kIfftNormalization;
|
||||
float e0_i = e0_old[i] * kSqrtHanning[kFftLengthBy2 + i];
|
||||
e0_i += e_extended[i] * kSqrtHanning[i];
|
||||
e0[i] = e0_i * kIfftNormalization;
|
||||
}
|
||||
|
||||
// The second half of e_extended is stored for the succeeding frame.
|
||||
std::copy(e_extended.begin() + kFftLengthBy2,
|
||||
e_extended.begin() + kFftLength, std::begin(e0_old));
|
||||
e_extended.begin() + kFftLength,
|
||||
std::begin(e_output_old_[0][ch]));
|
||||
|
||||
// Apply suppression gain to upper bands.
|
||||
for (size_t b = 1; b < e->size(); ++b) {
|
||||
auto& e_band = (*e)[b][ch];
|
||||
for (int b = 1; b < e->NumBands(); ++b) {
|
||||
auto e_band = e->View(b, ch);
|
||||
for (size_t i = 0; i < kFftLengthBy2; ++i) {
|
||||
e_band[i] *= high_bands_gain;
|
||||
}
|
||||
}
|
||||
|
||||
// Add comfort noise to band 1.
|
||||
if (e->size() > 1) {
|
||||
if (e->NumBands() > 1) {
|
||||
E.Assign(comfort_noise_high_band[ch]);
|
||||
std::array<float, kFftLength> time_domain_high_band_noise;
|
||||
fft_.Ifft(E, &time_domain_high_band_noise);
|
||||
|
||||
auto& e1 = (*e)[1][ch];
|
||||
auto e1 = e->View(/*band=*/1, ch);
|
||||
const float gain = high_bands_noise_scaling * kIfftNormalization;
|
||||
for (size_t i = 0; i < kFftLengthBy2; ++i) {
|
||||
e1[i] += time_domain_high_band_noise[i] * gain;
|
||||
@ -158,17 +159,17 @@ void SuppressionFilter::ApplyGain(
|
||||
}
|
||||
|
||||
// Delay upper bands to match the delay of the filter bank.
|
||||
for (size_t b = 1; b < e->size(); ++b) {
|
||||
auto& e_band = (*e)[b][ch];
|
||||
auto& e_band_old = e_output_old_[b][ch];
|
||||
for (int b = 1; b < e->NumBands(); ++b) {
|
||||
auto e_band = e->View(b, ch);
|
||||
float* e_band_old = e_output_old_[b][ch].data();
|
||||
for (size_t i = 0; i < kFftLengthBy2; ++i) {
|
||||
std::swap(e_band[i], e_band_old[i]);
|
||||
}
|
||||
}
|
||||
|
||||
// Clamp output of all bands.
|
||||
for (size_t b = 0; b < e->size(); ++b) {
|
||||
auto& e_band = (*e)[b][ch];
|
||||
for (int b = 0; b < e->NumBands(); ++b) {
|
||||
auto e_band = e->View(b, ch);
|
||||
for (size_t i = 0; i < kFftLengthBy2; ++i) {
|
||||
e_band[i] = rtc::SafeClamp(e_band[i], -32768.f, 32767.f);
|
||||
}
|
||||
|
@ -16,8 +16,8 @@
|
||||
|
||||
#include "modules/audio_processing/aec3/aec3_common.h"
|
||||
#include "modules/audio_processing/aec3/aec3_fft.h"
|
||||
#include "modules/audio_processing/aec3/block.h"
|
||||
#include "modules/audio_processing/aec3/fft_data.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -27,12 +27,16 @@ class SuppressionFilter {
|
||||
int sample_rate_hz,
|
||||
size_t num_capture_channels_);
|
||||
~SuppressionFilter();
|
||||
|
||||
SuppressionFilter(const SuppressionFilter&) = delete;
|
||||
SuppressionFilter& operator=(const SuppressionFilter&) = delete;
|
||||
|
||||
void ApplyGain(rtc::ArrayView<const FftData> comfort_noise,
|
||||
rtc::ArrayView<const FftData> comfort_noise_high_bands,
|
||||
const std::array<float, kFftLengthBy2Plus1>& suppression_gain,
|
||||
float high_bands_gain,
|
||||
rtc::ArrayView<const FftData> E_lowest_band,
|
||||
std::vector<std::vector<std::vector<float>>>* e);
|
||||
Block* e);
|
||||
|
||||
private:
|
||||
const Aec3Optimization optimization_;
|
||||
@ -40,7 +44,6 @@ class SuppressionFilter {
|
||||
const size_t num_capture_channels_;
|
||||
const Aec3Fft fft_;
|
||||
std::vector<std::vector<std::array<float, kFftLengthBy2>>> e_output_old_;
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(SuppressionFilter);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -21,45 +21,46 @@
|
||||
#include "modules/audio_processing/aec3/subband_nearend_detector.h"
|
||||
#include "modules/audio_processing/aec3/vector_math.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
void PostprocessGains(std::array<float, kFftLengthBy2Plus1>* gain) {
|
||||
// TODO(gustaf): Investigate if this can be relaxed to achieve higher
|
||||
// transparency above 2 kHz.
|
||||
|
||||
void LimitLowFrequencyGains(std::array<float, kFftLengthBy2Plus1>* gain) {
|
||||
// Limit the low frequency gains to avoid the impact of the high-pass filter
|
||||
// on the lower-frequency gain influencing the overall achieved gain.
|
||||
(*gain)[0] = (*gain)[1] = std::min((*gain)[1], (*gain)[2]);
|
||||
}
|
||||
|
||||
// Limit the high frequency gains to avoid the impact of the anti-aliasing
|
||||
// filter on the upper-frequency gains influencing the overall achieved
|
||||
// gain. TODO(peah): Update this when new anti-aliasing filters are
|
||||
// implemented.
|
||||
constexpr size_t kAntiAliasingImpactLimit = (64 * 2000) / 8000;
|
||||
const float min_upper_gain = (*gain)[kAntiAliasingImpactLimit];
|
||||
void LimitHighFrequencyGains(bool conservative_hf_suppression,
|
||||
std::array<float, kFftLengthBy2Plus1>* gain) {
|
||||
// Limit the high frequency gains to avoid echo leakage due to an imperfect
|
||||
// filter.
|
||||
constexpr size_t kFirstBandToLimit = (64 * 2000) / 8000;
|
||||
const float min_upper_gain = (*gain)[kFirstBandToLimit];
|
||||
std::for_each(
|
||||
gain->begin() + kAntiAliasingImpactLimit, gain->end() - 1,
|
||||
gain->begin() + kFirstBandToLimit + 1, gain->end(),
|
||||
[min_upper_gain](float& a) { a = std::min(a, min_upper_gain); });
|
||||
(*gain)[kFftLengthBy2] = (*gain)[kFftLengthBy2Minus1];
|
||||
|
||||
// Limits the gain in the frequencies for which the adaptive filter has not
|
||||
// converged.
|
||||
// TODO(peah): Make adaptive to take the actual filter error into account.
|
||||
constexpr size_t kUpperAccurateBandPlus1 = 29;
|
||||
if (conservative_hf_suppression) {
|
||||
// Limits the gain in the frequencies for which the adaptive filter has not
|
||||
// converged.
|
||||
// TODO(peah): Make adaptive to take the actual filter error into account.
|
||||
constexpr size_t kUpperAccurateBandPlus1 = 29;
|
||||
|
||||
constexpr float oneByBandsInSum =
|
||||
1 / static_cast<float>(kUpperAccurateBandPlus1 - 20);
|
||||
const float hf_gain_bound =
|
||||
std::accumulate(gain->begin() + 20,
|
||||
gain->begin() + kUpperAccurateBandPlus1, 0.f) *
|
||||
oneByBandsInSum;
|
||||
constexpr float oneByBandsInSum =
|
||||
1 / static_cast<float>(kUpperAccurateBandPlus1 - 20);
|
||||
const float hf_gain_bound =
|
||||
std::accumulate(gain->begin() + 20,
|
||||
gain->begin() + kUpperAccurateBandPlus1, 0.f) *
|
||||
oneByBandsInSum;
|
||||
|
||||
std::for_each(gain->begin() + kUpperAccurateBandPlus1, gain->end(),
|
||||
[hf_gain_bound](float& a) { a = std::min(a, hf_gain_bound); });
|
||||
std::for_each(
|
||||
gain->begin() + kUpperAccurateBandPlus1, gain->end(),
|
||||
[hf_gain_bound](float& a) { a = std::min(a, hf_gain_bound); });
|
||||
}
|
||||
}
|
||||
|
||||
// Scales the echo according to assessed audibility at the other end.
|
||||
@ -100,7 +101,7 @@ void WeightEchoForAudibility(const EchoCanceller3Config& config,
|
||||
|
||||
} // namespace
|
||||
|
||||
int SuppressionGain::instance_count_ = 0;
|
||||
std::atomic<int> SuppressionGain::instance_count_(0);
|
||||
|
||||
float SuppressionGain::UpperBandsGain(
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> echo_spectrum,
|
||||
@ -108,13 +109,13 @@ float SuppressionGain::UpperBandsGain(
|
||||
comfort_noise_spectrum,
|
||||
const absl::optional<int>& narrow_peak_band,
|
||||
bool saturated_echo,
|
||||
const std::vector<std::vector<std::vector<float>>>& render,
|
||||
const Block& render,
|
||||
const std::array<float, kFftLengthBy2Plus1>& low_band_gain) const {
|
||||
RTC_DCHECK_LT(0, render.size());
|
||||
if (render.size() == 1) {
|
||||
RTC_DCHECK_LT(0, render.NumBands());
|
||||
if (render.NumBands() == 1) {
|
||||
return 1.f;
|
||||
}
|
||||
const size_t num_render_channels = render[0].size();
|
||||
const int num_render_channels = render.NumChannels();
|
||||
|
||||
if (narrow_peak_band &&
|
||||
(*narrow_peak_band > static_cast<int>(kFftLengthBy2Plus1 - 10))) {
|
||||
@ -133,16 +134,17 @@ float SuppressionGain::UpperBandsGain(
|
||||
// Compute the upper and lower band energies.
|
||||
const auto sum_of_squares = [](float a, float b) { return a + b * b; };
|
||||
float low_band_energy = 0.f;
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
const float channel_energy = std::accumulate(
|
||||
render[0][0].begin(), render[0][0].end(), 0.f, sum_of_squares);
|
||||
for (int ch = 0; ch < num_render_channels; ++ch) {
|
||||
const float channel_energy =
|
||||
std::accumulate(render.begin(/*band=*/0, ch),
|
||||
render.end(/*band=*/0, ch), 0.0f, sum_of_squares);
|
||||
low_band_energy = std::max(low_band_energy, channel_energy);
|
||||
}
|
||||
float high_band_energy = 0.f;
|
||||
for (size_t k = 1; k < render.size(); ++k) {
|
||||
for (size_t ch = 0; ch < num_render_channels; ++ch) {
|
||||
for (int k = 1; k < render.NumBands(); ++k) {
|
||||
for (int ch = 0; ch < num_render_channels; ++ch) {
|
||||
const float energy = std::accumulate(
|
||||
render[k][ch].begin(), render[k][ch].end(), 0.f, sum_of_squares);
|
||||
render.begin(k, ch), render.end(k, ch), 0.f, sum_of_squares);
|
||||
high_band_energy = std::max(high_band_energy, energy);
|
||||
}
|
||||
}
|
||||
@ -229,16 +231,20 @@ void SuppressionGain::GetMinGain(
|
||||
min_gain[k] = std::min(min_gain[k], 1.f);
|
||||
}
|
||||
|
||||
const bool is_nearend_state = dominant_nearend_detector_->IsNearendState();
|
||||
for (size_t k = 0; k < 6; ++k) {
|
||||
const auto& dec = is_nearend_state ? nearend_params_.max_dec_factor_lf
|
||||
: normal_params_.max_dec_factor_lf;
|
||||
if (!initial_state_ ||
|
||||
config_.suppressor.lf_smoothing_during_initial_phase) {
|
||||
const float& dec = dominant_nearend_detector_->IsNearendState()
|
||||
? nearend_params_.max_dec_factor_lf
|
||||
: normal_params_.max_dec_factor_lf;
|
||||
|
||||
// Make sure the gains of the low frequencies do not decrease too
|
||||
// quickly after strong nearend.
|
||||
if (last_nearend[k] > last_echo[k]) {
|
||||
min_gain[k] = std::max(min_gain[k], last_gain_[k] * dec);
|
||||
min_gain[k] = std::min(min_gain[k], 1.f);
|
||||
for (int k = 0; k <= config_.suppressor.last_lf_smoothing_band; ++k) {
|
||||
// Make sure the gains of the low frequencies do not decrease too
|
||||
// quickly after strong nearend.
|
||||
if (last_nearend[k] > last_echo[k] ||
|
||||
k <= config_.suppressor.last_permanent_lf_smoothing_band) {
|
||||
min_gain[k] = std::max(min_gain[k], last_gain_[k] * dec);
|
||||
min_gain[k] = std::min(min_gain[k], 1.f);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -265,6 +271,7 @@ void SuppressionGain::LowerBandGain(
|
||||
suppressor_input,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> residual_echo,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> comfort_noise,
|
||||
bool clock_drift,
|
||||
std::array<float, kFftLengthBy2Plus1>* gain) {
|
||||
gain->fill(1.f);
|
||||
const bool saturated_echo = aec_state.SaturatedEcho();
|
||||
@ -298,8 +305,14 @@ void SuppressionGain::LowerBandGain(
|
||||
last_echo_[ch].begin());
|
||||
}
|
||||
|
||||
// Limit high-frequency gains.
|
||||
PostprocessGains(gain);
|
||||
LimitLowFrequencyGains(gain);
|
||||
// Use conservative high-frequency gains during clock-drift or when not in
|
||||
// dominant nearend.
|
||||
if (!dominant_nearend_detector_->IsNearendState() || clock_drift ||
|
||||
config_.suppressor.conservative_hf_suppression) {
|
||||
LimitHighFrequencyGains(config_.suppressor.conservative_hf_suppression,
|
||||
gain);
|
||||
}
|
||||
|
||||
// Store computed gains.
|
||||
std::copy(gain->begin(), gain->end(), last_gain_.begin());
|
||||
@ -312,8 +325,7 @@ SuppressionGain::SuppressionGain(const EchoCanceller3Config& config,
|
||||
Aec3Optimization optimization,
|
||||
int sample_rate_hz,
|
||||
size_t num_capture_channels)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
|
||||
: data_dumper_(new ApmDataDumper(instance_count_.fetch_add(1) + 1)),
|
||||
optimization_(optimization),
|
||||
config_(config),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
@ -325,8 +337,14 @@ SuppressionGain::SuppressionGain(const EchoCanceller3Config& config,
|
||||
num_capture_channels_,
|
||||
aec3::MovingAverage(kFftLengthBy2Plus1,
|
||||
config.suppressor.nearend_average_blocks)),
|
||||
nearend_params_(config_.suppressor.nearend_tuning),
|
||||
normal_params_(config_.suppressor.normal_tuning) {
|
||||
nearend_params_(config_.suppressor.last_lf_band,
|
||||
config_.suppressor.first_hf_band,
|
||||
config_.suppressor.nearend_tuning),
|
||||
normal_params_(config_.suppressor.last_lf_band,
|
||||
config_.suppressor.first_hf_band,
|
||||
config_.suppressor.normal_tuning),
|
||||
use_unbounded_echo_spectrum_(config.suppressor.dominant_nearend_detection
|
||||
.use_unbounded_echo_spectrum) {
|
||||
RTC_DCHECK_LT(0, state_change_duration_blocks_);
|
||||
last_gain_.fill(1.f);
|
||||
if (config_.suppressor.use_subband_nearend_detection) {
|
||||
@ -347,24 +365,33 @@ void SuppressionGain::GetGain(
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> echo_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
residual_echo_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
residual_echo_spectrum_unbounded,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
comfort_noise_spectrum,
|
||||
const RenderSignalAnalyzer& render_signal_analyzer,
|
||||
const AecState& aec_state,
|
||||
const std::vector<std::vector<std::vector<float>>>& render,
|
||||
const Block& render,
|
||||
bool clock_drift,
|
||||
float* high_bands_gain,
|
||||
std::array<float, kFftLengthBy2Plus1>* low_band_gain) {
|
||||
RTC_DCHECK(high_bands_gain);
|
||||
RTC_DCHECK(low_band_gain);
|
||||
|
||||
// Choose residual echo spectrum for dominant nearend detection.
|
||||
const auto echo = use_unbounded_echo_spectrum_
|
||||
? residual_echo_spectrum_unbounded
|
||||
: residual_echo_spectrum;
|
||||
|
||||
// Update the nearend state selection.
|
||||
dominant_nearend_detector_->Update(nearend_spectrum, residual_echo_spectrum,
|
||||
dominant_nearend_detector_->Update(nearend_spectrum, echo,
|
||||
comfort_noise_spectrum, initial_state_);
|
||||
|
||||
// Compute gain for the lower band.
|
||||
bool low_noise_render = low_render_detector_.Detect(render);
|
||||
LowerBandGain(low_noise_render, aec_state, nearend_spectrum,
|
||||
residual_echo_spectrum, comfort_noise_spectrum, low_band_gain);
|
||||
residual_echo_spectrum, comfort_noise_spectrum, clock_drift,
|
||||
low_band_gain);
|
||||
|
||||
// Compute the gain for the upper bands.
|
||||
const absl::optional<int> narrow_peak_band =
|
||||
@ -373,6 +400,9 @@ void SuppressionGain::GetGain(
|
||||
*high_bands_gain =
|
||||
UpperBandsGain(echo_spectrum, comfort_noise_spectrum, narrow_peak_band,
|
||||
aec_state.SaturatedEcho(), render, *low_band_gain);
|
||||
|
||||
data_dumper_->DumpRaw("aec3_dominant_nearend",
|
||||
dominant_nearend_detector_->IsNearendState());
|
||||
}
|
||||
|
||||
void SuppressionGain::SetInitialState(bool state) {
|
||||
@ -386,20 +416,17 @@ void SuppressionGain::SetInitialState(bool state) {
|
||||
|
||||
// Detects when the render signal can be considered to have low power and
|
||||
// consist of stationary noise.
|
||||
bool SuppressionGain::LowNoiseRenderDetector::Detect(
|
||||
const std::vector<std::vector<std::vector<float>>>& render) {
|
||||
bool SuppressionGain::LowNoiseRenderDetector::Detect(const Block& render) {
|
||||
float x2_sum = 0.f;
|
||||
float x2_max = 0.f;
|
||||
for (const auto& x_ch : render[0]) {
|
||||
for (const auto& x_k : x_ch) {
|
||||
for (int ch = 0; ch < render.NumChannels(); ++ch) {
|
||||
for (float x_k : render.View(/*band=*/0, ch)) {
|
||||
const float x2 = x_k * x_k;
|
||||
x2_sum += x2;
|
||||
x2_max = std::max(x2_max, x2);
|
||||
}
|
||||
}
|
||||
const size_t num_render_channels = render[0].size();
|
||||
x2_sum = x2_sum / num_render_channels;
|
||||
;
|
||||
x2_sum = x2_sum / render.NumChannels();
|
||||
|
||||
constexpr float kThreshold = 50.f * 50.f * 64.f;
|
||||
const bool low_noise_render =
|
||||
@ -409,23 +436,23 @@ bool SuppressionGain::LowNoiseRenderDetector::Detect(
|
||||
}
|
||||
|
||||
SuppressionGain::GainParameters::GainParameters(
|
||||
int last_lf_band,
|
||||
int first_hf_band,
|
||||
const EchoCanceller3Config::Suppressor::Tuning& tuning)
|
||||
: max_inc_factor(tuning.max_inc_factor),
|
||||
max_dec_factor_lf(tuning.max_dec_factor_lf) {
|
||||
// Compute per-band masking thresholds.
|
||||
constexpr size_t kLastLfBand = 5;
|
||||
constexpr size_t kFirstHfBand = 8;
|
||||
RTC_DCHECK_LT(kLastLfBand, kFirstHfBand);
|
||||
RTC_DCHECK_LT(last_lf_band, first_hf_band);
|
||||
auto& lf = tuning.mask_lf;
|
||||
auto& hf = tuning.mask_hf;
|
||||
RTC_DCHECK_LT(lf.enr_transparent, lf.enr_suppress);
|
||||
RTC_DCHECK_LT(hf.enr_transparent, hf.enr_suppress);
|
||||
for (size_t k = 0; k < kFftLengthBy2Plus1; k++) {
|
||||
for (int k = 0; k < static_cast<int>(kFftLengthBy2Plus1); k++) {
|
||||
float a;
|
||||
if (k <= kLastLfBand) {
|
||||
if (k <= last_lf_band) {
|
||||
a = 0.f;
|
||||
} else if (k < kFirstHfBand) {
|
||||
a = (k - kLastLfBand) / static_cast<float>(kFirstHfBand - kLastLfBand);
|
||||
} else if (k < first_hf_band) {
|
||||
a = (k - last_lf_band) / static_cast<float>(first_hf_band - last_lf_band);
|
||||
} else {
|
||||
a = 1.f;
|
||||
}
|
||||
|
@ -12,6 +12,7 @@
|
||||
#define MODULES_AUDIO_PROCESSING_AEC3_SUPPRESSION_GAIN_H_
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
@ -25,7 +26,6 @@
|
||||
#include "modules/audio_processing/aec3/nearend_detector.h"
|
||||
#include "modules/audio_processing/aec3/render_signal_analyzer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -36,20 +36,31 @@ class SuppressionGain {
|
||||
int sample_rate_hz,
|
||||
size_t num_capture_channels);
|
||||
~SuppressionGain();
|
||||
|
||||
SuppressionGain(const SuppressionGain&) = delete;
|
||||
SuppressionGain& operator=(const SuppressionGain&) = delete;
|
||||
|
||||
void GetGain(
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
nearend_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> echo_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
residual_echo_spectrum,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
residual_echo_spectrum_unbounded,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>>
|
||||
comfort_noise_spectrum,
|
||||
const RenderSignalAnalyzer& render_signal_analyzer,
|
||||
const AecState& aec_state,
|
||||
const std::vector<std::vector<std::vector<float>>>& render,
|
||||
const Block& render,
|
||||
bool clock_drift,
|
||||
float* high_bands_gain,
|
||||
std::array<float, kFftLengthBy2Plus1>* low_band_gain);
|
||||
|
||||
bool IsDominantNearend() {
|
||||
return dominant_nearend_detector_->IsNearendState();
|
||||
}
|
||||
|
||||
// Toggles the usage of the initial state.
|
||||
void SetInitialState(bool state);
|
||||
|
||||
@ -61,7 +72,7 @@ class SuppressionGain {
|
||||
comfort_noise_spectrum,
|
||||
const absl::optional<int>& narrow_peak_band,
|
||||
bool saturated_echo,
|
||||
const std::vector<std::vector<std::vector<float>>>& render,
|
||||
const Block& render,
|
||||
const std::array<float, kFftLengthBy2Plus1>& low_band_gain) const;
|
||||
|
||||
void GainToNoAudibleEcho(const std::array<float, kFftLengthBy2Plus1>& nearend,
|
||||
@ -76,6 +87,7 @@ class SuppressionGain {
|
||||
suppressor_input,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> residual_echo,
|
||||
rtc::ArrayView<const std::array<float, kFftLengthBy2Plus1>> comfort_noise,
|
||||
bool clock_drift,
|
||||
std::array<float, kFftLengthBy2Plus1>* gain);
|
||||
|
||||
void GetMinGain(rtc::ArrayView<const float> weighted_residual_echo,
|
||||
@ -89,7 +101,7 @@ class SuppressionGain {
|
||||
|
||||
class LowNoiseRenderDetector {
|
||||
public:
|
||||
bool Detect(const std::vector<std::vector<std::vector<float>>>& render);
|
||||
bool Detect(const Block& render);
|
||||
|
||||
private:
|
||||
float average_power_ = 32768.f * 32768.f;
|
||||
@ -97,6 +109,8 @@ class SuppressionGain {
|
||||
|
||||
struct GainParameters {
|
||||
explicit GainParameters(
|
||||
int last_lf_band,
|
||||
int first_hf_band,
|
||||
const EchoCanceller3Config::Suppressor::Tuning& tuning);
|
||||
const float max_inc_factor;
|
||||
const float max_dec_factor_lf;
|
||||
@ -105,7 +119,7 @@ class SuppressionGain {
|
||||
std::array<float, kFftLengthBy2Plus1> emr_transparent_;
|
||||
};
|
||||
|
||||
static int instance_count_;
|
||||
static std::atomic<int> instance_count_;
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
const Aec3Optimization optimization_;
|
||||
const EchoCanceller3Config config_;
|
||||
@ -120,9 +134,10 @@ class SuppressionGain {
|
||||
std::vector<aec3::MovingAverage> nearend_smoothers_;
|
||||
const GainParameters nearend_params_;
|
||||
const GainParameters normal_params_;
|
||||
// Determines if the dominant nearend detector uses the unbounded residual
|
||||
// echo spectrum.
|
||||
const bool use_unbounded_echo_spectrum_;
|
||||
std::unique_ptr<NearendDetector> dominant_nearend_detector_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(SuppressionGain);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include "modules/audio_processing/aec3/transparent_mode.h"
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
|
||||
namespace webrtc {
|
||||
@ -23,8 +24,8 @@ bool DeactivateTransparentMode() {
|
||||
return field_trial::IsEnabled("WebRTC-Aec3TransparentModeKillSwitch");
|
||||
}
|
||||
|
||||
bool DeactivateTransparentModeHmm() {
|
||||
return field_trial::IsEnabled("WebRTC-Aec3TransparentModeHmmKillSwitch");
|
||||
bool ActivateTransparentModeHmm() {
|
||||
return field_trial::IsEnabled("WebRTC-Aec3TransparentModeHmm");
|
||||
}
|
||||
|
||||
} // namespace
|
||||
@ -46,6 +47,7 @@ class TransparentModeImpl : public TransparentMode {
|
||||
void Update(int filter_delay_blocks,
|
||||
bool any_filter_consistent,
|
||||
bool any_filter_converged,
|
||||
bool any_coarse_filter_converged,
|
||||
bool all_filters_diverged,
|
||||
bool active_render,
|
||||
bool saturated_capture) override {
|
||||
@ -56,9 +58,9 @@ class TransparentModeImpl : public TransparentMode {
|
||||
// there is no echo present in the microphone signal.
|
||||
|
||||
// The constants have been obtained by observing active_render and
|
||||
// any_filter_converged under varying call scenarios. They have further been
|
||||
// hand tuned to prefer normal state during uncertain regions (to avoid echo
|
||||
// leaks).
|
||||
// any_coarse_filter_converged under varying call scenarios. They
|
||||
// have further been hand tuned to prefer normal state during uncertain
|
||||
// regions (to avoid echo leaks).
|
||||
|
||||
// The model is only updated during active render.
|
||||
if (!active_render)
|
||||
@ -69,8 +71,8 @@ class TransparentModeImpl : public TransparentMode {
|
||||
|
||||
// Probability of observing converged filters in states "normal" and
|
||||
// "transparent" during active render.
|
||||
constexpr float kConvergedNormal = 0.03f;
|
||||
constexpr float kConvergedTransparent = 0.005f;
|
||||
constexpr float kConvergedNormal = 0.01f;
|
||||
constexpr float kConvergedTransparent = 0.001f;
|
||||
|
||||
// Probability of transitioning to transparent state from normal state and
|
||||
// transparent state respectively.
|
||||
@ -92,7 +94,7 @@ class TransparentModeImpl : public TransparentMode {
|
||||
const float prob_transition_normal = 1.f - prob_transition_transparent;
|
||||
|
||||
// Observed output.
|
||||
const int out = any_filter_converged;
|
||||
const int out = static_cast<int>(any_coarse_filter_converged);
|
||||
|
||||
// Joint probabilites of the observed output and respective states.
|
||||
const float prob_joint_normal = prob_transition_normal * kB[0][out];
|
||||
@ -142,6 +144,7 @@ class LegacyTransparentModeImpl : public TransparentMode {
|
||||
void Update(int filter_delay_blocks,
|
||||
bool any_filter_consistent,
|
||||
bool any_filter_converged,
|
||||
bool any_coarse_filter_converged,
|
||||
bool all_filters_diverged,
|
||||
bool active_render,
|
||||
bool saturated_capture) override {
|
||||
@ -226,12 +229,15 @@ class LegacyTransparentModeImpl : public TransparentMode {
|
||||
std::unique_ptr<TransparentMode> TransparentMode::Create(
|
||||
const EchoCanceller3Config& config) {
|
||||
if (config.ep_strength.bounded_erl || DeactivateTransparentMode()) {
|
||||
RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Disabled";
|
||||
return nullptr;
|
||||
}
|
||||
if (DeactivateTransparentModeHmm()) {
|
||||
return std::make_unique<LegacyTransparentModeImpl>(config);
|
||||
if (ActivateTransparentModeHmm()) {
|
||||
RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: HMM";
|
||||
return std::make_unique<TransparentModeImpl>();
|
||||
}
|
||||
return std::make_unique<TransparentModeImpl>();
|
||||
RTC_LOG(LS_INFO) << "AEC3 Transparent Mode: Legacy";
|
||||
return std::make_unique<LegacyTransparentModeImpl>(config);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -37,6 +37,7 @@ class TransparentMode {
|
||||
virtual void Update(int filter_delay_blocks,
|
||||
bool any_filter_consistent,
|
||||
bool any_filter_converged,
|
||||
bool any_coarse_filter_converged,
|
||||
bool all_filters_diverged,
|
||||
bool active_render,
|
||||
bool saturated_capture) = 0;
|
||||
|
@ -8,12 +8,11 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/aec3/vector_math.h"
|
||||
|
||||
#include <immintrin.h>
|
||||
#include <math.h>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "modules/audio_processing/aec3/vector_math.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
@ -14,10 +14,10 @@ rtc_source_set("aec_dump") {
|
||||
|
||||
deps = [
|
||||
"..:aec_dump_interface",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base/system:file_wrapper",
|
||||
"../../../rtc_base/system:rtc_export",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
|
||||
}
|
||||
|
||||
if (rtc_include_tests) {
|
||||
@ -46,7 +46,6 @@ if (rtc_include_tests) {
|
||||
"..:api",
|
||||
"..:audioproc_test_utils",
|
||||
"../",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"//testing/gtest",
|
||||
]
|
||||
}
|
||||
@ -59,8 +58,6 @@ if (rtc_enable_protobuf) {
|
||||
"aec_dump_impl.h",
|
||||
"capture_stream_info.cc",
|
||||
"capture_stream_info.h",
|
||||
"write_to_file_task.cc",
|
||||
"write_to_file_task.h",
|
||||
]
|
||||
|
||||
deps = [
|
||||
@ -70,12 +67,16 @@ if (rtc_enable_protobuf) {
|
||||
"../../../api/task_queue",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:ignore_wundef",
|
||||
"../../../rtc_base:logging",
|
||||
"../../../rtc_base:macromagic",
|
||||
"../../../rtc_base:protobuf_utils",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base:race_checker",
|
||||
"../../../rtc_base:rtc_event",
|
||||
"../../../rtc_base:rtc_task_queue",
|
||||
"../../../rtc_base/system:file_wrapper",
|
||||
"../../../system_wrappers",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
|
||||
|
||||
deps += [ "../:audioproc_debug_proto" ]
|
||||
}
|
||||
@ -107,4 +108,5 @@ rtc_library("null_aec_dump_factory") {
|
||||
":aec_dump",
|
||||
"..:aec_dump_interface",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
|
||||
}
|
||||
|
@ -12,8 +12,8 @@
|
||||
#define MODULES_AUDIO_PROCESSING_AEC_DUMP_AEC_DUMP_FACTORY_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
|
||||
#include "absl/strings/string_view.h"
|
||||
#include "modules/audio_processing/include/aec_dump.h"
|
||||
#include "rtc_base/system/file_wrapper.h"
|
||||
#include "rtc_base/system/rtc_export.h"
|
||||
@ -26,16 +26,16 @@ namespace webrtc {
|
||||
|
||||
class RTC_EXPORT AecDumpFactory {
|
||||
public:
|
||||
// The |worker_queue| may not be null and must outlive the created
|
||||
// AecDump instance. |max_log_size_bytes == -1| means the log size
|
||||
// will be unlimited. |handle| may not be null. The AecDump takes
|
||||
// responsibility for |handle| and closes it in the destructor. A
|
||||
// The `worker_queue` may not be null and must outlive the created
|
||||
// AecDump instance. `max_log_size_bytes == -1` means the log size
|
||||
// will be unlimited. `handle` may not be null. The AecDump takes
|
||||
// responsibility for `handle` and closes it in the destructor. A
|
||||
// non-null return value indicates that the file has been
|
||||
// sucessfully opened.
|
||||
static std::unique_ptr<AecDump> Create(webrtc::FileWrapper file,
|
||||
int64_t max_log_size_bytes,
|
||||
rtc::TaskQueue* worker_queue);
|
||||
static std::unique_ptr<AecDump> Create(std::string file_name,
|
||||
static std::unique_ptr<AecDump> Create(absl::string_view file_name,
|
||||
int64_t max_log_size_bytes,
|
||||
rtc::TaskQueue* worker_queue);
|
||||
static std::unique_ptr<AecDump> Create(FILE* handle,
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user