Bump to WebRTC M120 release

Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone. We're continuing to carry iSAC even though it's gone upstream, but maybe we'll want to drop that soon.
2023-12-12 10:42:58 -05:00
parent 9a202fb8c2
commit c6abf6cd3f
479 changed files with 20900 additions and 11996 deletions
--- a/webrtc/modules/audio_processing/gain_controller2.cc
+++ b/webrtc/modules/audio_processing/gain_controller2.cc
@ -10,129 +10,274 @@

 #include "modules/audio_processing/gain_controller2.h"

+#include <memory>
+#include <utility>
+
 #include "common_audio/include/audio_util.h"
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/agc2/cpu_features.h"
 #include "modules/audio_processing/audio_buffer.h"
 #include "modules/audio_processing/include/audio_frame_view.h"
 #include "modules/audio_processing/logging/apm_data_dumper.h"
-#include "rtc_base/atomic_ops.h"
 #include "rtc_base/checks.h"
+#include "rtc_base/logging.h"
 #include "rtc_base/strings/string_builder.h"
+#include "system_wrappers/include/field_trial.h"

 namespace webrtc {
+namespace {

-int GainController2::instance_count_ = 0;
+using Agc2Config = AudioProcessing::Config::GainController2;
+using InputVolumeControllerConfig = InputVolumeController::Config;

-GainController2::GainController2()
-    : data_dumper_(
-          new ApmDataDumper(rtc::AtomicOps::Increment(&instance_count_))),
-      gain_applier_(/*hard_clip_samples=*/false,
-                    /*initial_gain_factor=*/0.f),
-      limiter_(static_cast<size_t>(48000), data_dumper_.get(), "Agc2") {
-  if (config_.adaptive_digital.enabled) {
-    adaptive_agc_.reset(new AdaptiveAgc(data_dumper_.get()));
+constexpr int kLogLimiterStatsPeriodMs = 30'000;
+constexpr int kFrameLengthMs = 10;
+constexpr int kLogLimiterStatsPeriodNumFrames =
+    kLogLimiterStatsPeriodMs / kFrameLengthMs;
+
+// Detects the available CPU features and applies any kill-switches.
+AvailableCpuFeatures GetAllowedCpuFeatures() {
+  AvailableCpuFeatures features = GetAvailableCpuFeatures();
+  if (field_trial::IsEnabled("WebRTC-Agc2SimdSse2KillSwitch")) {
+    features.sse2 = false;
+  }
+  if (field_trial::IsEnabled("WebRTC-Agc2SimdAvx2KillSwitch")) {
+    features.avx2 = false;
+  }
+  if (field_trial::IsEnabled("WebRTC-Agc2SimdNeonKillSwitch")) {
+    features.neon = false;
+  }
+  return features;
+}
+
+// Peak and RMS audio levels in dBFS.
+struct AudioLevels {
+  float peak_dbfs;
+  float rms_dbfs;
+};
+
+// Speech level info.
+struct SpeechLevel {
+  bool is_confident;
+  float rms_dbfs;
+};
+
+// Computes the audio levels for the first channel in `frame`.
+AudioLevels ComputeAudioLevels(AudioFrameView<float> frame,
+                               ApmDataDumper& data_dumper) {
+  float peak = 0.0f;
+  float rms = 0.0f;
+  for (const auto& x : frame.channel(0)) {
+    peak = std::max(std::fabs(x), peak);
+    rms += x * x;
+  }
+  AudioLevels levels{
+      FloatS16ToDbfs(peak),
+      FloatS16ToDbfs(std::sqrt(rms / frame.samples_per_channel()))};
+  data_dumper.DumpRaw("agc2_input_rms_dbfs", levels.rms_dbfs);
+  data_dumper.DumpRaw("agc2_input_peak_dbfs", levels.peak_dbfs);
+  return levels;
+}
+
+}  // namespace
+
+std::atomic<int> GainController2::instance_count_(0);
+
+GainController2::GainController2(
+    const Agc2Config& config,
+    const InputVolumeControllerConfig& input_volume_controller_config,
+    int sample_rate_hz,
+    int num_channels,
+    bool use_internal_vad)
+    : cpu_features_(GetAllowedCpuFeatures()),
+      data_dumper_(instance_count_.fetch_add(1) + 1),
+      fixed_gain_applier_(
+          /*hard_clip_samples=*/false,
+          /*initial_gain_factor=*/DbToRatio(config.fixed_digital.gain_db)),
+      limiter_(sample_rate_hz, &data_dumper_, /*histogram_name_prefix=*/"Agc2"),
+      calls_since_last_limiter_log_(0) {
+  RTC_DCHECK(Validate(config));
+  data_dumper_.InitiateNewSetOfRecordings();
+
+  if (config.input_volume_controller.enabled ||
+      config.adaptive_digital.enabled) {
+    // Create dependencies.
+    speech_level_estimator_ = std::make_unique<SpeechLevelEstimator>(
+        &data_dumper_, config.adaptive_digital, kAdjacentSpeechFramesThreshold);
+    if (use_internal_vad)
+      vad_ = std::make_unique<VoiceActivityDetectorWrapper>(
+          kVadResetPeriodMs, cpu_features_, sample_rate_hz);
+  }
+
+  if (config.input_volume_controller.enabled) {
+    // Create controller.
+    input_volume_controller_ = std::make_unique<InputVolumeController>(
+        num_channels, input_volume_controller_config);
+    // TODO(bugs.webrtc.org/7494): Call `Initialize` in ctor and remove method.
+    input_volume_controller_->Initialize();
+  }
+
+  if (config.adaptive_digital.enabled) {
+    // Create dependencies.
+    noise_level_estimator_ = CreateNoiseFloorEstimator(&data_dumper_);
+    saturation_protector_ = CreateSaturationProtector(
+        kSaturationProtectorInitialHeadroomDb, kAdjacentSpeechFramesThreshold,
+        &data_dumper_);
+    // Create controller.
+    adaptive_digital_controller_ =
+        std::make_unique<AdaptiveDigitalGainController>(
+            &data_dumper_, config.adaptive_digital,
+            kAdjacentSpeechFramesThreshold);
  }
 }

 GainController2::~GainController2() = default;

-void GainController2::Initialize(int sample_rate_hz) {
-  RTC_DCHECK(sample_rate_hz == AudioProcessing::kSampleRate8kHz ||
-             sample_rate_hz == AudioProcessing::kSampleRate16kHz ||
-             sample_rate_hz == AudioProcessing::kSampleRate32kHz ||
-             sample_rate_hz == AudioProcessing::kSampleRate48kHz);
-  limiter_.SetSampleRate(sample_rate_hz);
-  data_dumper_->InitiateNewSetOfRecordings();
-  data_dumper_->DumpRaw("sample_rate_hz", sample_rate_hz);
-}
-
-void GainController2::Process(AudioBuffer* audio) {
-  AudioFrameView<float> float_frame(audio->channels(), audio->num_channels(),
-                                    audio->num_frames());
-  // Apply fixed gain first, then the adaptive one.
-  gain_applier_.ApplyGain(float_frame);
-  if (adaptive_agc_) {
-    adaptive_agc_->Process(float_frame, limiter_.LastAudioLevel());
+// TODO(webrtc:7494): Pass the flag also to the other components.
+void GainController2::SetCaptureOutputUsed(bool capture_output_used) {
+  if (input_volume_controller_) {
+    input_volume_controller_->HandleCaptureOutputUsedChange(
+        capture_output_used);
  }
-  limiter_.Process(float_frame);
 }

-void GainController2::NotifyAnalogLevel(int level) {
-  if (analog_level_ != level && adaptive_agc_) {
-    adaptive_agc_->Reset();
-  }
-  analog_level_ = level;
-}
-
-void GainController2::ApplyConfig(
-    const AudioProcessing::Config::GainController2& config) {
-  RTC_DCHECK(Validate(config))
-      << " the invalid config was " << ToString(config);
-
-  config_ = config;
-  if (config.fixed_digital.gain_db != config_.fixed_digital.gain_db) {
+void GainController2::SetFixedGainDb(float gain_db) {
+  const float gain_factor = DbToRatio(gain_db);
+  if (fixed_gain_applier_.GetGainFactor() != gain_factor) {
    // Reset the limiter to quickly react on abrupt level changes caused by
    // large changes of the fixed gain.
    limiter_.Reset();
  }
-  gain_applier_.SetGainFactor(DbToRatio(config_.fixed_digital.gain_db));
-  if (config_.adaptive_digital.enabled) {
-    adaptive_agc_.reset(new AdaptiveAgc(data_dumper_.get(), config_));
-  } else {
-    adaptive_agc_.reset();
+  fixed_gain_applier_.SetGainFactor(gain_factor);
+}
+
+void GainController2::Analyze(int applied_input_volume,
+                              const AudioBuffer& audio_buffer) {
+  recommended_input_volume_ = absl::nullopt;
+
+  RTC_DCHECK_GE(applied_input_volume, 0);
+  RTC_DCHECK_LE(applied_input_volume, 255);
+
+  if (input_volume_controller_) {
+    input_volume_controller_->AnalyzeInputAudio(applied_input_volume,
+                                                audio_buffer);
+  }
+}
+
+void GainController2::Process(absl::optional<float> speech_probability,
+                              bool input_volume_changed,
+                              AudioBuffer* audio) {
+  recommended_input_volume_ = absl::nullopt;
+
+  data_dumper_.DumpRaw("agc2_applied_input_volume_changed",
+                       input_volume_changed);
+  if (input_volume_changed) {
+    // Handle input volume changes.
+    if (speech_level_estimator_)
+      speech_level_estimator_->Reset();
+    if (saturation_protector_)
+      saturation_protector_->Reset();
+  }
+
+  AudioFrameView<float> float_frame(audio->channels(), audio->num_channels(),
+                                    audio->num_frames());
+  // Compute speech probability.
+  if (vad_) {
+    // When the VAD component runs, `speech_probability` should not be specified
+    // because APM should not run the same VAD twice (as an APM sub-module and
+    // internally in AGC2).
+    RTC_DCHECK(!speech_probability.has_value());
+    speech_probability = vad_->Analyze(float_frame);
+  }
+  if (speech_probability.has_value()) {
+    RTC_DCHECK_GE(*speech_probability, 0.0f);
+    RTC_DCHECK_LE(*speech_probability, 1.0f);
+  }
+  // The speech probability may not be defined at this step (e.g., when the
+  // fixed digital controller alone is enabled).
+  if (speech_probability.has_value())
+    data_dumper_.DumpRaw("agc2_speech_probability", *speech_probability);
+
+  // Compute audio, noise and speech levels.
+  AudioLevels audio_levels = ComputeAudioLevels(float_frame, data_dumper_);
+  absl::optional<float> noise_rms_dbfs;
+  if (noise_level_estimator_) {
+    // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
+    // computation in `noise_level_estimator_`.
+    noise_rms_dbfs = noise_level_estimator_->Analyze(float_frame);
+  }
+  absl::optional<SpeechLevel> speech_level;
+  if (speech_level_estimator_) {
+    RTC_DCHECK(speech_probability.has_value());
+    speech_level_estimator_->Update(
+        audio_levels.rms_dbfs, audio_levels.peak_dbfs, *speech_probability);
+    speech_level =
+        SpeechLevel{.is_confident = speech_level_estimator_->is_confident(),
+                    .rms_dbfs = speech_level_estimator_->level_dbfs()};
+  }
+
+  // Update the recommended input volume.
+  if (input_volume_controller_) {
+    RTC_DCHECK(speech_level.has_value());
+    RTC_DCHECK(speech_probability.has_value());
+    if (speech_probability.has_value()) {
+      recommended_input_volume_ =
+          input_volume_controller_->RecommendInputVolume(
+              *speech_probability,
+              speech_level->is_confident
+                  ? absl::optional<float>(speech_level->rms_dbfs)
+                  : absl::nullopt);
+    }
+  }
+
+  if (adaptive_digital_controller_) {
+    RTC_DCHECK(saturation_protector_);
+    RTC_DCHECK(speech_probability.has_value());
+    RTC_DCHECK(speech_level.has_value());
+    saturation_protector_->Analyze(*speech_probability, audio_levels.peak_dbfs,
+                                   speech_level->rms_dbfs);
+    float headroom_db = saturation_protector_->HeadroomDb();
+    data_dumper_.DumpRaw("agc2_headroom_db", headroom_db);
+    float limiter_envelope_dbfs = FloatS16ToDbfs(limiter_.LastAudioLevel());
+    data_dumper_.DumpRaw("agc2_limiter_envelope_dbfs", limiter_envelope_dbfs);
+    RTC_DCHECK(noise_rms_dbfs.has_value());
+    adaptive_digital_controller_->Process(
+        /*info=*/{.speech_probability = *speech_probability,
+                  .speech_level_dbfs = speech_level->rms_dbfs,
+                  .speech_level_reliable = speech_level->is_confident,
+                  .noise_rms_dbfs = *noise_rms_dbfs,
+                  .headroom_db = headroom_db,
+                  .limiter_envelope_dbfs = limiter_envelope_dbfs},
+        float_frame);
+  }
+
+  // TODO(bugs.webrtc.org/7494): Pass `audio_levels` to remove duplicated
+  // computation in `limiter_`.
+  fixed_gain_applier_.ApplyGain(float_frame);
+
+  limiter_.Process(float_frame);
+
+  // Periodically log limiter stats.
+  if (++calls_since_last_limiter_log_ == kLogLimiterStatsPeriodNumFrames) {
+    calls_since_last_limiter_log_ = 0;
+    InterpolatedGainCurve::Stats stats = limiter_.GetGainCurveStats();
+    RTC_LOG(LS_INFO) << "[AGC2] limiter stats"
+                     << " | identity: " << stats.look_ups_identity_region
+                     << " | knee: " << stats.look_ups_knee_region
+                     << " | limiter: " << stats.look_ups_limiter_region
+                     << " | saturation: " << stats.look_ups_saturation_region;
  }
 }

 bool GainController2::Validate(
    const AudioProcessing::Config::GainController2& config) {
-  return config.fixed_digital.gain_db >= 0.f &&
-         config.fixed_digital.gain_db < 50.f &&
-         config.adaptive_digital.extra_saturation_margin_db >= 0.f &&
-         config.adaptive_digital.extra_saturation_margin_db <= 100.f;
-}
-
-std::string GainController2::ToString(
-    const AudioProcessing::Config::GainController2& config) {
-  rtc::StringBuilder ss;
-  std::string adaptive_digital_level_estimator;
-  using LevelEstimatorType =
-      AudioProcessing::Config::GainController2::LevelEstimator;
-  switch (config.adaptive_digital.level_estimator) {
-    case LevelEstimatorType::kRms:
-      adaptive_digital_level_estimator = "RMS";
-      break;
-    case LevelEstimatorType::kPeak:
-      adaptive_digital_level_estimator = "peak";
-      break;
-  }
-  // clang-format off
-  // clang formatting doesn't respect custom nested style.
-  ss << "{"
-        "enabled: " << (config.enabled ? "true" : "false") << ", "
-        "fixed_digital: {gain_db: " << config.fixed_digital.gain_db << "}, "
-        "adaptive_digital: {"
-          "enabled: "
-            << (config.adaptive_digital.enabled ? "true" : "false") << ", "
-          "level_estimator: {"
-            "type: " << adaptive_digital_level_estimator << ", "
-            "adjacent_speech_frames_threshold: "
-              << config.adaptive_digital
-                  .level_estimator_adjacent_speech_frames_threshold << ", "
-            "initial_saturation_margin_db: "
-              << config.adaptive_digital.initial_saturation_margin_db << ", "
-            "extra_saturation_margin_db: "
-              << config.adaptive_digital.extra_saturation_margin_db << "}, "
-          "gain_applier: {"
-            "adjacent_speech_frames_threshold: "
-              << config.adaptive_digital
-                  .gain_applier_adjacent_speech_frames_threshold << ", "
-            "max_gain_change_db_per_second: "
-              << config.adaptive_digital.max_gain_change_db_per_second << ", "
-            "max_output_noise_level_dbfs: "
-              << config.adaptive_digital.max_output_noise_level_dbfs << "}"
-        "}"
-        "}";
-  // clang-format on
-  return ss.Release();
+  const auto& fixed = config.fixed_digital;
+  const auto& adaptive = config.adaptive_digital;
+  return fixed.gain_db >= 0.0f && fixed.gain_db < 50.0f &&
+         adaptive.headroom_db >= 0.0f && adaptive.max_gain_db > 0.0f &&
+         adaptive.initial_gain_db >= 0.0f &&
+         adaptive.max_gain_change_db_per_second > 0.0f &&
+         adaptive.max_output_noise_level_dbfs <= 0.0f;
 }

 }  // namespace webrtc