Bump to WebRTC M120 release

Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone. We're continuing to carry iSAC even though it's gone upstream, but maybe we'll want to drop that soon.
2023-12-12 10:42:58 -05:00
parent 9a202fb8c2
commit c6abf6cd3f
479 changed files with 20900 additions and 11996 deletions
--- a/webrtc/modules/audio_processing/agc2/saturation_protector.cc
+++ b/webrtc/modules/audio_processing/agc2/saturation_protector.cc
@@ -10,84 +10,59 @@

 #include "modules/audio_processing/agc2/saturation_protector.h"

+#include <memory>
+
+#include "modules/audio_processing/agc2/agc2_common.h"
+#include "modules/audio_processing/agc2/saturation_protector_buffer.h"
 #include "modules/audio_processing/logging/apm_data_dumper.h"
+#include "rtc_base/checks.h"
 #include "rtc_base/numerics/safe_minmax.h"

 namespace webrtc {
 namespace {

-constexpr float kMinLevelDbfs = -90.f;
+constexpr int kPeakEnveloperSuperFrameLengthMs = 400;
+constexpr float kMinMarginDb = 12.0f;
+constexpr float kMaxMarginDb = 25.0f;
+constexpr float kAttack = 0.9988493699365052f;
+constexpr float kDecay = 0.9997697679981565f;

-// Min/max margins are based on speech crest-factor.
-constexpr float kMinMarginDb = 12.f;
-constexpr float kMaxMarginDb = 25.f;
-
-using saturation_protector_impl::RingBuffer;
-
-}  // namespace
-
-bool RingBuffer::operator==(const RingBuffer& b) const {
-  RTC_DCHECK_LE(size_, buffer_.size());
-  RTC_DCHECK_LE(b.size_, b.buffer_.size());
-  if (size_ != b.size_) {
-    return false;
+// Saturation protector state. Defined outside of `SaturationProtectorImpl` to
+// implement check-point and restore ops.
+struct SaturationProtectorState {
+  bool operator==(const SaturationProtectorState& s) const {
+    return headroom_db == s.headroom_db &&
+           peak_delay_buffer == s.peak_delay_buffer &&
+           max_peaks_dbfs == s.max_peaks_dbfs &&
+           time_since_push_ms == s.time_since_push_ms;
  }
-  for (int i = 0, i0 = FrontIndex(), i1 = b.FrontIndex(); i < size_;
-       ++i, ++i0, ++i1) {
-    if (buffer_[i0 % buffer_.size()] != b.buffer_[i1 % b.buffer_.size()]) {
-      return false;
-    }
+  inline bool operator!=(const SaturationProtectorState& s) const {
+    return !(*this == s);
  }
-  return true;
-}

-void RingBuffer::Reset() {
-  next_ = 0;
-  size_ = 0;
-}
+  float headroom_db;
+  SaturationProtectorBuffer peak_delay_buffer;
+  float max_peaks_dbfs;
+  int time_since_push_ms;  // Time since the last ring buffer push operation.
+};

-void RingBuffer::PushBack(float v) {
-  RTC_DCHECK_GE(next_, 0);
-  RTC_DCHECK_GE(size_, 0);
-  RTC_DCHECK_LT(next_, buffer_.size());
-  RTC_DCHECK_LE(size_, buffer_.size());
-  buffer_[next_++] = v;
-  if (rtc::SafeEq(next_, buffer_.size())) {
-    next_ = 0;
-  }
-  if (rtc::SafeLt(size_, buffer_.size())) {
-    size_++;
-  }
-}
-
-absl::optional<float> RingBuffer::Front() const {
-  if (size_ == 0) {
-    return absl::nullopt;
-  }
-  RTC_DCHECK_LT(FrontIndex(), buffer_.size());
-  return buffer_[FrontIndex()];
-}
-
-bool SaturationProtectorState::operator==(
-    const SaturationProtectorState& b) const {
-  return margin_db == b.margin_db && peak_delay_buffer == b.peak_delay_buffer &&
-         max_peaks_dbfs == b.max_peaks_dbfs &&
-         time_since_push_ms == b.time_since_push_ms;
-}
-
-void ResetSaturationProtectorState(float initial_margin_db,
+// Resets the saturation protector state.
+void ResetSaturationProtectorState(float initial_headroom_db,
                                   SaturationProtectorState& state) {
-  state.margin_db = initial_margin_db;
+  state.headroom_db = initial_headroom_db;
  state.peak_delay_buffer.Reset();
  state.max_peaks_dbfs = kMinLevelDbfs;
  state.time_since_push_ms = 0;
 }

-void UpdateSaturationProtectorState(float speech_peak_dbfs,
+// Updates `state` by analyzing the estimated speech level `speech_level_dbfs`
+// and the peak level `peak_dbfs` for an observed frame. `state` must not be
+// modified without calling this function.
+void UpdateSaturationProtectorState(float peak_dbfs,
                                    float speech_level_dbfs,
                                    SaturationProtectorState& state) {
  // Get the max peak over `kPeakEnveloperSuperFrameLengthMs` ms.
-  state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, speech_peak_dbfs);
+  state.max_peaks_dbfs = std::max(state.max_peaks_dbfs, peak_dbfs);
  state.time_since_push_ms += kFrameDurationMs;
  if (rtc::SafeGt(state.time_since_push_ms, kPeakEnveloperSuperFrameLengthMs)) {
    // Push `max_peaks_dbfs` back into the ring buffer.
@@ -97,25 +72,112 @@ void UpdateSaturationProtectorState(float speech_peak_dbfs,
    state.time_since_push_ms = 0;
  }

-  // Update margin by comparing the estimated speech level and the delayed max
-  // speech peak power.
-  // TODO(alessiob): Check with aleloi@ why we use a delay and how to tune it.
+  // Update the headroom by comparing the estimated speech level and the delayed
+  // max speech peak.
  const float delayed_peak_dbfs =
      state.peak_delay_buffer.Front().value_or(state.max_peaks_dbfs);
  const float difference_db = delayed_peak_dbfs - speech_level_dbfs;
-  if (difference_db > state.margin_db) {
+  if (difference_db > state.headroom_db) {
    // Attack.
-    state.margin_db =
-        state.margin_db * kSaturationProtectorAttackConstant +
-        difference_db * (1.f - kSaturationProtectorAttackConstant);
+    state.headroom_db =
+        state.headroom_db * kAttack + difference_db * (1.0f - kAttack);
  } else {
    // Decay.
-    state.margin_db = state.margin_db * kSaturationProtectorDecayConstant +
-                      difference_db * (1.f - kSaturationProtectorDecayConstant);
+    state.headroom_db =
+        state.headroom_db * kDecay + difference_db * (1.0f - kDecay);
  }

-  state.margin_db =
-      rtc::SafeClamp<float>(state.margin_db, kMinMarginDb, kMaxMarginDb);
+  state.headroom_db =
+      rtc::SafeClamp<float>(state.headroom_db, kMinMarginDb, kMaxMarginDb);
+}
+
+// Saturation protector which recommends a headroom based on the recent peaks.
+class SaturationProtectorImpl : public SaturationProtector {
+ public:
+  explicit SaturationProtectorImpl(float initial_headroom_db,
+                                   int adjacent_speech_frames_threshold,
+                                   ApmDataDumper* apm_data_dumper)
+      : apm_data_dumper_(apm_data_dumper),
+        initial_headroom_db_(initial_headroom_db),
+        adjacent_speech_frames_threshold_(adjacent_speech_frames_threshold) {
+    Reset();
+  }
+  SaturationProtectorImpl(const SaturationProtectorImpl&) = delete;
+  SaturationProtectorImpl& operator=(const SaturationProtectorImpl&) = delete;
+  ~SaturationProtectorImpl() = default;
+
+  float HeadroomDb() override { return headroom_db_; }
+
+  void Analyze(float speech_probability,
+               float peak_dbfs,
+               float speech_level_dbfs) override {
+    if (speech_probability < kVadConfidenceThreshold) {
+      // Not a speech frame.
+      if (adjacent_speech_frames_threshold_ > 1) {
+        // When two or more adjacent speech frames are required in order to
+        // update the state, we need to decide whether to discard or confirm the
+        // updates based on the speech sequence length.
+        if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
+          // First non-speech frame after a long enough sequence of speech
+          // frames. Update the reliable state.
+          reliable_state_ = preliminary_state_;
+        } else if (num_adjacent_speech_frames_ > 0) {
+          // First non-speech frame after a too short sequence of speech frames.
+          // Reset to the last reliable state.
+          preliminary_state_ = reliable_state_;
+        }
+      }
+      num_adjacent_speech_frames_ = 0;
+    } else {
+      // Speech frame observed.
+      num_adjacent_speech_frames_++;
+
+      // Update preliminary level estimate.
+      UpdateSaturationProtectorState(peak_dbfs, speech_level_dbfs,
+                                     preliminary_state_);
+
+      if (num_adjacent_speech_frames_ >= adjacent_speech_frames_threshold_) {
+        // `preliminary_state_` is now reliable. Update the headroom.
+        headroom_db_ = preliminary_state_.headroom_db;
+      }
+    }
+    DumpDebugData();
+  }
+
+  void Reset() override {
+    num_adjacent_speech_frames_ = 0;
+    headroom_db_ = initial_headroom_db_;
+    ResetSaturationProtectorState(initial_headroom_db_, preliminary_state_);
+    ResetSaturationProtectorState(initial_headroom_db_, reliable_state_);
+  }
+
+ private:
+  void DumpDebugData() {
+    apm_data_dumper_->DumpRaw(
+        "agc2_saturation_protector_preliminary_max_peak_dbfs",
+        preliminary_state_.max_peaks_dbfs);
+    apm_data_dumper_->DumpRaw(
+        "agc2_saturation_protector_reliable_max_peak_dbfs",
+        reliable_state_.max_peaks_dbfs);
+  }
+
+  ApmDataDumper* const apm_data_dumper_;
+  const float initial_headroom_db_;
+  const int adjacent_speech_frames_threshold_;
+  int num_adjacent_speech_frames_;
+  float headroom_db_;
+  SaturationProtectorState preliminary_state_;
+  SaturationProtectorState reliable_state_;
+};
+
+}  // namespace
+
+std::unique_ptr<SaturationProtector> CreateSaturationProtector(
+    float initial_headroom_db,
+    int adjacent_speech_frames_threshold,
+    ApmDataDumper* apm_data_dumper) {
+  return std::make_unique<SaturationProtectorImpl>(
+      initial_headroom_db, adjacent_speech_frames_threshold, apm_data_dumper);
 }

 }  // namespace webrtc