Bump to WebRTC M120 release

Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone. We're continuing to carry iSAC even though it's gone upstream, but maybe we'll want to drop that soon.
2023-12-12 10:42:58 -05:00
parent 9a202fb8c2
commit c6abf6cd3f
479 changed files with 20900 additions and 11996 deletions
--- a/webrtc/api/audio/audio_frame.cc
+++ b/webrtc/api/audio/audio_frame.cc
@ -11,8 +11,6 @@
 #include "api/audio/audio_frame.h"

 #include <string.h>
-#include <algorithm>
-#include <utility>

 #include "rtc_base/checks.h"
 #include "rtc_base/time_utils.h"
@ -24,35 +22,13 @@ AudioFrame::AudioFrame() {
  static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
 }

-void swap(AudioFrame& a, AudioFrame& b) {
-  using std::swap;
-  swap(a.timestamp_, b.timestamp_);
-  swap(a.elapsed_time_ms_, b.elapsed_time_ms_);
-  swap(a.ntp_time_ms_, b.ntp_time_ms_);
-  swap(a.samples_per_channel_, b.samples_per_channel_);
-  swap(a.sample_rate_hz_, b.sample_rate_hz_);
-  swap(a.num_channels_, b.num_channels_);
-  swap(a.channel_layout_, b.channel_layout_);
-  swap(a.speech_type_, b.speech_type_);
-  swap(a.vad_activity_, b.vad_activity_);
-  swap(a.profile_timestamp_ms_, b.profile_timestamp_ms_);
-  swap(a.packet_infos_, b.packet_infos_);
-  const size_t length_a = a.samples_per_channel_ * a.num_channels_;
-  const size_t length_b = b.samples_per_channel_ * b.num_channels_;
-  RTC_DCHECK_LE(length_a, AudioFrame::kMaxDataSizeSamples);
-  RTC_DCHECK_LE(length_b, AudioFrame::kMaxDataSizeSamples);
-  std::swap_ranges(a.data_, a.data_ + std::max(length_a, length_b), b.data_);
-  swap(a.muted_, b.muted_);
-  swap(a.absolute_capture_timestamp_ms_, b.absolute_capture_timestamp_ms_);
-}
-
 void AudioFrame::Reset() {
  ResetWithoutMuting();
  muted_ = true;
 }

 void AudioFrame::ResetWithoutMuting() {
-  // TODO(wu): Zero is a valid value for |timestamp_|. We should initialize
+  // TODO(wu): Zero is a valid value for `timestamp_`. We should initialize
  // to an invalid value, or add a new member to indicate invalidity.
  timestamp_ = 0;
  elapsed_time_ms_ = -1;
--- a/webrtc/api/audio/audio_frame.h
+++ b/webrtc/api/audio/audio_frame.h
@ -14,11 +14,8 @@
 #include <stddef.h>
 #include <stdint.h>

-#include <utility>
-
 #include "api/audio/channel_layout.h"
 #include "api/rtp_packet_infos.h"
-#include "rtc_base/constructor_magic.h"

 namespace webrtc {

@ -60,7 +57,8 @@ class AudioFrame {

  AudioFrame();

-  friend void swap(AudioFrame& a, AudioFrame& b);
+  AudioFrame(const AudioFrame&) = delete;
+  AudioFrame& operator=(const AudioFrame&) = delete;

  // Resets all members to their default state.
  void Reset();
@ -139,7 +137,7 @@ class AudioFrame {
  int64_t profile_timestamp_ms_ = 0;

  // Information about packets used to assemble this audio frame. This is needed
-  // by |SourceTracker| when the frame is delivered to the RTCRtpReceiver's
+  // by `SourceTracker` when the frame is delivered to the RTCRtpReceiver's
  // MediaStreamTrack, in order to implement getContributingSources(). See:
  // https://w3c.github.io/webrtc-pc/#dom-rtcrtpreceiver-getcontributingsources
  //
@ -149,7 +147,7 @@ class AudioFrame {
  //   sync buffer is the small sample-holding buffer located after the audio
  //   decoder and before where samples are assembled into output frames.
  //
-  // |RtpPacketInfos| may also be empty if the audio samples did not come from
+  // `RtpPacketInfos` may also be empty if the audio samples did not come from
  // RTP packets. E.g. if the audio were locally generated by packet loss
  // concealment, comfort noise generation, etc.
  RtpPacketInfos packet_infos_;
@ -165,11 +163,9 @@ class AudioFrame {

  // Absolute capture timestamp when this audio frame was originally captured.
  // This is only valid for audio frames captured on this machine. The absolute
-  // capture timestamp of a received frame is found in |packet_infos_|.
+  // capture timestamp of a received frame is found in `packet_infos_`.
  // This timestamp MUST be based on the same clock as rtc::TimeMillis().
  absl::optional<int64_t> absolute_capture_timestamp_ms_;
-
-  RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
 };

 }  // namespace webrtc
--- a/webrtc/api/audio/channel_layout.cc
+++ b/webrtc/api/audio/channel_layout.cc
@ -275,7 +275,7 @@ const char* ChannelLayoutToString(ChannelLayout layout) {
    case CHANNEL_LAYOUT_BITSTREAM:
      return "BITSTREAM";
  }
-  RTC_NOTREACHED() << "Invalid channel layout provided: " << layout;
+  RTC_DCHECK_NOTREACHED() << "Invalid channel layout provided: " << layout;
  return "";
 }

--- a/webrtc/api/audio/echo_canceller3_config.cc
+++ b/webrtc/api/audio/echo_canceller3_config.cc
@ -153,6 +153,7 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {

  res = res & Limit(&c->filter.config_change_duration_blocks, 0, 100000);
  res = res & Limit(&c->filter.initial_state_seconds, 0.f, 100.f);
+  res = res & Limit(&c->filter.coarse_reset_hangover_blocks, 0, 250000);

  res = res & Limit(&c->erle.min, 1.f, 100000.f);
  res = res & Limit(&c->erle.max_l, 1.f, 100000.f);
@ -165,6 +166,7 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {

  res = res & Limit(&c->ep_strength.default_gain, 0.f, 1000000.f);
  res = res & Limit(&c->ep_strength.default_len, -1.f, 1.f);
+  res = res & Limit(&c->ep_strength.nearend_len, -1.0f, 1.0f);

  res =
      res & Limit(&c->echo_audibility.low_render_limit, 0.f, 32768.f * 32768.f);
@ -228,6 +230,12 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {
  res =
      res & Limit(&c->suppressor.nearend_tuning.max_dec_factor_lf, 0.f, 100.f);

+  res = res & Limit(&c->suppressor.last_permanent_lf_smoothing_band, 0, 64);
+  res = res & Limit(&c->suppressor.last_lf_smoothing_band, 0, 64);
+  res = res & Limit(&c->suppressor.last_lf_band, 0, 63);
+  res = res &
+        Limit(&c->suppressor.first_hf_band, c->suppressor.last_lf_band + 1, 64);
+
  res = res & Limit(&c->suppressor.dominant_nearend_detection.enr_threshold,
                    0.f, 1000000.f);
  res = res & Limit(&c->suppressor.dominant_nearend_detection.snr_threshold,
--- a/webrtc/api/audio/echo_canceller3_config.h
+++ b/webrtc/api/audio/echo_canceller3_config.h
@ -43,6 +43,7 @@ struct RTC_EXPORT EchoCanceller3Config {
    size_t hysteresis_limit_blocks = 1;
    size_t fixed_capture_delay_samples = 0;
    float delay_estimate_smoothing = 0.7f;
+    float delay_estimate_smoothing_delay_found = 0.7f;
    float delay_candidate_detection_threshold = 0.2f;
    struct DelaySelectionThresholds {
      int initial;
@ -58,6 +59,7 @@ struct RTC_EXPORT EchoCanceller3Config {
    };
    AlignmentMixing render_alignment_mixing = {false, true, 10000.f, true};
    AlignmentMixing capture_alignment_mixing = {false, true, 10000.f, false};
+    bool detect_pre_echo = true;
  } delay;

  struct Filter {
@ -86,9 +88,11 @@ struct RTC_EXPORT EchoCanceller3Config {

    size_t config_change_duration_blocks = 250;
    float initial_state_seconds = 2.5f;
+    int coarse_reset_hangover_blocks = 25;
    bool conservative_initial_phase = false;
    bool enable_coarse_filter_output_usage = true;
    bool use_linear_filter = true;
+    bool high_pass_filter_echo_reference = false;
    bool export_linear_aec_output = false;
  } filter;

@ -105,8 +109,11 @@ struct RTC_EXPORT EchoCanceller3Config {
  struct EpStrength {
    float default_gain = 1.f;
    float default_len = 0.83f;
+    float nearend_len = 0.83f;
    bool echo_can_saturate = true;
    bool bounded_erl = false;
+    bool erle_onset_compensation_in_dominant_nearend = false;
+    bool use_conservative_tail_frequency_response = true;
  } ep_strength;

  struct EchoAudibility {
@ -190,6 +197,12 @@ struct RTC_EXPORT EchoCanceller3Config {
                                   2.0f,
                                   0.25f);

+    bool lf_smoothing_during_initial_phase = true;
+    int last_permanent_lf_smoothing_band = 0;
+    int last_lf_smoothing_band = 5;
+    int last_lf_band = 5;
+    int first_hf_band = 8;
+
    struct DominantNearendDetection {
      float enr_threshold = .25f;
      float enr_exit_threshold = 10.f;
@ -197,6 +210,7 @@ struct RTC_EXPORT EchoCanceller3Config {
      int hold_duration = 50;
      int trigger_threshold = 12;
      bool use_during_initial_phase = true;
+      bool use_unbounded_echo_spectrum = true;
    } dominant_nearend_detection;

    struct SubbandNearendDetection {
@ -221,7 +235,15 @@ struct RTC_EXPORT EchoCanceller3Config {
    } high_bands_suppression;

    float floor_first_increase = 0.00001f;
+    bool conservative_hf_suppression = false;
  } suppressor;
+
+  struct MultiChannel {
+    bool detect_stereo_content = true;
+    float stereo_detection_threshold = 0.0f;
+    int stereo_detection_timeout_threshold_seconds = 300;
+    float stereo_detection_hysteresis_seconds = 2.0f;
+  } multi_channel;
 };
 }  // namespace webrtc

--- a/webrtc/api/audio/echo_control.h
+++ b/webrtc/api/audio/echo_control.h
@ -48,6 +48,13 @@ class EchoControl {
  // Provides an optional external estimate of the audio buffer delay.
  virtual void SetAudioBufferDelay(int delay_ms) = 0;

+  // Specifies whether the capture output will be used. The purpose of this is
+  // to allow the echo controller to deactivate some of the processing when the
+  // resulting output is anyway not used, for instance when the endpoint is
+  // muted.
+  // TODO(b/177830919): Make pure virtual.
+  virtual void SetCaptureOutputUsage(bool capture_output_used) {}
+
  // Returns wheter the signal is altered.
  virtual bool ActiveProcessing() const = 0;