Bump to WebRTC M131 release

Ongoing fixes and improvements, transient suppressor is gone. Also, dropping isac because it doesn't seem to be useful, and is just build system deadweight now. Upstream references: Version: 131.0.6778.200 WebRTC: 79aff54b0fa9238ce3518dd9eaf9610cd6f22e82 Chromium: 2a19506ad24af755f2a215a4c61f775393e0db42
2024-12-24 19:32:07 -05:00
parent 8bdb53d91c
commit b5c48b97f6
263 changed files with 4628 additions and 20416 deletions
--- a/webrtc/audio/utility/BUILD.gn
+++ b/webrtc/audio/utility/BUILD.gn
@ -23,14 +23,14 @@ rtc_library("audio_frame_operations") {
  ]

  deps = [
+    "../../api:array_view",
    "../../api/audio:audio_frame_api",
    "../../common_audio",
    "../../rtc_base:checks",
    "../../rtc_base:logging",
    "../../rtc_base:safe_conversions",
-    "../../system_wrappers:field_trial",
+    "//third_party/abseil-cpp/absl/base:core_headers",
  ]
-  absl_deps = [ "//third_party/abseil-cpp/absl/base:core_headers" ]
 }

 if (rtc_include_tests) {
@ -48,7 +48,6 @@ if (rtc_include_tests) {
      "../../rtc_base:logging",
      "../../rtc_base:macromagic",
      "../../rtc_base:stringutils",
-      "../../test:field_trial",
      "../../test:test_support",
      "//testing/gtest",
    ]
--- a/webrtc/audio/utility/audio_frame_operations.cc
+++ b/webrtc/audio/utility/audio_frame_operations.cc
@ -29,72 +29,17 @@ const float kMuteFadeInc = 1.0f / kMuteFadeFrames;

 }  // namespace

-void AudioFrameOperations::Add(const AudioFrame& frame_to_add,
-                               AudioFrame* result_frame) {
-  // Sanity check.
-  RTC_DCHECK(result_frame);
-  RTC_DCHECK_GT(result_frame->num_channels_, 0);
-  RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_);
-
-  bool no_previous_data = result_frame->muted();
-  if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) {
-    // Special case we have no data to start with.
-    RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0);
-    result_frame->samples_per_channel_ = frame_to_add.samples_per_channel_;
-    no_previous_data = true;
-  }
-
-  if (result_frame->vad_activity_ == AudioFrame::kVadActive ||
-      frame_to_add.vad_activity_ == AudioFrame::kVadActive) {
-    result_frame->vad_activity_ = AudioFrame::kVadActive;
-  } else if (result_frame->vad_activity_ == AudioFrame::kVadUnknown ||
-             frame_to_add.vad_activity_ == AudioFrame::kVadUnknown) {
-    result_frame->vad_activity_ = AudioFrame::kVadUnknown;
-  }
-
-  if (result_frame->speech_type_ != frame_to_add.speech_type_)
-    result_frame->speech_type_ = AudioFrame::kUndefined;
-
-  if (!frame_to_add.muted()) {
-    const int16_t* in_data = frame_to_add.data();
-    int16_t* out_data = result_frame->mutable_data();
-    size_t length =
-        frame_to_add.samples_per_channel_ * frame_to_add.num_channels_;
-    if (no_previous_data) {
-      std::copy(in_data, in_data + length, out_data);
-    } else {
-      for (size_t i = 0; i < length; i++) {
-        const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) +
-                                   static_cast<int32_t>(in_data[i]);
-        out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard);
-      }
-    }
-  }
-}
-
-int AudioFrameOperations::MonoToStereo(AudioFrame* frame) {
-  if (frame->num_channels_ != 1) {
-    return -1;
-  }
-  UpmixChannels(2, frame);
-  return 0;
-}
-
-int AudioFrameOperations::StereoToMono(AudioFrame* frame) {
-  if (frame->num_channels_ != 2) {
-    return -1;
-  }
-  DownmixChannels(1, frame);
-  return frame->num_channels_ == 1 ? 0 : -1;
-}
-
-void AudioFrameOperations::QuadToStereo(const int16_t* src_audio,
-                                        size_t samples_per_channel,
-                                        int16_t* dst_audio) {
-  for (size_t i = 0; i < samples_per_channel; i++) {
-    dst_audio[i * 2] =
+void AudioFrameOperations::QuadToStereo(
+    InterleavedView<const int16_t> src_audio,
+    InterleavedView<int16_t> dst_audio) {
+  RTC_DCHECK_EQ(NumChannels(src_audio), 4);
+  RTC_DCHECK_EQ(NumChannels(dst_audio), 2);
+  RTC_DCHECK_EQ(SamplesPerChannel(src_audio), SamplesPerChannel(dst_audio));
+  for (size_t i = 0; i < SamplesPerChannel(src_audio); ++i) {
+    auto dst_frame = i * 2;
+    dst_audio[dst_frame] =
        (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1]) >> 1;
-    dst_audio[i * 2 + 1] =
+    dst_audio[dst_frame + 1] =
        (static_cast<int32_t>(src_audio[4 * i + 2]) + src_audio[4 * i + 3]) >>
        1;
  }
@ -109,30 +54,34 @@ int AudioFrameOperations::QuadToStereo(AudioFrame* frame) {
                AudioFrame::kMaxDataSizeSamples);

  if (!frame->muted()) {
-    QuadToStereo(frame->data(), frame->samples_per_channel_,
-                 frame->mutable_data());
+    // Note that `src` and `dst` will map in to the same buffer, but the call
+    // to `mutable_data()` changes the layout of `frame`, so `src` and `dst`
+    // will have different dimensions (important to call `data_view()` first).
+    auto src = frame->data_view();
+    auto dst = frame->mutable_data(frame->samples_per_channel_, 2);
+    QuadToStereo(src, dst);
+  } else {
+    frame->num_channels_ = 2;
  }
-  frame->num_channels_ = 2;

  return 0;
 }

-void AudioFrameOperations::DownmixChannels(const int16_t* src_audio,
-                                           size_t src_channels,
-                                           size_t samples_per_channel,
-                                           size_t dst_channels,
-                                           int16_t* dst_audio) {
-  if (src_channels > 1 && dst_channels == 1) {
-    DownmixInterleavedToMono(src_audio, samples_per_channel, src_channels,
-                             dst_audio);
-    return;
-  } else if (src_channels == 4 && dst_channels == 2) {
-    QuadToStereo(src_audio, samples_per_channel, dst_audio);
-    return;
+void AudioFrameOperations::DownmixChannels(
+    InterleavedView<const int16_t> src_audio,
+    InterleavedView<int16_t> dst_audio) {
+  RTC_DCHECK_EQ(SamplesPerChannel(src_audio), SamplesPerChannel(dst_audio));
+  if (NumChannels(src_audio) > 1 && IsMono(dst_audio)) {
+    // TODO(tommi): change DownmixInterleavedToMono to support InterleavedView
+    // and MonoView.
+    DownmixInterleavedToMono(&src_audio.data()[0], SamplesPerChannel(src_audio),
+                             NumChannels(src_audio), &dst_audio.data()[0]);
+  } else if (NumChannels(src_audio) == 4 && NumChannels(dst_audio) == 2) {
+    QuadToStereo(src_audio, dst_audio);
+  } else {
+    RTC_DCHECK_NOTREACHED() << "src_channels: " << NumChannels(src_audio)
+                            << ", dst_channels: " << NumChannels(dst_audio);
  }
-
-  RTC_DCHECK_NOTREACHED() << "src_channels: " << src_channels
-                          << ", dst_channels: " << dst_channels;
 }

 void AudioFrameOperations::DownmixChannels(size_t dst_channels,
@ -169,14 +118,16 @@ void AudioFrameOperations::UpmixChannels(size_t target_number_of_channels,
  if (!frame->muted()) {
    // Up-mixing done in place. Going backwards through the frame ensure nothing
    // is irrevocably overwritten.
-    int16_t* frame_data = frame->mutable_data();
-    for (int i = frame->samples_per_channel_ - 1; i >= 0; i--) {
+    auto frame_data = frame->mutable_data(frame->samples_per_channel_,
+                                          target_number_of_channels);
+    for (int i = frame->samples_per_channel_ - 1; i >= 0; --i) {
      for (size_t j = 0; j < target_number_of_channels; ++j) {
        frame_data[target_number_of_channels * i + j] = frame_data[i];
      }
    }
+  } else {
+    frame->num_channels_ = target_number_of_channels;
  }
-  frame->num_channels_ = target_number_of_channels;
 }

 void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) {
@ -250,35 +201,6 @@ void AudioFrameOperations::Mute(AudioFrame* frame) {
  Mute(frame, true, true);
 }

-void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) {
-  RTC_DCHECK(frame);
-  RTC_DCHECK_GT(frame->num_channels_, 0);
-  if (frame->num_channels_ < 1 || frame->muted()) {
-    return;
-  }
-
-  int16_t* frame_data = frame->mutable_data();
-  for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_;
-       i++) {
-    frame_data[i] = frame_data[i] >> 1;
-  }
-}
-
-int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) {
-  if (frame->num_channels_ != 2) {
-    return -1;
-  } else if (frame->muted()) {
-    return 0;
-  }
-
-  int16_t* frame_data = frame->mutable_data();
-  for (size_t i = 0; i < frame->samples_per_channel_; i++) {
-    frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]);
-    frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]);
-  }
-  return 0;
-}
-
 int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) {
  if (frame->muted()) {
    return 0;
--- a/webrtc/audio/utility/audio_frame_operations.h
+++ b/webrtc/audio/utility/audio_frame_operations.h
@ -15,6 +15,7 @@
 #include <stdint.h>

 #include "absl/base/attributes.h"
+#include "api/array_view.h"
 #include "api/audio/audio_frame.h"

 namespace webrtc {
@ -24,33 +25,11 @@ namespace webrtc {
 // than a class.
 class AudioFrameOperations {
 public:
-  // Add samples in `frame_to_add` with samples in `result_frame`
-  // putting the results in `results_frame`.  The fields
-  // `vad_activity_` and `speech_type_` of the result frame are
-  // updated. If `result_frame` is empty (`samples_per_channel_`==0),
-  // the samples in `frame_to_add` are added to it.  The number of
-  // channels and number of samples per channel must match except when
-  // `result_frame` is empty.
-  static void Add(const AudioFrame& frame_to_add, AudioFrame* result_frame);
-
-  // `frame.num_channels_` will be updated. This version checks for sufficient
-  // buffer size and that `num_channels_` is mono. Use UpmixChannels
-  // instead. TODO(bugs.webrtc.org/8649): remove.
-  ABSL_DEPRECATED("bugs.webrtc.org/8649")
-  static int MonoToStereo(AudioFrame* frame);
-
-  // `frame.num_channels_` will be updated. This version checks that
-  // `num_channels_` is stereo. Use DownmixChannels
-  // instead. TODO(bugs.webrtc.org/8649): remove.
-  ABSL_DEPRECATED("bugs.webrtc.org/8649")
-  static int StereoToMono(AudioFrame* frame);
-
  // Downmixes 4 channels `src_audio` to stereo `dst_audio`. This is an in-place
  // operation, meaning `src_audio` and `dst_audio` may point to the same
  // buffer.
-  static void QuadToStereo(const int16_t* src_audio,
-                           size_t samples_per_channel,
-                           int16_t* dst_audio);
+  static void QuadToStereo(InterleavedView<const int16_t> src_audio,
+                           InterleavedView<int16_t> dst_audio);

  // `frame.num_channels_` will be updated. This version checks that
  // `num_channels_` is 4 channels.
@ -60,11 +39,8 @@ class AudioFrameOperations {
  // This is an in-place operation, meaning `src_audio` and `dst_audio`
  // may point to the same buffer. Supported channel combinations are
  // Stereo to Mono, Quad to Mono, and Quad to Stereo.
-  static void DownmixChannels(const int16_t* src_audio,
-                              size_t src_channels,
-                              size_t samples_per_channel,
-                              size_t dst_channels,
-                              int16_t* dst_audio);
+  static void DownmixChannels(InterleavedView<const int16_t> src_audio,
+                              InterleavedView<int16_t> dst_audio);

  // `frame.num_channels_` will be updated. This version checks that
  // `num_channels_` and `dst_channels` are valid and performs relevant downmix.
@ -94,11 +70,6 @@ class AudioFrameOperations {
  // Zero out contents of frame.
  static void Mute(AudioFrame* frame);

-  // Halve samples in `frame`.
-  static void ApplyHalfGain(AudioFrame* frame);
-
-  static int Scale(float left, float right, AudioFrame* frame);
-
  static int ScaleWithSat(float scale, AudioFrame* frame);
 };