Bump to WebRTC M131 release

Ongoing fixes and improvements, transient suppressor is gone. Also, dropping isac because it doesn't seem to be useful, and is just build system deadweight now. Upstream references: Version: 131.0.6778.200 WebRTC: 79aff54b0fa9238ce3518dd9eaf9610cd6f22e82 Chromium: 2a19506ad24af755f2a215a4c61f775393e0db42
2024-12-24 19:32:07 -05:00
parent 8bdb53d91c
commit b5c48b97f6
263 changed files with 4628 additions and 20416 deletions
--- a/webrtc/api/audio/audio_frame.h
+++ b/webrtc/api/audio/audio_frame.h
@@ -14,11 +14,34 @@
 #include <stddef.h>
 #include <stdint.h>

+#include <array>
+#include <optional>
+
+#include "api/array_view.h"
+#include "api/audio/audio_view.h"
 #include "api/audio/channel_layout.h"
 #include "api/rtp_packet_infos.h"
+#include "rtc_base/checks.h"

 namespace webrtc {

+// Default webrtc buffer size in milliseconds.
+constexpr size_t kDefaultAudioBufferLengthMs = 10u;
+
+// Default total number of audio buffers per second based on the default length.
+constexpr size_t kDefaultAudioBuffersPerSec =
+    1000u / kDefaultAudioBufferLengthMs;
+
+// Returns the number of samples a buffer needs to hold for ~10ms of a single
+// audio channel at a given sample rate.
+// See also `AudioProcessing::GetFrameSize()`.
+inline size_t SampleRateToDefaultChannelSize(size_t sample_rate) {
+  // Basic sanity check. 192kHz is the highest supported input sample rate.
+  RTC_DCHECK_LE(sample_rate, 192000);
+  return sample_rate / kDefaultAudioBuffersPerSec;
+}
+/////////////////////////////////////////////////////////////////////
+
 /* This class holds up to 120 ms of super-wideband (32 kHz) stereo audio. It
 * allows for adding and subtracting frames while keeping track of the resulting
 * states.
@@ -57,6 +80,15 @@ class AudioFrame {

  AudioFrame();

+  // Construct an audio frame with frame length properties and channel
+  // information. `samples_per_channel()` will be initialized to a 10ms buffer
+  // size and if `layout` is not specified (default value of
+  // CHANNEL_LAYOUT_UNSUPPORTED is set), then the channel layout is derived
+  // (guessed) from `num_channels`.
+  AudioFrame(int sample_rate_hz,
+             size_t num_channels,
+             ChannelLayout layout = CHANNEL_LAYOUT_UNSUPPORTED);
+
  AudioFrame(const AudioFrame&) = delete;
  AudioFrame& operator=(const AudioFrame&) = delete;

@@ -68,6 +100,7 @@ class AudioFrame {
  // ResetWithoutMuting() to skip this wasteful zeroing.
  void ResetWithoutMuting();

+  // TODO: b/335805780 - Accept InterleavedView.
  void UpdateFrame(uint32_t timestamp,
                   const int16_t* data,
                   size_t samples_per_channel,
@@ -90,20 +123,40 @@ class AudioFrame {
  int64_t ElapsedProfileTimeMs() const;

  // data() returns a zeroed static buffer if the frame is muted.
-  // mutable_frame() always returns a non-static buffer; the first call to
-  // mutable_frame() zeros the non-static buffer and marks the frame unmuted.
+  // TODO: b/335805780 - Return InterleavedView.
  const int16_t* data() const;
+
+  // Returns a read-only view of all the valid samples held by the AudioFrame.
+  // For a muted AudioFrame, the samples will all be 0.
+  InterleavedView<const int16_t> data_view() const;
+
+  // mutable_frame() always returns a non-static buffer; the first call to
+  // mutable_frame() zeros the buffer and marks the frame as unmuted.
+  // TODO: b/335805780 - Return an InterleavedView.
  int16_t* mutable_data();

+  // Grants write access to the audio buffer. The size of the returned writable
+  // view is determined by the `samples_per_channel` and `num_channels`
+  // dimensions which the function checks for correctness and stores in the
+  // internal member variables; `samples_per_channel()` and `num_channels()`
+  // respectively.
+  // If the state is currently muted, the returned view will be zeroed out.
+  InterleavedView<int16_t> mutable_data(size_t samples_per_channel,
+                                        size_t num_channels);
+
  // Prefer to mute frames using AudioFrameOperations::Mute.
  void Mute();
  // Frame is muted by default.
  bool muted() const;

-  size_t max_16bit_samples() const { return kMaxDataSizeSamples; }
+  size_t max_16bit_samples() const { return data_.size(); }
  size_t samples_per_channel() const { return samples_per_channel_; }
  size_t num_channels() const { return num_channels_; }
+
  ChannelLayout channel_layout() const { return channel_layout_; }
+  // Sets the `channel_layout` property as well as `num_channels`.
+  void SetLayoutAndNumChannels(ChannelLayout layout, size_t num_channels);
+
  int sample_rate_hz() const { return sample_rate_hz_; }

  void set_absolute_capture_timestamp_ms(
@@ -111,10 +164,14 @@ class AudioFrame {
    absolute_capture_timestamp_ms_ = absolute_capture_time_stamp_ms;
  }

-  absl::optional<int64_t> absolute_capture_timestamp_ms() const {
+  std::optional<int64_t> absolute_capture_timestamp_ms() const {
    return absolute_capture_timestamp_ms_;
  }

+  // Sets the sample_rate_hz and samples_per_channel properties based on a
+  // given sample rate and calculates a default 10ms samples_per_channel value.
+  void SetSampleRateAndChannelSize(int sample_rate);
+
  // RTP timestamp of the first sample in the AudioFrame.
  uint32_t timestamp_ = 0;
  // Time since the first frame in milliseconds.
@@ -126,14 +183,13 @@ class AudioFrame {
  size_t samples_per_channel_ = 0;
  int sample_rate_hz_ = 0;
  size_t num_channels_ = 0;
-  ChannelLayout channel_layout_ = CHANNEL_LAYOUT_NONE;
  SpeechType speech_type_ = kUndefined;
  VADActivity vad_activity_ = kVadUnknown;
  // Monotonically increasing timestamp intended for profiling of audio frames.
  // Typically used for measuring elapsed time between two different points in
  // the audio path. No lock is used to save resources and we are thread safe
  // by design.
-  // TODO(nisse@webrtc.org): consider using absl::optional.
+  // TODO(nisse@webrtc.org): consider using std::optional.
  int64_t profile_timestamp_ms_ = 0;

  // Information about packets used to assemble this audio frame. This is needed
@@ -154,18 +210,19 @@ class AudioFrame {

 private:
  // A permanently zeroed out buffer to represent muted frames. This is a
-  // header-only class, so the only way to avoid creating a separate empty
+  // header-only class, so the only way to avoid creating a separate zeroed
  // buffer per translation unit is to wrap a static in an inline function.
-  static const int16_t* empty_data();
+  static rtc::ArrayView<const int16_t> zeroed_data();

-  int16_t data_[kMaxDataSizeSamples];
+  std::array<int16_t, kMaxDataSizeSamples> data_;
  bool muted_ = true;
+  ChannelLayout channel_layout_ = CHANNEL_LAYOUT_NONE;

  // Absolute capture timestamp when this audio frame was originally captured.
  // This is only valid for audio frames captured on this machine. The absolute
  // capture timestamp of a received frame is found in `packet_infos_`.
  // This timestamp MUST be based on the same clock as rtc::TimeMillis().
-  absl::optional<int64_t> absolute_capture_timestamp_ms_;
+  std::optional<int64_t> absolute_capture_timestamp_ms_;
 };

 }  // namespace webrtc