Bump to WebRTC M120 release

Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone. We're continuing to carry iSAC even though it's gone upstream, but maybe we'll want to drop that soon.
2023-12-12 10:42:58 -05:00
parent 9a202fb8c2
commit c6abf6cd3f
479 changed files with 20900 additions and 11996 deletions
--- a/webrtc/modules/audio_processing/vad/gmm.h
+++ b/webrtc/modules/audio_processing/vad/gmm.h
@ -20,13 +20,13 @@ namespace webrtc {
 // Where a 'mixture' is a Gaussian density.

 struct GmmParameters {
-  // weight[n] = log(w[n]) - |dimension|/2 * log(2*pi) - 1/2 * log(det(cov[n]));
+  // weight[n] = log(w[n]) - `dimension`/2 * log(2*pi) - 1/2 * log(det(cov[n]));
  // where cov[n] is the covariance matrix of mixture n;
  const double* weight;
-  // pointer to the first element of a |num_mixtures|x|dimension| matrix
+  // pointer to the first element of a `num_mixtures`x`dimension` matrix
  // where kth row is the mean of the kth mixture.
  const double* mean;
-  // pointer to the first element of a |num_mixtures|x|dimension|x|dimension|
+  // pointer to the first element of a `num_mixtures`x`dimension`x`dimension`
  // 3D-matrix, where the kth 2D-matrix is the inverse of the covariance
  // matrix of the kth mixture.
  const double* covar_inverse;
@ -36,8 +36,8 @@ struct GmmParameters {
  int num_mixtures;
 };

-// Evaluate the given GMM, according to |gmm_parameters|, at the given point
-// |x|. If the dimensionality of the given GMM is larger that the maximum
+// Evaluate the given GMM, according to `gmm_parameters`, at the given point
+// `x`. If the dimensionality of the given GMM is larger that the maximum
 // acceptable dimension by the following function -1 is returned.
 double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters);

--- a/webrtc/modules/audio_processing/vad/pitch_based_vad.h
+++ b/webrtc/modules/audio_processing/vad/pitch_based_vad.h
@ -34,7 +34,7 @@ class PitchBasedVad {
  //   p_combined: an array which contains the combined activity probabilities
  //               computed prior to the call of this function. The method,
  //               then, computes the voicing probabilities and combine them
-  //               with the given values. The result are returned in |p|.
+  //               with the given values. The result are returned in `p`.
  int VoicingProbability(const AudioFeatures& features, double* p_combined);

 private:
--- a/webrtc/modules/audio_processing/vad/pitch_internal.h
+++ b/webrtc/modules/audio_processing/vad/pitch_internal.h
@ -14,7 +14,7 @@
 namespace webrtc {

 // TODO(turajs): Write a description of this function. Also be consistent with
-// usage of |sampling_rate_hz| vs |kSamplingFreqHz|.
+// usage of `sampling_rate_hz` vs `kSamplingFreqHz`.
 void GetSubframesPitchParameters(int sampling_rate_hz,
                                 double* gains,
                                 double* lags,
--- a/webrtc/modules/audio_processing/vad/standalone_vad.h
+++ b/webrtc/modules/audio_processing/vad/standalone_vad.h
@ -26,12 +26,12 @@ class StandaloneVad {

  // Outputs
  //   p: a buffer where probabilities are written to.
-  //   length_p: number of elements of |p|.
+  //   length_p: number of elements of `p`.
  //
  // return value:
  //    -1: if no audio is stored or VAD returns error.
  //     0: in success.
-  // In case of error the content of |activity| is unchanged.
+  // In case of error the content of `activity` is unchanged.
  //
  // Note that due to a high false-positive (VAD decision is active while the
  // processed audio is just background noise) rate, stand-alone VAD is used as
--- a/webrtc/modules/audio_processing/vad/vad_audio_proc.cc
+++ b/webrtc/modules/audio_processing/vad/vad_audio_proc.cc
@ -132,7 +132,7 @@ void VadAudioProc::SubframeCorrelation(double* corr,
                      kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder);
 }

-// Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input.
+// Compute `kNum10msSubframes` sets of LPC coefficients, one per 10 ms input.
 // The analysis window is 15 ms long and it is centered on the first half of
 // each 10ms sub-frame. This is equivalent to computing LPC coefficients for the
 // first half of each 10 ms subframe.
@ -169,7 +169,7 @@ static float QuadraticInterpolation(float prev_val,
  return fractional_index;
 }

-// 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope
+// 1 / A(z), where A(z) is defined by `lpc` is a model of the spectral envelope
 // of the input signal. The local maximum of the spectral envelope corresponds
 // with the local minimum of A(z). It saves complexity, as we save one
 // inversion. Furthermore, we find the first local maximum of magnitude squared,
--- a/webrtc/modules/audio_processing/vad/vad_audio_proc.h
+++ b/webrtc/modules/audio_processing/vad/vad_audio_proc.h
@ -35,7 +35,7 @@ class VadAudioProc {
                      size_t length,
                      AudioFeatures* audio_features);

-  static const size_t kDftSize = 512;
+  static constexpr size_t kDftSize = 512;

 private:
  void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
@ -51,28 +51,22 @@ class VadAudioProc {
  // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
  // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
  // we need 5 ms of past signal to create the input of LPC analysis.
-  enum : size_t {
-    kNumPastSignalSamples = static_cast<size_t>(kSampleRateHz / 200)
-  };
+  static constexpr size_t kNumPastSignalSamples = size_t{kSampleRateHz / 200};

  // TODO(turajs): maybe defining this at a higher level (maybe enum) so that
  // all the code recognize it as "no-error."
-  enum : int { kNoError = 0 };
+  static constexpr int kNoError = 0;

-  enum : size_t { kNum10msSubframes = 3 };
-  enum : size_t {
-    kNumSubframeSamples = static_cast<size_t>(kSampleRateHz / 100)
-  };
-  enum : size_t {
-    // Samples in 30 ms @ given sampling rate.
-    kNumSamplesToProcess = kNum10msSubframes * kNumSubframeSamples
-  };
-  enum : size_t {
-    kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess
-  };
-  enum : size_t { kIpLength = kDftSize >> 1 };
-  enum : size_t { kWLength = kDftSize >> 1 };
-  enum : size_t { kLpcOrder = 16 };
+  static constexpr size_t kNum10msSubframes = 3;
+  static constexpr size_t kNumSubframeSamples = size_t{kSampleRateHz / 100};
+  // Samples in 30 ms @ given sampling rate.
+  static constexpr size_t kNumSamplesToProcess =
+      kNum10msSubframes * kNumSubframeSamples;
+  static constexpr size_t kBufferLength =
+      kNumPastSignalSamples + kNumSamplesToProcess;
+  static constexpr size_t kIpLength = kDftSize >> 1;
+  static constexpr size_t kWLength = kDftSize >> 1;
+  static constexpr size_t kLpcOrder = 16;

  size_t ip_[kIpLength];
  float w_fft_[kWLength];
--- a/webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h
+++ b/webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h
@ -11,6 +11,8 @@
 #ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
 #define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_

+#include <stddef.h>
+
 namespace webrtc {

 // These values should match MATLAB counterparts for unit-tests to pass.
--- a/webrtc/modules/audio_processing/vad/vad_circular_buffer.h
+++ b/webrtc/modules/audio_processing/vad/vad_circular_buffer.h
@ -38,8 +38,8 @@ class VadCircularBuffer {
  // The mean value of the elements in the buffer. The return value is zero if
  // buffer is empty, i.e. no value is inserted.
  double Mean();
-  // Remove transients. If the values exceed |val_threshold| for a period
-  // shorter then or equal to |width_threshold|, then that period is considered
+  // Remove transients. If the values exceed `val_threshold` for a period
+  // shorter then or equal to `width_threshold`, then that period is considered
  // transient and set to zero.
  int RemoveTransient(int width_threshold, double val_threshold);

@ -49,7 +49,7 @@ class VadCircularBuffer {
  // insertion. |index = 1| is the one before the most recent insertion, and
  // so on.
  int Get(int index, double* value) const;
-  // Set a given position to |value|. |index| is interpreted as above.
+  // Set a given position to `value`. `index` is interpreted as above.
  int Set(int index, double value);
  // Return the number of valid elements in the buffer.
  int BufferLevel();
--- a/webrtc/modules/audio_processing/vad/voice_activity_detector.cc
+++ b/webrtc/modules/audio_processing/vad/voice_activity_detector.cc
@ -32,12 +32,13 @@ VoiceActivityDetector::VoiceActivityDetector()
 VoiceActivityDetector::~VoiceActivityDetector() = default;

 // Because ISAC has a different chunk length, it updates
-// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
+// `chunkwise_voice_probabilities_` and `chunkwise_rms_` when there is new data.
 // Otherwise it clears them.
 void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
                                         size_t length,
                                         int sample_rate_hz) {
  RTC_DCHECK_EQ(length, sample_rate_hz / 100);
+  // TODO(bugs.webrtc.org/7494): Remove resampling and force 16 kHz audio.
  // Resample to the required rate.
  const int16_t* resampled_ptr = audio;
  if (sample_rate_hz != kSampleRateHz) {
@ -49,7 +50,7 @@ void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
  }
  RTC_DCHECK_EQ(length, kLength10Ms);

-  // Each chunk needs to be passed into |standalone_vad_|, because internally it
+  // Each chunk needs to be passed into `standalone_vad_`, because internally it
  // buffers the audio and processes it all at once when GetActivity() is
  // called.
  RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
--- a/webrtc/modules/audio_processing/vad/voice_activity_detector.h
+++ b/webrtc/modules/audio_processing/vad/voice_activity_detector.h
@ -33,6 +33,8 @@ class VoiceActivityDetector {
  ~VoiceActivityDetector();

  // Processes each audio chunk and estimates the voice probability.
+  // TODO(bugs.webrtc.org/7494): Switch to rtc::ArrayView and remove
+  // `sample_rate_hz`.
  void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz);

  // Returns a vector of voice probabilities for each chunk. It can be empty for