Bump to WebRTC M120 release

Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone. We're continuing to carry iSAC even though it's gone upstream, but maybe we'll want to drop that soon.
2023-12-12 10:42:58 -05:00
parent 9a202fb8c2
commit c6abf6cd3f
479 changed files with 20900 additions and 11996 deletions
--- a/webrtc/modules/audio_processing/transient/transient_suppressor.h
+++ b/webrtc/modules/audio_processing/transient/transient_suppressor.h
@ -11,9 +11,7 @@
 #ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
 #define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_

-#include <stddef.h>
-#include <stdint.h>
-#include <memory>
+#include <cstddef>

 namespace webrtc {

@ -21,38 +19,55 @@ namespace webrtc {
 // restoration algorithm that attenuates unexpected spikes in the spectrum.
 class TransientSuppressor {
 public:
+  // Type of VAD used by the caller to compute the `voice_probability` argument
+  // `Suppress()`.
+  enum class VadMode {
+    // By default, `TransientSuppressor` assumes that `voice_probability` is
+    // computed by `AgcManagerDirect`.
+    kDefault = 0,
+    // Use this mode when `TransientSuppressor` must assume that
+    // `voice_probability` is computed by the RNN VAD.
+    kRnnVad,
+    // Use this mode to let `TransientSuppressor::Suppressor()` ignore
+    // `voice_probability` and behave as if voice information is unavailable
+    // (regardless of the passed value).
+    kNoVad,
+  };
+
  virtual ~TransientSuppressor() {}

-  virtual int Initialize(int sample_rate_hz,
-                         int detector_rate_hz,
-                         int num_channels) = 0;
+  virtual void Initialize(int sample_rate_hz,
+                          int detector_rate_hz,
+                          int num_channels) = 0;

-  // Processes a |data| chunk, and returns it with keystrokes suppressed from
+  // Processes a `data` chunk, and returns it with keystrokes suppressed from
  // it. The float format is assumed to be int16 ranged. If there are more than
-  // one channel, the chunks are concatenated one after the other in |data|.
-  // |data_length| must be equal to |data_length_|.
-  // |num_channels| must be equal to |num_channels_|.
-  // A sub-band, ideally the higher, can be used as |detection_data|. If it is
-  // NULL, |data| is used for the detection too. The |detection_data| is always
+  // one channel, the chunks are concatenated one after the other in `data`.
+  // `data_length` must be equal to `data_length_`.
+  // `num_channels` must be equal to `num_channels_`.
+  // A sub-band, ideally the higher, can be used as `detection_data`. If it is
+  // NULL, `data` is used for the detection too. The `detection_data` is always
  // assumed mono.
  // If a reference signal (e.g. keyboard microphone) is available, it can be
-  // passed in as |reference_data|. It is assumed mono and must have the same
-  // length as |data|. NULL is accepted if unavailable.
+  // passed in as `reference_data`. It is assumed mono and must have the same
+  // length as `data`. NULL is accepted if unavailable.
  // This suppressor performs better if voice information is available.
-  // |voice_probability| is the probability of voice being present in this chunk
-  // of audio. If voice information is not available, |voice_probability| must
+  // `voice_probability` is the probability of voice being present in this chunk
+  // of audio. If voice information is not available, `voice_probability` must
  // always be set to 1.
-  // |key_pressed| determines if a key was pressed on this audio chunk.
-  // Returns 0 on success and -1 otherwise.
-  virtual int Suppress(float* data,
-                       size_t data_length,
-                       int num_channels,
-                       const float* detection_data,
-                       size_t detection_length,
-                       const float* reference_data,
-                       size_t reference_length,
-                       float voice_probability,
-                       bool key_pressed) = 0;
+  // `key_pressed` determines if a key was pressed on this audio chunk.
+  // Returns a delayed version of `voice_probability` according to the
+  // algorithmic delay introduced by this method. In this way, the modified
+  // `data` and the returned voice probability will be temporally aligned.
+  virtual float Suppress(float* data,
+                         size_t data_length,
+                         int num_channels,
+                         const float* detection_data,
+                         size_t detection_length,
+                         const float* reference_data,
+                         size_t reference_length,
+                         float voice_probability,
+                         bool key_pressed) = 0;
 };

 }  // namespace webrtc