Bump to WebRTC M120 release
Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone. We're continuing to carry iSAC even though it's gone upstream, but maybe we'll want to drop that soon.
This commit is contained in:
@ -11,9 +11,7 @@
|
||||
#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <memory>
|
||||
#include <cstddef>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -21,38 +19,55 @@ namespace webrtc {
|
||||
// restoration algorithm that attenuates unexpected spikes in the spectrum.
|
||||
class TransientSuppressor {
|
||||
public:
|
||||
// Type of VAD used by the caller to compute the `voice_probability` argument
|
||||
// `Suppress()`.
|
||||
enum class VadMode {
|
||||
// By default, `TransientSuppressor` assumes that `voice_probability` is
|
||||
// computed by `AgcManagerDirect`.
|
||||
kDefault = 0,
|
||||
// Use this mode when `TransientSuppressor` must assume that
|
||||
// `voice_probability` is computed by the RNN VAD.
|
||||
kRnnVad,
|
||||
// Use this mode to let `TransientSuppressor::Suppressor()` ignore
|
||||
// `voice_probability` and behave as if voice information is unavailable
|
||||
// (regardless of the passed value).
|
||||
kNoVad,
|
||||
};
|
||||
|
||||
virtual ~TransientSuppressor() {}
|
||||
|
||||
virtual int Initialize(int sample_rate_hz,
|
||||
int detector_rate_hz,
|
||||
int num_channels) = 0;
|
||||
virtual void Initialize(int sample_rate_hz,
|
||||
int detector_rate_hz,
|
||||
int num_channels) = 0;
|
||||
|
||||
// Processes a |data| chunk, and returns it with keystrokes suppressed from
|
||||
// Processes a `data` chunk, and returns it with keystrokes suppressed from
|
||||
// it. The float format is assumed to be int16 ranged. If there are more than
|
||||
// one channel, the chunks are concatenated one after the other in |data|.
|
||||
// |data_length| must be equal to |data_length_|.
|
||||
// |num_channels| must be equal to |num_channels_|.
|
||||
// A sub-band, ideally the higher, can be used as |detection_data|. If it is
|
||||
// NULL, |data| is used for the detection too. The |detection_data| is always
|
||||
// one channel, the chunks are concatenated one after the other in `data`.
|
||||
// `data_length` must be equal to `data_length_`.
|
||||
// `num_channels` must be equal to `num_channels_`.
|
||||
// A sub-band, ideally the higher, can be used as `detection_data`. If it is
|
||||
// NULL, `data` is used for the detection too. The `detection_data` is always
|
||||
// assumed mono.
|
||||
// If a reference signal (e.g. keyboard microphone) is available, it can be
|
||||
// passed in as |reference_data|. It is assumed mono and must have the same
|
||||
// length as |data|. NULL is accepted if unavailable.
|
||||
// passed in as `reference_data`. It is assumed mono and must have the same
|
||||
// length as `data`. NULL is accepted if unavailable.
|
||||
// This suppressor performs better if voice information is available.
|
||||
// |voice_probability| is the probability of voice being present in this chunk
|
||||
// of audio. If voice information is not available, |voice_probability| must
|
||||
// `voice_probability` is the probability of voice being present in this chunk
|
||||
// of audio. If voice information is not available, `voice_probability` must
|
||||
// always be set to 1.
|
||||
// |key_pressed| determines if a key was pressed on this audio chunk.
|
||||
// Returns 0 on success and -1 otherwise.
|
||||
virtual int Suppress(float* data,
|
||||
size_t data_length,
|
||||
int num_channels,
|
||||
const float* detection_data,
|
||||
size_t detection_length,
|
||||
const float* reference_data,
|
||||
size_t reference_length,
|
||||
float voice_probability,
|
||||
bool key_pressed) = 0;
|
||||
// `key_pressed` determines if a key was pressed on this audio chunk.
|
||||
// Returns a delayed version of `voice_probability` according to the
|
||||
// algorithmic delay introduced by this method. In this way, the modified
|
||||
// `data` and the returned voice probability will be temporally aligned.
|
||||
virtual float Suppress(float* data,
|
||||
size_t data_length,
|
||||
int num_channels,
|
||||
const float* detection_data,
|
||||
size_t detection_length,
|
||||
const float* reference_data,
|
||||
size_t reference_length,
|
||||
float voice_probability,
|
||||
bool key_pressed) = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
Reference in New Issue
Block a user