Bump to WebRTC M120 release

Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone.
We're continuing to carry iSAC even though it's gone upstream, but maybe
we'll want to drop that soon.
This commit is contained in:
Arun Raghavan
2023-12-12 10:42:58 -05:00
parent 9a202fb8c2
commit c6abf6cd3f
479 changed files with 20900 additions and 11996 deletions

View File

@ -20,13 +20,13 @@ namespace webrtc {
// Where a 'mixture' is a Gaussian density.
struct GmmParameters {
// weight[n] = log(w[n]) - |dimension|/2 * log(2*pi) - 1/2 * log(det(cov[n]));
// weight[n] = log(w[n]) - `dimension`/2 * log(2*pi) - 1/2 * log(det(cov[n]));
// where cov[n] is the covariance matrix of mixture n;
const double* weight;
// pointer to the first element of a |num_mixtures|x|dimension| matrix
// pointer to the first element of a `num_mixtures`x`dimension` matrix
// where kth row is the mean of the kth mixture.
const double* mean;
// pointer to the first element of a |num_mixtures|x|dimension|x|dimension|
// pointer to the first element of a `num_mixtures`x`dimension`x`dimension`
// 3D-matrix, where the kth 2D-matrix is the inverse of the covariance
// matrix of the kth mixture.
const double* covar_inverse;
@ -36,8 +36,8 @@ struct GmmParameters {
int num_mixtures;
};
// Evaluate the given GMM, according to |gmm_parameters|, at the given point
// |x|. If the dimensionality of the given GMM is larger that the maximum
// Evaluate the given GMM, according to `gmm_parameters`, at the given point
// `x`. If the dimensionality of the given GMM is larger that the maximum
// acceptable dimension by the following function -1 is returned.
double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters);

View File

@ -34,7 +34,7 @@ class PitchBasedVad {
// p_combined: an array which contains the combined activity probabilities
// computed prior to the call of this function. The method,
// then, computes the voicing probabilities and combine them
// with the given values. The result are returned in |p|.
// with the given values. The result are returned in `p`.
int VoicingProbability(const AudioFeatures& features, double* p_combined);
private:

View File

@ -14,7 +14,7 @@
namespace webrtc {
// TODO(turajs): Write a description of this function. Also be consistent with
// usage of |sampling_rate_hz| vs |kSamplingFreqHz|.
// usage of `sampling_rate_hz` vs `kSamplingFreqHz`.
void GetSubframesPitchParameters(int sampling_rate_hz,
double* gains,
double* lags,

View File

@ -26,12 +26,12 @@ class StandaloneVad {
// Outputs
// p: a buffer where probabilities are written to.
// length_p: number of elements of |p|.
// length_p: number of elements of `p`.
//
// return value:
// -1: if no audio is stored or VAD returns error.
// 0: in success.
// In case of error the content of |activity| is unchanged.
// In case of error the content of `activity` is unchanged.
//
// Note that due to a high false-positive (VAD decision is active while the
// processed audio is just background noise) rate, stand-alone VAD is used as

View File

@ -132,7 +132,7 @@ void VadAudioProc::SubframeCorrelation(double* corr,
kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder);
}
// Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input.
// Compute `kNum10msSubframes` sets of LPC coefficients, one per 10 ms input.
// The analysis window is 15 ms long and it is centered on the first half of
// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the
// first half of each 10 ms subframe.
@ -169,7 +169,7 @@ static float QuadraticInterpolation(float prev_val,
return fractional_index;
}
// 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope
// 1 / A(z), where A(z) is defined by `lpc` is a model of the spectral envelope
// of the input signal. The local maximum of the spectral envelope corresponds
// with the local minimum of A(z). It saves complexity, as we save one
// inversion. Furthermore, we find the first local maximum of magnitude squared,

View File

@ -35,7 +35,7 @@ class VadAudioProc {
size_t length,
AudioFeatures* audio_features);
static const size_t kDftSize = 512;
static constexpr size_t kDftSize = 512;
private:
void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
@ -51,28 +51,22 @@ class VadAudioProc {
// For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
// LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
// we need 5 ms of past signal to create the input of LPC analysis.
enum : size_t {
kNumPastSignalSamples = static_cast<size_t>(kSampleRateHz / 200)
};
static constexpr size_t kNumPastSignalSamples = size_t{kSampleRateHz / 200};
// TODO(turajs): maybe defining this at a higher level (maybe enum) so that
// all the code recognize it as "no-error."
enum : int { kNoError = 0 };
static constexpr int kNoError = 0;
enum : size_t { kNum10msSubframes = 3 };
enum : size_t {
kNumSubframeSamples = static_cast<size_t>(kSampleRateHz / 100)
};
enum : size_t {
// Samples in 30 ms @ given sampling rate.
kNumSamplesToProcess = kNum10msSubframes * kNumSubframeSamples
};
enum : size_t {
kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess
};
enum : size_t { kIpLength = kDftSize >> 1 };
enum : size_t { kWLength = kDftSize >> 1 };
enum : size_t { kLpcOrder = 16 };
static constexpr size_t kNum10msSubframes = 3;
static constexpr size_t kNumSubframeSamples = size_t{kSampleRateHz / 100};
// Samples in 30 ms @ given sampling rate.
static constexpr size_t kNumSamplesToProcess =
kNum10msSubframes * kNumSubframeSamples;
static constexpr size_t kBufferLength =
kNumPastSignalSamples + kNumSamplesToProcess;
static constexpr size_t kIpLength = kDftSize >> 1;
static constexpr size_t kWLength = kDftSize >> 1;
static constexpr size_t kLpcOrder = 16;
size_t ip_[kIpLength];
float w_fft_[kWLength];

View File

@ -11,6 +11,8 @@
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
#include <stddef.h>
namespace webrtc {
// These values should match MATLAB counterparts for unit-tests to pass.

View File

@ -38,8 +38,8 @@ class VadCircularBuffer {
// The mean value of the elements in the buffer. The return value is zero if
// buffer is empty, i.e. no value is inserted.
double Mean();
// Remove transients. If the values exceed |val_threshold| for a period
// shorter then or equal to |width_threshold|, then that period is considered
// Remove transients. If the values exceed `val_threshold` for a period
// shorter then or equal to `width_threshold`, then that period is considered
// transient and set to zero.
int RemoveTransient(int width_threshold, double val_threshold);
@ -49,7 +49,7 @@ class VadCircularBuffer {
// insertion. |index = 1| is the one before the most recent insertion, and
// so on.
int Get(int index, double* value) const;
// Set a given position to |value|. |index| is interpreted as above.
// Set a given position to `value`. `index` is interpreted as above.
int Set(int index, double value);
// Return the number of valid elements in the buffer.
int BufferLevel();

View File

@ -32,12 +32,13 @@ VoiceActivityDetector::VoiceActivityDetector()
VoiceActivityDetector::~VoiceActivityDetector() = default;
// Because ISAC has a different chunk length, it updates
// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
// `chunkwise_voice_probabilities_` and `chunkwise_rms_` when there is new data.
// Otherwise it clears them.
void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
size_t length,
int sample_rate_hz) {
RTC_DCHECK_EQ(length, sample_rate_hz / 100);
// TODO(bugs.webrtc.org/7494): Remove resampling and force 16 kHz audio.
// Resample to the required rate.
const int16_t* resampled_ptr = audio;
if (sample_rate_hz != kSampleRateHz) {
@ -49,7 +50,7 @@ void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
}
RTC_DCHECK_EQ(length, kLength10Ms);
// Each chunk needs to be passed into |standalone_vad_|, because internally it
// Each chunk needs to be passed into `standalone_vad_`, because internally it
// buffers the audio and processes it all at once when GetActivity() is
// called.
RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);

View File

@ -33,6 +33,8 @@ class VoiceActivityDetector {
~VoiceActivityDetector();
// Processes each audio chunk and estimates the voice probability.
// TODO(bugs.webrtc.org/7494): Switch to rtc::ArrayView and remove
// `sample_rate_hz`.
void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz);
// Returns a vector of voice probabilities for each chunk. It can be empty for