Bump to WebRTC M120 release
Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone. We're continuing to carry iSAC even though it's gone upstream, but maybe we'll want to drop that soon.
This commit is contained in:
@ -20,13 +20,13 @@ namespace webrtc {
|
||||
// Where a 'mixture' is a Gaussian density.
|
||||
|
||||
struct GmmParameters {
|
||||
// weight[n] = log(w[n]) - |dimension|/2 * log(2*pi) - 1/2 * log(det(cov[n]));
|
||||
// weight[n] = log(w[n]) - `dimension`/2 * log(2*pi) - 1/2 * log(det(cov[n]));
|
||||
// where cov[n] is the covariance matrix of mixture n;
|
||||
const double* weight;
|
||||
// pointer to the first element of a |num_mixtures|x|dimension| matrix
|
||||
// pointer to the first element of a `num_mixtures`x`dimension` matrix
|
||||
// where kth row is the mean of the kth mixture.
|
||||
const double* mean;
|
||||
// pointer to the first element of a |num_mixtures|x|dimension|x|dimension|
|
||||
// pointer to the first element of a `num_mixtures`x`dimension`x`dimension`
|
||||
// 3D-matrix, where the kth 2D-matrix is the inverse of the covariance
|
||||
// matrix of the kth mixture.
|
||||
const double* covar_inverse;
|
||||
@ -36,8 +36,8 @@ struct GmmParameters {
|
||||
int num_mixtures;
|
||||
};
|
||||
|
||||
// Evaluate the given GMM, according to |gmm_parameters|, at the given point
|
||||
// |x|. If the dimensionality of the given GMM is larger that the maximum
|
||||
// Evaluate the given GMM, according to `gmm_parameters`, at the given point
|
||||
// `x`. If the dimensionality of the given GMM is larger that the maximum
|
||||
// acceptable dimension by the following function -1 is returned.
|
||||
double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters);
|
||||
|
||||
|
@ -34,7 +34,7 @@ class PitchBasedVad {
|
||||
// p_combined: an array which contains the combined activity probabilities
|
||||
// computed prior to the call of this function. The method,
|
||||
// then, computes the voicing probabilities and combine them
|
||||
// with the given values. The result are returned in |p|.
|
||||
// with the given values. The result are returned in `p`.
|
||||
int VoicingProbability(const AudioFeatures& features, double* p_combined);
|
||||
|
||||
private:
|
||||
|
@ -14,7 +14,7 @@
|
||||
namespace webrtc {
|
||||
|
||||
// TODO(turajs): Write a description of this function. Also be consistent with
|
||||
// usage of |sampling_rate_hz| vs |kSamplingFreqHz|.
|
||||
// usage of `sampling_rate_hz` vs `kSamplingFreqHz`.
|
||||
void GetSubframesPitchParameters(int sampling_rate_hz,
|
||||
double* gains,
|
||||
double* lags,
|
||||
|
@ -26,12 +26,12 @@ class StandaloneVad {
|
||||
|
||||
// Outputs
|
||||
// p: a buffer where probabilities are written to.
|
||||
// length_p: number of elements of |p|.
|
||||
// length_p: number of elements of `p`.
|
||||
//
|
||||
// return value:
|
||||
// -1: if no audio is stored or VAD returns error.
|
||||
// 0: in success.
|
||||
// In case of error the content of |activity| is unchanged.
|
||||
// In case of error the content of `activity` is unchanged.
|
||||
//
|
||||
// Note that due to a high false-positive (VAD decision is active while the
|
||||
// processed audio is just background noise) rate, stand-alone VAD is used as
|
||||
|
@ -132,7 +132,7 @@ void VadAudioProc::SubframeCorrelation(double* corr,
|
||||
kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder);
|
||||
}
|
||||
|
||||
// Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input.
|
||||
// Compute `kNum10msSubframes` sets of LPC coefficients, one per 10 ms input.
|
||||
// The analysis window is 15 ms long and it is centered on the first half of
|
||||
// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the
|
||||
// first half of each 10 ms subframe.
|
||||
@ -169,7 +169,7 @@ static float QuadraticInterpolation(float prev_val,
|
||||
return fractional_index;
|
||||
}
|
||||
|
||||
// 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope
|
||||
// 1 / A(z), where A(z) is defined by `lpc` is a model of the spectral envelope
|
||||
// of the input signal. The local maximum of the spectral envelope corresponds
|
||||
// with the local minimum of A(z). It saves complexity, as we save one
|
||||
// inversion. Furthermore, we find the first local maximum of magnitude squared,
|
||||
|
@ -35,7 +35,7 @@ class VadAudioProc {
|
||||
size_t length,
|
||||
AudioFeatures* audio_features);
|
||||
|
||||
static const size_t kDftSize = 512;
|
||||
static constexpr size_t kDftSize = 512;
|
||||
|
||||
private:
|
||||
void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length);
|
||||
@ -51,28 +51,22 @@ class VadAudioProc {
|
||||
// For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
|
||||
// LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
|
||||
// we need 5 ms of past signal to create the input of LPC analysis.
|
||||
enum : size_t {
|
||||
kNumPastSignalSamples = static_cast<size_t>(kSampleRateHz / 200)
|
||||
};
|
||||
static constexpr size_t kNumPastSignalSamples = size_t{kSampleRateHz / 200};
|
||||
|
||||
// TODO(turajs): maybe defining this at a higher level (maybe enum) so that
|
||||
// all the code recognize it as "no-error."
|
||||
enum : int { kNoError = 0 };
|
||||
static constexpr int kNoError = 0;
|
||||
|
||||
enum : size_t { kNum10msSubframes = 3 };
|
||||
enum : size_t {
|
||||
kNumSubframeSamples = static_cast<size_t>(kSampleRateHz / 100)
|
||||
};
|
||||
enum : size_t {
|
||||
// Samples in 30 ms @ given sampling rate.
|
||||
kNumSamplesToProcess = kNum10msSubframes * kNumSubframeSamples
|
||||
};
|
||||
enum : size_t {
|
||||
kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess
|
||||
};
|
||||
enum : size_t { kIpLength = kDftSize >> 1 };
|
||||
enum : size_t { kWLength = kDftSize >> 1 };
|
||||
enum : size_t { kLpcOrder = 16 };
|
||||
static constexpr size_t kNum10msSubframes = 3;
|
||||
static constexpr size_t kNumSubframeSamples = size_t{kSampleRateHz / 100};
|
||||
// Samples in 30 ms @ given sampling rate.
|
||||
static constexpr size_t kNumSamplesToProcess =
|
||||
kNum10msSubframes * kNumSubframeSamples;
|
||||
static constexpr size_t kBufferLength =
|
||||
kNumPastSignalSamples + kNumSamplesToProcess;
|
||||
static constexpr size_t kIpLength = kDftSize >> 1;
|
||||
static constexpr size_t kWLength = kDftSize >> 1;
|
||||
static constexpr size_t kLpcOrder = 16;
|
||||
|
||||
size_t ip_[kIpLength];
|
||||
float w_fft_[kWLength];
|
||||
|
@ -11,6 +11,8 @@
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// These values should match MATLAB counterparts for unit-tests to pass.
|
||||
|
@ -38,8 +38,8 @@ class VadCircularBuffer {
|
||||
// The mean value of the elements in the buffer. The return value is zero if
|
||||
// buffer is empty, i.e. no value is inserted.
|
||||
double Mean();
|
||||
// Remove transients. If the values exceed |val_threshold| for a period
|
||||
// shorter then or equal to |width_threshold|, then that period is considered
|
||||
// Remove transients. If the values exceed `val_threshold` for a period
|
||||
// shorter then or equal to `width_threshold`, then that period is considered
|
||||
// transient and set to zero.
|
||||
int RemoveTransient(int width_threshold, double val_threshold);
|
||||
|
||||
@ -49,7 +49,7 @@ class VadCircularBuffer {
|
||||
// insertion. |index = 1| is the one before the most recent insertion, and
|
||||
// so on.
|
||||
int Get(int index, double* value) const;
|
||||
// Set a given position to |value|. |index| is interpreted as above.
|
||||
// Set a given position to `value`. `index` is interpreted as above.
|
||||
int Set(int index, double value);
|
||||
// Return the number of valid elements in the buffer.
|
||||
int BufferLevel();
|
||||
|
@ -32,12 +32,13 @@ VoiceActivityDetector::VoiceActivityDetector()
|
||||
VoiceActivityDetector::~VoiceActivityDetector() = default;
|
||||
|
||||
// Because ISAC has a different chunk length, it updates
|
||||
// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
|
||||
// `chunkwise_voice_probabilities_` and `chunkwise_rms_` when there is new data.
|
||||
// Otherwise it clears them.
|
||||
void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
|
||||
size_t length,
|
||||
int sample_rate_hz) {
|
||||
RTC_DCHECK_EQ(length, sample_rate_hz / 100);
|
||||
// TODO(bugs.webrtc.org/7494): Remove resampling and force 16 kHz audio.
|
||||
// Resample to the required rate.
|
||||
const int16_t* resampled_ptr = audio;
|
||||
if (sample_rate_hz != kSampleRateHz) {
|
||||
@ -49,7 +50,7 @@ void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
|
||||
}
|
||||
RTC_DCHECK_EQ(length, kLength10Ms);
|
||||
|
||||
// Each chunk needs to be passed into |standalone_vad_|, because internally it
|
||||
// Each chunk needs to be passed into `standalone_vad_`, because internally it
|
||||
// buffers the audio and processes it all at once when GetActivity() is
|
||||
// called.
|
||||
RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0);
|
||||
|
@ -33,6 +33,8 @@ class VoiceActivityDetector {
|
||||
~VoiceActivityDetector();
|
||||
|
||||
// Processes each audio chunk and estimates the voice probability.
|
||||
// TODO(bugs.webrtc.org/7494): Switch to rtc::ArrayView and remove
|
||||
// `sample_rate_hz`.
|
||||
void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz);
|
||||
|
||||
// Returns a vector of voice probabilities for each chunk. It can be empty for
|
||||
|
Reference in New Issue
Block a user