Bump to WebRTC M120 release

Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone.
We're continuing to carry iSAC even though it's gone upstream, but maybe
we'll want to drop that soon.
This commit is contained in:
Arun Raghavan
2023-12-12 10:42:58 -05:00
parent 9a202fb8c2
commit c6abf6cd3f
479 changed files with 20900 additions and 11996 deletions

View File

@ -54,7 +54,7 @@ int WebRtcVad_Init(VadInst* handle);
// has not been initialized).
int WebRtcVad_set_mode(VadInst* handle, int mode);
// Calculates a VAD decision for the |audio_frame|. For valid sampling rates
// Calculates a VAD decision for the `audio_frame`. For valid sampling rates
// frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
//
// - handle [i/o] : VAD Instance. Needs to be initialized by
@ -71,7 +71,7 @@ int WebRtcVad_Process(VadInst* handle,
const int16_t* audio_frame,
size_t frame_length);
// Checks for valid combinations of |rate| and |frame_length|. We support 10,
// Checks for valid combinations of `rate` and `frame_length`. We support 10,
// 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
//
// - rate [i] : Sampling frequency (Hz).

View File

@ -38,7 +38,7 @@ class VadImpl final : public Vad {
case 1:
return kActive;
default:
RTC_NOTREACHED() << "WebRtcVad_Process returned an error.";
RTC_DCHECK_NOTREACHED() << "WebRtcVad_Process returned an error.";
return kError;
}
}

View File

@ -90,11 +90,11 @@ static const int16_t kOverHangMax2VAG[3] = { 9, 5, 3 };
static const int16_t kLocalThresholdVAG[3] = { 94, 94, 94 };
static const int16_t kGlobalThresholdVAG[3] = { 1100, 1050, 1100 };
// Calculates the weighted average w.r.t. number of Gaussians. The |data| are
// updated with an |offset| before averaging.
// Calculates the weighted average w.r.t. number of Gaussians. The `data` are
// updated with an `offset` before averaging.
//
// - data [i/o] : Data to average.
// - offset [i] : An offset added to |data|.
// - offset [i] : An offset added to `data`.
// - weights [i] : Weights used for averaging.
//
// returns : The weighted average.
@ -124,7 +124,7 @@ static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow")
// type of signal is most probable.
//
// - self [i/o] : Pointer to VAD instance
// - features [i] : Feature vector of length |kNumChannels|
// - features [i] : Feature vector of length `kNumChannels`
// = log10(energy in frequency band)
// - total_power [i] : Total power in audio frame.
// - frame_length [i] : Number of input samples
@ -183,10 +183,10 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
// H1: Speech
//
// We combine a global LRT with local tests, for each frequency sub-band,
// here defined as |channel|.
// here defined as `channel`.
for (channel = 0; channel < kNumChannels; channel++) {
// For each channel we model the probability with a GMM consisting of
// |kNumGaussians|, with different means and standard deviations depending
// `kNumGaussians`, with different means and standard deviations depending
// on H0 or H1.
h0_test = 0;
h1_test = 0;
@ -234,7 +234,7 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
}
log_likelihood_ratio = shifts_h0 - shifts_h1;
// Update |sum_log_likelihood_ratios| with spectrum weighting. This is
// Update `sum_log_likelihood_ratios` with spectrum weighting. This is
// used for the global VAD decision.
sum_log_likelihood_ratios +=
(int32_t) (log_likelihood_ratio * kSpectrumWeight[channel]);
@ -298,8 +298,8 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
nmk2 = nmk;
if (!vadflag) {
// deltaN = (x-mu)/sigma^2
// ngprvec[k] = |noise_probability[k]| /
// (|noise_probability[0]| + |noise_probability[1]|)
// ngprvec[k] = `noise_probability[k]` /
// (`noise_probability[0]` + `noise_probability[1]`)
// (Q14 * Q11 >> 11) = Q14.
delt = (int16_t)((ngprvec[gaussian] * deltaN[gaussian]) >> 11);
@ -326,9 +326,9 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
if (vadflag) {
// Update speech mean vector:
// |deltaS| = (x-mu)/sigma^2
// sgprvec[k] = |speech_probability[k]| /
// (|speech_probability[0]| + |speech_probability[1]|)
// `deltaS` = (x-mu)/sigma^2
// sgprvec[k] = `speech_probability[k]` /
// (`speech_probability[0]` + `speech_probability[1]`)
// (Q14 * Q11) >> 11 = Q14.
delt = (int16_t)((sgprvec[gaussian] * deltaS[gaussian]) >> 11);
@ -409,35 +409,35 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
}
// Separate models if they are too close.
// |noise_global_mean| in Q14 (= Q7 * Q7).
// `noise_global_mean` in Q14 (= Q7 * Q7).
noise_global_mean = WeightedAverage(&self->noise_means[channel], 0,
&kNoiseDataWeights[channel]);
// |speech_global_mean| in Q14 (= Q7 * Q7).
// `speech_global_mean` in Q14 (= Q7 * Q7).
speech_global_mean = WeightedAverage(&self->speech_means[channel], 0,
&kSpeechDataWeights[channel]);
// |diff| = "global" speech mean - "global" noise mean.
// `diff` = "global" speech mean - "global" noise mean.
// (Q14 >> 9) - (Q14 >> 9) = Q5.
diff = (int16_t) (speech_global_mean >> 9) -
(int16_t) (noise_global_mean >> 9);
if (diff < kMinimumDifference[channel]) {
tmp_s16 = kMinimumDifference[channel] - diff;
// |tmp1_s16| = ~0.8 * (kMinimumDifference - diff) in Q7.
// |tmp2_s16| = ~0.2 * (kMinimumDifference - diff) in Q7.
// `tmp1_s16` = ~0.8 * (kMinimumDifference - diff) in Q7.
// `tmp2_s16` = ~0.2 * (kMinimumDifference - diff) in Q7.
tmp1_s16 = (int16_t)((13 * tmp_s16) >> 2);
tmp2_s16 = (int16_t)((3 * tmp_s16) >> 2);
// Move Gaussian means for speech model by |tmp1_s16| and update
// |speech_global_mean|. Note that |self->speech_means[channel]| is
// Move Gaussian means for speech model by `tmp1_s16` and update
// `speech_global_mean`. Note that `self->speech_means[channel]` is
// changed after the call.
speech_global_mean = WeightedAverage(&self->speech_means[channel],
tmp1_s16,
&kSpeechDataWeights[channel]);
// Move Gaussian means for noise model by -|tmp2_s16| and update
// |noise_global_mean|. Note that |self->noise_means[channel]| is
// Move Gaussian means for noise model by -`tmp2_s16` and update
// `noise_global_mean`. Note that `self->noise_means[channel]` is
// changed after the call.
noise_global_mean = WeightedAverage(&self->noise_means[channel],
-tmp2_s16,
@ -534,7 +534,7 @@ int WebRtcVad_InitCore(VadInstT* self) {
self->mean_value[i] = 1600;
}
// Set aggressiveness mode to default (=|kDefaultMode|).
// Set aggressiveness mode to default (=`kDefaultMode`).
if (WebRtcVad_set_mode_core(self, kDefaultMode) != 0) {
return -1;
}
@ -609,7 +609,7 @@ int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,
int vad;
size_t i;
int16_t speech_nb[240]; // 30 ms in 8 kHz.
// |tmp_mem| is a temporary memory used by resample function, length is
// `tmp_mem` is a temporary memory used by resample function, length is
// frame length in 10 ms (480 samples) + 256 extra.
int32_t tmp_mem[480 + 256] = { 0 };
const size_t kFrameLen10ms48khz = 480;

View File

@ -17,10 +17,19 @@
#include "common_audio/signal_processing/include/signal_processing_library.h"
enum { kNumChannels = 6 }; // Number of frequency bands (named channels).
enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM.
enum { kTableSize = kNumChannels * kNumGaussians };
enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal.
// TODO(https://bugs.webrtc.org/14476): When converted to C++, remove the macro.
#if defined(__cplusplus)
#define CONSTEXPR_INT(x) constexpr int x
#else
#define CONSTEXPR_INT(x) enum { x }
#endif
CONSTEXPR_INT(kNumChannels = 6); // Number of frequency bands (named channels).
CONSTEXPR_INT(
kNumGaussians = 2); // Number of Gaussians per channel in the GMM.
CONSTEXPR_INT(kTableSize = kNumChannels * kNumGaussians);
CONSTEXPR_INT(
kMinEnergy = 10); // Minimum energy required to trigger audio signal.
typedef struct VadInstT_ {
int vad;
@ -30,14 +39,14 @@ typedef struct VadInstT_ {
int16_t speech_means[kTableSize];
int16_t noise_stds[kTableSize];
int16_t speech_stds[kTableSize];
// TODO(bjornv): Change to |frame_count|.
// TODO(bjornv): Change to `frame_count`.
int32_t frame_counter;
int16_t over_hang; // Over Hang
int16_t num_of_speech;
// TODO(bjornv): Change to |age_vector|.
// TODO(bjornv): Change to `age_vector`.
int16_t index_vector[16 * kNumChannels];
int16_t low_value_vector[16 * kNumChannels];
// TODO(bjornv): Change to |median|.
// TODO(bjornv): Change to `median`.
int16_t mean_value[kNumChannels];
int16_t upper_state[5];
int16_t lower_state[5];
@ -51,7 +60,7 @@ typedef struct VadInstT_ {
} VadInstT;
// Initializes the core VAD component. The default aggressiveness mode is
// controlled by |kDefaultMode| in vad_core.c.
// controlled by `kDefaultMode` in vad_core.c.
//
// - self [i/o] : Instance that should be initialized
//

View File

@ -28,7 +28,7 @@ static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 };
// Adjustment for division with two in SplitFilter.
static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };
// High pass filtering, with a cut-off frequency at 80 Hz, if the |data_in| is
// High pass filtering, with a cut-off frequency at 80 Hz, if the `data_in` is
// sampled at 500 Hz.
//
// - data_in [i] : Input audio data sampled at 500 Hz.
@ -69,9 +69,9 @@ static void HighPassFilter(const int16_t* data_in, size_t data_length,
}
}
// All pass filtering of |data_in|, used before splitting the signal into two
// All pass filtering of `data_in`, used before splitting the signal into two
// frequency bands (low pass vs high pass).
// Note that |data_in| and |data_out| can NOT correspond to the same address.
// Note that `data_in` and `data_out` can NOT correspond to the same address.
//
// - data_in [i] : Input audio signal given in Q0.
// - data_length [i] : Length of input and output data.
@ -104,17 +104,17 @@ static void AllPassFilter(const int16_t* data_in, size_t data_length,
*filter_state = (int16_t) (state32 >> 16); // Q(-1)
}
// Splits |data_in| into |hp_data_out| and |lp_data_out| corresponding to
// Splits `data_in` into `hp_data_out` and `lp_data_out` corresponding to
// an upper (high pass) part and a lower (low pass) part respectively.
//
// - data_in [i] : Input audio data to be split into two frequency bands.
// - data_length [i] : Length of |data_in|.
// - data_length [i] : Length of `data_in`.
// - upper_state [i/o] : State of the upper filter, given in Q(-1).
// - lower_state [i/o] : State of the lower filter, given in Q(-1).
// - hp_data_out [o] : Output audio data of the upper half of the spectrum.
// The length is |data_length| / 2.
// The length is `data_length` / 2.
// - lp_data_out [o] : Output audio data of the lower half of the spectrum.
// The length is |data_length| / 2.
// The length is `data_length` / 2.
static void SplitFilter(const int16_t* data_in, size_t data_length,
int16_t* upper_state, int16_t* lower_state,
int16_t* hp_data_out, int16_t* lp_data_out) {
@ -138,23 +138,23 @@ static void SplitFilter(const int16_t* data_in, size_t data_length,
}
}
// Calculates the energy of |data_in| in dB, and also updates an overall
// |total_energy| if necessary.
// Calculates the energy of `data_in` in dB, and also updates an overall
// `total_energy` if necessary.
//
// - data_in [i] : Input audio data for energy calculation.
// - data_length [i] : Length of input data.
// - offset [i] : Offset value added to |log_energy|.
// - offset [i] : Offset value added to `log_energy`.
// - total_energy [i/o] : An external energy updated with the energy of
// |data_in|.
// NOTE: |total_energy| is only updated if
// |total_energy| <= |kMinEnergy|.
// - log_energy [o] : 10 * log10("energy of |data_in|") given in Q4.
// `data_in`.
// NOTE: `total_energy` is only updated if
// `total_energy` <= `kMinEnergy`.
// - log_energy [o] : 10 * log10("energy of `data_in`") given in Q4.
static void LogOfEnergy(const int16_t* data_in, size_t data_length,
int16_t offset, int16_t* total_energy,
int16_t* log_energy) {
// |tot_rshifts| accumulates the number of right shifts performed on |energy|.
// `tot_rshifts` accumulates the number of right shifts performed on `energy`.
int tot_rshifts = 0;
// The |energy| will be normalized to 15 bits. We use unsigned integer because
// The `energy` will be normalized to 15 bits. We use unsigned integer because
// we eventually will mask out the fractional part.
uint32_t energy = 0;
@ -169,14 +169,14 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length,
// zeros of an unsigned 32 bit value.
int normalizing_rshifts = 17 - WebRtcSpl_NormU32(energy);
// In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is
// (14 << 10), which is what we initialize |log2_energy| with. For a more
// (14 << 10), which is what we initialize `log2_energy` with. For a more
// detailed derivations, see below.
int16_t log2_energy = kLogEnergyIntPart;
tot_rshifts += normalizing_rshifts;
// Normalize |energy| to 15 bits.
// |tot_rshifts| is now the total number of right shifts performed on
// |energy| after normalization. This means that |energy| is in
// Normalize `energy` to 15 bits.
// `tot_rshifts` is now the total number of right shifts performed on
// `energy` after normalization. This means that `energy` is in
// Q(-tot_rshifts).
if (normalizing_rshifts < 0) {
energy <<= -normalizing_rshifts;
@ -184,30 +184,30 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length,
energy >>= normalizing_rshifts;
}
// Calculate the energy of |data_in| in dB, in Q4.
// Calculate the energy of `data_in` in dB, in Q4.
//
// 10 * log10("true energy") in Q4 = 2^4 * 10 * log10("true energy") =
// 160 * log10(|energy| * 2^|tot_rshifts|) =
// 160 * log10(2) * log2(|energy| * 2^|tot_rshifts|) =
// 160 * log10(2) * (log2(|energy|) + log2(2^|tot_rshifts|)) =
// (160 * log10(2)) * (log2(|energy|) + |tot_rshifts|) =
// |kLogConst| * (|log2_energy| + |tot_rshifts|)
// 160 * log10(`energy` * 2^`tot_rshifts`) =
// 160 * log10(2) * log2(`energy` * 2^`tot_rshifts`) =
// 160 * log10(2) * (log2(`energy`) + log2(2^`tot_rshifts`)) =
// (160 * log10(2)) * (log2(`energy`) + `tot_rshifts`) =
// `kLogConst` * (`log2_energy` + `tot_rshifts`)
//
// We know by construction that |energy| is normalized to 15 bits. Hence,
// |energy| = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15.
// Further, we'd like |log2_energy| in Q10
// log2(|energy|) in Q10 = 2^10 * log2(2^14 + frac_Q15) =
// We know by construction that `energy` is normalized to 15 bits. Hence,
// `energy` = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15.
// Further, we'd like `log2_energy` in Q10
// log2(`energy`) in Q10 = 2^10 * log2(2^14 + frac_Q15) =
// 2^10 * log2(2^14 * (1 + frac_Q15 * 2^-14)) =
// 2^10 * (14 + log2(1 + frac_Q15 * 2^-14)) ~=
// (14 << 10) + 2^10 * (frac_Q15 * 2^-14) =
// (14 << 10) + (frac_Q15 * 2^-4) = (14 << 10) + (frac_Q15 >> 4)
//
// Note that frac_Q15 = (|energy| & 0x00003FFF)
// Note that frac_Q15 = (`energy` & 0x00003FFF)
// Calculate and add the fractional part to |log2_energy|.
// Calculate and add the fractional part to `log2_energy`.
log2_energy += (int16_t) ((energy & 0x00003FFF) >> 4);
// |kLogConst| is in Q9, |log2_energy| in Q10 and |tot_rshifts| in Q0.
// `kLogConst` is in Q9, `log2_energy` in Q10 and `tot_rshifts` in Q0.
// Note that we in our derivation above have accounted for an output in Q4.
*log_energy = (int16_t)(((kLogConst * log2_energy) >> 19) +
((tot_rshifts * kLogConst) >> 9));
@ -222,19 +222,19 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length,
*log_energy += offset;
// Update the approximate |total_energy| with the energy of |data_in|, if
// |total_energy| has not exceeded |kMinEnergy|. |total_energy| is used as an
// Update the approximate `total_energy` with the energy of `data_in`, if
// `total_energy` has not exceeded `kMinEnergy`. `total_energy` is used as an
// energy indicator in WebRtcVad_GmmProbability() in vad_core.c.
if (*total_energy <= kMinEnergy) {
if (tot_rshifts >= 0) {
// We know by construction that the |energy| > |kMinEnergy| in Q0, so add
// an arbitrary value such that |total_energy| exceeds |kMinEnergy|.
// We know by construction that the `energy` > `kMinEnergy` in Q0, so add
// an arbitrary value such that `total_energy` exceeds `kMinEnergy`.
*total_energy += kMinEnergy + 1;
} else {
// By construction |energy| is represented by 15 bits, hence any number of
// right shifted |energy| will fit in an int16_t. In addition, adding the
// value to |total_energy| is wrap around safe as long as
// |kMinEnergy| < 8192.
// By construction `energy` is represented by 15 bits, hence any number of
// right shifted `energy` will fit in an int16_t. In addition, adding the
// value to `total_energy` is wrap around safe as long as
// `kMinEnergy` < 8192.
*total_energy += (int16_t) (energy >> -tot_rshifts); // Q0.
}
}
@ -243,14 +243,14 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length,
int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
size_t data_length, int16_t* features) {
int16_t total_energy = 0;
// We expect |data_length| to be 80, 160 or 240 samples, which corresponds to
// We expect `data_length` to be 80, 160 or 240 samples, which corresponds to
// 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
// have at most 120 samples after the first split and at most 60 samples after
// the second split.
int16_t hp_120[120], lp_120[120];
int16_t hp_60[60], lp_60[60];
const size_t half_data_length = data_length >> 1;
size_t length = half_data_length; // |data_length| / 2, corresponds to
size_t length = half_data_length; // `data_length` / 2, corresponds to
// bandwidth = 2000 Hz after downsampling.
// Initialize variables for the first SplitFilter().
@ -260,7 +260,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
int16_t* lp_out_ptr = lp_120; // [0 - 2000] Hz.
RTC_DCHECK_LE(data_length, 240);
RTC_DCHECK_LT(4, kNumChannels - 1); // Checking maximum |frequency_band|.
RTC_DCHECK_LT(4, kNumChannels - 1); // Checking maximum `frequency_band`.
// Split at 2000 Hz and downsample.
SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band],
@ -275,7 +275,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
&self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
// Energy in 3000 Hz - 4000 Hz.
length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz.
length >>= 1; // `data_length` / 4 <=> bandwidth = 1000 Hz.
LogOfEnergy(hp_60, length, kOffsetVector[5], &total_energy, &features[5]);
@ -287,12 +287,12 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
in_ptr = lp_120; // [0 - 2000] Hz.
hp_out_ptr = hp_60; // [1000 - 2000] Hz.
lp_out_ptr = lp_60; // [0 - 1000] Hz.
length = half_data_length; // |data_length| / 2 <=> bandwidth = 2000 Hz.
length = half_data_length; // `data_length` / 2 <=> bandwidth = 2000 Hz.
SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
&self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
// Energy in 1000 Hz - 2000 Hz.
length >>= 1; // |data_length| / 4 <=> bandwidth = 1000 Hz.
length >>= 1; // `data_length` / 4 <=> bandwidth = 1000 Hz.
LogOfEnergy(hp_60, length, kOffsetVector[3], &total_energy, &features[3]);
// For the lower band (0 Hz - 1000 Hz) split at 500 Hz and downsample.
@ -304,7 +304,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
&self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
// Energy in 500 Hz - 1000 Hz.
length >>= 1; // |data_length| / 8 <=> bandwidth = 500 Hz.
length >>= 1; // `data_length` / 8 <=> bandwidth = 500 Hz.
LogOfEnergy(hp_120, length, kOffsetVector[2], &total_energy, &features[2]);
// For the lower band (0 Hz - 500 Hz) split at 250 Hz and downsample.
@ -316,7 +316,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
&self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);
// Energy in 250 Hz - 500 Hz.
length >>= 1; // |data_length| / 16 <=> bandwidth = 250 Hz.
length >>= 1; // `data_length` / 16 <=> bandwidth = 250 Hz.
LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]);
// Remove 0 Hz - 80 Hz, by high pass filtering the lower band.

View File

@ -17,8 +17,8 @@
#include "common_audio/vad/vad_core.h"
// Takes |data_length| samples of |data_in| and calculates the logarithm of the
// energy of each of the |kNumChannels| = 6 frequency bands used by the VAD:
// Takes `data_length` samples of `data_in` and calculates the logarithm of the
// energy of each of the `kNumChannels` = 6 frequency bands used by the VAD:
// 80 Hz - 250 Hz
// 250 Hz - 500 Hz
// 500 Hz - 1000 Hz
@ -26,10 +26,10 @@
// 2000 Hz - 3000 Hz
// 3000 Hz - 4000 Hz
//
// The values are given in Q4 and written to |features|. Further, an approximate
// The values are given in Q4 and written to `features`. Further, an approximate
// overall energy is returned. The return value is used in
// WebRtcVad_GmmProbability() as a signal indicator, hence it is arbitrary above
// the threshold |kMinEnergy|.
// the threshold `kMinEnergy`.
//
// - self [i/o] : State information of the VAD.
// - data_in [i] : Input audio data, for feature extraction.

View File

@ -15,16 +15,16 @@
static const int32_t kCompVar = 22005;
static const int16_t kLog2Exp = 5909; // log2(exp(1)) in Q12.
// For a normal distribution, the probability of |input| is calculated and
// For a normal distribution, the probability of `input` is calculated and
// returned (in Q20). The formula for normal distributed probability is
//
// 1 / s * exp(-(x - m)^2 / (2 * s^2))
//
// where the parameters are given in the following Q domains:
// m = |mean| (Q7)
// s = |std| (Q7)
// x = |input| (Q4)
// in addition to the probability we output |delta| (in Q11) used when updating
// m = `mean` (Q7)
// s = `std` (Q7)
// x = `input` (Q4)
// in addition to the probability we output `delta` (in Q11) used when updating
// the noise/speech model.
int32_t WebRtcVad_GaussianProbability(int16_t input,
int16_t mean,
@ -33,13 +33,13 @@ int32_t WebRtcVad_GaussianProbability(int16_t input,
int16_t tmp16, inv_std, inv_std2, exp_value = 0;
int32_t tmp32;
// Calculate |inv_std| = 1 / s, in Q10.
// 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation.
// Calculate `inv_std` = 1 / s, in Q10.
// 131072 = 1 in Q17, and (`std` >> 1) is for rounding instead of truncation.
// Q-domain: Q17 / Q7 = Q10.
tmp32 = (int32_t) 131072 + (int32_t) (std >> 1);
inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std);
// Calculate |inv_std2| = 1 / s^2, in Q14.
// Calculate `inv_std2` = 1 / s^2, in Q14.
tmp16 = (inv_std >> 2); // Q10 -> Q8.
// Q-domain: (Q8 * Q8) >> 2 = Q14.
inv_std2 = (int16_t)((tmp16 * tmp16) >> 2);
@ -51,20 +51,20 @@ int32_t WebRtcVad_GaussianProbability(int16_t input,
tmp16 = tmp16 - mean; // Q7 - Q7 = Q7
// To be used later, when updating noise/speech model.
// |delta| = (x - m) / s^2, in Q11.
// `delta` = (x - m) / s^2, in Q11.
// Q-domain: (Q14 * Q7) >> 10 = Q11.
*delta = (int16_t)((inv_std2 * tmp16) >> 10);
// Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing
// Calculate the exponent `tmp32` = (x - m)^2 / (2 * s^2), in Q10. Replacing
// division by two with one shift.
// Q-domain: (Q11 * Q7) >> 8 = Q10.
tmp32 = (*delta * tmp16) >> 9;
// If the exponent is small enough to give a non-zero probability we calculate
// |exp_value| ~= exp(-(x - m)^2 / (2 * s^2))
// ~= exp2(-log2(exp(1)) * |tmp32|).
// `exp_value` ~= exp(-(x - m)^2 / (2 * s^2))
// ~= exp2(-log2(exp(1)) * `tmp32`).
if (tmp32 < kCompVar) {
// Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10.
// Calculate `tmp16` = log2(exp(1)) * `tmp32`, in Q10.
// Q-domain: (Q12 * Q10) >> 12 = Q10.
tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12);
tmp16 = -tmp16;
@ -72,7 +72,7 @@ int32_t WebRtcVad_GaussianProbability(int16_t input,
tmp16 ^= 0xFFFF;
tmp16 >>= 10;
tmp16 += 1;
// Get |exp_value| = exp(-|tmp32|) in Q10.
// Get `exp_value` = exp(-`tmp32`) in Q10.
exp_value >>= tmp16;
}

View File

@ -15,8 +15,8 @@
#include <stdint.h>
// Calculates the probability for |input|, given that |input| comes from a
// normal distribution with mean and standard deviation (|mean|, |std|).
// Calculates the probability for `input`, given that `input` comes from a
// normal distribution with mean and standard deviation (`mean`, `std`).
//
// Inputs:
// - input : input sample in Q4.
@ -26,11 +26,11 @@
// Output:
//
// - delta : input used when updating the model, Q11.
// |delta| = (|input| - |mean|) / |std|^2.
// `delta` = (`input` - `mean`) / `std`^2.
//
// Return:
// (probability for |input|) =
// 1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2));
// (probability for `input`) =
// 1 / `std` * exp(-(`input` - `mean`)^2 / (2 * `std`^2));
int32_t WebRtcVad_GaussianProbability(int16_t input,
int16_t mean,
int16_t std,

View File

@ -52,7 +52,7 @@ void WebRtcVad_Downsampling(const int16_t* signal_in,
filter_state[1] = tmp32_2;
}
// Inserts |feature_value| into |low_value_vector|, if it is one of the 16
// Inserts `feature_value` into `low_value_vector`, if it is one of the 16
// smallest values the last 100 frames. Then calculates and returns the median
// of the five smallest values.
int16_t WebRtcVad_FindMinimum(VadInstT* self,
@ -66,13 +66,13 @@ int16_t WebRtcVad_FindMinimum(VadInstT* self,
int16_t alpha = 0;
int32_t tmp32 = 0;
// Pointer to memory for the 16 minimum values and the age of each value of
// the |channel|.
// the `channel`.
int16_t* age = &self->index_vector[offset];
int16_t* smallest_values = &self->low_value_vector[offset];
RTC_DCHECK_LT(channel, kNumChannels);
// Each value in |smallest_values| is getting 1 loop older. Update |age|, and
// Each value in `smallest_values` is getting 1 loop older. Update `age`, and
// remove old values.
for (i = 0; i < 16; i++) {
if (age[i] != 100) {
@ -88,9 +88,9 @@ int16_t WebRtcVad_FindMinimum(VadInstT* self,
}
}
// Check if |feature_value| is smaller than any of the values in
// |smallest_values|. If so, find the |position| where to insert the new value
// (|feature_value|).
// Check if `feature_value` is smaller than any of the values in
// `smallest_values`. If so, find the `position` where to insert the new value
// (`feature_value`).
if (feature_value < smallest_values[7]) {
if (feature_value < smallest_values[3]) {
if (feature_value < smallest_values[1]) {
@ -152,7 +152,7 @@ int16_t WebRtcVad_FindMinimum(VadInstT* self,
age[position] = 1;
}
// Get |current_median|.
// Get `current_median`.
if (self->frame_counter > 2) {
current_median = smallest_values[2];
} else if (self->frame_counter > 0) {

View File

@ -23,11 +23,11 @@
//
// Input & Output:
// - filter_state : Current filter states of the two all-pass filters. The
// |filter_state| is updated after all samples have been
// `filter_state` is updated after all samples have been
// processed.
//
// Output:
// - signal_out : Downsampled signal (of length |in_length| / 2).
// - signal_out : Downsampled signal (of length `in_length` / 2).
void WebRtcVad_Downsampling(const int16_t* signal_in,
int16_t* signal_out,
int32_t* filter_state,
@ -35,7 +35,7 @@ void WebRtcVad_Downsampling(const int16_t* signal_in,
// Updates and returns the smoothed feature minimum. As minimum we use the
// median of the five smallest feature values in a 100 frames long window.
// As long as |handle->frame_counter| is zero, that is, we haven't received any
// As long as `handle->frame_counter` is zero, that is, we haven't received any
// "valid" data, FindMinimum() outputs the default value of 1600.
//
// Inputs:

View File

@ -21,7 +21,7 @@ static const int kValidRates[] = { 8000, 16000, 32000, 48000 };
static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates);
static const int kMaxFrameLengthMs = 30;
VadInst* WebRtcVad_Create() {
VadInst* WebRtcVad_Create(void) {
VadInstT* self = (VadInstT*)malloc(sizeof(VadInstT));
self->init_flag = 0;