Bump to WebRTC M120 release

Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone. We're continuing to carry iSAC even though it's gone upstream, but maybe we'll want to drop that soon.
2023-12-12 10:42:58 -05:00
parent 9a202fb8c2
commit c6abf6cd3f
479 changed files with 20900 additions and 11996 deletions
--- a/webrtc/common_audio/vad/include/webrtc_vad.h
+++ b/webrtc/common_audio/vad/include/webrtc_vad.h
@ -54,7 +54,7 @@ int WebRtcVad_Init(VadInst* handle);
 //                       has not been initialized).
 int WebRtcVad_set_mode(VadInst* handle, int mode);

-// Calculates a VAD decision for the |audio_frame|. For valid sampling rates
+// Calculates a VAD decision for the `audio_frame`. For valid sampling rates
 // frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths().
 //
 // - handle       [i/o] : VAD Instance. Needs to be initialized by
@ -71,7 +71,7 @@ int WebRtcVad_Process(VadInst* handle,
                      const int16_t* audio_frame,
                      size_t frame_length);

-// Checks for valid combinations of |rate| and |frame_length|. We support 10,
+// Checks for valid combinations of `rate` and `frame_length`. We support 10,
 // 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz.
 //
 // - rate         [i] : Sampling frequency (Hz).
--- a/webrtc/common_audio/vad/vad.cc
+++ b/webrtc/common_audio/vad/vad.cc
@ -38,7 +38,7 @@ class VadImpl final : public Vad {
      case 1:
        return kActive;
      default:
-        RTC_NOTREACHED() << "WebRtcVad_Process returned an error.";
+        RTC_DCHECK_NOTREACHED() << "WebRtcVad_Process returned an error.";
        return kError;
    }
  }
--- a/webrtc/common_audio/vad/vad_core.c
+++ b/webrtc/common_audio/vad/vad_core.c
@ -90,11 +90,11 @@ static const int16_t kOverHangMax2VAG[3] = { 9, 5, 3 };
 static const int16_t kLocalThresholdVAG[3] = { 94, 94, 94 };
 static const int16_t kGlobalThresholdVAG[3] = { 1100, 1050, 1100 };

-// Calculates the weighted average w.r.t. number of Gaussians. The |data| are
-// updated with an |offset| before averaging.
+// Calculates the weighted average w.r.t. number of Gaussians. The `data` are
+// updated with an `offset` before averaging.
 //
 // - data     [i/o] : Data to average.
-// - offset   [i]   : An offset added to |data|.
+// - offset   [i]   : An offset added to `data`.
 // - weights  [i]   : Weights used for averaging.
 //
 // returns          : The weighted average.
@ -124,7 +124,7 @@ static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow")
 // type of signal is most probable.
 //
 // - self           [i/o] : Pointer to VAD instance
-// - features       [i]   : Feature vector of length |kNumChannels|
+// - features       [i]   : Feature vector of length `kNumChannels`
 //                          = log10(energy in frequency band)
 // - total_power    [i]   : Total power in audio frame.
 // - frame_length   [i]   : Number of input samples
@ -183,10 +183,10 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
    // H1: Speech
    //
    // We combine a global LRT with local tests, for each frequency sub-band,
-    // here defined as |channel|.
+    // here defined as `channel`.
    for (channel = 0; channel < kNumChannels; channel++) {
      // For each channel we model the probability with a GMM consisting of
-      // |kNumGaussians|, with different means and standard deviations depending
+      // `kNumGaussians`, with different means and standard deviations depending
      // on H0 or H1.
      h0_test = 0;
      h1_test = 0;
@ -234,7 +234,7 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
      }
      log_likelihood_ratio = shifts_h0 - shifts_h1;

-      // Update |sum_log_likelihood_ratios| with spectrum weighting. This is
+      // Update `sum_log_likelihood_ratios` with spectrum weighting. This is
      // used for the global VAD decision.
      sum_log_likelihood_ratios +=
          (int32_t) (log_likelihood_ratio * kSpectrumWeight[channel]);
@ -298,8 +298,8 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
        nmk2 = nmk;
        if (!vadflag) {
          // deltaN = (x-mu)/sigma^2
-          // ngprvec[k] = |noise_probability[k]| /
-          //   (|noise_probability[0]| + |noise_probability[1]|)
+          // ngprvec[k] = `noise_probability[k]` /
+          //   (`noise_probability[0]` + `noise_probability[1]`)

          // (Q14 * Q11 >> 11) = Q14.
          delt = (int16_t)((ngprvec[gaussian] * deltaN[gaussian]) >> 11);
@ -326,9 +326,9 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,

        if (vadflag) {
          // Update speech mean vector:
-          // |deltaS| = (x-mu)/sigma^2
-          // sgprvec[k] = |speech_probability[k]| /
-          //   (|speech_probability[0]| + |speech_probability[1]|)
+          // `deltaS` = (x-mu)/sigma^2
+          // sgprvec[k] = `speech_probability[k]` /
+          //   (`speech_probability[0]` + `speech_probability[1]`)

          // (Q14 * Q11) >> 11 = Q14.
          delt = (int16_t)((sgprvec[gaussian] * deltaS[gaussian]) >> 11);
@ -409,35 +409,35 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
      }

      // Separate models if they are too close.
-      // |noise_global_mean| in Q14 (= Q7 * Q7).
+      // `noise_global_mean` in Q14 (= Q7 * Q7).
      noise_global_mean = WeightedAverage(&self->noise_means[channel], 0,
                                          &kNoiseDataWeights[channel]);

-      // |speech_global_mean| in Q14 (= Q7 * Q7).
+      // `speech_global_mean` in Q14 (= Q7 * Q7).
      speech_global_mean = WeightedAverage(&self->speech_means[channel], 0,
                                           &kSpeechDataWeights[channel]);

-      // |diff| = "global" speech mean - "global" noise mean.
+      // `diff` = "global" speech mean - "global" noise mean.
      // (Q14 >> 9) - (Q14 >> 9) = Q5.
      diff = (int16_t) (speech_global_mean >> 9) -
          (int16_t) (noise_global_mean >> 9);
      if (diff < kMinimumDifference[channel]) {
        tmp_s16 = kMinimumDifference[channel] - diff;

-        // |tmp1_s16| = ~0.8 * (kMinimumDifference - diff) in Q7.
-        // |tmp2_s16| = ~0.2 * (kMinimumDifference - diff) in Q7.
+        // `tmp1_s16` = ~0.8 * (kMinimumDifference - diff) in Q7.
+        // `tmp2_s16` = ~0.2 * (kMinimumDifference - diff) in Q7.
        tmp1_s16 = (int16_t)((13 * tmp_s16) >> 2);
        tmp2_s16 = (int16_t)((3 * tmp_s16) >> 2);

-        // Move Gaussian means for speech model by |tmp1_s16| and update
-        // |speech_global_mean|. Note that |self->speech_means[channel]| is
+        // Move Gaussian means for speech model by `tmp1_s16` and update
+        // `speech_global_mean`. Note that `self->speech_means[channel]` is
        // changed after the call.
        speech_global_mean = WeightedAverage(&self->speech_means[channel],
                                             tmp1_s16,
                                             &kSpeechDataWeights[channel]);

-        // Move Gaussian means for noise model by -|tmp2_s16| and update
-        // |noise_global_mean|. Note that |self->noise_means[channel]| is
+        // Move Gaussian means for noise model by -`tmp2_s16` and update
+        // `noise_global_mean`. Note that `self->noise_means[channel]` is
        // changed after the call.
        noise_global_mean = WeightedAverage(&self->noise_means[channel],
                                            -tmp2_s16,
@ -534,7 +534,7 @@ int WebRtcVad_InitCore(VadInstT* self) {
    self->mean_value[i] = 1600;
  }

-  // Set aggressiveness mode to default (=|kDefaultMode|).
+  // Set aggressiveness mode to default (=`kDefaultMode`).
  if (WebRtcVad_set_mode_core(self, kDefaultMode) != 0) {
    return -1;
  }
@ -609,7 +609,7 @@ int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,
  int vad;
  size_t i;
  int16_t speech_nb[240];  // 30 ms in 8 kHz.
-  // |tmp_mem| is a temporary memory used by resample function, length is
+  // `tmp_mem` is a temporary memory used by resample function, length is
  // frame length in 10 ms (480 samples) + 256 extra.
  int32_t tmp_mem[480 + 256] = { 0 };
  const size_t kFrameLen10ms48khz = 480;
--- a/webrtc/common_audio/vad/vad_core.h
+++ b/webrtc/common_audio/vad/vad_core.h
@ -17,10 +17,19 @@

 #include "common_audio/signal_processing/include/signal_processing_library.h"

-enum { kNumChannels = 6 };   // Number of frequency bands (named channels).
-enum { kNumGaussians = 2 };  // Number of Gaussians per channel in the GMM.
-enum { kTableSize = kNumChannels * kNumGaussians };
-enum { kMinEnergy = 10 };  // Minimum energy required to trigger audio signal.
+// TODO(https://bugs.webrtc.org/14476): When converted to C++, remove the macro.
+#if defined(__cplusplus)
+#define CONSTEXPR_INT(x) constexpr int x
+#else
+#define CONSTEXPR_INT(x) enum { x }
+#endif
+
+CONSTEXPR_INT(kNumChannels = 6);  // Number of frequency bands (named channels).
+CONSTEXPR_INT(
+    kNumGaussians = 2);  // Number of Gaussians per channel in the GMM.
+CONSTEXPR_INT(kTableSize = kNumChannels * kNumGaussians);
+CONSTEXPR_INT(
+    kMinEnergy = 10);  // Minimum energy required to trigger audio signal.

 typedef struct VadInstT_ {
  int vad;
@ -30,14 +39,14 @@ typedef struct VadInstT_ {
  int16_t speech_means[kTableSize];
  int16_t noise_stds[kTableSize];
  int16_t speech_stds[kTableSize];
-  // TODO(bjornv): Change to |frame_count|.
+  // TODO(bjornv): Change to `frame_count`.
  int32_t frame_counter;
  int16_t over_hang;  // Over Hang
  int16_t num_of_speech;
-  // TODO(bjornv): Change to |age_vector|.
+  // TODO(bjornv): Change to `age_vector`.
  int16_t index_vector[16 * kNumChannels];
  int16_t low_value_vector[16 * kNumChannels];
-  // TODO(bjornv): Change to |median|.
+  // TODO(bjornv): Change to `median`.
  int16_t mean_value[kNumChannels];
  int16_t upper_state[5];
  int16_t lower_state[5];
@ -51,7 +60,7 @@ typedef struct VadInstT_ {
 } VadInstT;

 // Initializes the core VAD component. The default aggressiveness mode is
-// controlled by |kDefaultMode| in vad_core.c.
+// controlled by `kDefaultMode` in vad_core.c.
 //
 // - self [i/o] : Instance that should be initialized
 //
--- a/webrtc/common_audio/vad/vad_filterbank.c
+++ b/webrtc/common_audio/vad/vad_filterbank.c
@ -28,7 +28,7 @@ static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 };
 // Adjustment for division with two in SplitFilter.
 static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };

-// High pass filtering, with a cut-off frequency at 80 Hz, if the |data_in| is
+// High pass filtering, with a cut-off frequency at 80 Hz, if the `data_in` is
 // sampled at 500 Hz.
 //
 // - data_in      [i]   : Input audio data sampled at 500 Hz.
@ -69,9 +69,9 @@ static void HighPassFilter(const int16_t* data_in, size_t data_length,
  }
 }

-// All pass filtering of |data_in|, used before splitting the signal into two
+// All pass filtering of `data_in`, used before splitting the signal into two
 // frequency bands (low pass vs high pass).
-// Note that |data_in| and |data_out| can NOT correspond to the same address.
+// Note that `data_in` and `data_out` can NOT correspond to the same address.
 //
 // - data_in            [i]   : Input audio signal given in Q0.
 // - data_length        [i]   : Length of input and output data.
@ -104,17 +104,17 @@ static void AllPassFilter(const int16_t* data_in, size_t data_length,
  *filter_state = (int16_t) (state32 >> 16);  // Q(-1)
 }

-// Splits |data_in| into |hp_data_out| and |lp_data_out| corresponding to
+// Splits `data_in` into `hp_data_out` and `lp_data_out` corresponding to
 // an upper (high pass) part and a lower (low pass) part respectively.
 //
 // - data_in      [i]   : Input audio data to be split into two frequency bands.
-// - data_length  [i]   : Length of |data_in|.
+// - data_length  [i]   : Length of `data_in`.
 // - upper_state  [i/o] : State of the upper filter, given in Q(-1).
 // - lower_state  [i/o] : State of the lower filter, given in Q(-1).
 // - hp_data_out  [o]   : Output audio data of the upper half of the spectrum.
-//                        The length is |data_length| / 2.
+//                        The length is `data_length` / 2.
 // - lp_data_out  [o]   : Output audio data of the lower half of the spectrum.
-//                        The length is |data_length| / 2.
+//                        The length is `data_length` / 2.
 static void SplitFilter(const int16_t* data_in, size_t data_length,
                        int16_t* upper_state, int16_t* lower_state,
                        int16_t* hp_data_out, int16_t* lp_data_out) {
@ -138,23 +138,23 @@ static void SplitFilter(const int16_t* data_in, size_t data_length,
  }
 }

-// Calculates the energy of |data_in| in dB, and also updates an overall
-// |total_energy| if necessary.
+// Calculates the energy of `data_in` in dB, and also updates an overall
+// `total_energy` if necessary.
 //
 // - data_in      [i]   : Input audio data for energy calculation.
 // - data_length  [i]   : Length of input data.
-// - offset       [i]   : Offset value added to |log_energy|.
+// - offset       [i]   : Offset value added to `log_energy`.
 // - total_energy [i/o] : An external energy updated with the energy of
-//                        |data_in|.
-//                        NOTE: |total_energy| is only updated if
-//                        |total_energy| <= |kMinEnergy|.
-// - log_energy   [o]   : 10 * log10("energy of |data_in|") given in Q4.
+//                        `data_in`.
+//                        NOTE: `total_energy` is only updated if
+//                        `total_energy` <= `kMinEnergy`.
+// - log_energy   [o]   : 10 * log10("energy of `data_in`") given in Q4.
 static void LogOfEnergy(const int16_t* data_in, size_t data_length,
                        int16_t offset, int16_t* total_energy,
                        int16_t* log_energy) {
-  // |tot_rshifts| accumulates the number of right shifts performed on |energy|.
+  // `tot_rshifts` accumulates the number of right shifts performed on `energy`.
  int tot_rshifts = 0;
-  // The |energy| will be normalized to 15 bits. We use unsigned integer because
+  // The `energy` will be normalized to 15 bits. We use unsigned integer because
  // we eventually will mask out the fractional part.
  uint32_t energy = 0;

@ -169,14 +169,14 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length,
    // zeros of an unsigned 32 bit value.
    int normalizing_rshifts = 17 - WebRtcSpl_NormU32(energy);
    // In a 15 bit representation the leading bit is 2^14. log2(2^14) in Q10 is
-    // (14 << 10), which is what we initialize |log2_energy| with. For a more
+    // (14 << 10), which is what we initialize `log2_energy` with. For a more
    // detailed derivations, see below.
    int16_t log2_energy = kLogEnergyIntPart;

    tot_rshifts += normalizing_rshifts;
-    // Normalize |energy| to 15 bits.
-    // |tot_rshifts| is now the total number of right shifts performed on
-    // |energy| after normalization. This means that |energy| is in
+    // Normalize `energy` to 15 bits.
+    // `tot_rshifts` is now the total number of right shifts performed on
+    // `energy` after normalization. This means that `energy` is in
    // Q(-tot_rshifts).
    if (normalizing_rshifts < 0) {
      energy <<= -normalizing_rshifts;
@ -184,30 +184,30 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length,
      energy >>= normalizing_rshifts;
    }

-    // Calculate the energy of |data_in| in dB, in Q4.
+    // Calculate the energy of `data_in` in dB, in Q4.
    //
    // 10 * log10("true energy") in Q4 = 2^4 * 10 * log10("true energy") =
-    // 160 * log10(|energy| * 2^|tot_rshifts|) =
-    // 160 * log10(2) * log2(|energy| * 2^|tot_rshifts|) =
-    // 160 * log10(2) * (log2(|energy|) + log2(2^|tot_rshifts|)) =
-    // (160 * log10(2)) * (log2(|energy|) + |tot_rshifts|) =
-    // |kLogConst| * (|log2_energy| + |tot_rshifts|)
+    // 160 * log10(`energy` * 2^`tot_rshifts`) =
+    // 160 * log10(2) * log2(`energy` * 2^`tot_rshifts`) =
+    // 160 * log10(2) * (log2(`energy`) + log2(2^`tot_rshifts`)) =
+    // (160 * log10(2)) * (log2(`energy`) + `tot_rshifts`) =
+    // `kLogConst` * (`log2_energy` + `tot_rshifts`)
    //
-    // We know by construction that |energy| is normalized to 15 bits. Hence,
-    // |energy| = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15.
-    // Further, we'd like |log2_energy| in Q10
-    // log2(|energy|) in Q10 = 2^10 * log2(2^14 + frac_Q15) =
+    // We know by construction that `energy` is normalized to 15 bits. Hence,
+    // `energy` = 2^14 + frac_Q15, where frac_Q15 is a fractional part in Q15.
+    // Further, we'd like `log2_energy` in Q10
+    // log2(`energy`) in Q10 = 2^10 * log2(2^14 + frac_Q15) =
    // 2^10 * log2(2^14 * (1 + frac_Q15 * 2^-14)) =
    // 2^10 * (14 + log2(1 + frac_Q15 * 2^-14)) ~=
    // (14 << 10) + 2^10 * (frac_Q15 * 2^-14) =
    // (14 << 10) + (frac_Q15 * 2^-4) = (14 << 10) + (frac_Q15 >> 4)
    //
-    // Note that frac_Q15 = (|energy| & 0x00003FFF)
+    // Note that frac_Q15 = (`energy` & 0x00003FFF)

-    // Calculate and add the fractional part to |log2_energy|.
+    // Calculate and add the fractional part to `log2_energy`.
    log2_energy += (int16_t) ((energy & 0x00003FFF) >> 4);

-    // |kLogConst| is in Q9, |log2_energy| in Q10 and |tot_rshifts| in Q0.
+    // `kLogConst` is in Q9, `log2_energy` in Q10 and `tot_rshifts` in Q0.
    // Note that we in our derivation above have accounted for an output in Q4.
    *log_energy = (int16_t)(((kLogConst * log2_energy) >> 19) +
        ((tot_rshifts * kLogConst) >> 9));
@ -222,19 +222,19 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length,

  *log_energy += offset;

-  // Update the approximate |total_energy| with the energy of |data_in|, if
-  // |total_energy| has not exceeded |kMinEnergy|. |total_energy| is used as an
+  // Update the approximate `total_energy` with the energy of `data_in`, if
+  // `total_energy` has not exceeded `kMinEnergy`. `total_energy` is used as an
  // energy indicator in WebRtcVad_GmmProbability() in vad_core.c.
  if (*total_energy <= kMinEnergy) {
    if (tot_rshifts >= 0) {
-      // We know by construction that the |energy| > |kMinEnergy| in Q0, so add
-      // an arbitrary value such that |total_energy| exceeds |kMinEnergy|.
+      // We know by construction that the `energy` > `kMinEnergy` in Q0, so add
+      // an arbitrary value such that `total_energy` exceeds `kMinEnergy`.
      *total_energy += kMinEnergy + 1;
    } else {
-      // By construction |energy| is represented by 15 bits, hence any number of
-      // right shifted |energy| will fit in an int16_t. In addition, adding the
-      // value to |total_energy| is wrap around safe as long as
-      // |kMinEnergy| < 8192.
+      // By construction `energy` is represented by 15 bits, hence any number of
+      // right shifted `energy` will fit in an int16_t. In addition, adding the
+      // value to `total_energy` is wrap around safe as long as
+      // `kMinEnergy` < 8192.
      *total_energy += (int16_t) (energy >> -tot_rshifts);  // Q0.
    }
  }
@ -243,14 +243,14 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length,
 int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
                                    size_t data_length, int16_t* features) {
  int16_t total_energy = 0;
-  // We expect |data_length| to be 80, 160 or 240 samples, which corresponds to
+  // We expect `data_length` to be 80, 160 or 240 samples, which corresponds to
  // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
  // have at most 120 samples after the first split and at most 60 samples after
  // the second split.
  int16_t hp_120[120], lp_120[120];
  int16_t hp_60[60], lp_60[60];
  const size_t half_data_length = data_length >> 1;
-  size_t length = half_data_length;  // |data_length| / 2, corresponds to
+  size_t length = half_data_length;  // `data_length` / 2, corresponds to
                                     // bandwidth = 2000 Hz after downsampling.

  // Initialize variables for the first SplitFilter().
@ -260,7 +260,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
  int16_t* lp_out_ptr = lp_120;  // [0 - 2000] Hz.

  RTC_DCHECK_LE(data_length, 240);
-  RTC_DCHECK_LT(4, kNumChannels - 1);  // Checking maximum |frequency_band|.
+  RTC_DCHECK_LT(4, kNumChannels - 1);  // Checking maximum `frequency_band`.

  // Split at 2000 Hz and downsample.
  SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band],
@ -275,7 +275,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
              &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);

  // Energy in 3000 Hz - 4000 Hz.
-  length >>= 1;  // |data_length| / 4 <=> bandwidth = 1000 Hz.
+  length >>= 1;  // `data_length` / 4 <=> bandwidth = 1000 Hz.

  LogOfEnergy(hp_60, length, kOffsetVector[5], &total_energy, &features[5]);

@ -287,12 +287,12 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
  in_ptr = lp_120;  // [0 - 2000] Hz.
  hp_out_ptr = hp_60;  // [1000 - 2000] Hz.
  lp_out_ptr = lp_60;  // [0 - 1000] Hz.
-  length = half_data_length;  // |data_length| / 2 <=> bandwidth = 2000 Hz.
+  length = half_data_length;  // `data_length` / 2 <=> bandwidth = 2000 Hz.
  SplitFilter(in_ptr, length, &self->upper_state[frequency_band],
              &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);

  // Energy in 1000 Hz - 2000 Hz.
-  length >>= 1;  // |data_length| / 4 <=> bandwidth = 1000 Hz.
+  length >>= 1;  // `data_length` / 4 <=> bandwidth = 1000 Hz.
  LogOfEnergy(hp_60, length, kOffsetVector[3], &total_energy, &features[3]);

  // For the lower band (0 Hz - 1000 Hz) split at 500 Hz and downsample.
@ -304,7 +304,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
              &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);

  // Energy in 500 Hz - 1000 Hz.
-  length >>= 1;  // |data_length| / 8 <=> bandwidth = 500 Hz.
+  length >>= 1;  // `data_length` / 8 <=> bandwidth = 500 Hz.
  LogOfEnergy(hp_120, length, kOffsetVector[2], &total_energy, &features[2]);

  // For the lower band (0 Hz - 500 Hz) split at 250 Hz and downsample.
@ -316,7 +316,7 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
              &self->lower_state[frequency_band], hp_out_ptr, lp_out_ptr);

  // Energy in 250 Hz - 500 Hz.
-  length >>= 1;  // |data_length| / 16 <=> bandwidth = 250 Hz.
+  length >>= 1;  // `data_length` / 16 <=> bandwidth = 250 Hz.
  LogOfEnergy(hp_60, length, kOffsetVector[1], &total_energy, &features[1]);

  // Remove 0 Hz - 80 Hz, by high pass filtering the lower band.
--- a/webrtc/common_audio/vad/vad_filterbank.h
+++ b/webrtc/common_audio/vad/vad_filterbank.h
@ -17,8 +17,8 @@

 #include "common_audio/vad/vad_core.h"

-// Takes |data_length| samples of |data_in| and calculates the logarithm of the
-// energy of each of the |kNumChannels| = 6 frequency bands used by the VAD:
+// Takes `data_length` samples of `data_in` and calculates the logarithm of the
+// energy of each of the `kNumChannels` = 6 frequency bands used by the VAD:
 //        80 Hz - 250 Hz
 //        250 Hz - 500 Hz
 //        500 Hz - 1000 Hz
@ -26,10 +26,10 @@
 //        2000 Hz - 3000 Hz
 //        3000 Hz - 4000 Hz
 //
-// The values are given in Q4 and written to |features|. Further, an approximate
+// The values are given in Q4 and written to `features`. Further, an approximate
 // overall energy is returned. The return value is used in
 // WebRtcVad_GmmProbability() as a signal indicator, hence it is arbitrary above
-// the threshold |kMinEnergy|.
+// the threshold `kMinEnergy`.
 //
 // - self         [i/o] : State information of the VAD.
 // - data_in      [i]   : Input audio data, for feature extraction.
--- a/webrtc/common_audio/vad/vad_gmm.c
+++ b/webrtc/common_audio/vad/vad_gmm.c
@ -15,16 +15,16 @@
 static const int32_t kCompVar = 22005;
 static const int16_t kLog2Exp = 5909;  // log2(exp(1)) in Q12.

-// For a normal distribution, the probability of |input| is calculated and
+// For a normal distribution, the probability of `input` is calculated and
 // returned (in Q20). The formula for normal distributed probability is
 //
 // 1 / s * exp(-(x - m)^2 / (2 * s^2))
 //
 // where the parameters are given in the following Q domains:
-// m = |mean| (Q7)
-// s = |std| (Q7)
-// x = |input| (Q4)
-// in addition to the probability we output |delta| (in Q11) used when updating
+// m = `mean` (Q7)
+// s = `std` (Q7)
+// x = `input` (Q4)
+// in addition to the probability we output `delta` (in Q11) used when updating
 // the noise/speech model.
 int32_t WebRtcVad_GaussianProbability(int16_t input,
                                      int16_t mean,
@ -33,13 +33,13 @@ int32_t WebRtcVad_GaussianProbability(int16_t input,
  int16_t tmp16, inv_std, inv_std2, exp_value = 0;
  int32_t tmp32;

-  // Calculate |inv_std| = 1 / s, in Q10.
-  // 131072 = 1 in Q17, and (|std| >> 1) is for rounding instead of truncation.
+  // Calculate `inv_std` = 1 / s, in Q10.
+  // 131072 = 1 in Q17, and (`std` >> 1) is for rounding instead of truncation.
  // Q-domain: Q17 / Q7 = Q10.
  tmp32 = (int32_t) 131072 + (int32_t) (std >> 1);
  inv_std = (int16_t) WebRtcSpl_DivW32W16(tmp32, std);

-  // Calculate |inv_std2| = 1 / s^2, in Q14.
+  // Calculate `inv_std2` = 1 / s^2, in Q14.
  tmp16 = (inv_std >> 2);  // Q10 -> Q8.
  // Q-domain: (Q8 * Q8) >> 2 = Q14.
  inv_std2 = (int16_t)((tmp16 * tmp16) >> 2);
@ -51,20 +51,20 @@ int32_t WebRtcVad_GaussianProbability(int16_t input,
  tmp16 = tmp16 - mean;  // Q7 - Q7 = Q7

  // To be used later, when updating noise/speech model.
-  // |delta| = (x - m) / s^2, in Q11.
+  // `delta` = (x - m) / s^2, in Q11.
  // Q-domain: (Q14 * Q7) >> 10 = Q11.
  *delta = (int16_t)((inv_std2 * tmp16) >> 10);

-  // Calculate the exponent |tmp32| = (x - m)^2 / (2 * s^2), in Q10. Replacing
+  // Calculate the exponent `tmp32` = (x - m)^2 / (2 * s^2), in Q10. Replacing
  // division by two with one shift.
  // Q-domain: (Q11 * Q7) >> 8 = Q10.
  tmp32 = (*delta * tmp16) >> 9;

  // If the exponent is small enough to give a non-zero probability we calculate
-  // |exp_value| ~= exp(-(x - m)^2 / (2 * s^2))
-  //             ~= exp2(-log2(exp(1)) * |tmp32|).
+  // `exp_value` ~= exp(-(x - m)^2 / (2 * s^2))
+  //             ~= exp2(-log2(exp(1)) * `tmp32`).
  if (tmp32 < kCompVar) {
-    // Calculate |tmp16| = log2(exp(1)) * |tmp32|, in Q10.
+    // Calculate `tmp16` = log2(exp(1)) * `tmp32`, in Q10.
    // Q-domain: (Q12 * Q10) >> 12 = Q10.
    tmp16 = (int16_t)((kLog2Exp * tmp32) >> 12);
    tmp16 = -tmp16;
@ -72,7 +72,7 @@ int32_t WebRtcVad_GaussianProbability(int16_t input,
    tmp16 ^= 0xFFFF;
    tmp16 >>= 10;
    tmp16 += 1;
-    // Get |exp_value| = exp(-|tmp32|) in Q10.
+    // Get `exp_value` = exp(-`tmp32`) in Q10.
    exp_value >>= tmp16;
  }

--- a/webrtc/common_audio/vad/vad_gmm.h
+++ b/webrtc/common_audio/vad/vad_gmm.h
@ -15,8 +15,8 @@

 #include <stdint.h>

-// Calculates the probability for |input|, given that |input| comes from a
-// normal distribution with mean and standard deviation (|mean|, |std|).
+// Calculates the probability for `input`, given that `input` comes from a
+// normal distribution with mean and standard deviation (`mean`, `std`).
 //
 // Inputs:
 //      - input         : input sample in Q4.
@ -26,11 +26,11 @@
 // Output:
 //
 //      - delta         : input used when updating the model, Q11.
-//                        |delta| = (|input| - |mean|) / |std|^2.
+//                        `delta` = (`input` - `mean`) / `std`^2.
 //
 // Return:
-//   (probability for |input|) =
-//    1 / |std| * exp(-(|input| - |mean|)^2 / (2 * |std|^2));
+//   (probability for `input`) =
+//    1 / `std` * exp(-(`input` - `mean`)^2 / (2 * `std`^2));
 int32_t WebRtcVad_GaussianProbability(int16_t input,
                                      int16_t mean,
                                      int16_t std,
--- a/webrtc/common_audio/vad/vad_sp.c
+++ b/webrtc/common_audio/vad/vad_sp.c
@ -52,7 +52,7 @@ void WebRtcVad_Downsampling(const int16_t* signal_in,
  filter_state[1] = tmp32_2;
 }

-// Inserts |feature_value| into |low_value_vector|, if it is one of the 16
+// Inserts `feature_value` into `low_value_vector`, if it is one of the 16
 // smallest values the last 100 frames. Then calculates and returns the median
 // of the five smallest values.
 int16_t WebRtcVad_FindMinimum(VadInstT* self,
@ -66,13 +66,13 @@ int16_t WebRtcVad_FindMinimum(VadInstT* self,
  int16_t alpha = 0;
  int32_t tmp32 = 0;
  // Pointer to memory for the 16 minimum values and the age of each value of
-  // the |channel|.
+  // the `channel`.
  int16_t* age = &self->index_vector[offset];
  int16_t* smallest_values = &self->low_value_vector[offset];

  RTC_DCHECK_LT(channel, kNumChannels);

-  // Each value in |smallest_values| is getting 1 loop older. Update |age|, and
+  // Each value in `smallest_values` is getting 1 loop older. Update `age`, and
  // remove old values.
  for (i = 0; i < 16; i++) {
    if (age[i] != 100) {
@ -88,9 +88,9 @@ int16_t WebRtcVad_FindMinimum(VadInstT* self,
    }
  }

-  // Check if |feature_value| is smaller than any of the values in
-  // |smallest_values|. If so, find the |position| where to insert the new value
-  // (|feature_value|).
+  // Check if `feature_value` is smaller than any of the values in
+  // `smallest_values`. If so, find the `position` where to insert the new value
+  // (`feature_value`).
  if (feature_value < smallest_values[7]) {
    if (feature_value < smallest_values[3]) {
      if (feature_value < smallest_values[1]) {
@ -152,7 +152,7 @@ int16_t WebRtcVad_FindMinimum(VadInstT* self,
    age[position] = 1;
  }

-  // Get |current_median|.
+  // Get `current_median`.
  if (self->frame_counter > 2) {
    current_median = smallest_values[2];
  } else if (self->frame_counter > 0) {
--- a/webrtc/common_audio/vad/vad_sp.h
+++ b/webrtc/common_audio/vad/vad_sp.h
@ -23,11 +23,11 @@
 //
 // Input & Output:
 //      - filter_state  : Current filter states of the two all-pass filters. The
-//                        |filter_state| is updated after all samples have been
+//                        `filter_state` is updated after all samples have been
 //                        processed.
 //
 // Output:
-//      - signal_out    : Downsampled signal (of length |in_length| / 2).
+//      - signal_out    : Downsampled signal (of length `in_length` / 2).
 void WebRtcVad_Downsampling(const int16_t* signal_in,
                            int16_t* signal_out,
                            int32_t* filter_state,
@ -35,7 +35,7 @@ void WebRtcVad_Downsampling(const int16_t* signal_in,

 // Updates and returns the smoothed feature minimum. As minimum we use the
 // median of the five smallest feature values in a 100 frames long window.
-// As long as |handle->frame_counter| is zero, that is, we haven't received any
+// As long as `handle->frame_counter` is zero, that is, we haven't received any
 // "valid" data, FindMinimum() outputs the default value of 1600.
 //
 // Inputs:
--- a/webrtc/common_audio/vad/webrtc_vad.c
+++ b/webrtc/common_audio/vad/webrtc_vad.c
@ -21,7 +21,7 @@ static const int kValidRates[] = { 8000, 16000, 32000, 48000 };
 static const size_t kRatesSize = sizeof(kValidRates) / sizeof(*kValidRates);
 static const int kMaxFrameLengthMs = 30;

-VadInst* WebRtcVad_Create() {
+VadInst* WebRtcVad_Create(void) {
  VadInstT* self = (VadInstT*)malloc(sizeof(VadInstT));

  self->init_flag = 0;