Update audio_processing module

Corresponds to upstream commit 524e9b043e7e86fd72353b987c9d5f6a1ebf83e1 Update notes: * Pull in third party license file * Replace .gypi files with BUILD.gn to keep track of what changes upstream * Bunch of new filse pulled in as dependencies * Won't build yet due to changes needed on top of these
2015-10-13 17:25:22 +05:30
parent 5ae7a5d6cd
commit 753eada3aa
324 changed files with 52533 additions and 16117 deletions
--- a/webrtc/modules/audio_processing/vad/pitch_based_vad.cc
+++ b/webrtc/modules/audio_processing/vad/pitch_based_vad.cc
@@ -0,0 +1,124 @@
+/*
+ *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/vad/pitch_based_vad.h"
+
+#include <assert.h>
+#include <math.h>
+#include <string.h>
+
+#include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h"
+#include "webrtc/modules/audio_processing/vad/common.h"
+#include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h"
+#include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h"
+#include "webrtc/modules/interface/module_common_types.h"
+
+namespace webrtc {
+
+static_assert(kNoiseGmmDim == kVoiceGmmDim,
+              "noise and voice gmm dimension not equal");
+
+// These values should match MATLAB counterparts for unit-tests to pass.
+static const int kPosteriorHistorySize = 500;  // 5 sec of 10 ms frames.
+static const double kInitialPriorProbability = 0.3;
+static const int kTransientWidthThreshold = 7;
+static const double kLowProbabilityThreshold = 0.2;
+
+static double LimitProbability(double p) {
+  const double kLimHigh = 0.99;
+  const double kLimLow = 0.01;
+
+  if (p > kLimHigh)
+    p = kLimHigh;
+  else if (p < kLimLow)
+    p = kLimLow;
+  return p;
+}
+
+PitchBasedVad::PitchBasedVad()
+    : p_prior_(kInitialPriorProbability),
+      circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) {
+  // Setup noise GMM.
+  noise_gmm_.dimension = kNoiseGmmDim;
+  noise_gmm_.num_mixtures = kNoiseGmmNumMixtures;
+  noise_gmm_.weight = kNoiseGmmWeights;
+  noise_gmm_.mean = &kNoiseGmmMean[0][0];
+  noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0];
+
+  // Setup voice GMM.
+  voice_gmm_.dimension = kVoiceGmmDim;
+  voice_gmm_.num_mixtures = kVoiceGmmNumMixtures;
+  voice_gmm_.weight = kVoiceGmmWeights;
+  voice_gmm_.mean = &kVoiceGmmMean[0][0];
+  voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
+}
+
+PitchBasedVad::~PitchBasedVad() {
+}
+
+int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
+                                      double* p_combined) {
+  double p;
+  double gmm_features[3];
+  double pdf_features_given_voice;
+  double pdf_features_given_noise;
+  // These limits are the same in matlab implementation 'VoicingProbGMM().'
+  const double kLimLowLogPitchGain = -2.0;
+  const double kLimHighLogPitchGain = -0.9;
+  const double kLimLowSpectralPeak = 200;
+  const double kLimHighSpectralPeak = 2000;
+  const double kEps = 1e-12;
+  for (size_t n = 0; n < features.num_frames; n++) {
+    gmm_features[0] = features.log_pitch_gain[n];
+    gmm_features[1] = features.spectral_peak[n];
+    gmm_features[2] = features.pitch_lag_hz[n];
+
+    pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_);
+    pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_);
+
+    if (features.spectral_peak[n] < kLimLowSpectralPeak ||
+        features.spectral_peak[n] > kLimHighSpectralPeak ||
+        features.log_pitch_gain[n] < kLimLowLogPitchGain) {
+      pdf_features_given_voice = kEps * pdf_features_given_noise;
+    } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) {
+      pdf_features_given_noise = kEps * pdf_features_given_voice;
+    }
+
+    p = p_prior_ * pdf_features_given_voice /
+        (pdf_features_given_voice * p_prior_ +
+         pdf_features_given_noise * (1 - p_prior_));
+
+    p = LimitProbability(p);
+
+    // Combine pitch-based probability with standalone probability, before
+    // updating prior probabilities.
+    double prod_active = p * p_combined[n];
+    double prod_inactive = (1 - p) * (1 - p_combined[n]);
+    p_combined[n] = prod_active / (prod_active + prod_inactive);
+
+    if (UpdatePrior(p_combined[n]) < 0)
+      return -1;
+    // Limit prior probability. With a zero prior probability the posterior
+    // probability is always zero.
+    p_prior_ = LimitProbability(p_prior_);
+  }
+  return 0;
+}
+
+int PitchBasedVad::UpdatePrior(double p) {
+  circular_buffer_->Insert(p);
+  if (circular_buffer_->RemoveTransient(kTransientWidthThreshold,
+                                        kLowProbabilityThreshold) < 0)
+    return -1;
+  p_prior_ = circular_buffer_->Mean();
+  return 0;
+}
+
+}  // namespace webrtc