Update audio_processing module
Corresponds to upstream commit 524e9b043e7e86fd72353b987c9d5f6a1ebf83e1 Update notes: * Pull in third party license file * Replace .gypi files with BUILD.gn to keep track of what changes upstream * Bunch of new filse pulled in as dependencies * Won't build yet due to changes needed on top of these
This commit is contained in:
		
							
								
								
									
										27
									
								
								webrtc/modules/audio_processing/vad/common.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								webrtc/modules/audio_processing/vad/common.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,27 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ | ||||
|  | ||||
| static const int kSampleRateHz = 16000; | ||||
| static const size_t kLength10Ms = kSampleRateHz / 100; | ||||
| static const size_t kMaxNumFrames = 4; | ||||
|  | ||||
| struct AudioFeatures { | ||||
|   double log_pitch_gain[kMaxNumFrames]; | ||||
|   double pitch_lag_hz[kMaxNumFrames]; | ||||
|   double spectral_peak[kMaxNumFrames]; | ||||
|   double rms[kMaxNumFrames]; | ||||
|   size_t num_frames; | ||||
|   bool silence; | ||||
| }; | ||||
|  | ||||
| #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_COMMON_H_ | ||||
							
								
								
									
										64
									
								
								webrtc/modules/audio_processing/vad/gmm.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										64
									
								
								webrtc/modules/audio_processing/vad/gmm.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,64 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #include "webrtc/modules/audio_processing/vad/gmm.h" | ||||
|  | ||||
| #include <math.h> | ||||
| #include <stdlib.h> | ||||
|  | ||||
| #include "webrtc/typedefs.h" | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| static const int kMaxDimension = 10; | ||||
|  | ||||
| static void RemoveMean(const double* in, | ||||
|                        const double* mean_vec, | ||||
|                        int dimension, | ||||
|                        double* out) { | ||||
|   for (int n = 0; n < dimension; ++n) | ||||
|     out[n] = in[n] - mean_vec[n]; | ||||
| } | ||||
|  | ||||
| static double ComputeExponent(const double* in, | ||||
|                               const double* covar_inv, | ||||
|                               int dimension) { | ||||
|   double q = 0; | ||||
|   for (int i = 0; i < dimension; ++i) { | ||||
|     double v = 0; | ||||
|     for (int j = 0; j < dimension; j++) | ||||
|       v += (*covar_inv++) * in[j]; | ||||
|     q += v * in[i]; | ||||
|   } | ||||
|   q *= -0.5; | ||||
|   return q; | ||||
| } | ||||
|  | ||||
| double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters) { | ||||
|   if (gmm_parameters.dimension > kMaxDimension) { | ||||
|     return -1;  // This is invalid pdf so the caller can check this. | ||||
|   } | ||||
|   double f = 0; | ||||
|   double v[kMaxDimension]; | ||||
|   const double* mean_vec = gmm_parameters.mean; | ||||
|   const double* covar_inv = gmm_parameters.covar_inverse; | ||||
|  | ||||
|   for (int n = 0; n < gmm_parameters.num_mixtures; n++) { | ||||
|     RemoveMean(x, mean_vec, gmm_parameters.dimension, v); | ||||
|     double q = ComputeExponent(v, covar_inv, gmm_parameters.dimension) + | ||||
|                gmm_parameters.weight[n]; | ||||
|     f += exp(q); | ||||
|     mean_vec += gmm_parameters.dimension; | ||||
|     covar_inv += gmm_parameters.dimension * gmm_parameters.dimension; | ||||
|   } | ||||
|   return f; | ||||
| } | ||||
|  | ||||
| }  // namespace webrtc | ||||
							
								
								
									
										45
									
								
								webrtc/modules/audio_processing/vad/gmm.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										45
									
								
								webrtc/modules/audio_processing/vad/gmm.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,45 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_GMM_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_GMM_H_ | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| // A structure that specifies a GMM. | ||||
| // A GMM is formulated as | ||||
| //  f(x) = w[0] * mixture[0] + w[1] * mixture[1] + ... + | ||||
| //         w[num_mixtures - 1] * mixture[num_mixtures - 1]; | ||||
| // Where a 'mixture' is a Gaussian density. | ||||
|  | ||||
| struct GmmParameters { | ||||
|   // weight[n] = log(w[n]) - |dimension|/2 * log(2*pi) - 1/2 * log(det(cov[n])); | ||||
|   // where cov[n] is the covariance matrix of mixture n; | ||||
|   const double* weight; | ||||
|   // pointer to the first element of a |num_mixtures|x|dimension| matrix | ||||
|   // where kth row is the mean of the kth mixture. | ||||
|   const double* mean; | ||||
|   // pointer to the first element of a |num_mixtures|x|dimension|x|dimension| | ||||
|   // 3D-matrix, where the kth 2D-matrix is the inverse of the covariance | ||||
|   // matrix of the kth mixture. | ||||
|   const double* covar_inverse; | ||||
|   // Dimensionality of the mixtures. | ||||
|   int dimension; | ||||
|   // number of the mixtures. | ||||
|   int num_mixtures; | ||||
| }; | ||||
|  | ||||
| // Evaluate the given GMM, according to |gmm_parameters|, at the given point | ||||
| // |x|. If the dimensionality of the given GMM is larger that the maximum | ||||
| // acceptable dimension by the following function -1 is returned. | ||||
| double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters); | ||||
|  | ||||
| }  // namespace webrtc | ||||
| #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_GMM_H_ | ||||
							
								
								
									
										85
									
								
								webrtc/modules/audio_processing/vad/noise_gmm_tables.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								webrtc/modules/audio_processing/vad/noise_gmm_tables.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,85 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| // GMM tables for inactive segments. Generated by MakeGmmTables.m. | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ | ||||
|  | ||||
| static const int kNoiseGmmNumMixtures = 12; | ||||
| static const int kNoiseGmmDim = 3; | ||||
|  | ||||
| static const double | ||||
|     kNoiseGmmCovarInverse[kNoiseGmmNumMixtures][kNoiseGmmDim][kNoiseGmmDim] = { | ||||
|         {{7.36219567592941e+00, 4.83060785179861e-03, 1.23335151497610e-02}, | ||||
|          {4.83060785179861e-03, 1.65289507047817e-04, -2.41490588169997e-04}, | ||||
|          {1.23335151497610e-02, -2.41490588169997e-04, 6.59472060689382e-03}}, | ||||
|         {{8.70265239309140e+00, -5.30636201431086e-04, 5.44014966585347e-03}, | ||||
|          {-5.30636201431086e-04, 3.11095453521008e-04, -1.86287206836035e-04}, | ||||
|          {5.44014966585347e-03, -1.86287206836035e-04, 6.29493388790744e-04}}, | ||||
|         {{4.53467851955055e+00, -3.92977536695197e-03, -2.46521420693317e-03}, | ||||
|          {-3.92977536695197e-03, 4.94650752632750e-05, -1.08587438501826e-05}, | ||||
|          {-2.46521420693317e-03, -1.08587438501826e-05, 9.28793975422261e-05}}, | ||||
|         {{9.26817997114275e-01, -4.03976069276753e-04, -3.56441427392165e-03}, | ||||
|          {-4.03976069276753e-04, 2.51976251631430e-06, 1.46914206734572e-07}, | ||||
|          {-3.56441427392165e-03, 1.46914206734572e-07, 8.19914567685373e-05}}, | ||||
|         {{7.61715986787441e+00, -1.54889041216888e-04, 2.41756280071656e-02}, | ||||
|          {-1.54889041216888e-04, 3.50282550461672e-07, -6.27251196972490e-06}, | ||||
|          {2.41756280071656e-02, -6.27251196972490e-06, 1.45061847649872e-02}}, | ||||
|         {{8.31193642663158e+00, -3.84070508164323e-04, -3.09750630821876e-02}, | ||||
|          {-3.84070508164323e-04, 3.80433432277336e-07, -1.14321142836636e-06}, | ||||
|          {-3.09750630821876e-02, -1.14321142836636e-06, 8.35091486289997e-04}}, | ||||
|         {{9.67283151270894e-01, 5.82465812445039e-05, -3.18350798617053e-03}, | ||||
|          {5.82465812445039e-05, 2.23762672000318e-07, -7.74196587408623e-07}, | ||||
|          {-3.18350798617053e-03, -7.74196587408623e-07, 3.85120938338325e-04}}, | ||||
|         {{8.28066236985388e+00, 5.87634508319763e-05, 6.99303090891743e-03}, | ||||
|          {5.87634508319763e-05, 2.93746018618058e-07, 3.40843332882272e-07}, | ||||
|          {6.99303090891743e-03, 3.40843332882272e-07, 1.99379171190344e-04}}, | ||||
|         {{6.07488998675646e+00, -1.11494526618473e-02, 5.10013111123381e-03}, | ||||
|          {-1.11494526618473e-02, 6.99238879921751e-04, 5.36718550370870e-05}, | ||||
|          {5.10013111123381e-03, 5.36718550370870e-05, 5.26909853276753e-04}}, | ||||
|         {{6.90492021419175e+00, 4.20639355257863e-04, -2.38612752336481e-03}, | ||||
|          {4.20639355257863e-04, 3.31246767338153e-06, -2.42052288150859e-08}, | ||||
|          {-2.38612752336481e-03, -2.42052288150859e-08, 4.46608368363412e-04}}, | ||||
|         {{1.31069150869715e+01, -1.73718583865670e-04, -1.97591814508578e-02}, | ||||
|          {-1.73718583865670e-04, 2.80451716300124e-07, 9.96570755379865e-07}, | ||||
|          {-1.97591814508578e-02, 9.96570755379865e-07, 2.41361900868847e-03}}, | ||||
|         {{4.69566344239814e+00, -2.61077567563690e-04, 5.26359000761433e-03}, | ||||
|          {-2.61077567563690e-04, 1.82420859823767e-06, -7.83645887541601e-07}, | ||||
|          {5.26359000761433e-03, -7.83645887541601e-07, 1.33586288288802e-02}}}; | ||||
|  | ||||
| static const double kNoiseGmmMean[kNoiseGmmNumMixtures][kNoiseGmmDim] = { | ||||
|     {-2.01386094766163e+00, 1.69702162045397e+02, 7.41715804872181e+01}, | ||||
|     {-1.94684591777290e+00, 1.42398396732668e+02, 1.64186321157831e+02}, | ||||
|     {-2.29319297562437e+00, 3.86415425589868e+02, 2.13452215267125e+02}, | ||||
|     {-3.25487177070268e+00, 1.08668712553616e+03, 2.33119949467419e+02}, | ||||
|     {-2.13159632447467e+00, 4.83821702557717e+03, 6.86786166673740e+01}, | ||||
|     {-2.26171410780526e+00, 4.79420193982422e+03, 1.53222513286450e+02}, | ||||
|     {-3.32166740703185e+00, 4.35161135834358e+03, 1.33206448431316e+02}, | ||||
|     {-2.19290322814343e+00, 3.98325506609408e+03, 2.13249167359934e+02}, | ||||
|     {-2.02898459255404e+00, 7.37039893155007e+03, 1.12518527491926e+02}, | ||||
|     {-2.26150236399500e+00, 1.54896745196145e+03, 1.49717357868579e+02}, | ||||
|     {-2.00417668301790e+00, 3.82434760310304e+03, 1.07438913004312e+02}, | ||||
|     {-2.30193040814533e+00, 1.43953696546439e+03, 7.04085275122649e+01}}; | ||||
|  | ||||
| static const double kNoiseGmmWeights[kNoiseGmmNumMixtures] = { | ||||
|     -1.09422832086193e+01, | ||||
|     -1.10847897513425e+01, | ||||
|     -1.36767587732187e+01, | ||||
|     -1.79789356118641e+01, | ||||
|     -1.42830169160894e+01, | ||||
|     -1.56500228061379e+01, | ||||
|     -1.83124990950113e+01, | ||||
|     -1.69979436177477e+01, | ||||
|     -1.12329424387828e+01, | ||||
|     -1.41311785780639e+01, | ||||
|     -1.47171861448585e+01, | ||||
|     -1.35963362781839e+01}; | ||||
| #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_ | ||||
							
								
								
									
										124
									
								
								webrtc/modules/audio_processing/vad/pitch_based_vad.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										124
									
								
								webrtc/modules/audio_processing/vad/pitch_based_vad.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,124 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #include "webrtc/modules/audio_processing/vad/pitch_based_vad.h" | ||||
|  | ||||
| #include <assert.h> | ||||
| #include <math.h> | ||||
| #include <string.h> | ||||
|  | ||||
| #include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h" | ||||
| #include "webrtc/modules/audio_processing/vad/common.h" | ||||
| #include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h" | ||||
| #include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h" | ||||
| #include "webrtc/modules/interface/module_common_types.h" | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| static_assert(kNoiseGmmDim == kVoiceGmmDim, | ||||
|               "noise and voice gmm dimension not equal"); | ||||
|  | ||||
| // These values should match MATLAB counterparts for unit-tests to pass. | ||||
| static const int kPosteriorHistorySize = 500;  // 5 sec of 10 ms frames. | ||||
| static const double kInitialPriorProbability = 0.3; | ||||
| static const int kTransientWidthThreshold = 7; | ||||
| static const double kLowProbabilityThreshold = 0.2; | ||||
|  | ||||
| static double LimitProbability(double p) { | ||||
|   const double kLimHigh = 0.99; | ||||
|   const double kLimLow = 0.01; | ||||
|  | ||||
|   if (p > kLimHigh) | ||||
|     p = kLimHigh; | ||||
|   else if (p < kLimLow) | ||||
|     p = kLimLow; | ||||
|   return p; | ||||
| } | ||||
|  | ||||
| PitchBasedVad::PitchBasedVad() | ||||
|     : p_prior_(kInitialPriorProbability), | ||||
|       circular_buffer_(VadCircularBuffer::Create(kPosteriorHistorySize)) { | ||||
|   // Setup noise GMM. | ||||
|   noise_gmm_.dimension = kNoiseGmmDim; | ||||
|   noise_gmm_.num_mixtures = kNoiseGmmNumMixtures; | ||||
|   noise_gmm_.weight = kNoiseGmmWeights; | ||||
|   noise_gmm_.mean = &kNoiseGmmMean[0][0]; | ||||
|   noise_gmm_.covar_inverse = &kNoiseGmmCovarInverse[0][0][0]; | ||||
|  | ||||
|   // Setup voice GMM. | ||||
|   voice_gmm_.dimension = kVoiceGmmDim; | ||||
|   voice_gmm_.num_mixtures = kVoiceGmmNumMixtures; | ||||
|   voice_gmm_.weight = kVoiceGmmWeights; | ||||
|   voice_gmm_.mean = &kVoiceGmmMean[0][0]; | ||||
|   voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0]; | ||||
| } | ||||
|  | ||||
| PitchBasedVad::~PitchBasedVad() { | ||||
| } | ||||
|  | ||||
| int PitchBasedVad::VoicingProbability(const AudioFeatures& features, | ||||
|                                       double* p_combined) { | ||||
|   double p; | ||||
|   double gmm_features[3]; | ||||
|   double pdf_features_given_voice; | ||||
|   double pdf_features_given_noise; | ||||
|   // These limits are the same in matlab implementation 'VoicingProbGMM().' | ||||
|   const double kLimLowLogPitchGain = -2.0; | ||||
|   const double kLimHighLogPitchGain = -0.9; | ||||
|   const double kLimLowSpectralPeak = 200; | ||||
|   const double kLimHighSpectralPeak = 2000; | ||||
|   const double kEps = 1e-12; | ||||
|   for (size_t n = 0; n < features.num_frames; n++) { | ||||
|     gmm_features[0] = features.log_pitch_gain[n]; | ||||
|     gmm_features[1] = features.spectral_peak[n]; | ||||
|     gmm_features[2] = features.pitch_lag_hz[n]; | ||||
|  | ||||
|     pdf_features_given_voice = EvaluateGmm(gmm_features, voice_gmm_); | ||||
|     pdf_features_given_noise = EvaluateGmm(gmm_features, noise_gmm_); | ||||
|  | ||||
|     if (features.spectral_peak[n] < kLimLowSpectralPeak || | ||||
|         features.spectral_peak[n] > kLimHighSpectralPeak || | ||||
|         features.log_pitch_gain[n] < kLimLowLogPitchGain) { | ||||
|       pdf_features_given_voice = kEps * pdf_features_given_noise; | ||||
|     } else if (features.log_pitch_gain[n] > kLimHighLogPitchGain) { | ||||
|       pdf_features_given_noise = kEps * pdf_features_given_voice; | ||||
|     } | ||||
|  | ||||
|     p = p_prior_ * pdf_features_given_voice / | ||||
|         (pdf_features_given_voice * p_prior_ + | ||||
|          pdf_features_given_noise * (1 - p_prior_)); | ||||
|  | ||||
|     p = LimitProbability(p); | ||||
|  | ||||
|     // Combine pitch-based probability with standalone probability, before | ||||
|     // updating prior probabilities. | ||||
|     double prod_active = p * p_combined[n]; | ||||
|     double prod_inactive = (1 - p) * (1 - p_combined[n]); | ||||
|     p_combined[n] = prod_active / (prod_active + prod_inactive); | ||||
|  | ||||
|     if (UpdatePrior(p_combined[n]) < 0) | ||||
|       return -1; | ||||
|     // Limit prior probability. With a zero prior probability the posterior | ||||
|     // probability is always zero. | ||||
|     p_prior_ = LimitProbability(p_prior_); | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| int PitchBasedVad::UpdatePrior(double p) { | ||||
|   circular_buffer_->Insert(p); | ||||
|   if (circular_buffer_->RemoveTransient(kTransientWidthThreshold, | ||||
|                                         kLowProbabilityThreshold) < 0) | ||||
|     return -1; | ||||
|   p_prior_ = circular_buffer_->Mean(); | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| }  // namespace webrtc | ||||
							
								
								
									
										57
									
								
								webrtc/modules/audio_processing/vad/pitch_based_vad.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										57
									
								
								webrtc/modules/audio_processing/vad/pitch_based_vad.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,57 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ | ||||
|  | ||||
| #include "webrtc/base/scoped_ptr.h" | ||||
| #include "webrtc/modules/audio_processing/vad/common.h" | ||||
| #include "webrtc/modules/audio_processing/vad/gmm.h" | ||||
| #include "webrtc/typedefs.h" | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| class AudioFrame; | ||||
| class VadCircularBuffer; | ||||
|  | ||||
| // Computes the probability of the input audio frame to be active given | ||||
| // the corresponding pitch-gain and lag of the frame. | ||||
| class PitchBasedVad { | ||||
|  public: | ||||
|   PitchBasedVad(); | ||||
|   ~PitchBasedVad(); | ||||
|  | ||||
|   // Compute pitch-based voicing probability, given the features. | ||||
|   //   features: a structure containing features required for computing voicing | ||||
|   //             probabilities. | ||||
|   // | ||||
|   //   p_combined: an array which contains the combined activity probabilities | ||||
|   //               computed prior to the call of this function. The method, | ||||
|   //               then, computes the voicing probabilities and combine them | ||||
|   //               with the given values. The result are returned in |p|. | ||||
|   int VoicingProbability(const AudioFeatures& features, double* p_combined); | ||||
|  | ||||
|  private: | ||||
|   int UpdatePrior(double p); | ||||
|  | ||||
|   // TODO(turajs): maybe defining this at a higher level (maybe enum) so that | ||||
|   // all the code recognize it as "no-error." | ||||
|   static const int kNoError = 0; | ||||
|  | ||||
|   GmmParameters noise_gmm_; | ||||
|   GmmParameters voice_gmm_; | ||||
|  | ||||
|   double p_prior_; | ||||
|  | ||||
|   rtc::scoped_ptr<VadCircularBuffer> circular_buffer_; | ||||
| }; | ||||
|  | ||||
| }  // namespace webrtc | ||||
| #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_ | ||||
							
								
								
									
										51
									
								
								webrtc/modules/audio_processing/vad/pitch_internal.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								webrtc/modules/audio_processing/vad/pitch_internal.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,51 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #include "webrtc/modules/audio_processing/vad/pitch_internal.h" | ||||
|  | ||||
| #include <cmath> | ||||
|  | ||||
| // A 4-to-3 linear interpolation. | ||||
| // The interpolation constants are derived as following: | ||||
| // Input pitch parameters are updated every 7.5 ms. Within a 30-ms interval | ||||
| // we are interested in pitch parameters of 0-5 ms, 10-15ms and 20-25ms. This is | ||||
| // like interpolating 4-to-6 and keep the odd samples. | ||||
| // The reason behind this is that LPC coefficients are computed for the first | ||||
| // half of each 10ms interval. | ||||
| static void PitchInterpolation(double old_val, const double* in, double* out) { | ||||
|   out[0] = 1. / 6. * old_val + 5. / 6. * in[0]; | ||||
|   out[1] = 5. / 6. * in[1] + 1. / 6. * in[2]; | ||||
|   out[2] = 0.5 * in[2] + 0.5 * in[3]; | ||||
| } | ||||
|  | ||||
| void GetSubframesPitchParameters(int sampling_rate_hz, | ||||
|                                  double* gains, | ||||
|                                  double* lags, | ||||
|                                  int num_in_frames, | ||||
|                                  int num_out_frames, | ||||
|                                  double* log_old_gain, | ||||
|                                  double* old_lag, | ||||
|                                  double* log_pitch_gain, | ||||
|                                  double* pitch_lag_hz) { | ||||
|   // Gain interpolation is in log-domain, also returned in log-domain. | ||||
|   for (int n = 0; n < num_in_frames; n++) | ||||
|     gains[n] = log(gains[n] + 1e-12); | ||||
|  | ||||
|   // Interpolate lags and gains. | ||||
|   PitchInterpolation(*log_old_gain, gains, log_pitch_gain); | ||||
|   *log_old_gain = gains[num_in_frames - 1]; | ||||
|   PitchInterpolation(*old_lag, lags, pitch_lag_hz); | ||||
|   *old_lag = lags[num_in_frames - 1]; | ||||
|  | ||||
|   // Convert pitch-lags to Hertz. | ||||
|   for (int n = 0; n < num_out_frames; n++) { | ||||
|     pitch_lag_hz[n] = (sampling_rate_hz) / (pitch_lag_hz[n]); | ||||
|   } | ||||
| } | ||||
							
								
								
									
										26
									
								
								webrtc/modules/audio_processing/vad/pitch_internal.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										26
									
								
								webrtc/modules/audio_processing/vad/pitch_internal.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,26 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ | ||||
|  | ||||
| // TODO(turajs): Write a description of this function. Also be consistent with | ||||
| // usage of |sampling_rate_hz| vs |kSamplingFreqHz|. | ||||
| void GetSubframesPitchParameters(int sampling_rate_hz, | ||||
|                                  double* gains, | ||||
|                                  double* lags, | ||||
|                                  int num_in_frames, | ||||
|                                  int num_out_frames, | ||||
|                                  double* log_old_gain, | ||||
|                                  double* old_lag, | ||||
|                                  double* log_pitch_gain, | ||||
|                                  double* pitch_lag_hz); | ||||
|  | ||||
| #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_ | ||||
							
								
								
									
										106
									
								
								webrtc/modules/audio_processing/vad/pole_zero_filter.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										106
									
								
								webrtc/modules/audio_processing/vad/pole_zero_filter.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,106 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #include "webrtc/modules/audio_processing/vad/pole_zero_filter.h" | ||||
|  | ||||
| #include <stdlib.h> | ||||
| #include <string.h> | ||||
| #include <algorithm> | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| PoleZeroFilter* PoleZeroFilter::Create(const float* numerator_coefficients, | ||||
|                                        size_t order_numerator, | ||||
|                                        const float* denominator_coefficients, | ||||
|                                        size_t order_denominator) { | ||||
|   if (order_numerator > kMaxFilterOrder || | ||||
|       order_denominator > kMaxFilterOrder || denominator_coefficients[0] == 0 || | ||||
|       numerator_coefficients == NULL || denominator_coefficients == NULL) | ||||
|     return NULL; | ||||
|   return new PoleZeroFilter(numerator_coefficients, order_numerator, | ||||
|                             denominator_coefficients, order_denominator); | ||||
| } | ||||
|  | ||||
| PoleZeroFilter::PoleZeroFilter(const float* numerator_coefficients, | ||||
|                                size_t order_numerator, | ||||
|                                const float* denominator_coefficients, | ||||
|                                size_t order_denominator) | ||||
|     : past_input_(), | ||||
|       past_output_(), | ||||
|       numerator_coefficients_(), | ||||
|       denominator_coefficients_(), | ||||
|       order_numerator_(order_numerator), | ||||
|       order_denominator_(order_denominator), | ||||
|       highest_order_(std::max(order_denominator, order_numerator)) { | ||||
|   memcpy(numerator_coefficients_, numerator_coefficients, | ||||
|          sizeof(numerator_coefficients_[0]) * (order_numerator_ + 1)); | ||||
|   memcpy(denominator_coefficients_, denominator_coefficients, | ||||
|          sizeof(denominator_coefficients_[0]) * (order_denominator_ + 1)); | ||||
|  | ||||
|   if (denominator_coefficients_[0] != 1) { | ||||
|     for (size_t n = 0; n <= order_numerator_; n++) | ||||
|       numerator_coefficients_[n] /= denominator_coefficients_[0]; | ||||
|     for (size_t n = 0; n <= order_denominator_; n++) | ||||
|       denominator_coefficients_[n] /= denominator_coefficients_[0]; | ||||
|   } | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| static float FilterArPast(const T* past, size_t order, | ||||
|                           const float* coefficients) { | ||||
|   float sum = 0.0f; | ||||
|   size_t past_index = order - 1; | ||||
|   for (size_t k = 1; k <= order; k++, past_index--) | ||||
|     sum += coefficients[k] * past[past_index]; | ||||
|   return sum; | ||||
| } | ||||
|  | ||||
| int PoleZeroFilter::Filter(const int16_t* in, | ||||
|                            size_t num_input_samples, | ||||
|                            float* output) { | ||||
|   if (in == NULL || output == NULL) | ||||
|     return -1; | ||||
|   // This is the typical case, just a memcpy. | ||||
|   const size_t k = std::min(num_input_samples, highest_order_); | ||||
|   size_t n; | ||||
|   for (n = 0; n < k; n++) { | ||||
|     output[n] = in[n] * numerator_coefficients_[0]; | ||||
|     output[n] += FilterArPast(&past_input_[n], order_numerator_, | ||||
|                               numerator_coefficients_); | ||||
|     output[n] -= FilterArPast(&past_output_[n], order_denominator_, | ||||
|                               denominator_coefficients_); | ||||
|  | ||||
|     past_input_[n + order_numerator_] = in[n]; | ||||
|     past_output_[n + order_denominator_] = output[n]; | ||||
|   } | ||||
|   if (highest_order_ < num_input_samples) { | ||||
|     for (size_t m = 0; n < num_input_samples; n++, m++) { | ||||
|       output[n] = in[n] * numerator_coefficients_[0]; | ||||
|       output[n] += | ||||
|           FilterArPast(&in[m], order_numerator_, numerator_coefficients_); | ||||
|       output[n] -= FilterArPast(&output[m], order_denominator_, | ||||
|                                 denominator_coefficients_); | ||||
|     } | ||||
|     // Record into the past signal. | ||||
|     memcpy(past_input_, &in[num_input_samples - order_numerator_], | ||||
|            sizeof(in[0]) * order_numerator_); | ||||
|     memcpy(past_output_, &output[num_input_samples - order_denominator_], | ||||
|            sizeof(output[0]) * order_denominator_); | ||||
|   } else { | ||||
|     // Odd case that the length of the input is shorter that filter order. | ||||
|     memmove(past_input_, &past_input_[num_input_samples], | ||||
|             order_numerator_ * sizeof(past_input_[0])); | ||||
|     memmove(past_output_, &past_output_[num_input_samples], | ||||
|             order_denominator_ * sizeof(past_output_[0])); | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| }  // namespace webrtc | ||||
							
								
								
									
										52
									
								
								webrtc/modules/audio_processing/vad/pole_zero_filter.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										52
									
								
								webrtc/modules/audio_processing/vad/pole_zero_filter.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,52 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ | ||||
|  | ||||
| #include <cstddef> | ||||
|  | ||||
| #include "webrtc/typedefs.h" | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| class PoleZeroFilter { | ||||
|  public: | ||||
|   ~PoleZeroFilter() {} | ||||
|  | ||||
|   static PoleZeroFilter* Create(const float* numerator_coefficients, | ||||
|                                 size_t order_numerator, | ||||
|                                 const float* denominator_coefficients, | ||||
|                                 size_t order_denominator); | ||||
|  | ||||
|   int Filter(const int16_t* in, size_t num_input_samples, float* output); | ||||
|  | ||||
|  private: | ||||
|   PoleZeroFilter(const float* numerator_coefficients, | ||||
|                  size_t order_numerator, | ||||
|                  const float* denominator_coefficients, | ||||
|                  size_t order_denominator); | ||||
|  | ||||
|   static const int kMaxFilterOrder = 24; | ||||
|  | ||||
|   int16_t past_input_[kMaxFilterOrder * 2]; | ||||
|   float past_output_[kMaxFilterOrder * 2]; | ||||
|  | ||||
|   float numerator_coefficients_[kMaxFilterOrder + 1]; | ||||
|   float denominator_coefficients_[kMaxFilterOrder + 1]; | ||||
|  | ||||
|   size_t order_numerator_; | ||||
|   size_t order_denominator_; | ||||
|   size_t highest_order_; | ||||
| }; | ||||
|  | ||||
| }  // namespace webrtc | ||||
|  | ||||
| #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_ | ||||
							
								
								
									
										93
									
								
								webrtc/modules/audio_processing/vad/standalone_vad.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										93
									
								
								webrtc/modules/audio_processing/vad/standalone_vad.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,93 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #include "webrtc/modules/audio_processing/vad/standalone_vad.h" | ||||
|  | ||||
| #include <assert.h> | ||||
|  | ||||
| #include "webrtc/modules/interface/module_common_types.h" | ||||
| #include "webrtc/modules/utility/interface/audio_frame_operations.h" | ||||
| #include "webrtc/typedefs.h" | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| static const int kDefaultStandaloneVadMode = 3; | ||||
|  | ||||
| StandaloneVad::StandaloneVad(VadInst* vad) | ||||
|     : vad_(vad), buffer_(), index_(0), mode_(kDefaultStandaloneVadMode) { | ||||
| } | ||||
|  | ||||
| StandaloneVad::~StandaloneVad() { | ||||
|   WebRtcVad_Free(vad_); | ||||
| } | ||||
|  | ||||
| StandaloneVad* StandaloneVad::Create() { | ||||
|   VadInst* vad = WebRtcVad_Create(); | ||||
|   if (!vad) | ||||
|     return nullptr; | ||||
|  | ||||
|   int err = WebRtcVad_Init(vad); | ||||
|   err |= WebRtcVad_set_mode(vad, kDefaultStandaloneVadMode); | ||||
|   if (err != 0) { | ||||
|     WebRtcVad_Free(vad); | ||||
|     return nullptr; | ||||
|   } | ||||
|   return new StandaloneVad(vad); | ||||
| } | ||||
|  | ||||
| int StandaloneVad::AddAudio(const int16_t* data, size_t length) { | ||||
|   if (length != kLength10Ms) | ||||
|     return -1; | ||||
|  | ||||
|   if (index_ + length > kLength10Ms * kMaxNum10msFrames) | ||||
|     // Reset the buffer if it's full. | ||||
|     // TODO(ajm): Instead, consider just processing every 10 ms frame. Then we | ||||
|     // can forgo the buffering. | ||||
|     index_ = 0; | ||||
|  | ||||
|   memcpy(&buffer_[index_], data, sizeof(int16_t) * length); | ||||
|   index_ += length; | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| int StandaloneVad::GetActivity(double* p, size_t length_p) { | ||||
|   if (index_ == 0) | ||||
|     return -1; | ||||
|  | ||||
|   const size_t num_frames = index_ / kLength10Ms; | ||||
|   if (num_frames > length_p) | ||||
|     return -1; | ||||
|   assert(WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_) == 0); | ||||
|  | ||||
|   int activity = WebRtcVad_Process(vad_, kSampleRateHz, buffer_, index_); | ||||
|   if (activity < 0) | ||||
|     return -1; | ||||
|   else if (activity == 0) | ||||
|     p[0] = 0.01;  // Arbitrary but small and non-zero. | ||||
|   else | ||||
|     p[0] = 0.5;  // 0.5 is neutral values when combinned by other probabilities. | ||||
|   for (size_t n = 1; n < num_frames; n++) | ||||
|     p[n] = p[0]; | ||||
|   // Reset the buffer to start from the beginning. | ||||
|   index_ = 0; | ||||
|   return activity; | ||||
| } | ||||
|  | ||||
| int StandaloneVad::set_mode(int mode) { | ||||
|   if (mode < 0 || mode > 3) | ||||
|     return -1; | ||||
|   if (WebRtcVad_set_mode(vad_, mode) != 0) | ||||
|     return -1; | ||||
|  | ||||
|   mode_ = mode; | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| }  // namespace webrtc | ||||
							
								
								
									
										70
									
								
								webrtc/modules/audio_processing/vad/standalone_vad.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										70
									
								
								webrtc/modules/audio_processing/vad/standalone_vad.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,70 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ | ||||
|  | ||||
| #include "webrtc/base/scoped_ptr.h" | ||||
| #include "webrtc/modules/audio_processing/vad/common.h" | ||||
| #include "webrtc/common_audio/vad/include/webrtc_vad.h" | ||||
| #include "webrtc/typedefs.h" | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| class AudioFrame; | ||||
|  | ||||
| class StandaloneVad { | ||||
|  public: | ||||
|   static StandaloneVad* Create(); | ||||
|   ~StandaloneVad(); | ||||
|  | ||||
|   // Outputs | ||||
|   //   p: a buffer where probabilities are written to. | ||||
|   //   length_p: number of elements of |p|. | ||||
|   // | ||||
|   // return value: | ||||
|   //    -1: if no audio is stored or VAD returns error. | ||||
|   //     0: in success. | ||||
|   // In case of error the content of |activity| is unchanged. | ||||
|   // | ||||
|   // Note that due to a high false-positive (VAD decision is active while the | ||||
|   // processed audio is just background noise) rate, stand-alone VAD is used as | ||||
|   // a one-sided indicator. The activity probability is 0.5 if the frame is | ||||
|   // classified as active, and the probability is 0.01 if the audio is | ||||
|   // classified as passive. In this way, when probabilities are combined, the | ||||
|   // effect of the stand-alone VAD is neutral if the input is classified as | ||||
|   // active. | ||||
|   int GetActivity(double* p, size_t length_p); | ||||
|  | ||||
|   // Expecting 10 ms of 16 kHz audio to be pushed in. | ||||
|   int AddAudio(const int16_t* data, size_t length); | ||||
|  | ||||
|   // Set aggressiveness of VAD, 0 is the least aggressive and 3 is the most | ||||
|   // aggressive mode. Returns -1 if the input is less than 0 or larger than 3, | ||||
|   // otherwise 0 is returned. | ||||
|   int set_mode(int mode); | ||||
|   // Get the agressiveness of the current VAD. | ||||
|   int mode() const { return mode_; } | ||||
|  | ||||
|  private: | ||||
|   explicit StandaloneVad(VadInst* vad); | ||||
|  | ||||
|   static const size_t kMaxNum10msFrames = 3; | ||||
|  | ||||
|   // TODO(turajs): Is there a way to use scoped-pointer here? | ||||
|   VadInst* vad_; | ||||
|   int16_t buffer_[kMaxNum10msFrames * kLength10Ms]; | ||||
|   size_t index_; | ||||
|   int mode_; | ||||
| }; | ||||
|  | ||||
| }  // namespace webrtc | ||||
|  | ||||
| #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_ | ||||
							
								
								
									
										275
									
								
								webrtc/modules/audio_processing/vad/vad_audio_proc.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										275
									
								
								webrtc/modules/audio_processing/vad/vad_audio_proc.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,275 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #include "webrtc/modules/audio_processing/vad/vad_audio_proc.h" | ||||
|  | ||||
| #include <math.h> | ||||
| #include <stdio.h> | ||||
|  | ||||
| #include "webrtc/common_audio/fft4g.h" | ||||
| #include "webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h" | ||||
| #include "webrtc/modules/audio_processing/vad/pitch_internal.h" | ||||
| #include "webrtc/modules/audio_processing/vad/pole_zero_filter.h" | ||||
| extern "C" { | ||||
| #include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h" | ||||
| #include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h" | ||||
| #include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h" | ||||
| #include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h" | ||||
| } | ||||
| #include "webrtc/modules/interface/module_common_types.h" | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| // The following structures are declared anonymous in iSAC's structs.h. To | ||||
| // forward declare them, we use this derived class trick. | ||||
| struct VadAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {}; | ||||
| struct VadAudioProc::PreFiltBankstr : public ::PreFiltBankstr {}; | ||||
|  | ||||
| static const float kFrequencyResolution = | ||||
|     kSampleRateHz / static_cast<float>(VadAudioProc::kDftSize); | ||||
| static const int kSilenceRms = 5; | ||||
|  | ||||
| // TODO(turajs): Make a Create or Init for VadAudioProc. | ||||
| VadAudioProc::VadAudioProc() | ||||
|     : audio_buffer_(), | ||||
|       num_buffer_samples_(kNumPastSignalSamples), | ||||
|       log_old_gain_(-2), | ||||
|       old_lag_(50),  // Arbitrary but valid as pitch-lag (in samples). | ||||
|       pitch_analysis_handle_(new PitchAnalysisStruct), | ||||
|       pre_filter_handle_(new PreFiltBankstr), | ||||
|       high_pass_filter_(PoleZeroFilter::Create(kCoeffNumerator, | ||||
|                                                kFilterOrder, | ||||
|                                                kCoeffDenominator, | ||||
|                                                kFilterOrder)) { | ||||
|   static_assert(kNumPastSignalSamples + kNumSubframeSamples == | ||||
|                     sizeof(kLpcAnalWin) / sizeof(kLpcAnalWin[0]), | ||||
|                 "lpc analysis window incorrect size"); | ||||
|   static_assert(kLpcOrder + 1 == sizeof(kCorrWeight) / sizeof(kCorrWeight[0]), | ||||
|                 "correlation weight incorrect size"); | ||||
|  | ||||
|   // TODO(turajs): Are we doing too much in the constructor? | ||||
|   float data[kDftSize]; | ||||
|   // Make FFT to initialize. | ||||
|   ip_[0] = 0; | ||||
|   WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); | ||||
|   // TODO(turajs): Need to initialize high-pass filter. | ||||
|  | ||||
|   // Initialize iSAC components. | ||||
|   WebRtcIsac_InitPreFilterbank(pre_filter_handle_.get()); | ||||
|   WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get()); | ||||
| } | ||||
|  | ||||
| VadAudioProc::~VadAudioProc() { | ||||
| } | ||||
|  | ||||
| void VadAudioProc::ResetBuffer() { | ||||
|   memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess], | ||||
|          sizeof(audio_buffer_[0]) * kNumPastSignalSamples); | ||||
|   num_buffer_samples_ = kNumPastSignalSamples; | ||||
| } | ||||
|  | ||||
| int VadAudioProc::ExtractFeatures(const int16_t* frame, | ||||
|                                   size_t length, | ||||
|                                   AudioFeatures* features) { | ||||
|   features->num_frames = 0; | ||||
|   if (length != kNumSubframeSamples) { | ||||
|     return -1; | ||||
|   } | ||||
|  | ||||
|   // High-pass filter to remove the DC component and very low frequency content. | ||||
|   // We have experienced that this high-pass filtering improves voice/non-voiced | ||||
|   // classification. | ||||
|   if (high_pass_filter_->Filter(frame, kNumSubframeSamples, | ||||
|                                 &audio_buffer_[num_buffer_samples_]) != 0) { | ||||
|     return -1; | ||||
|   } | ||||
|  | ||||
|   num_buffer_samples_ += kNumSubframeSamples; | ||||
|   if (num_buffer_samples_ < kBufferLength) { | ||||
|     return 0; | ||||
|   } | ||||
|   assert(num_buffer_samples_ == kBufferLength); | ||||
|   features->num_frames = kNum10msSubframes; | ||||
|   features->silence = false; | ||||
|  | ||||
|   Rms(features->rms, kMaxNumFrames); | ||||
|   for (size_t i = 0; i < kNum10msSubframes; ++i) { | ||||
|     if (features->rms[i] < kSilenceRms) { | ||||
|       // PitchAnalysis can cause NaNs in the pitch gain if it's fed silence. | ||||
|       // Bail out here instead. | ||||
|       features->silence = true; | ||||
|       ResetBuffer(); | ||||
|       return 0; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   PitchAnalysis(features->log_pitch_gain, features->pitch_lag_hz, | ||||
|                 kMaxNumFrames); | ||||
|   FindFirstSpectralPeaks(features->spectral_peak, kMaxNumFrames); | ||||
|   ResetBuffer(); | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| // Computes |kLpcOrder + 1| correlation coefficients. | ||||
| void VadAudioProc::SubframeCorrelation(double* corr, | ||||
|                                        size_t length_corr, | ||||
|                                        size_t subframe_index) { | ||||
|   assert(length_corr >= kLpcOrder + 1); | ||||
|   double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples]; | ||||
|   size_t buffer_index = subframe_index * kNumSubframeSamples; | ||||
|  | ||||
|   for (size_t n = 0; n < kNumSubframeSamples + kNumPastSignalSamples; n++) | ||||
|     windowed_audio[n] = audio_buffer_[buffer_index++] * kLpcAnalWin[n]; | ||||
|  | ||||
|   WebRtcIsac_AutoCorr(corr, windowed_audio, | ||||
|                       kNumSubframeSamples + kNumPastSignalSamples, kLpcOrder); | ||||
| } | ||||
|  | ||||
| // Compute |kNum10msSubframes| sets of LPC coefficients, one per 10 ms input. | ||||
| // The analysis window is 15 ms long and it is centered on the first half of | ||||
| // each 10ms sub-frame. This is equivalent to computing LPC coefficients for the | ||||
| // first half of each 10 ms subframe. | ||||
| void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) { | ||||
|   assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1)); | ||||
|   double corr[kLpcOrder + 1]; | ||||
|   double reflec_coeff[kLpcOrder]; | ||||
|   for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes; | ||||
|        i++, offset_lpc += kLpcOrder + 1) { | ||||
|     SubframeCorrelation(corr, kLpcOrder + 1, i); | ||||
|     corr[0] *= 1.0001; | ||||
|     // This makes Lev-Durb a bit more stable. | ||||
|     for (size_t k = 0; k < kLpcOrder + 1; k++) { | ||||
|       corr[k] *= kCorrWeight[k]; | ||||
|     } | ||||
|     WebRtcIsac_LevDurb(&lpc[offset_lpc], reflec_coeff, corr, kLpcOrder); | ||||
|   } | ||||
| } | ||||
|  | ||||
| // Fit a second order curve to these 3 points and find the location of the | ||||
| // extremum. The points are inverted before curve fitting. | ||||
| static float QuadraticInterpolation(float prev_val, | ||||
|                                     float curr_val, | ||||
|                                     float next_val) { | ||||
|   // Doing the interpolation in |1 / A(z)|^2. | ||||
|   float fractional_index = 0; | ||||
|   next_val = 1.0f / next_val; | ||||
|   prev_val = 1.0f / prev_val; | ||||
|   curr_val = 1.0f / curr_val; | ||||
|  | ||||
|   fractional_index = | ||||
|       -(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val); | ||||
|   assert(fabs(fractional_index) < 1); | ||||
|   return fractional_index; | ||||
| } | ||||
|  | ||||
| // 1 / A(z), where A(z) is defined by |lpc| is a model of the spectral envelope | ||||
| // of the input signal. The local maximum of the spectral envelope corresponds | ||||
| // with the local minimum of A(z). It saves complexity, as we save one | ||||
| // inversion. Furthermore, we find the first local maximum of magnitude squared, | ||||
| // to save on one square root. | ||||
| void VadAudioProc::FindFirstSpectralPeaks(double* f_peak, | ||||
|                                           size_t length_f_peak) { | ||||
|   assert(length_f_peak >= kNum10msSubframes); | ||||
|   double lpc[kNum10msSubframes * (kLpcOrder + 1)]; | ||||
|   // For all sub-frames. | ||||
|   GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1)); | ||||
|  | ||||
|   const size_t kNumDftCoefficients = kDftSize / 2 + 1; | ||||
|   float data[kDftSize]; | ||||
|  | ||||
|   for (size_t i = 0; i < kNum10msSubframes; i++) { | ||||
|     // Convert to float with zero pad. | ||||
|     memset(data, 0, sizeof(data)); | ||||
|     for (size_t n = 0; n < kLpcOrder + 1; n++) { | ||||
|       data[n] = static_cast<float>(lpc[i * (kLpcOrder + 1) + n]); | ||||
|     } | ||||
|     // Transform to frequency domain. | ||||
|     WebRtc_rdft(kDftSize, 1, data, ip_, w_fft_); | ||||
|  | ||||
|     size_t index_peak = 0; | ||||
|     float prev_magn_sqr = data[0] * data[0]; | ||||
|     float curr_magn_sqr = data[2] * data[2] + data[3] * data[3]; | ||||
|     float next_magn_sqr; | ||||
|     bool found_peak = false; | ||||
|     for (size_t n = 2; n < kNumDftCoefficients - 1; n++) { | ||||
|       next_magn_sqr = | ||||
|           data[2 * n] * data[2 * n] + data[2 * n + 1] * data[2 * n + 1]; | ||||
|       if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { | ||||
|         found_peak = true; | ||||
|         index_peak = n - 1; | ||||
|         break; | ||||
|       } | ||||
|       prev_magn_sqr = curr_magn_sqr; | ||||
|       curr_magn_sqr = next_magn_sqr; | ||||
|     } | ||||
|     float fractional_index = 0; | ||||
|     if (!found_peak) { | ||||
|       // Checking if |kNumDftCoefficients - 1| is the local minimum. | ||||
|       next_magn_sqr = data[1] * data[1]; | ||||
|       if (curr_magn_sqr < prev_magn_sqr && curr_magn_sqr < next_magn_sqr) { | ||||
|         index_peak = kNumDftCoefficients - 1; | ||||
|       } | ||||
|     } else { | ||||
|       // A peak is found, do a simple quadratic interpolation to get a more | ||||
|       // accurate estimate of the peak location. | ||||
|       fractional_index = | ||||
|           QuadraticInterpolation(prev_magn_sqr, curr_magn_sqr, next_magn_sqr); | ||||
|     } | ||||
|     f_peak[i] = (index_peak + fractional_index) * kFrequencyResolution; | ||||
|   } | ||||
| } | ||||
|  | ||||
| // Using iSAC functions to estimate pitch gains & lags. | ||||
| void VadAudioProc::PitchAnalysis(double* log_pitch_gains, | ||||
|                                  double* pitch_lags_hz, | ||||
|                                  size_t length) { | ||||
|   // TODO(turajs): This can be "imported" from iSAC & and the next two | ||||
|   // constants. | ||||
|   assert(length >= kNum10msSubframes); | ||||
|   const int kNumPitchSubframes = 4; | ||||
|   double gains[kNumPitchSubframes]; | ||||
|   double lags[kNumPitchSubframes]; | ||||
|  | ||||
|   const int kNumSubbandFrameSamples = 240; | ||||
|   const int kNumLookaheadSamples = 24; | ||||
|  | ||||
|   float lower[kNumSubbandFrameSamples]; | ||||
|   float upper[kNumSubbandFrameSamples]; | ||||
|   double lower_lookahead[kNumSubbandFrameSamples]; | ||||
|   double upper_lookahead[kNumSubbandFrameSamples]; | ||||
|   double lower_lookahead_pre_filter[kNumSubbandFrameSamples + | ||||
|                                     kNumLookaheadSamples]; | ||||
|  | ||||
|   // Split signal to lower and upper bands | ||||
|   WebRtcIsac_SplitAndFilterFloat(&audio_buffer_[kNumPastSignalSamples], lower, | ||||
|                                  upper, lower_lookahead, upper_lookahead, | ||||
|                                  pre_filter_handle_.get()); | ||||
|   WebRtcIsac_PitchAnalysis(lower_lookahead, lower_lookahead_pre_filter, | ||||
|                            pitch_analysis_handle_.get(), lags, gains); | ||||
|  | ||||
|   // Lags are computed on lower-band signal with sampling rate half of the | ||||
|   // input signal. | ||||
|   GetSubframesPitchParameters( | ||||
|       kSampleRateHz / 2, gains, lags, kNumPitchSubframes, kNum10msSubframes, | ||||
|       &log_old_gain_, &old_lag_, log_pitch_gains, pitch_lags_hz); | ||||
| } | ||||
|  | ||||
| void VadAudioProc::Rms(double* rms, size_t length_rms) { | ||||
|   assert(length_rms >= kNum10msSubframes); | ||||
|   size_t offset = kNumPastSignalSamples; | ||||
|   for (size_t i = 0; i < kNum10msSubframes; i++) { | ||||
|     rms[i] = 0; | ||||
|     for (size_t n = 0; n < kNumSubframeSamples; n++, offset++) | ||||
|       rms[i] += audio_buffer_[offset] * audio_buffer_[offset]; | ||||
|     rms[i] = sqrt(rms[i] / kNumSubframeSamples); | ||||
|   } | ||||
| } | ||||
|  | ||||
| }  // namespace webrtc | ||||
							
								
								
									
										89
									
								
								webrtc/modules/audio_processing/vad/vad_audio_proc.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										89
									
								
								webrtc/modules/audio_processing/vad/vad_audio_proc.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,89 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ | ||||
|  | ||||
| #include "webrtc/base/scoped_ptr.h" | ||||
| #include "webrtc/modules/audio_processing/vad/common.h" | ||||
| #include "webrtc/typedefs.h" | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| class AudioFrame; | ||||
| class PoleZeroFilter; | ||||
|  | ||||
| class VadAudioProc { | ||||
|  public: | ||||
|   // Forward declare iSAC structs. | ||||
|   struct PitchAnalysisStruct; | ||||
|   struct PreFiltBankstr; | ||||
|  | ||||
|   VadAudioProc(); | ||||
|   ~VadAudioProc(); | ||||
|  | ||||
|   int ExtractFeatures(const int16_t* audio_frame, | ||||
|                       size_t length, | ||||
|                       AudioFeatures* audio_features); | ||||
|  | ||||
|   static const size_t kDftSize = 512; | ||||
|  | ||||
|  private: | ||||
|   void PitchAnalysis(double* pitch_gains, double* pitch_lags_hz, size_t length); | ||||
|   void SubframeCorrelation(double* corr, | ||||
|                            size_t length_corr, | ||||
|                            size_t subframe_index); | ||||
|   void GetLpcPolynomials(double* lpc, size_t length_lpc); | ||||
|   void FindFirstSpectralPeaks(double* f_peak, size_t length_f_peak); | ||||
|   void Rms(double* rms, size_t length_rms); | ||||
|   void ResetBuffer(); | ||||
|  | ||||
|   // To compute spectral peak we perform LPC analysis to get spectral envelope. | ||||
|   // For every 30 ms we compute 3 spectral peak there for 3 LPC analysis. | ||||
|   // LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame | ||||
|   // we need 5 ms of past signal to create the input of LPC analysis. | ||||
|   static const size_t kNumPastSignalSamples = | ||||
|       static_cast<size_t>(kSampleRateHz / 200); | ||||
|  | ||||
|   // TODO(turajs): maybe defining this at a higher level (maybe enum) so that | ||||
|   // all the code recognize it as "no-error." | ||||
|   static const int kNoError = 0; | ||||
|  | ||||
|   static const size_t kNum10msSubframes = 3; | ||||
|   static const size_t kNumSubframeSamples = | ||||
|       static_cast<size_t>(kSampleRateHz / 100); | ||||
|   static const size_t kNumSamplesToProcess = | ||||
|       kNum10msSubframes * | ||||
|       kNumSubframeSamples;  // Samples in 30 ms @ given sampling rate. | ||||
|   static const size_t kBufferLength = | ||||
|       kNumPastSignalSamples + kNumSamplesToProcess; | ||||
|   static const size_t kIpLength = kDftSize >> 1; | ||||
|   static const size_t kWLength = kDftSize >> 1; | ||||
|  | ||||
|   static const size_t kLpcOrder = 16; | ||||
|  | ||||
|   size_t ip_[kIpLength]; | ||||
|   float w_fft_[kWLength]; | ||||
|  | ||||
|   // A buffer of 5 ms (past audio) + 30 ms (one iSAC frame ). | ||||
|   float audio_buffer_[kBufferLength]; | ||||
|   size_t num_buffer_samples_; | ||||
|  | ||||
|   double log_old_gain_; | ||||
|   double old_lag_; | ||||
|  | ||||
|   rtc::scoped_ptr<PitchAnalysisStruct> pitch_analysis_handle_; | ||||
|   rtc::scoped_ptr<PreFiltBankstr> pre_filter_handle_; | ||||
|   rtc::scoped_ptr<PoleZeroFilter> high_pass_filter_; | ||||
| }; | ||||
|  | ||||
| }  // namespace webrtc | ||||
|  | ||||
| #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_ | ||||
| @@ -0,0 +1,94 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_ | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| // These values should match MATLAB counterparts for unit-tests to pass. | ||||
| static const double kCorrWeight[] = {1.000000, | ||||
|                                      0.985000, | ||||
|                                      0.970225, | ||||
|                                      0.955672, | ||||
|                                      0.941337, | ||||
|                                      0.927217, | ||||
|                                      0.913308, | ||||
|                                      0.899609, | ||||
|                                      0.886115, | ||||
|                                      0.872823, | ||||
|                                      0.859730, | ||||
|                                      0.846834, | ||||
|                                      0.834132, | ||||
|                                      0.821620, | ||||
|                                      0.809296, | ||||
|                                      0.797156, | ||||
|                                      0.785199}; | ||||
|  | ||||
| static const double kLpcAnalWin[] = { | ||||
|     0.00000000, 0.01314436, 0.02628645, 0.03942400, 0.05255473, 0.06567639, | ||||
|     0.07878670, 0.09188339, 0.10496421, 0.11802689, 0.13106918, 0.14408883, | ||||
|     0.15708358, 0.17005118, 0.18298941, 0.19589602, 0.20876878, 0.22160547, | ||||
|     0.23440387, 0.24716177, 0.25987696, 0.27254725, 0.28517045, 0.29774438, | ||||
|     0.31026687, 0.32273574, 0.33514885, 0.34750406, 0.35979922, 0.37203222, | ||||
|     0.38420093, 0.39630327, 0.40833713, 0.42030043, 0.43219112, 0.44400713, | ||||
|     0.45574642, 0.46740697, 0.47898676, 0.49048379, 0.50189608, 0.51322164, | ||||
|     0.52445853, 0.53560481, 0.54665854, 0.55761782, 0.56848075, 0.57924546, | ||||
|     0.58991008, 0.60047278, 0.61093173, 0.62128512, 0.63153117, 0.64166810, | ||||
|     0.65169416, 0.66160761, 0.67140676, 0.68108990, 0.69065536, 0.70010148, | ||||
|     0.70942664, 0.71862923, 0.72770765, 0.73666033, 0.74548573, 0.75418233, | ||||
|     0.76274862, 0.77118312, 0.77948437, 0.78765094, 0.79568142, 0.80357442, | ||||
|     0.81132858, 0.81894256, 0.82641504, 0.83374472, 0.84093036, 0.84797069, | ||||
|     0.85486451, 0.86161063, 0.86820787, 0.87465511, 0.88095122, 0.88709512, | ||||
|     0.89308574, 0.89892206, 0.90460306, 0.91012776, 0.91549520, 0.92070447, | ||||
|     0.92575465, 0.93064488, 0.93537432, 0.93994213, 0.94434755, 0.94858979, | ||||
|     0.95266814, 0.95658189, 0.96033035, 0.96391289, 0.96732888, 0.97057773, | ||||
|     0.97365889, 0.97657181, 0.97931600, 0.98189099, 0.98429632, 0.98653158, | ||||
|     0.98859639, 0.99049038, 0.99221324, 0.99376466, 0.99514438, 0.99635215, | ||||
|     0.99738778, 0.99825107, 0.99894188, 0.99946010, 0.99980562, 0.99997840, | ||||
|     0.99997840, 0.99980562, 0.99946010, 0.99894188, 0.99825107, 0.99738778, | ||||
|     0.99635215, 0.99514438, 0.99376466, 0.99221324, 0.99049038, 0.98859639, | ||||
|     0.98653158, 0.98429632, 0.98189099, 0.97931600, 0.97657181, 0.97365889, | ||||
|     0.97057773, 0.96732888, 0.96391289, 0.96033035, 0.95658189, 0.95266814, | ||||
|     0.94858979, 0.94434755, 0.93994213, 0.93537432, 0.93064488, 0.92575465, | ||||
|     0.92070447, 0.91549520, 0.91012776, 0.90460306, 0.89892206, 0.89308574, | ||||
|     0.88709512, 0.88095122, 0.87465511, 0.86820787, 0.86161063, 0.85486451, | ||||
|     0.84797069, 0.84093036, 0.83374472, 0.82641504, 0.81894256, 0.81132858, | ||||
|     0.80357442, 0.79568142, 0.78765094, 0.77948437, 0.77118312, 0.76274862, | ||||
|     0.75418233, 0.74548573, 0.73666033, 0.72770765, 0.71862923, 0.70942664, | ||||
|     0.70010148, 0.69065536, 0.68108990, 0.67140676, 0.66160761, 0.65169416, | ||||
|     0.64166810, 0.63153117, 0.62128512, 0.61093173, 0.60047278, 0.58991008, | ||||
|     0.57924546, 0.56848075, 0.55761782, 0.54665854, 0.53560481, 0.52445853, | ||||
|     0.51322164, 0.50189608, 0.49048379, 0.47898676, 0.46740697, 0.45574642, | ||||
|     0.44400713, 0.43219112, 0.42030043, 0.40833713, 0.39630327, 0.38420093, | ||||
|     0.37203222, 0.35979922, 0.34750406, 0.33514885, 0.32273574, 0.31026687, | ||||
|     0.29774438, 0.28517045, 0.27254725, 0.25987696, 0.24716177, 0.23440387, | ||||
|     0.22160547, 0.20876878, 0.19589602, 0.18298941, 0.17005118, 0.15708358, | ||||
|     0.14408883, 0.13106918, 0.11802689, 0.10496421, 0.09188339, 0.07878670, | ||||
|     0.06567639, 0.05255473, 0.03942400, 0.02628645, 0.01314436, 0.00000000}; | ||||
|  | ||||
| static const size_t kFilterOrder = 2; | ||||
| static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f, | ||||
|                                                         -1.949650f, | ||||
|                                                         0.974827f}; | ||||
| static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f, | ||||
|                                                           -1.971999f, | ||||
|                                                           0.972457f}; | ||||
|  | ||||
| static_assert(kFilterOrder + 1 == | ||||
|                   sizeof(kCoeffNumerator) / sizeof(kCoeffNumerator[0]), | ||||
|               "numerator coefficients incorrect size"); | ||||
| static_assert(kFilterOrder + 1 == | ||||
|                   sizeof(kCoeffDenominator) / sizeof(kCoeffDenominator[0]), | ||||
|               "denominator coefficients incorrect size"); | ||||
|  | ||||
| }  // namespace webrtc | ||||
|  | ||||
| #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROCESSING_H_ | ||||
							
								
								
									
										138
									
								
								webrtc/modules/audio_processing/vad/vad_circular_buffer.cc
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										138
									
								
								webrtc/modules/audio_processing/vad/vad_circular_buffer.cc
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,138 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h" | ||||
|  | ||||
| #include <assert.h> | ||||
| #include <stdlib.h> | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| VadCircularBuffer::VadCircularBuffer(int buffer_size) | ||||
|     : buffer_(new double[buffer_size]), | ||||
|       is_full_(false), | ||||
|       index_(0), | ||||
|       buffer_size_(buffer_size), | ||||
|       sum_(0) { | ||||
| } | ||||
|  | ||||
| VadCircularBuffer::~VadCircularBuffer() { | ||||
| } | ||||
|  | ||||
| void VadCircularBuffer::Reset() { | ||||
|   is_full_ = false; | ||||
|   index_ = 0; | ||||
|   sum_ = 0; | ||||
| } | ||||
|  | ||||
| VadCircularBuffer* VadCircularBuffer::Create(int buffer_size) { | ||||
|   if (buffer_size <= 0) | ||||
|     return NULL; | ||||
|   return new VadCircularBuffer(buffer_size); | ||||
| } | ||||
|  | ||||
| double VadCircularBuffer::Oldest() const { | ||||
|   if (!is_full_) | ||||
|     return buffer_[0]; | ||||
|   else | ||||
|     return buffer_[index_]; | ||||
| } | ||||
|  | ||||
| double VadCircularBuffer::Mean() { | ||||
|   double m; | ||||
|   if (is_full_) { | ||||
|     m = sum_ / buffer_size_; | ||||
|   } else { | ||||
|     if (index_ > 0) | ||||
|       m = sum_ / index_; | ||||
|     else | ||||
|       m = 0; | ||||
|   } | ||||
|   return m; | ||||
| } | ||||
|  | ||||
| void VadCircularBuffer::Insert(double value) { | ||||
|   if (is_full_) { | ||||
|     sum_ -= buffer_[index_]; | ||||
|   } | ||||
|   sum_ += value; | ||||
|   buffer_[index_] = value; | ||||
|   index_++; | ||||
|   if (index_ >= buffer_size_) { | ||||
|     is_full_ = true; | ||||
|     index_ = 0; | ||||
|   } | ||||
| } | ||||
| int VadCircularBuffer::BufferLevel() { | ||||
|   if (is_full_) | ||||
|     return buffer_size_; | ||||
|   return index_; | ||||
| } | ||||
|  | ||||
| int VadCircularBuffer::Get(int index, double* value) const { | ||||
|   int err = ConvertToLinearIndex(&index); | ||||
|   if (err < 0) | ||||
|     return -1; | ||||
|   *value = buffer_[index]; | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| int VadCircularBuffer::Set(int index, double value) { | ||||
|   int err = ConvertToLinearIndex(&index); | ||||
|   if (err < 0) | ||||
|     return -1; | ||||
|  | ||||
|   sum_ -= buffer_[index]; | ||||
|   buffer_[index] = value; | ||||
|   sum_ += value; | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| int VadCircularBuffer::ConvertToLinearIndex(int* index) const { | ||||
|   if (*index < 0 || *index >= buffer_size_) | ||||
|     return -1; | ||||
|  | ||||
|   if (!is_full_ && *index >= index_) | ||||
|     return -1; | ||||
|  | ||||
|   *index = index_ - 1 - *index; | ||||
|   if (*index < 0) | ||||
|     *index += buffer_size_; | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| int VadCircularBuffer::RemoveTransient(int width_threshold, | ||||
|                                        double val_threshold) { | ||||
|   if (!is_full_ && index_ < width_threshold + 2) | ||||
|     return 0; | ||||
|  | ||||
|   int index_1 = 0; | ||||
|   int index_2 = width_threshold + 1; | ||||
|   double v = 0; | ||||
|   if (Get(index_1, &v) < 0) | ||||
|     return -1; | ||||
|   if (v < val_threshold) { | ||||
|     Set(index_1, 0); | ||||
|     int index; | ||||
|     for (index = index_2; index > index_1; index--) { | ||||
|       if (Get(index, &v) < 0) | ||||
|         return -1; | ||||
|       if (v < val_threshold) | ||||
|         break; | ||||
|     } | ||||
|     for (; index > index_1; index--) { | ||||
|       if (Set(index, 0.0) < 0) | ||||
|         return -1; | ||||
|     } | ||||
|   } | ||||
|   return 0; | ||||
| } | ||||
|  | ||||
| }  // namespace webrtc | ||||
							
								
								
									
										69
									
								
								webrtc/modules/audio_processing/vad/vad_circular_buffer.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										69
									
								
								webrtc/modules/audio_processing/vad/vad_circular_buffer.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,69 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ | ||||
|  | ||||
| #include "webrtc/base/scoped_ptr.h" | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| // A circular buffer tailored to the need of this project. It stores last | ||||
| // K samples of the input, and keeps track of the mean of the last samples. | ||||
| // | ||||
| // It is used in class "PitchBasedActivity" to keep track of posterior | ||||
| // probabilities in the past few seconds. The posterior probabilities are used | ||||
| // to recursively update prior probabilities. | ||||
| class VadCircularBuffer { | ||||
|  public: | ||||
|   static VadCircularBuffer* Create(int buffer_size); | ||||
|   ~VadCircularBuffer(); | ||||
|  | ||||
|   // If buffer is wrapped around. | ||||
|   bool is_full() const { return is_full_; } | ||||
|   // Get the oldest entry in the buffer. | ||||
|   double Oldest() const; | ||||
|   // Insert new value into the buffer. | ||||
|   void Insert(double value); | ||||
|   // Reset buffer, forget the past, start fresh. | ||||
|   void Reset(); | ||||
|  | ||||
|   // The mean value of the elements in the buffer. The return value is zero if | ||||
|   // buffer is empty, i.e. no value is inserted. | ||||
|   double Mean(); | ||||
|   // Remove transients. If the values exceed |val_threshold| for a period | ||||
|   // shorter then or equal to |width_threshold|, then that period is considered | ||||
|   // transient and set to zero. | ||||
|   int RemoveTransient(int width_threshold, double val_threshold); | ||||
|  | ||||
|  private: | ||||
|   explicit VadCircularBuffer(int buffer_size); | ||||
|   // Get previous values. |index = 0| corresponds to the most recent | ||||
|   // insertion. |index = 1| is the one before the most recent insertion, and | ||||
|   // so on. | ||||
|   int Get(int index, double* value) const; | ||||
|   // Set a given position to |value|. |index| is interpreted as above. | ||||
|   int Set(int index, double value); | ||||
|   // Return the number of valid elements in the buffer. | ||||
|   int BufferLevel(); | ||||
|  | ||||
|   // Convert an index with the interpretation as get() method to the | ||||
|   // corresponding linear index. | ||||
|   int ConvertToLinearIndex(int* index) const; | ||||
|  | ||||
|   rtc::scoped_ptr<double[]> buffer_; | ||||
|   bool is_full_; | ||||
|   int index_; | ||||
|   int buffer_size_; | ||||
|   double sum_; | ||||
| }; | ||||
|  | ||||
| }  // namespace webrtc | ||||
| #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_ | ||||
| @@ -0,0 +1,85 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #include "webrtc/modules/audio_processing/vad/voice_activity_detector.h" | ||||
|  | ||||
| #include <algorithm> | ||||
|  | ||||
| #include "webrtc/base/checks.h" | ||||
|  | ||||
| namespace webrtc { | ||||
| namespace { | ||||
|  | ||||
| const size_t kMaxLength = 320; | ||||
| const int kNumChannels = 1; | ||||
|  | ||||
| const double kDefaultVoiceValue = 1.0; | ||||
| const double kNeutralProbability = 0.5; | ||||
| const double kLowProbability = 0.01; | ||||
|  | ||||
| }  // namespace | ||||
|  | ||||
| VoiceActivityDetector::VoiceActivityDetector() | ||||
|     : last_voice_probability_(kDefaultVoiceValue), | ||||
|       standalone_vad_(StandaloneVad::Create()) { | ||||
| } | ||||
|  | ||||
| // Because ISAC has a different chunk length, it updates | ||||
| // |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data. | ||||
| // Otherwise it clears them. | ||||
| void VoiceActivityDetector::ProcessChunk(const int16_t* audio, | ||||
|                                          size_t length, | ||||
|                                          int sample_rate_hz) { | ||||
|   RTC_DCHECK_EQ(static_cast<int>(length), sample_rate_hz / 100); | ||||
|   RTC_DCHECK_LE(length, kMaxLength); | ||||
|   // Resample to the required rate. | ||||
|   const int16_t* resampled_ptr = audio; | ||||
|   if (sample_rate_hz != kSampleRateHz) { | ||||
|     RTC_CHECK_EQ( | ||||
|         resampler_.ResetIfNeeded(sample_rate_hz, kSampleRateHz, kNumChannels), | ||||
|         0); | ||||
|     resampler_.Push(audio, length, resampled_, kLength10Ms, length); | ||||
|     resampled_ptr = resampled_; | ||||
|   } | ||||
|   RTC_DCHECK_EQ(length, kLength10Ms); | ||||
|  | ||||
|   // Each chunk needs to be passed into |standalone_vad_|, because internally it | ||||
|   // buffers the audio and processes it all at once when GetActivity() is | ||||
|   // called. | ||||
|   RTC_CHECK_EQ(standalone_vad_->AddAudio(resampled_ptr, length), 0); | ||||
|  | ||||
|   audio_processing_.ExtractFeatures(resampled_ptr, length, &features_); | ||||
|  | ||||
|   chunkwise_voice_probabilities_.resize(features_.num_frames); | ||||
|   chunkwise_rms_.resize(features_.num_frames); | ||||
|   std::copy(features_.rms, features_.rms + chunkwise_rms_.size(), | ||||
|             chunkwise_rms_.begin()); | ||||
|   if (features_.num_frames > 0) { | ||||
|     if (features_.silence) { | ||||
|       // The other features are invalid, so set the voice probabilities to an | ||||
|       // arbitrary low value. | ||||
|       std::fill(chunkwise_voice_probabilities_.begin(), | ||||
|                 chunkwise_voice_probabilities_.end(), kLowProbability); | ||||
|     } else { | ||||
|       std::fill(chunkwise_voice_probabilities_.begin(), | ||||
|                 chunkwise_voice_probabilities_.end(), kNeutralProbability); | ||||
|       RTC_CHECK_GE( | ||||
|           standalone_vad_->GetActivity(&chunkwise_voice_probabilities_[0], | ||||
|                                        chunkwise_voice_probabilities_.size()), | ||||
|           0); | ||||
|       RTC_CHECK_GE(pitch_based_vad_.VoicingProbability( | ||||
|                        features_, &chunkwise_voice_probabilities_[0]), | ||||
|                    0); | ||||
|     } | ||||
|     last_voice_probability_ = chunkwise_voice_probabilities_.back(); | ||||
|   } | ||||
| } | ||||
|  | ||||
| }  // namespace webrtc | ||||
| @@ -0,0 +1,70 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ | ||||
|  | ||||
| #include <vector> | ||||
|  | ||||
| #include "webrtc/base/scoped_ptr.h" | ||||
| #include "webrtc/common_audio/resampler/include/resampler.h" | ||||
| #include "webrtc/modules/audio_processing/vad/vad_audio_proc.h" | ||||
| #include "webrtc/modules/audio_processing/vad/common.h" | ||||
| #include "webrtc/modules/audio_processing/vad/pitch_based_vad.h" | ||||
| #include "webrtc/modules/audio_processing/vad/standalone_vad.h" | ||||
|  | ||||
| namespace webrtc { | ||||
|  | ||||
| // A Voice Activity Detector (VAD) that combines the voice probability from the | ||||
| // StandaloneVad and PitchBasedVad to get a more robust estimation. | ||||
| class VoiceActivityDetector { | ||||
|  public: | ||||
|   VoiceActivityDetector(); | ||||
|  | ||||
|   // Processes each audio chunk and estimates the voice probability. The maximum | ||||
|   // supported sample rate is 32kHz. | ||||
|   // TODO(aluebs): Change |length| to size_t. | ||||
|   void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz); | ||||
|  | ||||
|   // Returns a vector of voice probabilities for each chunk. It can be empty for | ||||
|   // some chunks, but it catches up afterwards returning multiple values at | ||||
|   // once. | ||||
|   const std::vector<double>& chunkwise_voice_probabilities() const { | ||||
|     return chunkwise_voice_probabilities_; | ||||
|   } | ||||
|  | ||||
|   // Returns a vector of RMS values for each chunk. It has the same length as | ||||
|   // chunkwise_voice_probabilities(). | ||||
|   const std::vector<double>& chunkwise_rms() const { return chunkwise_rms_; } | ||||
|  | ||||
|   // Returns the last voice probability, regardless of the internal | ||||
|   // implementation, although it has a few chunks of delay. | ||||
|   float last_voice_probability() const { return last_voice_probability_; } | ||||
|  | ||||
|  private: | ||||
|   // TODO(aluebs): Change these to float. | ||||
|   std::vector<double> chunkwise_voice_probabilities_; | ||||
|   std::vector<double> chunkwise_rms_; | ||||
|  | ||||
|   float last_voice_probability_; | ||||
|  | ||||
|   Resampler resampler_; | ||||
|   VadAudioProc audio_processing_; | ||||
|  | ||||
|   rtc::scoped_ptr<StandaloneVad> standalone_vad_; | ||||
|   PitchBasedVad pitch_based_vad_; | ||||
|  | ||||
|   int16_t resampled_[kLength10Ms]; | ||||
|   AudioFeatures features_; | ||||
| }; | ||||
|  | ||||
| }  // namespace webrtc | ||||
|  | ||||
| #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_ | ||||
							
								
								
									
										85
									
								
								webrtc/modules/audio_processing/vad/voice_gmm_tables.h
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										85
									
								
								webrtc/modules/audio_processing/vad/voice_gmm_tables.h
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,85 @@ | ||||
| /* | ||||
|  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. | ||||
|  * | ||||
|  *  Use of this source code is governed by a BSD-style license | ||||
|  *  that can be found in the LICENSE file in the root of the source | ||||
|  *  tree. An additional intellectual property rights grant can be found | ||||
|  *  in the file PATENTS.  All contributing project authors may | ||||
|  *  be found in the AUTHORS file in the root of the source tree. | ||||
|  */ | ||||
|  | ||||
| // GMM tables for active segments. Generated by MakeGmmTables.m. | ||||
|  | ||||
| #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ | ||||
| #define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ | ||||
|  | ||||
| static const int kVoiceGmmNumMixtures = 12; | ||||
| static const int kVoiceGmmDim = 3; | ||||
|  | ||||
| static const double | ||||
|     kVoiceGmmCovarInverse[kVoiceGmmNumMixtures][kVoiceGmmDim][kVoiceGmmDim] = { | ||||
|         {{1.83673825579513e+00, -8.09791637570095e-04, 4.60106414365986e-03}, | ||||
|          {-8.09791637570095e-04, 8.89351738394608e-04, -9.80188953277734e-04}, | ||||
|          {4.60106414365986e-03, -9.80188953277734e-04, 1.38706060206582e-03}}, | ||||
|         {{6.76228912850703e+01, -1.98893120119660e-02, -3.53548357253551e-03}, | ||||
|          {-1.98893120119660e-02, 3.96216858500530e-05, -4.08492938394097e-05}, | ||||
|          {-3.53548357253551e-03, -4.08492938394097e-05, 9.31864352856416e-04}}, | ||||
|         {{9.98612435944558e+00, -5.27880954316893e-03, -6.30342541619017e-03}, | ||||
|          {-5.27880954316893e-03, 4.54359480225226e-05, 6.30804591626044e-05}, | ||||
|          {-6.30342541619017e-03, 6.30804591626044e-05, 5.36466441382942e-04}}, | ||||
|         {{3.39917474216349e+01, -1.56213579433191e-03, -4.01459014990225e-02}, | ||||
|          {-1.56213579433191e-03, 6.40415424897724e-05, 6.20076342427833e-05}, | ||||
|          {-4.01459014990225e-02, 6.20076342427833e-05, 3.51199070103063e-03}}, | ||||
|         {{1.34545062271428e+01, -7.94513610147144e-03, -5.34401019341728e-02}, | ||||
|          {-7.94513610147144e-03, 1.16511820098649e-04, 4.66063702069293e-05}, | ||||
|          {-5.34401019341728e-02, 4.66063702069293e-05, 2.72354323774163e-03}}, | ||||
|         {{1.08557844314806e+02, -1.54885805673668e-02, -1.88029692674851e-02}, | ||||
|          {-1.54885805673668e-02, 1.16404042786406e-04, 6.45579292702802e-06}, | ||||
|          {-1.88029692674851e-02, 6.45579292702802e-06, 4.32330478391416e-04}}, | ||||
|         {{8.22940066541450e+01, -1.15903110231303e-02, -4.92166764865343e-02}, | ||||
|          {-1.15903110231303e-02, 7.42510742165261e-05, 3.73007314191290e-06}, | ||||
|          {-4.92166764865343e-02, 3.73007314191290e-06, 3.64005221593244e-03}}, | ||||
|         {{2.31133605685660e+00, -7.83261568950254e-04, 7.45744012346313e-04}, | ||||
|          {-7.83261568950254e-04, 1.29460648214142e-05, -2.22774455093730e-06}, | ||||
|          {7.45744012346313e-04, -2.22774455093730e-06, 1.05117294093010e-04}}, | ||||
|         {{3.78767849189611e+02, 1.57759761011568e-03, -2.08551217988774e-02}, | ||||
|          {1.57759761011568e-03, 4.76066236886865e-05, -2.33977412299324e-05}, | ||||
|          {-2.08551217988774e-02, -2.33977412299324e-05, 5.24261005371196e-04}}, | ||||
|         {{6.98580096506135e-01, -5.13850255217378e-04, -4.01124551717056e-04}, | ||||
|          {-5.13850255217378e-04, 1.40501021984840e-06, -2.09496928716569e-06}, | ||||
|          {-4.01124551717056e-04, -2.09496928716569e-06, 2.82879357740037e-04}}, | ||||
|         {{2.62770945162399e+00, -2.31825753241430e-03, -5.30447217466318e-03}, | ||||
|          {-2.31825753241430e-03, 4.59108572227649e-05, 7.67631886355405e-05}, | ||||
|          {-5.30447217466318e-03, 7.67631886355405e-05, 2.28521601674098e-03}}, | ||||
|         {{1.89940391362152e+02, -4.23280856852379e-03, -2.70608873541399e-02}, | ||||
|          {-4.23280856852379e-03, 6.77547582742563e-05, 2.69154203800467e-05}, | ||||
|          {-2.70608873541399e-02, 2.69154203800467e-05, 3.88574543373470e-03}}}; | ||||
|  | ||||
| static const double kVoiceGmmMean[kVoiceGmmNumMixtures][kVoiceGmmDim] = { | ||||
|     {-2.15020241646536e+00, 4.97079062999877e+02, 4.77078119504505e+02}, | ||||
|     {-8.92097680029190e-01, 5.92064964199921e+02, 1.81045145941059e+02}, | ||||
|     {-1.29435784144398e+00, 4.98450293410611e+02, 1.71991263804064e+02}, | ||||
|     {-1.03925228397884e+00, 4.99511274321571e+02, 1.05838336539105e+02}, | ||||
|     {-1.29229047206129e+00, 4.15026762566707e+02, 1.12861119017125e+02}, | ||||
|     {-7.88748114599810e-01, 4.48739336688113e+02, 1.89784216956337e+02}, | ||||
|     {-8.77777402332642e-01, 4.86620285054533e+02, 1.13477708016491e+02}, | ||||
|     {-2.06465957063057e+00, 6.33385049870607e+02, 2.32758546796149e+02}, | ||||
|     {-6.98893789231685e-01, 5.93622051503385e+02, 1.92536982473203e+02}, | ||||
|     {-2.55901217508894e+00, 1.55914919756205e+03, 1.39769980835570e+02}, | ||||
|     {-1.92070024165837e+00, 4.87983940444185e+02, 1.02745468128289e+02}, | ||||
|     {-7.29187507662854e-01, 5.22717685022855e+02, 1.16377942283991e+02}}; | ||||
|  | ||||
| static const double kVoiceGmmWeights[kVoiceGmmNumMixtures] = { | ||||
|     -1.39789694361035e+01, | ||||
|     -1.19527720202104e+01, | ||||
|     -1.32396317929055e+01, | ||||
|     -1.09436815209238e+01, | ||||
|     -1.13440027478149e+01, | ||||
|     -1.12200721834504e+01, | ||||
|     -1.02537324043693e+01, | ||||
|     -1.60789861938302e+01, | ||||
|     -1.03394494048344e+01, | ||||
|     -1.83207938586818e+01, | ||||
|     -1.31186044948288e+01, | ||||
|     -9.52479998673554e+00}; | ||||
| #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_ | ||||
		Reference in New Issue
	
	Block a user