Update to current webrtc library
This is from the upstream library commit id 3326535126e435f1ba647885ce43a8f0f3d317eb, corresponding to Chromium 88.0.4290.1.
This commit is contained in:
69
webrtc/modules/audio_processing/vad/BUILD.gn
Normal file
69
webrtc/modules/audio_processing/vad/BUILD.gn
Normal file
@ -0,0 +1,69 @@
|
||||
# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
import("../../../webrtc.gni")
|
||||
rtc_library("vad") {
|
||||
visibility = [
|
||||
"../*",
|
||||
"../../../rtc_tools:*",
|
||||
]
|
||||
sources = [
|
||||
"common.h",
|
||||
"gmm.cc",
|
||||
"gmm.h",
|
||||
"noise_gmm_tables.h",
|
||||
"pitch_based_vad.cc",
|
||||
"pitch_based_vad.h",
|
||||
"pitch_internal.cc",
|
||||
"pitch_internal.h",
|
||||
"pole_zero_filter.cc",
|
||||
"pole_zero_filter.h",
|
||||
"standalone_vad.cc",
|
||||
"standalone_vad.h",
|
||||
"vad_audio_proc.cc",
|
||||
"vad_audio_proc.h",
|
||||
"vad_audio_proc_internal.h",
|
||||
"vad_circular_buffer.cc",
|
||||
"vad_circular_buffer.h",
|
||||
"voice_activity_detector.cc",
|
||||
"voice_activity_detector.h",
|
||||
"voice_gmm_tables.h",
|
||||
]
|
||||
deps = [
|
||||
"../../../audio/utility:audio_frame_operations",
|
||||
"../../../common_audio",
|
||||
"../../../common_audio:common_audio_c",
|
||||
"../../../common_audio/third_party/ooura:fft_size_256",
|
||||
"../../../rtc_base:checks",
|
||||
"../../audio_coding:isac_vad",
|
||||
]
|
||||
}
|
||||
|
||||
if (rtc_include_tests) {
|
||||
rtc_library("vad_unittests") {
|
||||
testonly = true
|
||||
sources = [
|
||||
"gmm_unittest.cc",
|
||||
"pitch_based_vad_unittest.cc",
|
||||
"pitch_internal_unittest.cc",
|
||||
"pole_zero_filter_unittest.cc",
|
||||
"standalone_vad_unittest.cc",
|
||||
"vad_audio_proc_unittest.cc",
|
||||
"vad_circular_buffer_unittest.cc",
|
||||
"voice_activity_detector_unittest.cc",
|
||||
]
|
||||
deps = [
|
||||
":vad",
|
||||
"../../../common_audio",
|
||||
"../../../test:fileutils",
|
||||
"../../../test:test_support",
|
||||
"//testing/gmock",
|
||||
"//testing/gtest",
|
||||
]
|
||||
}
|
||||
}
|
@ -8,8 +8,10 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
static const int kSampleRateHz = 16000;
|
||||
static const size_t kLength10Ms = kSampleRateHz / 100;
|
||||
@ -24,4 +26,4 @@ struct AudioFeatures {
|
||||
bool silence;
|
||||
};
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
|
||||
|
@ -8,12 +8,9 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/vad/gmm.h"
|
||||
#include "modules/audio_processing/vad/gmm.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
|
@ -8,8 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_GMM_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_GMM_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_GMM_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_GMM_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -42,4 +42,4 @@ struct GmmParameters {
|
||||
double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters);
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_GMM_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_GMM_H_
|
||||
|
@ -10,8 +10,10 @@
|
||||
|
||||
// GMM tables for inactive segments. Generated by MakeGmmTables.m.
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kNoiseGmmNumMixtures = 12;
|
||||
static const int kNoiseGmmDim = 3;
|
||||
@ -70,16 +72,11 @@ static const double kNoiseGmmMean[kNoiseGmmNumMixtures][kNoiseGmmDim] = {
|
||||
{-2.30193040814533e+00, 1.43953696546439e+03, 7.04085275122649e+01}};
|
||||
|
||||
static const double kNoiseGmmWeights[kNoiseGmmNumMixtures] = {
|
||||
-1.09422832086193e+01,
|
||||
-1.10847897513425e+01,
|
||||
-1.36767587732187e+01,
|
||||
-1.79789356118641e+01,
|
||||
-1.42830169160894e+01,
|
||||
-1.56500228061379e+01,
|
||||
-1.83124990950113e+01,
|
||||
-1.69979436177477e+01,
|
||||
-1.12329424387828e+01,
|
||||
-1.41311785780639e+01,
|
||||
-1.47171861448585e+01,
|
||||
-1.35963362781839e+01};
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
|
||||
-1.09422832086193e+01, -1.10847897513425e+01, -1.36767587732187e+01,
|
||||
-1.79789356118641e+01, -1.42830169160894e+01, -1.56500228061379e+01,
|
||||
-1.83124990950113e+01, -1.69979436177477e+01, -1.12329424387828e+01,
|
||||
-1.41311785780639e+01, -1.47171861448585e+01, -1.35963362781839e+01};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
|
||||
|
@ -8,17 +8,14 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/vad/pitch_based_vad.h"
|
||||
#include "modules/audio_processing/vad/pitch_based_vad.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/vad/common.h"
|
||||
#include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h"
|
||||
#include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h"
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "modules/audio_processing/vad/common.h"
|
||||
#include "modules/audio_processing/vad/noise_gmm_tables.h"
|
||||
#include "modules/audio_processing/vad/vad_circular_buffer.h"
|
||||
#include "modules/audio_processing/vad/voice_gmm_tables.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -60,8 +57,7 @@ PitchBasedVad::PitchBasedVad()
|
||||
voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
|
||||
}
|
||||
|
||||
PitchBasedVad::~PitchBasedVad() {
|
||||
}
|
||||
PitchBasedVad::~PitchBasedVad() {}
|
||||
|
||||
int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
|
||||
double* p_combined) {
|
||||
|
@ -8,17 +8,16 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/modules/audio_processing/vad/common.h"
|
||||
#include "webrtc/modules/audio_processing/vad/gmm.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
#include <memory>
|
||||
|
||||
#include "modules/audio_processing/vad/common.h"
|
||||
#include "modules/audio_processing/vad/gmm.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioFrame;
|
||||
class VadCircularBuffer;
|
||||
|
||||
// Computes the probability of the input audio frame to be active given
|
||||
@ -50,8 +49,9 @@ class PitchBasedVad {
|
||||
|
||||
double p_prior_;
|
||||
|
||||
rtc::scoped_ptr<VadCircularBuffer> circular_buffer_;
|
||||
std::unique_ptr<VadCircularBuffer> circular_buffer_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
|
||||
|
@ -8,10 +8,12 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/vad/pitch_internal.h"
|
||||
#include "modules/audio_processing/vad/pitch_internal.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// A 4-to-3 linear interpolation.
|
||||
// The interpolation constants are derived as following:
|
||||
// Input pitch parameters are updated every 7.5 ms. Within a 30-ms interval
|
||||
@ -49,3 +51,5 @@ void GetSubframesPitchParameters(int sampling_rate_hz,
|
||||
pitch_lag_hz[n] = (sampling_rate_hz) / (pitch_lag_hz[n]);
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -8,8 +8,10 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// TODO(turajs): Write a description of this function. Also be consistent with
|
||||
// usage of |sampling_rate_hz| vs |kSamplingFreqHz|.
|
||||
@ -23,4 +25,6 @@ void GetSubframesPitchParameters(int sampling_rate_hz,
|
||||
double* log_pitch_gain,
|
||||
double* pitch_lag_hz);
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
|
||||
|
@ -8,10 +8,10 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/vad/pole_zero_filter.h"
|
||||
#include "modules/audio_processing/vad/pole_zero_filter.h"
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
namespace webrtc {
|
||||
@ -53,7 +53,8 @@ PoleZeroFilter::PoleZeroFilter(const float* numerator_coefficients,
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
static float FilterArPast(const T* past, size_t order,
|
||||
static float FilterArPast(const T* past,
|
||||
size_t order,
|
||||
const float* coefficients) {
|
||||
float sum = 0.0f;
|
||||
size_t past_index = order - 1;
|
||||
|
@ -8,12 +8,11 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
|
||||
|
||||
#include <cstddef>
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -49,4 +48,4 @@ class PoleZeroFilter {
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
|
||||
|
@ -8,21 +8,19 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/vad/standalone_vad.h"
|
||||
#include "modules/audio_processing/vad/standalone_vad.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/modules/utility/interface/audio_frame_operations.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
#include "common_audio/vad/include/webrtc_vad.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kDefaultStandaloneVadMode = 3;
|
||||
|
||||
StandaloneVad::StandaloneVad(VadInst* vad)
|
||||
: vad_(vad), buffer_(), index_(0), mode_(kDefaultStandaloneVadMode) {
|
||||
}
|
||||
: vad_(vad), buffer_(), index_(0), mode_(kDefaultStandaloneVadMode) {}
|
||||
|
||||
StandaloneVad::~StandaloneVad() {
|
||||
WebRtcVad_Free(vad_);
|
||||
@ -64,7 +62,7 @@ int StandaloneVad::GetActivity(double* p, size_t length_p) {
|
||||
const size_t num_frames = index_ / kLength10Ms;
|
||||
if (num_frames > length_p)
|
||||
return -1;
|
||||
assert(WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_) == 0);
|
||||
RTC_DCHECK_EQ(0, WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_));
|
||||
|
||||
int activity = WebRtcVad_Process(vad_, kSampleRateHz, buffer_, index_);
|
||||
if (activity < 0)
|
||||
|
@ -8,18 +8,17 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/modules/audio_processing/vad/common.h"
|
||||
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "common_audio/vad/include/webrtc_vad.h"
|
||||
#include "modules/audio_processing/vad/common.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioFrame;
|
||||
|
||||
class StandaloneVad {
|
||||
public:
|
||||
static StandaloneVad* Create();
|
||||
@ -67,4 +66,4 @@ class StandaloneVad {
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
|
||||
|
@ -8,22 +8,23 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/vad/vad_audio_proc.h"
|
||||
#include "modules/audio_processing/vad/vad_audio_proc.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/common_audio/fft4g.h"
|
||||
#include "webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h"
|
||||
#include "webrtc/modules/audio_processing/vad/pitch_internal.h"
|
||||
#include "webrtc/modules/audio_processing/vad/pole_zero_filter.h"
|
||||
#include "common_audio/third_party/ooura/fft_size_256/fft4g.h"
|
||||
#include "modules/audio_processing/vad/pitch_internal.h"
|
||||
#include "modules/audio_processing/vad/pole_zero_filter.h"
|
||||
#include "modules/audio_processing/vad/vad_audio_proc_internal.h"
|
||||
#include "rtc_base/checks.h"
|
||||
extern "C" {
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
|
||||
#include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/filter_functions.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
|
||||
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
|
||||
}
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -32,9 +33,9 @@ namespace webrtc {
|
||||
struct VadAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {};
|
||||
struct VadAudioProc::PreFiltBankstr : public ::PreFiltBankstr {};
|
||||
|
||||
static const float kFrequencyResolution =
|
||||
static constexpr float kFrequencyResolution =
|
||||
kSampleRateHz / static_cast<float>(VadAudioProc::kDftSize);
|
||||
static const int kSilenceRms = 5;
|
||||
static constexpr int kSilenceRms = 5;
|
||||
|
||||
// TODO(turajs): Make a Create or Init for VadAudioProc.
|
||||
VadAudioProc::VadAudioProc()
|
||||
@ -66,8 +67,7 @@ VadAudioProc::VadAudioProc()
|
||||
WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get());
|
||||
}
|
||||
|
||||
VadAudioProc::~VadAudioProc() {
|
||||
}
|
||||
VadAudioProc::~VadAudioProc() {}
|
||||
|
||||
void VadAudioProc::ResetBuffer() {
|
||||
memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess],
|
||||
@ -95,7 +95,7 @@ int VadAudioProc::ExtractFeatures(const int16_t* frame,
|
||||
if (num_buffer_samples_ < kBufferLength) {
|
||||
return 0;
|
||||
}
|
||||
assert(num_buffer_samples_ == kBufferLength);
|
||||
RTC_DCHECK_EQ(num_buffer_samples_, kBufferLength);
|
||||
features->num_frames = kNum10msSubframes;
|
||||
features->silence = false;
|
||||
|
||||
@ -121,7 +121,7 @@ int VadAudioProc::ExtractFeatures(const int16_t* frame,
|
||||
void VadAudioProc::SubframeCorrelation(double* corr,
|
||||
size_t length_corr,
|
||||
size_t subframe_index) {
|
||||
assert(length_corr >= kLpcOrder + 1);
|
||||
RTC_DCHECK_GE(length_corr, kLpcOrder + 1);
|
||||
double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples];
|
||||
size_t buffer_index = subframe_index * kNumSubframeSamples;
|
||||
|
||||
@ -137,7 +137,7 @@ void VadAudioProc::SubframeCorrelation(double* corr,
|
||||
// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the
|
||||
// first half of each 10 ms subframe.
|
||||
void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) {
|
||||
assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1));
|
||||
RTC_DCHECK_GE(length_lpc, kNum10msSubframes * (kLpcOrder + 1));
|
||||
double corr[kLpcOrder + 1];
|
||||
double reflec_coeff[kLpcOrder];
|
||||
for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes;
|
||||
@ -165,7 +165,7 @@ static float QuadraticInterpolation(float prev_val,
|
||||
|
||||
fractional_index =
|
||||
-(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val);
|
||||
assert(fabs(fractional_index) < 1);
|
||||
RTC_DCHECK_LT(fabs(fractional_index), 1);
|
||||
return fractional_index;
|
||||
}
|
||||
|
||||
@ -176,7 +176,7 @@ static float QuadraticInterpolation(float prev_val,
|
||||
// to save on one square root.
|
||||
void VadAudioProc::FindFirstSpectralPeaks(double* f_peak,
|
||||
size_t length_f_peak) {
|
||||
assert(length_f_peak >= kNum10msSubframes);
|
||||
RTC_DCHECK_GE(length_f_peak, kNum10msSubframes);
|
||||
double lpc[kNum10msSubframes * (kLpcOrder + 1)];
|
||||
// For all sub-frames.
|
||||
GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1));
|
||||
@ -232,7 +232,7 @@ void VadAudioProc::PitchAnalysis(double* log_pitch_gains,
|
||||
size_t length) {
|
||||
// TODO(turajs): This can be "imported" from iSAC & and the next two
|
||||
// constants.
|
||||
assert(length >= kNum10msSubframes);
|
||||
RTC_DCHECK_GE(length, kNum10msSubframes);
|
||||
const int kNumPitchSubframes = 4;
|
||||
double gains[kNumPitchSubframes];
|
||||
double lags[kNumPitchSubframes];
|
||||
@ -262,7 +262,7 @@ void VadAudioProc::PitchAnalysis(double* log_pitch_gains,
|
||||
}
|
||||
|
||||
void VadAudioProc::Rms(double* rms, size_t length_rms) {
|
||||
assert(length_rms >= kNum10msSubframes);
|
||||
RTC_DCHECK_GE(length_rms, kNum10msSubframes);
|
||||
size_t offset = kNumPastSignalSamples;
|
||||
for (size_t i = 0; i < kNum10msSubframes; i++) {
|
||||
rms[i] = 0;
|
||||
|
@ -8,16 +8,18 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/modules/audio_processing/vad/common.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "modules/audio_processing/vad/common.h" // AudioFeatures, kSampleR...
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioFrame;
|
||||
class PoleZeroFilter;
|
||||
|
||||
class VadAudioProc {
|
||||
@ -49,25 +51,28 @@ class VadAudioProc {
|
||||
// For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
|
||||
// LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
|
||||
// we need 5 ms of past signal to create the input of LPC analysis.
|
||||
static const size_t kNumPastSignalSamples =
|
||||
static_cast<size_t>(kSampleRateHz / 200);
|
||||
enum : size_t {
|
||||
kNumPastSignalSamples = static_cast<size_t>(kSampleRateHz / 200)
|
||||
};
|
||||
|
||||
// TODO(turajs): maybe defining this at a higher level (maybe enum) so that
|
||||
// all the code recognize it as "no-error."
|
||||
static const int kNoError = 0;
|
||||
enum : int { kNoError = 0 };
|
||||
|
||||
static const size_t kNum10msSubframes = 3;
|
||||
static const size_t kNumSubframeSamples =
|
||||
static_cast<size_t>(kSampleRateHz / 100);
|
||||
static const size_t kNumSamplesToProcess =
|
||||
kNum10msSubframes *
|
||||
kNumSubframeSamples; // Samples in 30 ms @ given sampling rate.
|
||||
static const size_t kBufferLength =
|
||||
kNumPastSignalSamples + kNumSamplesToProcess;
|
||||
static const size_t kIpLength = kDftSize >> 1;
|
||||
static const size_t kWLength = kDftSize >> 1;
|
||||
|
||||
static const size_t kLpcOrder = 16;
|
||||
enum : size_t { kNum10msSubframes = 3 };
|
||||
enum : size_t {
|
||||
kNumSubframeSamples = static_cast<size_t>(kSampleRateHz / 100)
|
||||
};
|
||||
enum : size_t {
|
||||
// Samples in 30 ms @ given sampling rate.
|
||||
kNumSamplesToProcess = kNum10msSubframes * kNumSubframeSamples
|
||||
};
|
||||
enum : size_t {
|
||||
kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess
|
||||
};
|
||||
enum : size_t { kIpLength = kDftSize >> 1 };
|
||||
enum : size_t { kWLength = kDftSize >> 1 };
|
||||
enum : size_t { kLpcOrder = 16 };
|
||||
|
||||
size_t ip_[kIpLength];
|
||||
float w_fft_[kWLength];
|
||||
@ -79,11 +84,11 @@ class VadAudioProc {
|
||||
double log_old_gain_;
|
||||
double old_lag_;
|
||||
|
||||
rtc::scoped_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
|
||||
rtc::scoped_ptr<PreFiltBankstr> pre_filter_handle_;
|
||||
rtc::scoped_ptr<PoleZeroFilter> high_pass_filter_;
|
||||
std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
|
||||
std::unique_ptr<PreFiltBankstr> pre_filter_handle_;
|
||||
std::unique_ptr<PoleZeroFilter> high_pass_filter_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
|
||||
|
@ -8,29 +8,16 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// These values should match MATLAB counterparts for unit-tests to pass.
|
||||
static const double kCorrWeight[] = {1.000000,
|
||||
0.985000,
|
||||
0.970225,
|
||||
0.955672,
|
||||
0.941337,
|
||||
0.927217,
|
||||
0.913308,
|
||||
0.899609,
|
||||
0.886115,
|
||||
0.872823,
|
||||
0.859730,
|
||||
0.846834,
|
||||
0.834132,
|
||||
0.821620,
|
||||
0.809296,
|
||||
0.797156,
|
||||
0.785199};
|
||||
static const double kCorrWeight[] = {
|
||||
1.000000, 0.985000, 0.970225, 0.955672, 0.941337, 0.927217,
|
||||
0.913308, 0.899609, 0.886115, 0.872823, 0.859730, 0.846834,
|
||||
0.834132, 0.821620, 0.809296, 0.797156, 0.785199};
|
||||
|
||||
static const double kLpcAnalWin[] = {
|
||||
0.00000000, 0.01314436, 0.02628645, 0.03942400, 0.05255473, 0.06567639,
|
||||
@ -75,11 +62,9 @@ static const double kLpcAnalWin[] = {
|
||||
0.06567639, 0.05255473, 0.03942400, 0.02628645, 0.01314436, 0.00000000};
|
||||
|
||||
static const size_t kFilterOrder = 2;
|
||||
static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f,
|
||||
-1.949650f,
|
||||
static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f, -1.949650f,
|
||||
0.974827f};
|
||||
static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f,
|
||||
-1.971999f,
|
||||
static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f, -1.971999f,
|
||||
0.972457f};
|
||||
|
||||
static_assert(kFilterOrder + 1 ==
|
||||
@ -91,4 +76,4 @@ static_assert(kFilterOrder + 1 ==
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROCESSING_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROCESSING_H_
|
||||
|
@ -8,9 +8,8 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h"
|
||||
#include "modules/audio_processing/vad/vad_circular_buffer.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace webrtc {
|
||||
@ -20,11 +19,9 @@ VadCircularBuffer::VadCircularBuffer(int buffer_size)
|
||||
is_full_(false),
|
||||
index_(0),
|
||||
buffer_size_(buffer_size),
|
||||
sum_(0) {
|
||||
}
|
||||
sum_(0) {}
|
||||
|
||||
VadCircularBuffer::~VadCircularBuffer() {
|
||||
}
|
||||
VadCircularBuffer::~VadCircularBuffer() {}
|
||||
|
||||
void VadCircularBuffer::Reset() {
|
||||
is_full_ = false;
|
||||
|
@ -8,10 +8,10 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include <memory>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -58,7 +58,7 @@ class VadCircularBuffer {
|
||||
// corresponding linear index.
|
||||
int ConvertToLinearIndex(int* index) const;
|
||||
|
||||
rtc::scoped_ptr<double[]> buffer_;
|
||||
std::unique_ptr<double[]> buffer_;
|
||||
bool is_full_;
|
||||
int index_;
|
||||
int buffer_size_;
|
||||
@ -66,4 +66,4 @@ class VadCircularBuffer {
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
|
||||
|
@ -8,17 +8,16 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
|
||||
#include "modules/audio_processing/vad/voice_activity_detector.h"
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
#include "webrtc/base/checks.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
const size_t kMaxLength = 320;
|
||||
const int kNumChannels = 1;
|
||||
const size_t kNumChannels = 1;
|
||||
|
||||
const double kDefaultVoiceValue = 1.0;
|
||||
const double kNeutralProbability = 0.5;
|
||||
@ -28,8 +27,9 @@ const double kLowProbability = 0.01;
|
||||
|
||||
VoiceActivityDetector::VoiceActivityDetector()
|
||||
: last_voice_probability_(kDefaultVoiceValue),
|
||||
standalone_vad_(StandaloneVad::Create()) {
|
||||
}
|
||||
standalone_vad_(StandaloneVad::Create()) {}
|
||||
|
||||
VoiceActivityDetector::~VoiceActivityDetector() = default;
|
||||
|
||||
// Because ISAC has a different chunk length, it updates
|
||||
// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
|
||||
@ -37,8 +37,7 @@ VoiceActivityDetector::VoiceActivityDetector()
|
||||
void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
|
||||
size_t length,
|
||||
int sample_rate_hz) {
|
||||
RTC_DCHECK_EQ(static_cast<int>(length), sample_rate_hz / 100);
|
||||
RTC_DCHECK_LE(length, kMaxLength);
|
||||
RTC_DCHECK_EQ(length, sample_rate_hz / 100);
|
||||
// Resample to the required rate.
|
||||
const int16_t* resampled_ptr = audio;
|
||||
if (sample_rate_hz != kSampleRateHz) {
|
||||
|
@ -8,17 +8,20 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/common_audio/resampler/include/resampler.h"
|
||||
#include "webrtc/modules/audio_processing/vad/vad_audio_proc.h"
|
||||
#include "webrtc/modules/audio_processing/vad/common.h"
|
||||
#include "webrtc/modules/audio_processing/vad/pitch_based_vad.h"
|
||||
#include "webrtc/modules/audio_processing/vad/standalone_vad.h"
|
||||
#include "common_audio/resampler/include/resampler.h"
|
||||
#include "modules/audio_processing/vad/common.h"
|
||||
#include "modules/audio_processing/vad/pitch_based_vad.h"
|
||||
#include "modules/audio_processing/vad/standalone_vad.h"
|
||||
#include "modules/audio_processing/vad/vad_audio_proc.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
@ -27,10 +30,9 @@ namespace webrtc {
|
||||
class VoiceActivityDetector {
|
||||
public:
|
||||
VoiceActivityDetector();
|
||||
~VoiceActivityDetector();
|
||||
|
||||
// Processes each audio chunk and estimates the voice probability. The maximum
|
||||
// supported sample rate is 32kHz.
|
||||
// TODO(aluebs): Change |length| to size_t.
|
||||
// Processes each audio chunk and estimates the voice probability.
|
||||
void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz);
|
||||
|
||||
// Returns a vector of voice probabilities for each chunk. It can be empty for
|
||||
@ -58,7 +60,7 @@ class VoiceActivityDetector {
|
||||
Resampler resampler_;
|
||||
VadAudioProc audio_processing_;
|
||||
|
||||
rtc::scoped_ptr<StandaloneVad> standalone_vad_;
|
||||
std::unique_ptr<StandaloneVad> standalone_vad_;
|
||||
PitchBasedVad pitch_based_vad_;
|
||||
|
||||
int16_t resampled_[kLength10Ms];
|
||||
@ -67,4 +69,4 @@ class VoiceActivityDetector {
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
|
||||
|
@ -10,8 +10,8 @@
|
||||
|
||||
// GMM tables for active segments. Generated by MakeGmmTables.m.
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
|
||||
#define MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
|
||||
|
||||
static const int kVoiceGmmNumMixtures = 12;
|
||||
static const int kVoiceGmmDim = 3;
|
||||
@ -70,16 +70,8 @@ static const double kVoiceGmmMean[kVoiceGmmNumMixtures][kVoiceGmmDim] = {
|
||||
{-7.29187507662854e-01, 5.22717685022855e+02, 1.16377942283991e+02}};
|
||||
|
||||
static const double kVoiceGmmWeights[kVoiceGmmNumMixtures] = {
|
||||
-1.39789694361035e+01,
|
||||
-1.19527720202104e+01,
|
||||
-1.32396317929055e+01,
|
||||
-1.09436815209238e+01,
|
||||
-1.13440027478149e+01,
|
||||
-1.12200721834504e+01,
|
||||
-1.02537324043693e+01,
|
||||
-1.60789861938302e+01,
|
||||
-1.03394494048344e+01,
|
||||
-1.83207938586818e+01,
|
||||
-1.31186044948288e+01,
|
||||
-9.52479998673554e+00};
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
|
||||
-1.39789694361035e+01, -1.19527720202104e+01, -1.32396317929055e+01,
|
||||
-1.09436815209238e+01, -1.13440027478149e+01, -1.12200721834504e+01,
|
||||
-1.02537324043693e+01, -1.60789861938302e+01, -1.03394494048344e+01,
|
||||
-1.83207938586818e+01, -1.31186044948288e+01, -9.52479998673554e+00};
|
||||
#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
|
||||
|
Reference in New Issue
Block a user