Update to current webrtc library

This is from the upstream library commit id
3326535126e435f1ba647885ce43a8f0f3d317eb, corresponding to Chromium
88.0.4290.1.
This commit is contained in:
Arun Raghavan
2020-10-12 18:08:02 -04:00
parent b1b02581d3
commit bcec8b0b21
859 changed files with 76187 additions and 49580 deletions

View File

@ -0,0 +1,69 @@
# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
import("../../../webrtc.gni")
rtc_library("vad") {
visibility = [
"../*",
"../../../rtc_tools:*",
]
sources = [
"common.h",
"gmm.cc",
"gmm.h",
"noise_gmm_tables.h",
"pitch_based_vad.cc",
"pitch_based_vad.h",
"pitch_internal.cc",
"pitch_internal.h",
"pole_zero_filter.cc",
"pole_zero_filter.h",
"standalone_vad.cc",
"standalone_vad.h",
"vad_audio_proc.cc",
"vad_audio_proc.h",
"vad_audio_proc_internal.h",
"vad_circular_buffer.cc",
"vad_circular_buffer.h",
"voice_activity_detector.cc",
"voice_activity_detector.h",
"voice_gmm_tables.h",
]
deps = [
"../../../audio/utility:audio_frame_operations",
"../../../common_audio",
"../../../common_audio:common_audio_c",
"../../../common_audio/third_party/ooura:fft_size_256",
"../../../rtc_base:checks",
"../../audio_coding:isac_vad",
]
}
if (rtc_include_tests) {
rtc_library("vad_unittests") {
testonly = true
sources = [
"gmm_unittest.cc",
"pitch_based_vad_unittest.cc",
"pitch_internal_unittest.cc",
"pole_zero_filter_unittest.cc",
"standalone_vad_unittest.cc",
"vad_audio_proc_unittest.cc",
"vad_circular_buffer_unittest.cc",
"voice_activity_detector_unittest.cc",
]
deps = [
":vad",
"../../../common_audio",
"../../../test:fileutils",
"../../../test:test_support",
"//testing/gmock",
"//testing/gtest",
]
}
}

View File

@ -8,8 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
#ifndef MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
#define MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
#include <stddef.h>
static const int kSampleRateHz = 16000;
static const size_t kLength10Ms = kSampleRateHz / 100;
@ -24,4 +26,4 @@ struct AudioFeatures {
bool silence;
};
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_COMMON_H_
#endif // MODULES_AUDIO_PROCESSING_VAD_COMMON_H_

View File

@ -8,12 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/vad/gmm.h"
#include "modules/audio_processing/vad/gmm.h"
#include <math.h>
#include <stdlib.h>
#include "webrtc/typedefs.h"
namespace webrtc {

View File

@ -8,8 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_GMM_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_GMM_H_
#ifndef MODULES_AUDIO_PROCESSING_VAD_GMM_H_
#define MODULES_AUDIO_PROCESSING_VAD_GMM_H_
namespace webrtc {
@ -42,4 +42,4 @@ struct GmmParameters {
double EvaluateGmm(const double* x, const GmmParameters& gmm_parameters);
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_GMM_H_
#endif // MODULES_AUDIO_PROCESSING_VAD_GMM_H_

View File

@ -10,8 +10,10 @@
// GMM tables for inactive segments. Generated by MakeGmmTables.m.
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
#ifndef MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
#define MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
namespace webrtc {
static const int kNoiseGmmNumMixtures = 12;
static const int kNoiseGmmDim = 3;
@ -70,16 +72,11 @@ static const double kNoiseGmmMean[kNoiseGmmNumMixtures][kNoiseGmmDim] = {
{-2.30193040814533e+00, 1.43953696546439e+03, 7.04085275122649e+01}};
static const double kNoiseGmmWeights[kNoiseGmmNumMixtures] = {
-1.09422832086193e+01,
-1.10847897513425e+01,
-1.36767587732187e+01,
-1.79789356118641e+01,
-1.42830169160894e+01,
-1.56500228061379e+01,
-1.83124990950113e+01,
-1.69979436177477e+01,
-1.12329424387828e+01,
-1.41311785780639e+01,
-1.47171861448585e+01,
-1.35963362781839e+01};
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_
-1.09422832086193e+01, -1.10847897513425e+01, -1.36767587732187e+01,
-1.79789356118641e+01, -1.42830169160894e+01, -1.56500228061379e+01,
-1.83124990950113e+01, -1.69979436177477e+01, -1.12329424387828e+01,
-1.41311785780639e+01, -1.47171861448585e+01, -1.35963362781839e+01};
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_VAD_NOISE_GMM_TABLES_H_

View File

@ -8,17 +8,14 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/vad/pitch_based_vad.h"
#include "modules/audio_processing/vad/pitch_based_vad.h"
#include <assert.h>
#include <math.h>
#include <string.h>
#include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h"
#include "webrtc/modules/audio_processing/vad/common.h"
#include "webrtc/modules/audio_processing/vad/noise_gmm_tables.h"
#include "webrtc/modules/audio_processing/vad/voice_gmm_tables.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "modules/audio_processing/vad/common.h"
#include "modules/audio_processing/vad/noise_gmm_tables.h"
#include "modules/audio_processing/vad/vad_circular_buffer.h"
#include "modules/audio_processing/vad/voice_gmm_tables.h"
namespace webrtc {
@ -60,8 +57,7 @@ PitchBasedVad::PitchBasedVad()
voice_gmm_.covar_inverse = &kVoiceGmmCovarInverse[0][0][0];
}
PitchBasedVad::~PitchBasedVad() {
}
PitchBasedVad::~PitchBasedVad() {}
int PitchBasedVad::VoicingProbability(const AudioFeatures& features,
double* p_combined) {

View File

@ -8,17 +8,16 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
#define MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_processing/vad/common.h"
#include "webrtc/modules/audio_processing/vad/gmm.h"
#include "webrtc/typedefs.h"
#include <memory>
#include "modules/audio_processing/vad/common.h"
#include "modules/audio_processing/vad/gmm.h"
namespace webrtc {
class AudioFrame;
class VadCircularBuffer;
// Computes the probability of the input audio frame to be active given
@ -50,8 +49,9 @@ class PitchBasedVad {
double p_prior_;
rtc::scoped_ptr<VadCircularBuffer> circular_buffer_;
std::unique_ptr<VadCircularBuffer> circular_buffer_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_
#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_BASED_VAD_H_

View File

@ -8,10 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/vad/pitch_internal.h"
#include "modules/audio_processing/vad/pitch_internal.h"
#include <cmath>
namespace webrtc {
// A 4-to-3 linear interpolation.
// The interpolation constants are derived as following:
// Input pitch parameters are updated every 7.5 ms. Within a 30-ms interval
@ -49,3 +51,5 @@ void GetSubframesPitchParameters(int sampling_rate_hz,
pitch_lag_hz[n] = (sampling_rate_hz) / (pitch_lag_hz[n]);
}
}
} // namespace webrtc

View File

@ -8,8 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
#ifndef MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
#define MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
namespace webrtc {
// TODO(turajs): Write a description of this function. Also be consistent with
// usage of |sampling_rate_hz| vs |kSamplingFreqHz|.
@ -23,4 +25,6 @@ void GetSubframesPitchParameters(int sampling_rate_hz,
double* log_pitch_gain,
double* pitch_lag_hz);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_
} // namespace webrtc
#endif // MODULES_AUDIO_PROCESSING_VAD_PITCH_INTERNAL_H_

View File

@ -8,10 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/vad/pole_zero_filter.h"
#include "modules/audio_processing/vad/pole_zero_filter.h"
#include <stdlib.h>
#include <string.h>
#include <algorithm>
namespace webrtc {
@ -53,7 +53,8 @@ PoleZeroFilter::PoleZeroFilter(const float* numerator_coefficients,
}
template <typename T>
static float FilterArPast(const T* past, size_t order,
static float FilterArPast(const T* past,
size_t order,
const float* coefficients) {
float sum = 0.0f;
size_t past_index = order - 1;

View File

@ -8,12 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
#ifndef MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
#define MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
#include <cstddef>
#include "webrtc/typedefs.h"
#include <stddef.h>
#include <stdint.h>
namespace webrtc {
@ -49,4 +48,4 @@ class PoleZeroFilter {
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_
#endif // MODULES_AUDIO_PROCESSING_VAD_POLE_ZERO_FILTER_H_

View File

@ -8,21 +8,19 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/vad/standalone_vad.h"
#include "modules/audio_processing/vad/standalone_vad.h"
#include <assert.h>
#include <string.h>
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/modules/utility/interface/audio_frame_operations.h"
#include "webrtc/typedefs.h"
#include "common_audio/vad/include/webrtc_vad.h"
#include "rtc_base/checks.h"
namespace webrtc {
static const int kDefaultStandaloneVadMode = 3;
StandaloneVad::StandaloneVad(VadInst* vad)
: vad_(vad), buffer_(), index_(0), mode_(kDefaultStandaloneVadMode) {
}
: vad_(vad), buffer_(), index_(0), mode_(kDefaultStandaloneVadMode) {}
StandaloneVad::~StandaloneVad() {
WebRtcVad_Free(vad_);
@ -64,7 +62,7 @@ int StandaloneVad::GetActivity(double* p, size_t length_p) {
const size_t num_frames = index_ / kLength10Ms;
if (num_frames > length_p)
return -1;
assert(WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_) == 0);
RTC_DCHECK_EQ(0, WebRtcVad_ValidRateAndFrameLength(kSampleRateHz, index_));
int activity = WebRtcVad_Process(vad_, kSampleRateHz, buffer_, index_);
if (activity < 0)

View File

@ -8,18 +8,17 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
#ifndef MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
#define MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_processing/vad/common.h"
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
#include "webrtc/typedefs.h"
#include <stddef.h>
#include <stdint.h>
#include "common_audio/vad/include/webrtc_vad.h"
#include "modules/audio_processing/vad/common.h"
namespace webrtc {
class AudioFrame;
class StandaloneVad {
public:
static StandaloneVad* Create();
@ -67,4 +66,4 @@ class StandaloneVad {
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_
#endif // MODULES_AUDIO_PROCESSING_AGC_STANDALONE_VAD_H_

View File

@ -8,22 +8,23 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/vad/vad_audio_proc.h"
#include "modules/audio_processing/vad/vad_audio_proc.h"
#include <math.h>
#include <stdio.h>
#include <string.h>
#include "webrtc/common_audio/fft4g.h"
#include "webrtc/modules/audio_processing/vad/vad_audio_proc_internal.h"
#include "webrtc/modules/audio_processing/vad/pitch_internal.h"
#include "webrtc/modules/audio_processing/vad/pole_zero_filter.h"
#include "common_audio/third_party/ooura/fft_size_256/fft4g.h"
#include "modules/audio_processing/vad/pitch_internal.h"
#include "modules/audio_processing/vad/pole_zero_filter.h"
#include "modules/audio_processing/vad/vad_audio_proc_internal.h"
#include "rtc_base/checks.h"
extern "C" {
#include "webrtc/modules/audio_coding/codecs/isac/main/source/codec.h"
#include "webrtc/modules/audio_coding/codecs/isac/main/source/lpc_analysis.h"
#include "webrtc/modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
#include "webrtc/modules/audio_coding/codecs/isac/main/source/structs.h"
#include "modules/audio_coding/codecs/isac/main/source/filter_functions.h"
#include "modules/audio_coding/codecs/isac/main/source/isac_vad.h"
#include "modules/audio_coding/codecs/isac/main/source/pitch_estimator.h"
#include "modules/audio_coding/codecs/isac/main/source/structs.h"
}
#include "webrtc/modules/interface/module_common_types.h"
namespace webrtc {
@ -32,9 +33,9 @@ namespace webrtc {
struct VadAudioProc::PitchAnalysisStruct : public ::PitchAnalysisStruct {};
struct VadAudioProc::PreFiltBankstr : public ::PreFiltBankstr {};
static const float kFrequencyResolution =
static constexpr float kFrequencyResolution =
kSampleRateHz / static_cast<float>(VadAudioProc::kDftSize);
static const int kSilenceRms = 5;
static constexpr int kSilenceRms = 5;
// TODO(turajs): Make a Create or Init for VadAudioProc.
VadAudioProc::VadAudioProc()
@ -66,8 +67,7 @@ VadAudioProc::VadAudioProc()
WebRtcIsac_InitPitchAnalysis(pitch_analysis_handle_.get());
}
VadAudioProc::~VadAudioProc() {
}
VadAudioProc::~VadAudioProc() {}
void VadAudioProc::ResetBuffer() {
memcpy(audio_buffer_, &audio_buffer_[kNumSamplesToProcess],
@ -95,7 +95,7 @@ int VadAudioProc::ExtractFeatures(const int16_t* frame,
if (num_buffer_samples_ < kBufferLength) {
return 0;
}
assert(num_buffer_samples_ == kBufferLength);
RTC_DCHECK_EQ(num_buffer_samples_, kBufferLength);
features->num_frames = kNum10msSubframes;
features->silence = false;
@ -121,7 +121,7 @@ int VadAudioProc::ExtractFeatures(const int16_t* frame,
void VadAudioProc::SubframeCorrelation(double* corr,
size_t length_corr,
size_t subframe_index) {
assert(length_corr >= kLpcOrder + 1);
RTC_DCHECK_GE(length_corr, kLpcOrder + 1);
double windowed_audio[kNumSubframeSamples + kNumPastSignalSamples];
size_t buffer_index = subframe_index * kNumSubframeSamples;
@ -137,7 +137,7 @@ void VadAudioProc::SubframeCorrelation(double* corr,
// each 10ms sub-frame. This is equivalent to computing LPC coefficients for the
// first half of each 10 ms subframe.
void VadAudioProc::GetLpcPolynomials(double* lpc, size_t length_lpc) {
assert(length_lpc >= kNum10msSubframes * (kLpcOrder + 1));
RTC_DCHECK_GE(length_lpc, kNum10msSubframes * (kLpcOrder + 1));
double corr[kLpcOrder + 1];
double reflec_coeff[kLpcOrder];
for (size_t i = 0, offset_lpc = 0; i < kNum10msSubframes;
@ -165,7 +165,7 @@ static float QuadraticInterpolation(float prev_val,
fractional_index =
-(next_val - prev_val) * 0.5f / (next_val + prev_val - 2.f * curr_val);
assert(fabs(fractional_index) < 1);
RTC_DCHECK_LT(fabs(fractional_index), 1);
return fractional_index;
}
@ -176,7 +176,7 @@ static float QuadraticInterpolation(float prev_val,
// to save on one square root.
void VadAudioProc::FindFirstSpectralPeaks(double* f_peak,
size_t length_f_peak) {
assert(length_f_peak >= kNum10msSubframes);
RTC_DCHECK_GE(length_f_peak, kNum10msSubframes);
double lpc[kNum10msSubframes * (kLpcOrder + 1)];
// For all sub-frames.
GetLpcPolynomials(lpc, kNum10msSubframes * (kLpcOrder + 1));
@ -232,7 +232,7 @@ void VadAudioProc::PitchAnalysis(double* log_pitch_gains,
size_t length) {
// TODO(turajs): This can be "imported" from iSAC & and the next two
// constants.
assert(length >= kNum10msSubframes);
RTC_DCHECK_GE(length, kNum10msSubframes);
const int kNumPitchSubframes = 4;
double gains[kNumPitchSubframes];
double lags[kNumPitchSubframes];
@ -262,7 +262,7 @@ void VadAudioProc::PitchAnalysis(double* log_pitch_gains,
}
void VadAudioProc::Rms(double* rms, size_t length_rms) {
assert(length_rms >= kNum10msSubframes);
RTC_DCHECK_GE(length_rms, kNum10msSubframes);
size_t offset = kNumPastSignalSamples;
for (size_t i = 0; i < kNum10msSubframes; i++) {
rms[i] = 0;

View File

@ -8,16 +8,18 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_processing/vad/common.h"
#include "webrtc/typedefs.h"
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include "modules/audio_processing/vad/common.h" // AudioFeatures, kSampleR...
namespace webrtc {
class AudioFrame;
class PoleZeroFilter;
class VadAudioProc {
@ -49,25 +51,28 @@ class VadAudioProc {
// For every 30 ms we compute 3 spectral peak there for 3 LPC analysis.
// LPC is computed over 15 ms of windowed audio. For every 10 ms sub-frame
// we need 5 ms of past signal to create the input of LPC analysis.
static const size_t kNumPastSignalSamples =
static_cast<size_t>(kSampleRateHz / 200);
enum : size_t {
kNumPastSignalSamples = static_cast<size_t>(kSampleRateHz / 200)
};
// TODO(turajs): maybe defining this at a higher level (maybe enum) so that
// all the code recognize it as "no-error."
static const int kNoError = 0;
enum : int { kNoError = 0 };
static const size_t kNum10msSubframes = 3;
static const size_t kNumSubframeSamples =
static_cast<size_t>(kSampleRateHz / 100);
static const size_t kNumSamplesToProcess =
kNum10msSubframes *
kNumSubframeSamples; // Samples in 30 ms @ given sampling rate.
static const size_t kBufferLength =
kNumPastSignalSamples + kNumSamplesToProcess;
static const size_t kIpLength = kDftSize >> 1;
static const size_t kWLength = kDftSize >> 1;
static const size_t kLpcOrder = 16;
enum : size_t { kNum10msSubframes = 3 };
enum : size_t {
kNumSubframeSamples = static_cast<size_t>(kSampleRateHz / 100)
};
enum : size_t {
// Samples in 30 ms @ given sampling rate.
kNumSamplesToProcess = kNum10msSubframes * kNumSubframeSamples
};
enum : size_t {
kBufferLength = kNumPastSignalSamples + kNumSamplesToProcess
};
enum : size_t { kIpLength = kDftSize >> 1 };
enum : size_t { kWLength = kDftSize >> 1 };
enum : size_t { kLpcOrder = 16 };
size_t ip_[kIpLength];
float w_fft_[kWLength];
@ -79,11 +84,11 @@ class VadAudioProc {
double log_old_gain_;
double old_lag_;
rtc::scoped_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
rtc::scoped_ptr<PreFiltBankstr> pre_filter_handle_;
rtc::scoped_ptr<PoleZeroFilter> high_pass_filter_;
std::unique_ptr<PitchAnalysisStruct> pitch_analysis_handle_;
std::unique_ptr<PreFiltBankstr> pre_filter_handle_;
std::unique_ptr<PoleZeroFilter> high_pass_filter_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_
#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_H_

View File

@ -8,29 +8,16 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
#define MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROC_INTERNAL_H_
namespace webrtc {
// These values should match MATLAB counterparts for unit-tests to pass.
static const double kCorrWeight[] = {1.000000,
0.985000,
0.970225,
0.955672,
0.941337,
0.927217,
0.913308,
0.899609,
0.886115,
0.872823,
0.859730,
0.846834,
0.834132,
0.821620,
0.809296,
0.797156,
0.785199};
static const double kCorrWeight[] = {
1.000000, 0.985000, 0.970225, 0.955672, 0.941337, 0.927217,
0.913308, 0.899609, 0.886115, 0.872823, 0.859730, 0.846834,
0.834132, 0.821620, 0.809296, 0.797156, 0.785199};
static const double kLpcAnalWin[] = {
0.00000000, 0.01314436, 0.02628645, 0.03942400, 0.05255473, 0.06567639,
@ -75,11 +62,9 @@ static const double kLpcAnalWin[] = {
0.06567639, 0.05255473, 0.03942400, 0.02628645, 0.01314436, 0.00000000};
static const size_t kFilterOrder = 2;
static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f,
-1.949650f,
static const float kCoeffNumerator[kFilterOrder + 1] = {0.974827f, -1.949650f,
0.974827f};
static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f,
-1.971999f,
static const float kCoeffDenominator[kFilterOrder + 1] = {1.0f, -1.971999f,
0.972457f};
static_assert(kFilterOrder + 1 ==
@ -91,4 +76,4 @@ static_assert(kFilterOrder + 1 ==
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROCESSING_H_
#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_AUDIO_PROCESSING_H_

View File

@ -8,9 +8,8 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/vad/vad_circular_buffer.h"
#include "modules/audio_processing/vad/vad_circular_buffer.h"
#include <assert.h>
#include <stdlib.h>
namespace webrtc {
@ -20,11 +19,9 @@ VadCircularBuffer::VadCircularBuffer(int buffer_size)
is_full_(false),
index_(0),
buffer_size_(buffer_size),
sum_(0) {
}
sum_(0) {}
VadCircularBuffer::~VadCircularBuffer() {
}
VadCircularBuffer::~VadCircularBuffer() {}
void VadCircularBuffer::Reset() {
is_full_ = false;

View File

@ -8,10 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
#ifndef MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
#define MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
#include "webrtc/base/scoped_ptr.h"
#include <memory>
namespace webrtc {
@ -58,7 +58,7 @@ class VadCircularBuffer {
// corresponding linear index.
int ConvertToLinearIndex(int* index) const;
rtc::scoped_ptr<double[]> buffer_;
std::unique_ptr<double[]> buffer_;
bool is_full_;
int index_;
int buffer_size_;
@ -66,4 +66,4 @@ class VadCircularBuffer {
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_
#endif // MODULES_AUDIO_PROCESSING_VAD_VAD_CIRCULAR_BUFFER_H_

View File

@ -8,17 +8,16 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
#include "modules/audio_processing/vad/voice_activity_detector.h"
#include <algorithm>
#include "webrtc/base/checks.h"
#include "rtc_base/checks.h"
namespace webrtc {
namespace {
const size_t kMaxLength = 320;
const int kNumChannels = 1;
const size_t kNumChannels = 1;
const double kDefaultVoiceValue = 1.0;
const double kNeutralProbability = 0.5;
@ -28,8 +27,9 @@ const double kLowProbability = 0.01;
VoiceActivityDetector::VoiceActivityDetector()
: last_voice_probability_(kDefaultVoiceValue),
standalone_vad_(StandaloneVad::Create()) {
}
standalone_vad_(StandaloneVad::Create()) {}
VoiceActivityDetector::~VoiceActivityDetector() = default;
// Because ISAC has a different chunk length, it updates
// |chunkwise_voice_probabilities_| and |chunkwise_rms_| when there is new data.
@ -37,8 +37,7 @@ VoiceActivityDetector::VoiceActivityDetector()
void VoiceActivityDetector::ProcessChunk(const int16_t* audio,
size_t length,
int sample_rate_hz) {
RTC_DCHECK_EQ(static_cast<int>(length), sample_rate_hz / 100);
RTC_DCHECK_LE(length, kMaxLength);
RTC_DCHECK_EQ(length, sample_rate_hz / 100);
// Resample to the required rate.
const int16_t* resampled_ptr = audio;
if (sample_rate_hz != kSampleRateHz) {

View File

@ -8,17 +8,20 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
#define MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
#include <stddef.h>
#include <stdint.h>
#include <memory>
#include <vector>
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/common_audio/resampler/include/resampler.h"
#include "webrtc/modules/audio_processing/vad/vad_audio_proc.h"
#include "webrtc/modules/audio_processing/vad/common.h"
#include "webrtc/modules/audio_processing/vad/pitch_based_vad.h"
#include "webrtc/modules/audio_processing/vad/standalone_vad.h"
#include "common_audio/resampler/include/resampler.h"
#include "modules/audio_processing/vad/common.h"
#include "modules/audio_processing/vad/pitch_based_vad.h"
#include "modules/audio_processing/vad/standalone_vad.h"
#include "modules/audio_processing/vad/vad_audio_proc.h"
namespace webrtc {
@ -27,10 +30,9 @@ namespace webrtc {
class VoiceActivityDetector {
public:
VoiceActivityDetector();
~VoiceActivityDetector();
// Processes each audio chunk and estimates the voice probability. The maximum
// supported sample rate is 32kHz.
// TODO(aluebs): Change |length| to size_t.
// Processes each audio chunk and estimates the voice probability.
void ProcessChunk(const int16_t* audio, size_t length, int sample_rate_hz);
// Returns a vector of voice probabilities for each chunk. It can be empty for
@ -58,7 +60,7 @@ class VoiceActivityDetector {
Resampler resampler_;
VadAudioProc audio_processing_;
rtc::scoped_ptr<StandaloneVad> standalone_vad_;
std::unique_ptr<StandaloneVad> standalone_vad_;
PitchBasedVad pitch_based_vad_;
int16_t resampled_[kLength10Ms];
@ -67,4 +69,4 @@ class VoiceActivityDetector {
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_
#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_ACTIVITY_DETECTOR_H_

View File

@ -10,8 +10,8 @@
// GMM tables for active segments. Generated by MakeGmmTables.m.
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
#ifndef MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
#define MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
static const int kVoiceGmmNumMixtures = 12;
static const int kVoiceGmmDim = 3;
@ -70,16 +70,8 @@ static const double kVoiceGmmMean[kVoiceGmmNumMixtures][kVoiceGmmDim] = {
{-7.29187507662854e-01, 5.22717685022855e+02, 1.16377942283991e+02}};
static const double kVoiceGmmWeights[kVoiceGmmNumMixtures] = {
-1.39789694361035e+01,
-1.19527720202104e+01,
-1.32396317929055e+01,
-1.09436815209238e+01,
-1.13440027478149e+01,
-1.12200721834504e+01,
-1.02537324043693e+01,
-1.60789861938302e+01,
-1.03394494048344e+01,
-1.83207938586818e+01,
-1.31186044948288e+01,
-9.52479998673554e+00};
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_
-1.39789694361035e+01, -1.19527720202104e+01, -1.32396317929055e+01,
-1.09436815209238e+01, -1.13440027478149e+01, -1.12200721834504e+01,
-1.02537324043693e+01, -1.60789861938302e+01, -1.03394494048344e+01,
-1.83207938586818e+01, -1.31186044948288e+01, -9.52479998673554e+00};
#endif // MODULES_AUDIO_PROCESSING_VAD_VOICE_GMM_TABLES_H_