Update to current webrtc library

This is from the upstream library commit id
3326535126e435f1ba647885ce43a8f0f3d317eb, corresponding to Chromium
88.0.4290.1.
This commit is contained in:
Arun Raghavan
2020-10-12 18:08:02 -04:00
parent b1b02581d3
commit bcec8b0b21
859 changed files with 76187 additions and 49580 deletions

View File

@ -8,13 +8,13 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_COMMON_AUDIO_VAD_INCLUDE_VAD_H_
#define WEBRTC_COMMON_AUDIO_VAD_INCLUDE_VAD_H_
#ifndef COMMON_AUDIO_VAD_INCLUDE_VAD_H_
#define COMMON_AUDIO_VAD_INCLUDE_VAD_H_
#include "webrtc/base/checks.h"
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
#include "webrtc/typedefs.h"
#include <memory>
#include "common_audio/vad/include/webrtc_vad.h"
#include "rtc_base/checks.h"
namespace webrtc {
@ -43,8 +43,8 @@ class Vad {
};
// Returns a Vad instance that's implemented on top of WebRtcVad.
rtc::scoped_ptr<Vad> CreateVad(Vad::Aggressiveness aggressiveness);
std::unique_ptr<Vad> CreateVad(Vad::Aggressiveness aggressiveness);
} // namespace webrtc
#endif // WEBRTC_COMMON_AUDIO_VAD_INCLUDE_VAD_H_
#endif // COMMON_AUDIO_VAD_INCLUDE_VAD_H_

View File

@ -8,17 +8,16 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file includes the VAD API calls. Specific function calls are given below.
* This header file includes the VAD API calls. Specific function calls are
* given below.
*/
#ifndef WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT
#define WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
#ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT
#define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_
#include <stddef.h>
#include "webrtc/typedefs.h"
#include <stdint.h>
typedef struct WebRtcVadInst VadInst;
@ -27,7 +26,7 @@ extern "C" {
#endif
// Creates an instance to the VAD structure.
VadInst* WebRtcVad_Create();
VadInst* WebRtcVad_Create(void);
// Frees the dynamic memory of a specified VAD instance.
//
@ -39,7 +38,7 @@ void WebRtcVad_Free(VadInst* handle);
// - handle [i/o] : Instance that should be initialized.
//
// returns : 0 - (OK),
// -1 - (NULL pointer or Default mode could not be set).
// -1 - (null pointer or Default mode could not be set).
int WebRtcVad_Init(VadInst* handle);
// Sets the VAD operating mode. A more aggressive (higher mode) VAD is more
@ -51,7 +50,7 @@ int WebRtcVad_Init(VadInst* handle);
// - mode [i] : Aggressiveness mode (0, 1, 2, or 3).
//
// returns : 0 - (OK),
// -1 - (NULL pointer, mode could not be set or the VAD instance
// -1 - (null pointer, mode could not be set or the VAD instance
// has not been initialized).
int WebRtcVad_set_mode(VadInst* handle, int mode);
@ -67,7 +66,9 @@ int WebRtcVad_set_mode(VadInst* handle, int mode);
// returns : 1 - (Active Voice),
// 0 - (Non-active Voice),
// -1 - (Error)
int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame,
int WebRtcVad_Process(VadInst* handle,
int fs,
const int16_t* audio_frame,
size_t frame_length);
// Checks for valid combinations of |rate| and |frame_length|. We support 10,
@ -83,4 +84,4 @@ int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length);
}
#endif
#endif // WEBRTC_COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT
#endif // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT

View File

@ -8,9 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_audio/vad/include/vad.h"
#include "common_audio/vad/include/vad.h"
#include "webrtc/base/checks.h"
#include <memory>
#include "common_audio/vad/include/webrtc_vad.h"
#include "rtc_base/checks.h"
namespace webrtc {
@ -35,7 +38,7 @@ class VadImpl final : public Vad {
case 1:
return kActive;
default:
RTC_DCHECK(false) << "WebRtcVad_Process returned an error.";
RTC_NOTREACHED() << "WebRtcVad_Process returned an error.";
return kError;
}
}
@ -56,8 +59,8 @@ class VadImpl final : public Vad {
} // namespace
rtc::scoped_ptr<Vad> CreateVad(Vad::Aggressiveness aggressiveness) {
return rtc::scoped_ptr<Vad>(new VadImpl(aggressiveness));
std::unique_ptr<Vad> CreateVad(Vad::Aggressiveness aggressiveness) {
return std::unique_ptr<Vad>(new VadImpl(aggressiveness));
}
} // namespace webrtc

View File

@ -8,13 +8,13 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_audio/vad/vad_core.h"
#include "common_audio/vad/vad_core.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/common_audio/vad/vad_filterbank.h"
#include "webrtc/common_audio/vad/vad_gmm.h"
#include "webrtc/common_audio/vad/vad_sp.h"
#include "webrtc/typedefs.h"
#include "rtc_base/sanitizer.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "common_audio/vad/vad_filterbank.h"
#include "common_audio/vad/vad_gmm.h"
#include "common_audio/vad/vad_sp.h"
// Spectrum Weighting
static const int16_t kSpectrumWeight[kNumChannels] = { 6, 8, 10, 12, 14, 16 };
@ -110,6 +110,15 @@ static int32_t WeightedAverage(int16_t* data, int16_t offset,
return weighted_average;
}
// An s16 x s32 -> s32 multiplication that's allowed to overflow. (It's still
// undefined behavior, so not a good idea; this just makes UBSan ignore the
// violation, so that our old code can continue to do what it's always been
// doing.)
static inline int32_t RTC_NO_SANITIZE("signed-integer-overflow")
OverflowingMulS16ByS32ToS32(int16_t a, int32_t b) {
return a * b;
}
// Calculates the probabilities for both speech and background noise using
// Gaussian Mixture Models (GMM). A hypothesis-test is performed to decide which
// type of signal is most probable.
@ -231,7 +240,7 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
(int32_t) (log_likelihood_ratio * kSpectrumWeight[channel]);
// Local VAD decision.
if ((log_likelihood_ratio << 2) > individualTest) {
if ((log_likelihood_ratio * 4) > individualTest) {
vadflag = 1;
}
@ -378,7 +387,7 @@ static int16_t GmmProbability(VadInstT* self, int16_t* features,
// (Q14 >> 2) * Q12 = Q24.
tmp_s16 = (ngprvec[gaussian] + 2) >> 2;
tmp2_s32 = tmp_s16 * tmp1_s32;
tmp2_s32 = OverflowingMulS16ByS32ToS32(tmp_s16, tmp1_s32);
// Q20 * approx 0.001 (2^-10=0.0009766), hence,
// (Q24 >> 14) = (Q24 >> 4) / 2^10 = Q20.
tmp1_s32 = tmp2_s32 >> 14;

View File

@ -8,51 +8,46 @@
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file includes the descriptions of the core VAD calls.
*/
#ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
#define WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
#ifndef COMMON_AUDIO_VAD_VAD_CORE_H_
#define COMMON_AUDIO_VAD_VAD_CORE_H_
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/typedefs.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
enum { kNumChannels = 6 }; // Number of frequency bands (named channels).
enum { kNumChannels = 6 }; // Number of frequency bands (named channels).
enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM.
enum { kTableSize = kNumChannels * kNumGaussians };
enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal.
typedef struct VadInstT_
{
int vad;
int32_t downsampling_filter_states[4];
WebRtcSpl_State48khzTo8khz state_48_to_8;
int16_t noise_means[kTableSize];
int16_t speech_means[kTableSize];
int16_t noise_stds[kTableSize];
int16_t speech_stds[kTableSize];
// TODO(bjornv): Change to |frame_count|.
int32_t frame_counter;
int16_t over_hang; // Over Hang
int16_t num_of_speech;
// TODO(bjornv): Change to |age_vector|.
int16_t index_vector[16 * kNumChannels];
int16_t low_value_vector[16 * kNumChannels];
// TODO(bjornv): Change to |median|.
int16_t mean_value[kNumChannels];
int16_t upper_state[5];
int16_t lower_state[5];
int16_t hp_filter_state[4];
int16_t over_hang_max_1[3];
int16_t over_hang_max_2[3];
int16_t individual[3];
int16_t total[3];
int init_flag;
typedef struct VadInstT_ {
int vad;
int32_t downsampling_filter_states[4];
WebRtcSpl_State48khzTo8khz state_48_to_8;
int16_t noise_means[kTableSize];
int16_t speech_means[kTableSize];
int16_t noise_stds[kTableSize];
int16_t speech_stds[kTableSize];
// TODO(bjornv): Change to |frame_count|.
int32_t frame_counter;
int16_t over_hang; // Over Hang
int16_t num_of_speech;
// TODO(bjornv): Change to |age_vector|.
int16_t index_vector[16 * kNumChannels];
int16_t low_value_vector[16 * kNumChannels];
// TODO(bjornv): Change to |median|.
int16_t mean_value[kNumChannels];
int16_t upper_state[5];
int16_t lower_state[5];
int16_t hp_filter_state[4];
int16_t over_hang_max_1[3];
int16_t over_hang_max_2[3];
int16_t individual[3];
int16_t total[3];
int init_flag;
} VadInstT;
// Initializes the core VAD component. The default aggressiveness mode is
@ -60,7 +55,7 @@ typedef struct VadInstT_
//
// - self [i/o] : Instance that should be initialized
//
// returns : 0 (OK), -1 (NULL pointer in or if the default mode can't be
// returns : 0 (OK), -1 (null pointer in or if the default mode can't be
// set)
int WebRtcVad_InitCore(VadInstT* self);
@ -103,13 +98,17 @@ int WebRtcVad_set_mode_core(VadInstT* self, int mode);
* 0 - No active speech
* 1-6 - Active speech
*/
int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,
int WebRtcVad_CalcVad48khz(VadInstT* inst,
const int16_t* speech_frame,
size_t frame_length);
int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame,
int WebRtcVad_CalcVad32khz(VadInstT* inst,
const int16_t* speech_frame,
size_t frame_length);
int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame,
int WebRtcVad_CalcVad16khz(VadInstT* inst,
const int16_t* speech_frame,
size_t frame_length);
int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame,
int WebRtcVad_CalcVad8khz(VadInstT* inst,
const int16_t* speech_frame,
size_t frame_length);
#endif // WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
#endif // COMMON_AUDIO_VAD_VAD_CORE_H_

View File

@ -8,12 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_audio/vad/vad_filterbank.h"
#include "common_audio/vad/vad_filterbank.h"
#include <assert.h>
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/typedefs.h"
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
// Constants used in LogOfEnergy().
static const int16_t kLogConst = 24660; // 160*log10(2) in Q9.
@ -92,14 +90,14 @@ static void AllPassFilter(const int16_t* data_in, size_t data_length,
size_t i;
int16_t tmp16 = 0;
int32_t tmp32 = 0;
int32_t state32 = ((int32_t) (*filter_state) << 16); // Q15
int32_t state32 = ((int32_t) (*filter_state) * (1 << 16)); // Q15
for (i = 0; i < data_length; i++) {
tmp32 = state32 + filter_coefficient * *data_in;
tmp16 = (int16_t) (tmp32 >> 16); // Q(-1)
*data_out++ = tmp16;
state32 = (*data_in << 14) - filter_coefficient * tmp16; // Q14
state32 <<= 1; // Q15.
state32 = (*data_in * (1 << 14)) - filter_coefficient * tmp16; // Q14
state32 *= 2; // Q15.
data_in += 2;
}
@ -160,8 +158,8 @@ static void LogOfEnergy(const int16_t* data_in, size_t data_length,
// we eventually will mask out the fractional part.
uint32_t energy = 0;
assert(data_in != NULL);
assert(data_length > 0);
RTC_DCHECK(data_in);
RTC_DCHECK_GT(data_length, 0);
energy = (uint32_t) WebRtcSpl_Energy((int16_t*) data_in, data_length,
&tot_rshifts);
@ -261,8 +259,8 @@ int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
int16_t* hp_out_ptr = hp_120; // [2000 - 4000] Hz.
int16_t* lp_out_ptr = lp_120; // [0 - 2000] Hz.
assert(data_length <= 240);
assert(4 < kNumChannels - 1); // Checking maximum |frequency_band|.
RTC_DCHECK_LE(data_length, 240);
RTC_DCHECK_LT(4, kNumChannels - 1); // Checking maximum |frequency_band|.
// Split at 2000 Hz and downsample.
SplitFilter(in_ptr, data_length, &self->upper_state[frequency_band],

View File

@ -12,11 +12,10 @@
* This file includes feature calculating functionality used in vad_core.c.
*/
#ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
#define WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
#ifndef COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
#define COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
#include "webrtc/common_audio/vad/vad_core.h"
#include "webrtc/typedefs.h"
#include "common_audio/vad/vad_core.h"
// Takes |data_length| samples of |data_in| and calculates the logarithm of the
// energy of each of the |kNumChannels| = 6 frequency bands used by the VAD:
@ -38,7 +37,9 @@
// - features [o] : 10 * log10(energy in each frequency band), Q4.
// - returns : Total energy of the signal (NOTE! This value is not
// exact. It is only used in a comparison.)
int16_t WebRtcVad_CalculateFeatures(VadInstT* self, const int16_t* data_in,
size_t data_length, int16_t* features);
int16_t WebRtcVad_CalculateFeatures(VadInstT* self,
const int16_t* data_in,
size_t data_length,
int16_t* features);
#endif // WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
#endif // COMMON_AUDIO_VAD_VAD_FILTERBANK_H_

View File

@ -8,10 +8,9 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_audio/vad/vad_gmm.h"
#include "common_audio/vad/vad_gmm.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/typedefs.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
static const int32_t kCompVar = 22005;
static const int16_t kLog2Exp = 5909; // log2(exp(1)) in Q12.

View File

@ -10,10 +10,10 @@
// Gaussian probability calculations internally used in vad_core.c.
#ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_GMM_H_
#define WEBRTC_COMMON_AUDIO_VAD_VAD_GMM_H_
#ifndef COMMON_AUDIO_VAD_VAD_GMM_H_
#define COMMON_AUDIO_VAD_VAD_GMM_H_
#include "webrtc/typedefs.h"
#include <stdint.h>
// Calculates the probability for |input|, given that |input| comes from a
// normal distribution with mean and standard deviation (|mean|, |std|).
@ -36,4 +36,4 @@ int32_t WebRtcVad_GaussianProbability(int16_t input,
int16_t std,
int16_t* delta);
#endif // WEBRTC_COMMON_AUDIO_VAD_VAD_GMM_H_
#endif // COMMON_AUDIO_VAD_VAD_GMM_H_

View File

@ -8,13 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_audio/vad/vad_sp.h"
#include "common_audio/vad/vad_sp.h"
#include <assert.h>
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/common_audio/vad/vad_core.h"
#include "webrtc/typedefs.h"
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "common_audio/vad/vad_core.h"
// Allpass filter coefficients, upper and lower, in Q13.
// Upper: 0.64, Lower: 0.17.
@ -72,7 +70,7 @@ int16_t WebRtcVad_FindMinimum(VadInstT* self,
int16_t* age = &self->index_vector[offset];
int16_t* smallest_values = &self->low_value_vector[offset];
assert(channel < kNumChannels);
RTC_DCHECK_LT(channel, kNumChannels);
// Each value in |smallest_values| is getting 1 loop older. Update |age|, and
// remove old values.
@ -81,7 +79,7 @@ int16_t WebRtcVad_FindMinimum(VadInstT* self,
age[i]++;
} else {
// Too old value. Remove from memory and shift larger values downwards.
for (j = i; j < 16; j++) {
for (j = i; j < 15; j++) {
smallest_values[j] = smallest_values[j + 1];
age[j] = age[j + 1];
}

View File

@ -8,14 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
// This file includes specific signal processing tools used in vad_core.c.
#ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_
#define WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_
#ifndef COMMON_AUDIO_VAD_VAD_SP_H_
#define COMMON_AUDIO_VAD_VAD_SP_H_
#include "webrtc/common_audio/vad/vad_core.h"
#include "webrtc/typedefs.h"
#include "common_audio/vad/vad_core.h"
// Downsamples the signal by a factor 2, eg. 32->16 or 16->8.
//
@ -53,4 +51,4 @@ int16_t WebRtcVad_FindMinimum(VadInstT* handle,
int16_t feature_value,
int channel);
#endif // WEBRTC_COMMON_AUDIO_VAD_VAD_SP_H_
#endif // COMMON_AUDIO_VAD_VAD_SP_H_

View File

@ -8,14 +8,13 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/common_audio/vad/include/webrtc_vad.h"
#include "common_audio/vad/include/webrtc_vad.h"
#include <stdlib.h>
#include <string.h>
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/common_audio/vad/vad_core.h"
#include "webrtc/typedefs.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include "common_audio/vad/vad_core.h"
static const int kInitCheck = 42;
static const int kValidRates[] = { 8000, 16000, 32000, 48000 };
@ -25,7 +24,6 @@ static const int kMaxFrameLengthMs = 30;
VadInst* WebRtcVad_Create() {
VadInstT* self = (VadInstT*)malloc(sizeof(VadInstT));
WebRtcSpl_Init();
self->init_flag = 0;
return (VadInst*)self;