Update common_audio
Corresponds to upstream commit 524e9b043e7e86fd72353b987c9d5f6a1ebf83e1 Update notes: * Moved src/ to webrtc/ to easily diff against the third_party/webrtc in the chromium tree * ARM/NEON/MIPS support is not yet hooked up * Tests have not been copied
This commit is contained in:
115
webrtc/common_audio/vad/vad_core.h
Normal file
115
webrtc/common_audio/vad/vad_core.h
Normal file
@ -0,0 +1,115 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This header file includes the descriptions of the core VAD calls.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
|
||||
#define WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
|
||||
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
enum { kNumChannels = 6 }; // Number of frequency bands (named channels).
|
||||
enum { kNumGaussians = 2 }; // Number of Gaussians per channel in the GMM.
|
||||
enum { kTableSize = kNumChannels * kNumGaussians };
|
||||
enum { kMinEnergy = 10 }; // Minimum energy required to trigger audio signal.
|
||||
|
||||
typedef struct VadInstT_
|
||||
{
|
||||
|
||||
int vad;
|
||||
int32_t downsampling_filter_states[4];
|
||||
WebRtcSpl_State48khzTo8khz state_48_to_8;
|
||||
int16_t noise_means[kTableSize];
|
||||
int16_t speech_means[kTableSize];
|
||||
int16_t noise_stds[kTableSize];
|
||||
int16_t speech_stds[kTableSize];
|
||||
// TODO(bjornv): Change to |frame_count|.
|
||||
int32_t frame_counter;
|
||||
int16_t over_hang; // Over Hang
|
||||
int16_t num_of_speech;
|
||||
// TODO(bjornv): Change to |age_vector|.
|
||||
int16_t index_vector[16 * kNumChannels];
|
||||
int16_t low_value_vector[16 * kNumChannels];
|
||||
// TODO(bjornv): Change to |median|.
|
||||
int16_t mean_value[kNumChannels];
|
||||
int16_t upper_state[5];
|
||||
int16_t lower_state[5];
|
||||
int16_t hp_filter_state[4];
|
||||
int16_t over_hang_max_1[3];
|
||||
int16_t over_hang_max_2[3];
|
||||
int16_t individual[3];
|
||||
int16_t total[3];
|
||||
|
||||
int init_flag;
|
||||
|
||||
} VadInstT;
|
||||
|
||||
// Initializes the core VAD component. The default aggressiveness mode is
|
||||
// controlled by |kDefaultMode| in vad_core.c.
|
||||
//
|
||||
// - self [i/o] : Instance that should be initialized
|
||||
//
|
||||
// returns : 0 (OK), -1 (NULL pointer in or if the default mode can't be
|
||||
// set)
|
||||
int WebRtcVad_InitCore(VadInstT* self);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcVad_set_mode_core(...)
|
||||
*
|
||||
* This function changes the VAD settings
|
||||
*
|
||||
* Input:
|
||||
* - inst : VAD instance
|
||||
* - mode : Aggressiveness degree
|
||||
* 0 (High quality) - 3 (Highly aggressive)
|
||||
*
|
||||
* Output:
|
||||
* - inst : Changed instance
|
||||
*
|
||||
* Return value : 0 - Ok
|
||||
* -1 - Error
|
||||
*/
|
||||
|
||||
int WebRtcVad_set_mode_core(VadInstT* self, int mode);
|
||||
|
||||
/****************************************************************************
|
||||
* WebRtcVad_CalcVad48khz(...)
|
||||
* WebRtcVad_CalcVad32khz(...)
|
||||
* WebRtcVad_CalcVad16khz(...)
|
||||
* WebRtcVad_CalcVad8khz(...)
|
||||
*
|
||||
* Calculate probability for active speech and make VAD decision.
|
||||
*
|
||||
* Input:
|
||||
* - inst : Instance that should be initialized
|
||||
* - speech_frame : Input speech frame
|
||||
* - frame_length : Number of input samples
|
||||
*
|
||||
* Output:
|
||||
* - inst : Updated filter states etc.
|
||||
*
|
||||
* Return value : VAD decision
|
||||
* 0 - No active speech
|
||||
* 1-6 - Active speech
|
||||
*/
|
||||
int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,
|
||||
size_t frame_length);
|
||||
int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame,
|
||||
size_t frame_length);
|
||||
int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame,
|
||||
size_t frame_length);
|
||||
int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame,
|
||||
size_t frame_length);
|
||||
|
||||
#endif // WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
|
Reference in New Issue
Block a user