Update to current webrtc library
This is from the upstream library commit id 3326535126e435f1ba647885ce43a8f0f3d317eb, corresponding to Chromium 88.0.4290.1.
This commit is contained in:
116
webrtc/modules/audio_processing/agc/BUILD.gn
Normal file
116
webrtc/modules/audio_processing/agc/BUILD.gn
Normal file
@ -0,0 +1,116 @@
|
||||
# Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
#
|
||||
# Use of this source code is governed by a BSD-style license
|
||||
# that can be found in the LICENSE file in the root of the source
|
||||
# tree. An additional intellectual property rights grant can be found
|
||||
# in the file PATENTS. All contributing project authors may
|
||||
# be found in the AUTHORS file in the root of the source tree.
|
||||
|
||||
import("../../../webrtc.gni")
|
||||
|
||||
rtc_source_set("gain_control_interface") {
|
||||
sources = [ "gain_control.h" ]
|
||||
}
|
||||
|
||||
rtc_library("agc") {
|
||||
sources = [
|
||||
"agc_manager_direct.cc",
|
||||
"agc_manager_direct.h",
|
||||
]
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
deps = [
|
||||
":gain_control_interface",
|
||||
":gain_map",
|
||||
":level_estimation",
|
||||
"..:apm_logging",
|
||||
"..:audio_buffer",
|
||||
"../../../common_audio",
|
||||
"../../../common_audio:common_audio_c",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:gtest_prod",
|
||||
"../../../rtc_base:logging",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../rtc_base:safe_minmax",
|
||||
"../../../system_wrappers:field_trial",
|
||||
"../../../system_wrappers:metrics",
|
||||
"../agc2:level_estimation_agc",
|
||||
"../vad",
|
||||
]
|
||||
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
|
||||
}
|
||||
|
||||
rtc_library("level_estimation") {
|
||||
sources = [
|
||||
"agc.cc",
|
||||
"agc.h",
|
||||
"loudness_histogram.cc",
|
||||
"loudness_histogram.h",
|
||||
"utility.cc",
|
||||
"utility.h",
|
||||
]
|
||||
deps = [
|
||||
"../../../rtc_base:checks",
|
||||
"../vad",
|
||||
]
|
||||
}
|
||||
|
||||
rtc_library("legacy_agc") {
|
||||
visibility = [
|
||||
":*",
|
||||
"..:*",
|
||||
] # Only targets in this file and in
|
||||
# audio_processing can depend on
|
||||
# this.
|
||||
|
||||
sources = [
|
||||
"legacy/analog_agc.cc",
|
||||
"legacy/analog_agc.h",
|
||||
"legacy/digital_agc.cc",
|
||||
"legacy/digital_agc.h",
|
||||
"legacy/gain_control.h",
|
||||
]
|
||||
|
||||
deps = [
|
||||
"../../../common_audio",
|
||||
"../../../common_audio:common_audio_c",
|
||||
"../../../common_audio/third_party/ooura:fft_size_256",
|
||||
"../../../rtc_base:checks",
|
||||
"../../../rtc_base:rtc_base_approved",
|
||||
"../../../system_wrappers",
|
||||
]
|
||||
|
||||
if (rtc_build_with_neon) {
|
||||
if (current_cpu != "arm64") {
|
||||
# Enable compilation for the NEON instruction set.
|
||||
suppressed_configs += [ "//build/config/compiler:compiler_arm_fpu" ]
|
||||
cflags = [ "-mfpu=neon" ]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rtc_source_set("gain_map") {
|
||||
sources = [ "gain_map_internal.h" ]
|
||||
}
|
||||
|
||||
if (rtc_include_tests) {
|
||||
rtc_library("agc_unittests") {
|
||||
testonly = true
|
||||
sources = [
|
||||
"agc_manager_direct_unittest.cc",
|
||||
"loudness_histogram_unittest.cc",
|
||||
"mock_agc.h",
|
||||
]
|
||||
configs += [ "..:apm_debug_dump" ]
|
||||
|
||||
deps = [
|
||||
":agc",
|
||||
":gain_control_interface",
|
||||
":level_estimation",
|
||||
"..:mocks",
|
||||
"../../../test:field_trial",
|
||||
"../../../test:fileutils",
|
||||
"../../../test:test_support",
|
||||
"//testing/gtest",
|
||||
]
|
||||
}
|
||||
}
|
@ -8,18 +8,15 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/agc.h"
|
||||
#include "modules/audio_processing/agc/agc.h"
|
||||
|
||||
#include <cmath>
|
||||
#include <cstdlib>
|
||||
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/base/checks.h"
|
||||
#include "webrtc/modules/audio_processing/agc/histogram.h"
|
||||
#include "webrtc/modules/audio_processing/agc/utility.h"
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "modules/audio_processing/agc/loudness_histogram.h"
|
||||
#include "modules/audio_processing/agc/utility.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
@ -33,23 +30,12 @@ const double kActivityThreshold = 0.3;
|
||||
Agc::Agc()
|
||||
: target_level_loudness_(Dbfs2Loudness(kDefaultLevelDbfs)),
|
||||
target_level_dbfs_(kDefaultLevelDbfs),
|
||||
histogram_(Histogram::Create(kNumAnalysisFrames)),
|
||||
inactive_histogram_(Histogram::Create()) {
|
||||
}
|
||||
histogram_(LoudnessHistogram::Create(kNumAnalysisFrames)),
|
||||
inactive_histogram_(LoudnessHistogram::Create()) {}
|
||||
|
||||
Agc::~Agc() {}
|
||||
Agc::~Agc() = default;
|
||||
|
||||
float Agc::AnalyzePreproc(const int16_t* audio, size_t length) {
|
||||
assert(length > 0);
|
||||
size_t num_clipped = 0;
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
if (audio[i] == 32767 || audio[i] == -32768)
|
||||
++num_clipped;
|
||||
}
|
||||
return 1.0f * num_clipped / length;
|
||||
}
|
||||
|
||||
int Agc::Process(const int16_t* audio, size_t length, int sample_rate_hz) {
|
||||
void Agc::Process(const int16_t* audio, size_t length, int sample_rate_hz) {
|
||||
vad_.ProcessChunk(audio, length, sample_rate_hz);
|
||||
const std::vector<double>& rms = vad_.chunkwise_rms();
|
||||
const std::vector<double>& probabilities =
|
||||
@ -58,12 +44,11 @@ int Agc::Process(const int16_t* audio, size_t length, int sample_rate_hz) {
|
||||
for (size_t i = 0; i < rms.size(); ++i) {
|
||||
histogram_->Update(rms[i], probabilities[i]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool Agc::GetRmsErrorDb(int* error) {
|
||||
if (!error) {
|
||||
assert(false);
|
||||
RTC_NOTREACHED();
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -98,4 +83,12 @@ int Agc::set_target_level_dbfs(int level) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
int Agc::target_level_dbfs() const {
|
||||
return target_level_dbfs_;
|
||||
}
|
||||
|
||||
float Agc::voice_probability() const {
|
||||
return vad_.last_voice_probability();
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -8,29 +8,25 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC_AGC_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC_AGC_H_
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
#include <memory>
|
||||
|
||||
#include "modules/audio_processing/vad/voice_activity_detector.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioFrame;
|
||||
class Histogram;
|
||||
class LoudnessHistogram;
|
||||
|
||||
class Agc {
|
||||
public:
|
||||
Agc();
|
||||
virtual ~Agc();
|
||||
|
||||
// Returns the proportion of samples in the buffer which are at full-scale
|
||||
// (and presumably clipped).
|
||||
virtual float AnalyzePreproc(const int16_t* audio, size_t length);
|
||||
// |audio| must be mono; in a multi-channel stream, provide the first (usually
|
||||
// left) channel.
|
||||
virtual int Process(const int16_t* audio, size_t length, int sample_rate_hz);
|
||||
virtual void Process(const int16_t* audio, size_t length, int sample_rate_hz);
|
||||
|
||||
// Retrieves the difference between the target RMS level and the current
|
||||
// signal RMS level in dB. Returns true if an update is available and false
|
||||
@ -39,20 +35,17 @@ class Agc {
|
||||
virtual void Reset();
|
||||
|
||||
virtual int set_target_level_dbfs(int level);
|
||||
virtual int target_level_dbfs() const { return target_level_dbfs_; }
|
||||
|
||||
virtual float voice_probability() const {
|
||||
return vad_.last_voice_probability();
|
||||
}
|
||||
virtual int target_level_dbfs() const;
|
||||
virtual float voice_probability() const;
|
||||
|
||||
private:
|
||||
double target_level_loudness_;
|
||||
int target_level_dbfs_;
|
||||
rtc::scoped_ptr<Histogram> histogram_;
|
||||
rtc::scoped_ptr<Histogram> inactive_histogram_;
|
||||
std::unique_ptr<LoudnessHistogram> histogram_;
|
||||
std::unique_ptr<LoudnessHistogram> inactive_histogram_;
|
||||
VoiceActivityDetector vad_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC_AGC_H_
|
||||
|
@ -8,26 +8,26 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
|
||||
#include "modules/audio_processing/agc/agc_manager_direct.h"
|
||||
|
||||
#include <cassert>
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#ifdef WEBRTC_AGC_DEBUG_DUMP
|
||||
#include <cstdio>
|
||||
#endif
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/gain_map_internal.h"
|
||||
#include "webrtc/modules/audio_processing/gain_control_impl.h"
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "webrtc/system_wrappers/include/logging.h"
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "modules/audio_processing/agc/gain_control.h"
|
||||
#include "modules/audio_processing/agc/gain_map_internal.h"
|
||||
#include "modules/audio_processing/agc2/adaptive_mode_level_estimator_agc.h"
|
||||
#include "rtc_base/atomic_ops.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
#include "system_wrappers/include/field_trial.h"
|
||||
#include "system_wrappers/include/metrics.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// Lowest the microphone level can be lowered due to clipping.
|
||||
const int kClippedLevelMin = 170;
|
||||
// Amount the microphone level is lowered with every clipping event.
|
||||
const int kClippedLevelStep = 15;
|
||||
// Proportion of clipped samples required to declare a clipping event.
|
||||
@ -56,185 +56,123 @@ const int kMaxResidualGainChange = 15;
|
||||
// restrictions from clipping events.
|
||||
const int kSurplusCompressionGain = 6;
|
||||
|
||||
int ClampLevel(int mic_level) {
|
||||
return std::min(std::max(kMinMicLevel, mic_level), kMaxMicLevel);
|
||||
// Returns whether a fall-back solution to choose the maximum level should be
|
||||
// chosen.
|
||||
bool UseMaxAnalogChannelLevel() {
|
||||
return field_trial::IsEnabled("WebRTC-UseMaxAnalogAgcChannelLevel");
|
||||
}
|
||||
|
||||
int LevelFromGainError(int gain_error, int level) {
|
||||
assert(level >= 0 && level <= kMaxMicLevel);
|
||||
// Returns kMinMicLevel if no field trial exists or if it has been disabled.
|
||||
// Returns a value between 0 and 255 depending on the field-trial string.
|
||||
// Example: 'WebRTC-Audio-AgcMinMicLevelExperiment/Enabled-80' => returns 80.
|
||||
int GetMinMicLevel() {
|
||||
RTC_LOG(LS_INFO) << "[agc] GetMinMicLevel";
|
||||
constexpr char kMinMicLevelFieldTrial[] =
|
||||
"WebRTC-Audio-AgcMinMicLevelExperiment";
|
||||
if (!webrtc::field_trial::IsEnabled(kMinMicLevelFieldTrial)) {
|
||||
RTC_LOG(LS_INFO) << "[agc] Using default min mic level: " << kMinMicLevel;
|
||||
return kMinMicLevel;
|
||||
}
|
||||
const auto field_trial_string =
|
||||
webrtc::field_trial::FindFullName(kMinMicLevelFieldTrial);
|
||||
int min_mic_level = -1;
|
||||
sscanf(field_trial_string.c_str(), "Enabled-%d", &min_mic_level);
|
||||
if (min_mic_level >= 0 && min_mic_level <= 255) {
|
||||
RTC_LOG(LS_INFO) << "[agc] Experimental min mic level: " << min_mic_level;
|
||||
return min_mic_level;
|
||||
} else {
|
||||
RTC_LOG(LS_WARNING) << "[agc] Invalid parameter for "
|
||||
<< kMinMicLevelFieldTrial << ", ignored.";
|
||||
return kMinMicLevel;
|
||||
}
|
||||
}
|
||||
|
||||
int ClampLevel(int mic_level, int min_mic_level) {
|
||||
return rtc::SafeClamp(mic_level, min_mic_level, kMaxMicLevel);
|
||||
}
|
||||
|
||||
int LevelFromGainError(int gain_error, int level, int min_mic_level) {
|
||||
RTC_DCHECK_GE(level, 0);
|
||||
RTC_DCHECK_LE(level, kMaxMicLevel);
|
||||
if (gain_error == 0) {
|
||||
return level;
|
||||
}
|
||||
// TODO(ajm): Could be made more efficient with a binary search.
|
||||
|
||||
int new_level = level;
|
||||
if (gain_error > 0) {
|
||||
while (kGainMap[new_level] - kGainMap[level] < gain_error &&
|
||||
new_level < kMaxMicLevel) {
|
||||
new_level < kMaxMicLevel) {
|
||||
++new_level;
|
||||
}
|
||||
} else {
|
||||
while (kGainMap[new_level] - kGainMap[level] > gain_error &&
|
||||
new_level > kMinMicLevel) {
|
||||
new_level > min_mic_level) {
|
||||
--new_level;
|
||||
}
|
||||
}
|
||||
return new_level;
|
||||
}
|
||||
|
||||
// Returns the proportion of samples in the buffer which are at full-scale
|
||||
// (and presumably clipped).
|
||||
float ComputeClippedRatio(const float* const* audio,
|
||||
size_t num_channels,
|
||||
size_t samples_per_channel) {
|
||||
RTC_DCHECK_GT(samples_per_channel, 0);
|
||||
int num_clipped = 0;
|
||||
for (size_t ch = 0; ch < num_channels; ++ch) {
|
||||
int num_clipped_in_ch = 0;
|
||||
for (size_t i = 0; i < samples_per_channel; ++i) {
|
||||
RTC_DCHECK(audio[ch]);
|
||||
if (audio[ch][i] >= 32767.f || audio[ch][i] <= -32768.f) {
|
||||
++num_clipped_in_ch;
|
||||
}
|
||||
}
|
||||
num_clipped = std::max(num_clipped, num_clipped_in_ch);
|
||||
}
|
||||
return static_cast<float>(num_clipped) / (samples_per_channel);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// Facility for dumping debug audio files. All methods are no-ops in the
|
||||
// default case where WEBRTC_AGC_DEBUG_DUMP is undefined.
|
||||
class DebugFile {
|
||||
#ifdef WEBRTC_AGC_DEBUG_DUMP
|
||||
public:
|
||||
explicit DebugFile(const char* filename)
|
||||
: file_(fopen(filename, "wb")) {
|
||||
assert(file_);
|
||||
}
|
||||
~DebugFile() {
|
||||
fclose(file_);
|
||||
}
|
||||
void Write(const int16_t* data, size_t length_samples) {
|
||||
fwrite(data, 1, length_samples * sizeof(int16_t), file_);
|
||||
}
|
||||
private:
|
||||
FILE* file_;
|
||||
#else
|
||||
public:
|
||||
explicit DebugFile(const char* filename) {
|
||||
}
|
||||
~DebugFile() {
|
||||
}
|
||||
void Write(const int16_t* data, size_t length_samples) {
|
||||
}
|
||||
#endif // WEBRTC_AGC_DEBUG_DUMP
|
||||
};
|
||||
|
||||
AgcManagerDirect::AgcManagerDirect(GainControl* gctrl,
|
||||
VolumeCallbacks* volume_callbacks,
|
||||
int startup_min_level)
|
||||
: agc_(new Agc()),
|
||||
gctrl_(gctrl),
|
||||
volume_callbacks_(volume_callbacks),
|
||||
frames_since_clipped_(kClippedWaitFrames),
|
||||
level_(0),
|
||||
MonoAgc::MonoAgc(ApmDataDumper* data_dumper,
|
||||
int startup_min_level,
|
||||
int clipped_level_min,
|
||||
bool use_agc2_level_estimation,
|
||||
bool disable_digital_adaptive,
|
||||
int min_mic_level)
|
||||
: min_mic_level_(min_mic_level),
|
||||
disable_digital_adaptive_(disable_digital_adaptive),
|
||||
max_level_(kMaxMicLevel),
|
||||
max_compression_gain_(kMaxCompressionGain),
|
||||
target_compression_(kDefaultCompressionGain),
|
||||
compression_(target_compression_),
|
||||
compression_accumulator_(compression_),
|
||||
capture_muted_(false),
|
||||
check_volume_on_next_process_(true), // Check at startup.
|
||||
startup_(true),
|
||||
startup_min_level_(ClampLevel(startup_min_level)),
|
||||
file_preproc_(new DebugFile("agc_preproc.pcm")),
|
||||
file_postproc_(new DebugFile("agc_postproc.pcm")) {
|
||||
startup_min_level_(ClampLevel(startup_min_level, min_mic_level_)),
|
||||
clipped_level_min_(clipped_level_min) {
|
||||
if (use_agc2_level_estimation) {
|
||||
agc_ = std::make_unique<AdaptiveModeLevelEstimatorAgc>(data_dumper);
|
||||
} else {
|
||||
agc_ = std::make_unique<Agc>();
|
||||
}
|
||||
}
|
||||
|
||||
AgcManagerDirect::AgcManagerDirect(Agc* agc,
|
||||
GainControl* gctrl,
|
||||
VolumeCallbacks* volume_callbacks,
|
||||
int startup_min_level)
|
||||
: agc_(agc),
|
||||
gctrl_(gctrl),
|
||||
volume_callbacks_(volume_callbacks),
|
||||
frames_since_clipped_(kClippedWaitFrames),
|
||||
level_(0),
|
||||
max_level_(kMaxMicLevel),
|
||||
max_compression_gain_(kMaxCompressionGain),
|
||||
target_compression_(kDefaultCompressionGain),
|
||||
compression_(target_compression_),
|
||||
compression_accumulator_(compression_),
|
||||
capture_muted_(false),
|
||||
check_volume_on_next_process_(true), // Check at startup.
|
||||
startup_(true),
|
||||
startup_min_level_(ClampLevel(startup_min_level)),
|
||||
file_preproc_(new DebugFile("agc_preproc.pcm")),
|
||||
file_postproc_(new DebugFile("agc_postproc.pcm")) {
|
||||
}
|
||||
MonoAgc::~MonoAgc() = default;
|
||||
|
||||
AgcManagerDirect::~AgcManagerDirect() {}
|
||||
|
||||
int AgcManagerDirect::Initialize() {
|
||||
void MonoAgc::Initialize() {
|
||||
max_level_ = kMaxMicLevel;
|
||||
max_compression_gain_ = kMaxCompressionGain;
|
||||
target_compression_ = kDefaultCompressionGain;
|
||||
compression_ = target_compression_;
|
||||
target_compression_ = disable_digital_adaptive_ ? 0 : kDefaultCompressionGain;
|
||||
compression_ = disable_digital_adaptive_ ? 0 : target_compression_;
|
||||
compression_accumulator_ = compression_;
|
||||
capture_muted_ = false;
|
||||
check_volume_on_next_process_ = true;
|
||||
// TODO(bjornv): Investigate if we need to reset |startup_| as well. For
|
||||
// example, what happens when we change devices.
|
||||
|
||||
if (gctrl_->set_mode(GainControl::kFixedDigital) != 0) {
|
||||
LOG_FERR1(LS_ERROR, set_mode, GainControl::kFixedDigital);
|
||||
return -1;
|
||||
}
|
||||
if (gctrl_->set_target_level_dbfs(2) != 0) {
|
||||
LOG_FERR1(LS_ERROR, set_target_level_dbfs, 2);
|
||||
return -1;
|
||||
}
|
||||
if (gctrl_->set_compression_gain_db(kDefaultCompressionGain) != 0) {
|
||||
LOG_FERR1(LS_ERROR, set_compression_gain_db, kDefaultCompressionGain);
|
||||
return -1;
|
||||
}
|
||||
if (gctrl_->enable_limiter(true) != 0) {
|
||||
LOG_FERR1(LS_ERROR, enable_limiter, true);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
|
||||
int num_channels,
|
||||
size_t samples_per_channel) {
|
||||
size_t length = num_channels * samples_per_channel;
|
||||
if (capture_muted_) {
|
||||
return;
|
||||
}
|
||||
|
||||
file_preproc_->Write(audio, length);
|
||||
|
||||
if (frames_since_clipped_ < kClippedWaitFrames) {
|
||||
++frames_since_clipped_;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for clipped samples, as the AGC has difficulty detecting pitch
|
||||
// under clipping distortion. We do this in the preprocessing phase in order
|
||||
// to catch clipped echo as well.
|
||||
//
|
||||
// If we find a sufficiently clipped frame, drop the current microphone level
|
||||
// and enforce a new maximum level, dropped the same amount from the current
|
||||
// maximum. This harsh treatment is an effort to avoid repeated clipped echo
|
||||
// events. As compensation for this restriction, the maximum compression
|
||||
// gain is increased, through SetMaxLevel().
|
||||
float clipped_ratio = agc_->AnalyzePreproc(audio, length);
|
||||
if (clipped_ratio > kClippedRatioThreshold) {
|
||||
LOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
|
||||
<< clipped_ratio;
|
||||
// Always decrease the maximum level, even if the current level is below
|
||||
// threshold.
|
||||
SetMaxLevel(std::max(kClippedLevelMin, max_level_ - kClippedLevelStep));
|
||||
if (level_ > kClippedLevelMin) {
|
||||
// Don't try to adjust the level if we're already below the limit. As
|
||||
// a consequence, if the user has brought the level above the limit, we
|
||||
// will still not react until the postproc updates the level.
|
||||
SetLevel(std::max(kClippedLevelMin, level_ - kClippedLevelStep));
|
||||
// Reset the AGC since the level has changed.
|
||||
agc_->Reset();
|
||||
}
|
||||
frames_since_clipped_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void AgcManagerDirect::Process(const int16_t* audio,
|
||||
size_t length,
|
||||
int sample_rate_hz) {
|
||||
if (capture_muted_) {
|
||||
return;
|
||||
}
|
||||
void MonoAgc::Process(const int16_t* audio,
|
||||
size_t samples_per_channel,
|
||||
int sample_rate_hz) {
|
||||
new_compression_to_set_ = absl::nullopt;
|
||||
|
||||
if (check_volume_on_next_process_) {
|
||||
check_volume_on_next_process_ = false;
|
||||
@ -243,35 +181,50 @@ void AgcManagerDirect::Process(const int16_t* audio,
|
||||
CheckVolumeAndReset();
|
||||
}
|
||||
|
||||
if (agc_->Process(audio, length, sample_rate_hz) != 0) {
|
||||
LOG_FERR0(LS_ERROR, Agc::Process);
|
||||
assert(false);
|
||||
}
|
||||
agc_->Process(audio, samples_per_channel, sample_rate_hz);
|
||||
|
||||
UpdateGain();
|
||||
UpdateCompressor();
|
||||
|
||||
file_postproc_->Write(audio, length);
|
||||
if (!disable_digital_adaptive_) {
|
||||
UpdateCompressor();
|
||||
}
|
||||
}
|
||||
|
||||
void AgcManagerDirect::SetLevel(int new_level) {
|
||||
int voe_level = volume_callbacks_->GetMicVolume();
|
||||
if (voe_level < 0) {
|
||||
return;
|
||||
void MonoAgc::HandleClipping() {
|
||||
// Always decrease the maximum level, even if the current level is below
|
||||
// threshold.
|
||||
SetMaxLevel(std::max(clipped_level_min_, max_level_ - kClippedLevelStep));
|
||||
if (log_to_histograms_) {
|
||||
RTC_HISTOGRAM_BOOLEAN("WebRTC.Audio.AgcClippingAdjustmentAllowed",
|
||||
level_ - kClippedLevelStep >= clipped_level_min_);
|
||||
}
|
||||
if (level_ > clipped_level_min_) {
|
||||
// Don't try to adjust the level if we're already below the limit. As
|
||||
// a consequence, if the user has brought the level above the limit, we
|
||||
// will still not react until the postproc updates the level.
|
||||
SetLevel(std::max(clipped_level_min_, level_ - kClippedLevelStep));
|
||||
// Reset the AGCs for all channels since the level has changed.
|
||||
agc_->Reset();
|
||||
}
|
||||
}
|
||||
|
||||
void MonoAgc::SetLevel(int new_level) {
|
||||
int voe_level = stream_analog_level_;
|
||||
if (voe_level == 0) {
|
||||
LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
|
||||
RTC_DLOG(LS_INFO)
|
||||
<< "[agc] VolumeCallbacks returned level=0, taking no action.";
|
||||
return;
|
||||
}
|
||||
if (voe_level > kMaxMicLevel) {
|
||||
LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << voe_level;
|
||||
if (voe_level < 0 || voe_level > kMaxMicLevel) {
|
||||
RTC_LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level="
|
||||
<< voe_level;
|
||||
return;
|
||||
}
|
||||
|
||||
if (voe_level > level_ + kLevelQuantizationSlack ||
|
||||
voe_level < level_ - kLevelQuantizationSlack) {
|
||||
LOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating "
|
||||
<< "stored level from " << level_ << " to " << voe_level;
|
||||
RTC_DLOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating "
|
||||
"stored level from "
|
||||
<< level_ << " to " << voe_level;
|
||||
level_ = voe_level;
|
||||
// Always allow the user to increase the volume.
|
||||
if (level_ > max_level_) {
|
||||
@ -281,6 +234,7 @@ void AgcManagerDirect::SetLevel(int new_level) {
|
||||
// was manually adjusted. The compressor will still provide some of the
|
||||
// desired gain change.
|
||||
agc_->Reset();
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@ -289,26 +243,27 @@ void AgcManagerDirect::SetLevel(int new_level) {
|
||||
return;
|
||||
}
|
||||
|
||||
volume_callbacks_->SetMicVolume(new_level);
|
||||
LOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", "
|
||||
<< "level_=" << level_ << ", "
|
||||
<< "new_level=" << new_level;
|
||||
stream_analog_level_ = new_level;
|
||||
RTC_DLOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", level_=" << level_
|
||||
<< ", new_level=" << new_level;
|
||||
level_ = new_level;
|
||||
}
|
||||
|
||||
void AgcManagerDirect::SetMaxLevel(int level) {
|
||||
assert(level >= kClippedLevelMin);
|
||||
void MonoAgc::SetMaxLevel(int level) {
|
||||
RTC_DCHECK_GE(level, clipped_level_min_);
|
||||
max_level_ = level;
|
||||
// Scale the |kSurplusCompressionGain| linearly across the restricted
|
||||
// level range.
|
||||
max_compression_gain_ = kMaxCompressionGain + std::floor(
|
||||
(1.f * kMaxMicLevel - max_level_) / (kMaxMicLevel - kClippedLevelMin) *
|
||||
kSurplusCompressionGain + 0.5f);
|
||||
LOG(LS_INFO) << "[agc] max_level_=" << max_level_
|
||||
<< ", max_compression_gain_=" << max_compression_gain_;
|
||||
max_compression_gain_ =
|
||||
kMaxCompressionGain + std::floor((1.f * kMaxMicLevel - max_level_) /
|
||||
(kMaxMicLevel - clipped_level_min_) *
|
||||
kSurplusCompressionGain +
|
||||
0.5f);
|
||||
RTC_DLOG(LS_INFO) << "[agc] max_level_=" << max_level_
|
||||
<< ", max_compression_gain_=" << max_compression_gain_;
|
||||
}
|
||||
|
||||
void AgcManagerDirect::SetCaptureMuted(bool muted) {
|
||||
void MonoAgc::SetCaptureMuted(bool muted) {
|
||||
if (capture_muted_ == muted) {
|
||||
return;
|
||||
}
|
||||
@ -320,34 +275,29 @@ void AgcManagerDirect::SetCaptureMuted(bool muted) {
|
||||
}
|
||||
}
|
||||
|
||||
float AgcManagerDirect::voice_probability() {
|
||||
return agc_->voice_probability();
|
||||
}
|
||||
|
||||
int AgcManagerDirect::CheckVolumeAndReset() {
|
||||
int level = volume_callbacks_->GetMicVolume();
|
||||
if (level < 0) {
|
||||
return -1;
|
||||
}
|
||||
int MonoAgc::CheckVolumeAndReset() {
|
||||
int level = stream_analog_level_;
|
||||
// Reasons for taking action at startup:
|
||||
// 1) A person starting a call is expected to be heard.
|
||||
// 2) Independent of interpretation of |level| == 0 we should raise it so the
|
||||
// AGC can do its job properly.
|
||||
if (level == 0 && !startup_) {
|
||||
LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
|
||||
RTC_DLOG(LS_INFO)
|
||||
<< "[agc] VolumeCallbacks returned level=0, taking no action.";
|
||||
return 0;
|
||||
}
|
||||
if (level > kMaxMicLevel) {
|
||||
LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << level;
|
||||
if (level < 0 || level > kMaxMicLevel) {
|
||||
RTC_LOG(LS_ERROR) << "[agc] VolumeCallbacks returned an invalid level="
|
||||
<< level;
|
||||
return -1;
|
||||
}
|
||||
LOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level;
|
||||
RTC_DLOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level;
|
||||
|
||||
int minLevel = startup_ ? startup_min_level_ : kMinMicLevel;
|
||||
int minLevel = startup_ ? startup_min_level_ : min_mic_level_;
|
||||
if (level < minLevel) {
|
||||
level = minLevel;
|
||||
LOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level;
|
||||
volume_callbacks_->SetMicVolume(level);
|
||||
RTC_DLOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level;
|
||||
stream_analog_level_ = level;
|
||||
}
|
||||
agc_->Reset();
|
||||
level_ = level;
|
||||
@ -362,7 +312,7 @@ int AgcManagerDirect::CheckVolumeAndReset() {
|
||||
//
|
||||
// If the slider needs to be moved, we check first if the user has adjusted
|
||||
// it, in which case we take no action and cache the updated level.
|
||||
void AgcManagerDirect::UpdateGain() {
|
||||
void MonoAgc::UpdateGain() {
|
||||
int rms_error = 0;
|
||||
if (!agc_->GetRmsErrorDb(&rms_error)) {
|
||||
// No error update ready.
|
||||
@ -374,39 +324,55 @@ void AgcManagerDirect::UpdateGain() {
|
||||
rms_error += kMinCompressionGain;
|
||||
|
||||
// Handle as much error as possible with the compressor first.
|
||||
int raw_compression = std::max(std::min(rms_error, max_compression_gain_),
|
||||
kMinCompressionGain);
|
||||
int raw_compression =
|
||||
rtc::SafeClamp(rms_error, kMinCompressionGain, max_compression_gain_);
|
||||
|
||||
// Deemphasize the compression gain error. Move halfway between the current
|
||||
// target and the newly received target. This serves to soften perceptible
|
||||
// intra-talkspurt adjustments, at the cost of some adaptation speed.
|
||||
if ((raw_compression == max_compression_gain_ &&
|
||||
target_compression_ == max_compression_gain_ - 1) ||
|
||||
target_compression_ == max_compression_gain_ - 1) ||
|
||||
(raw_compression == kMinCompressionGain &&
|
||||
target_compression_ == kMinCompressionGain + 1)) {
|
||||
target_compression_ == kMinCompressionGain + 1)) {
|
||||
// Special case to allow the target to reach the endpoints of the
|
||||
// compression range. The deemphasis would otherwise halt it at 1 dB shy.
|
||||
target_compression_ = raw_compression;
|
||||
} else {
|
||||
target_compression_ = (raw_compression - target_compression_) / 2
|
||||
+ target_compression_;
|
||||
target_compression_ =
|
||||
(raw_compression - target_compression_) / 2 + target_compression_;
|
||||
}
|
||||
|
||||
// Residual error will be handled by adjusting the volume slider. Use the
|
||||
// raw rather than deemphasized compression here as we would otherwise
|
||||
// shrink the amount of slack the compressor provides.
|
||||
int residual_gain = rms_error - raw_compression;
|
||||
residual_gain = std::min(std::max(residual_gain, -kMaxResidualGainChange),
|
||||
kMaxResidualGainChange);
|
||||
LOG(LS_INFO) << "[agc] rms_error=" << rms_error << ", "
|
||||
<< "target_compression=" << target_compression_ << ", "
|
||||
<< "residual_gain=" << residual_gain;
|
||||
const int residual_gain =
|
||||
rtc::SafeClamp(rms_error - raw_compression, -kMaxResidualGainChange,
|
||||
kMaxResidualGainChange);
|
||||
RTC_DLOG(LS_INFO) << "[agc] rms_error=" << rms_error
|
||||
<< ", target_compression=" << target_compression_
|
||||
<< ", residual_gain=" << residual_gain;
|
||||
if (residual_gain == 0)
|
||||
return;
|
||||
|
||||
SetLevel(LevelFromGainError(residual_gain, level_));
|
||||
int old_level = level_;
|
||||
SetLevel(LevelFromGainError(residual_gain, level_, min_mic_level_));
|
||||
if (old_level != level_) {
|
||||
// level_ was updated by SetLevel; log the new value.
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.AgcSetLevel", level_, 1,
|
||||
kMaxMicLevel, 50);
|
||||
// Reset the AGC since the level has changed.
|
||||
agc_->Reset();
|
||||
}
|
||||
}
|
||||
|
||||
void AgcManagerDirect::UpdateCompressor() {
|
||||
void MonoAgc::UpdateCompressor() {
|
||||
calls_since_last_gain_log_++;
|
||||
if (calls_since_last_gain_log_ == 100) {
|
||||
calls_since_last_gain_log_ = 0;
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc.DigitalGainApplied",
|
||||
compression_, 0, kMaxCompressionGain,
|
||||
kMaxCompressionGain + 1);
|
||||
}
|
||||
if (compression_ == target_compression_) {
|
||||
return;
|
||||
}
|
||||
@ -431,10 +397,209 @@ void AgcManagerDirect::UpdateCompressor() {
|
||||
|
||||
// Set the new compression gain.
|
||||
if (new_compression != compression_) {
|
||||
RTC_HISTOGRAM_COUNTS_LINEAR("WebRTC.Audio.Agc.DigitalGainUpdated",
|
||||
new_compression, 0, kMaxCompressionGain,
|
||||
kMaxCompressionGain + 1);
|
||||
compression_ = new_compression;
|
||||
compression_accumulator_ = new_compression;
|
||||
if (gctrl_->set_compression_gain_db(compression_) != 0) {
|
||||
LOG_FERR1(LS_ERROR, set_compression_gain_db, compression_);
|
||||
new_compression_to_set_ = compression_;
|
||||
}
|
||||
}
|
||||
|
||||
int AgcManagerDirect::instance_counter_ = 0;
|
||||
|
||||
AgcManagerDirect::AgcManagerDirect(Agc* agc,
|
||||
int startup_min_level,
|
||||
int clipped_level_min,
|
||||
int sample_rate_hz)
|
||||
: AgcManagerDirect(/*num_capture_channels*/ 1,
|
||||
startup_min_level,
|
||||
clipped_level_min,
|
||||
/*use_agc2_level_estimation*/ false,
|
||||
/*disable_digital_adaptive*/ false,
|
||||
sample_rate_hz) {
|
||||
RTC_DCHECK(channel_agcs_[0]);
|
||||
RTC_DCHECK(agc);
|
||||
channel_agcs_[0]->set_agc(agc);
|
||||
}
|
||||
|
||||
AgcManagerDirect::AgcManagerDirect(int num_capture_channels,
|
||||
int startup_min_level,
|
||||
int clipped_level_min,
|
||||
bool use_agc2_level_estimation,
|
||||
bool disable_digital_adaptive,
|
||||
int sample_rate_hz)
|
||||
: data_dumper_(
|
||||
new ApmDataDumper(rtc::AtomicOps::Increment(&instance_counter_))),
|
||||
use_min_channel_level_(!UseMaxAnalogChannelLevel()),
|
||||
sample_rate_hz_(sample_rate_hz),
|
||||
num_capture_channels_(num_capture_channels),
|
||||
disable_digital_adaptive_(disable_digital_adaptive),
|
||||
frames_since_clipped_(kClippedWaitFrames),
|
||||
capture_muted_(false),
|
||||
channel_agcs_(num_capture_channels),
|
||||
new_compressions_to_set_(num_capture_channels) {
|
||||
const int min_mic_level = GetMinMicLevel();
|
||||
for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) {
|
||||
ApmDataDumper* data_dumper_ch = ch == 0 ? data_dumper_.get() : nullptr;
|
||||
|
||||
channel_agcs_[ch] = std::make_unique<MonoAgc>(
|
||||
data_dumper_ch, startup_min_level, clipped_level_min,
|
||||
use_agc2_level_estimation, disable_digital_adaptive_, min_mic_level);
|
||||
}
|
||||
RTC_DCHECK_LT(0, channel_agcs_.size());
|
||||
channel_agcs_[0]->ActivateLogging();
|
||||
}
|
||||
|
||||
AgcManagerDirect::~AgcManagerDirect() {}
|
||||
|
||||
void AgcManagerDirect::Initialize() {
|
||||
RTC_DLOG(LS_INFO) << "AgcManagerDirect::Initialize";
|
||||
data_dumper_->InitiateNewSetOfRecordings();
|
||||
for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) {
|
||||
channel_agcs_[ch]->Initialize();
|
||||
}
|
||||
capture_muted_ = false;
|
||||
|
||||
AggregateChannelLevels();
|
||||
}
|
||||
|
||||
void AgcManagerDirect::SetupDigitalGainControl(
|
||||
GainControl* gain_control) const {
|
||||
RTC_DCHECK(gain_control);
|
||||
if (gain_control->set_mode(GainControl::kFixedDigital) != 0) {
|
||||
RTC_LOG(LS_ERROR) << "set_mode(GainControl::kFixedDigital) failed.";
|
||||
}
|
||||
const int target_level_dbfs = disable_digital_adaptive_ ? 0 : 2;
|
||||
if (gain_control->set_target_level_dbfs(target_level_dbfs) != 0) {
|
||||
RTC_LOG(LS_ERROR) << "set_target_level_dbfs() failed.";
|
||||
}
|
||||
const int compression_gain_db =
|
||||
disable_digital_adaptive_ ? 0 : kDefaultCompressionGain;
|
||||
if (gain_control->set_compression_gain_db(compression_gain_db) != 0) {
|
||||
RTC_LOG(LS_ERROR) << "set_compression_gain_db() failed.";
|
||||
}
|
||||
const bool enable_limiter = !disable_digital_adaptive_;
|
||||
if (gain_control->enable_limiter(enable_limiter) != 0) {
|
||||
RTC_LOG(LS_ERROR) << "enable_limiter() failed.";
|
||||
}
|
||||
}
|
||||
|
||||
void AgcManagerDirect::AnalyzePreProcess(const AudioBuffer* audio) {
|
||||
RTC_DCHECK(audio);
|
||||
AnalyzePreProcess(audio->channels_const(), audio->num_frames());
|
||||
}
|
||||
|
||||
void AgcManagerDirect::AnalyzePreProcess(const float* const* audio,
|
||||
size_t samples_per_channel) {
|
||||
RTC_DCHECK(audio);
|
||||
AggregateChannelLevels();
|
||||
if (capture_muted_) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (frames_since_clipped_ < kClippedWaitFrames) {
|
||||
++frames_since_clipped_;
|
||||
return;
|
||||
}
|
||||
|
||||
// Check for clipped samples, as the AGC has difficulty detecting pitch
|
||||
// under clipping distortion. We do this in the preprocessing phase in order
|
||||
// to catch clipped echo as well.
|
||||
//
|
||||
// If we find a sufficiently clipped frame, drop the current microphone level
|
||||
// and enforce a new maximum level, dropped the same amount from the current
|
||||
// maximum. This harsh treatment is an effort to avoid repeated clipped echo
|
||||
// events. As compensation for this restriction, the maximum compression
|
||||
// gain is increased, through SetMaxLevel().
|
||||
float clipped_ratio =
|
||||
ComputeClippedRatio(audio, num_capture_channels_, samples_per_channel);
|
||||
|
||||
if (clipped_ratio > kClippedRatioThreshold) {
|
||||
RTC_DLOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
|
||||
<< clipped_ratio;
|
||||
for (auto& state_ch : channel_agcs_) {
|
||||
state_ch->HandleClipping();
|
||||
}
|
||||
frames_since_clipped_ = 0;
|
||||
}
|
||||
AggregateChannelLevels();
|
||||
}
|
||||
|
||||
void AgcManagerDirect::Process(const AudioBuffer* audio) {
|
||||
AggregateChannelLevels();
|
||||
|
||||
if (capture_muted_) {
|
||||
return;
|
||||
}
|
||||
|
||||
for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) {
|
||||
int16_t* audio_use = nullptr;
|
||||
std::array<int16_t, AudioBuffer::kMaxSampleRate / 100> audio_data;
|
||||
int num_frames_per_band;
|
||||
if (audio) {
|
||||
FloatS16ToS16(audio->split_bands_const_f(ch)[0],
|
||||
audio->num_frames_per_band(), audio_data.data());
|
||||
audio_use = audio_data.data();
|
||||
num_frames_per_band = audio->num_frames_per_band();
|
||||
} else {
|
||||
// Only used for testing.
|
||||
// TODO(peah): Change unittests to only allow on non-null audio input.
|
||||
num_frames_per_band = 320;
|
||||
}
|
||||
channel_agcs_[ch]->Process(audio_use, num_frames_per_band, sample_rate_hz_);
|
||||
new_compressions_to_set_[ch] = channel_agcs_[ch]->new_compression();
|
||||
}
|
||||
|
||||
AggregateChannelLevels();
|
||||
}
|
||||
|
||||
absl::optional<int> AgcManagerDirect::GetDigitalComressionGain() {
|
||||
return new_compressions_to_set_[channel_controlling_gain_];
|
||||
}
|
||||
|
||||
void AgcManagerDirect::SetCaptureMuted(bool muted) {
|
||||
for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) {
|
||||
channel_agcs_[ch]->SetCaptureMuted(muted);
|
||||
}
|
||||
capture_muted_ = muted;
|
||||
}
|
||||
|
||||
float AgcManagerDirect::voice_probability() const {
|
||||
float max_prob = 0.f;
|
||||
for (const auto& state_ch : channel_agcs_) {
|
||||
max_prob = std::max(max_prob, state_ch->voice_probability());
|
||||
}
|
||||
|
||||
return max_prob;
|
||||
}
|
||||
|
||||
void AgcManagerDirect::set_stream_analog_level(int level) {
|
||||
for (size_t ch = 0; ch < channel_agcs_.size(); ++ch) {
|
||||
channel_agcs_[ch]->set_stream_analog_level(level);
|
||||
}
|
||||
|
||||
AggregateChannelLevels();
|
||||
}
|
||||
|
||||
void AgcManagerDirect::AggregateChannelLevels() {
|
||||
stream_analog_level_ = channel_agcs_[0]->stream_analog_level();
|
||||
channel_controlling_gain_ = 0;
|
||||
if (use_min_channel_level_) {
|
||||
for (size_t ch = 1; ch < channel_agcs_.size(); ++ch) {
|
||||
int level = channel_agcs_[ch]->stream_analog_level();
|
||||
if (level < stream_analog_level_) {
|
||||
stream_analog_level_ = level;
|
||||
channel_controlling_gain_ = static_cast<int>(ch);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
for (size_t ch = 1; ch < channel_agcs_.size(); ++ch) {
|
||||
int level = channel_agcs_[ch]->stream_analog_level();
|
||||
if (level > stream_analog_level_) {
|
||||
stream_analog_level_ = level;
|
||||
channel_controlling_gain_ = static_cast<int>(ch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -8,29 +8,22 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/modules/audio_processing/agc/agc.h"
|
||||
#include <memory>
|
||||
|
||||
#include "absl/types/optional.h"
|
||||
#include "modules/audio_processing/agc/agc.h"
|
||||
#include "modules/audio_processing/audio_buffer.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/gtest_prod_util.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioFrame;
|
||||
class DebugFile;
|
||||
class MonoAgc;
|
||||
class GainControl;
|
||||
|
||||
// Callbacks that need to be injected into AgcManagerDirect to read and control
|
||||
// the volume values. This is done to remove the VoiceEngine dependency in
|
||||
// AgcManagerDirect.
|
||||
// TODO(aluebs): Remove VolumeCallbacks.
|
||||
class VolumeCallbacks {
|
||||
public:
|
||||
virtual ~VolumeCallbacks() {}
|
||||
virtual void SetMicVolume(int volume) = 0;
|
||||
virtual int GetMicVolume() = 0;
|
||||
};
|
||||
|
||||
// Direct interface to use AGC to set volume and compression values.
|
||||
// AudioProcessing uses this interface directly to integrate the callback-less
|
||||
// AGC.
|
||||
@ -42,30 +35,105 @@ class AgcManagerDirect final {
|
||||
// responsible for processing the audio using it after the call to Process.
|
||||
// The operating range of startup_min_level is [12, 255] and any input value
|
||||
// outside that range will be clamped.
|
||||
AgcManagerDirect(GainControl* gctrl,
|
||||
VolumeCallbacks* volume_callbacks,
|
||||
int startup_min_level);
|
||||
// Dependency injection for testing. Don't delete |agc| as the memory is owned
|
||||
// by the manager.
|
||||
AgcManagerDirect(Agc* agc,
|
||||
GainControl* gctrl,
|
||||
VolumeCallbacks* volume_callbacks,
|
||||
int startup_min_level);
|
||||
~AgcManagerDirect();
|
||||
AgcManagerDirect(int num_capture_channels,
|
||||
int startup_min_level,
|
||||
int clipped_level_min,
|
||||
bool use_agc2_level_estimation,
|
||||
bool disable_digital_adaptive,
|
||||
int sample_rate_hz);
|
||||
|
||||
int Initialize();
|
||||
void AnalyzePreProcess(int16_t* audio,
|
||||
int num_channels,
|
||||
size_t samples_per_channel);
|
||||
void Process(const int16_t* audio, size_t length, int sample_rate_hz);
|
||||
~AgcManagerDirect();
|
||||
AgcManagerDirect(const AgcManagerDirect&) = delete;
|
||||
AgcManagerDirect& operator=(const AgcManagerDirect&) = delete;
|
||||
|
||||
void Initialize();
|
||||
void SetupDigitalGainControl(GainControl* gain_control) const;
|
||||
|
||||
void AnalyzePreProcess(const AudioBuffer* audio);
|
||||
void Process(const AudioBuffer* audio);
|
||||
|
||||
// Call when the capture stream has been muted/unmuted. This causes the
|
||||
// manager to disregard all incoming audio; chances are good it's background
|
||||
// noise to which we'd like to avoid adapting.
|
||||
void SetCaptureMuted(bool muted);
|
||||
bool capture_muted() { return capture_muted_; }
|
||||
float voice_probability() const;
|
||||
|
||||
float voice_probability();
|
||||
int stream_analog_level() const { return stream_analog_level_; }
|
||||
void set_stream_analog_level(int level);
|
||||
int num_channels() const { return num_capture_channels_; }
|
||||
int sample_rate_hz() const { return sample_rate_hz_; }
|
||||
|
||||
// If available, returns a new compression gain for the digital gain control.
|
||||
absl::optional<int> GetDigitalComressionGain();
|
||||
|
||||
private:
|
||||
friend class AgcManagerDirectTest;
|
||||
|
||||
FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectStandaloneTest,
|
||||
DisableDigitalDisablesDigital);
|
||||
FRIEND_TEST_ALL_PREFIXES(AgcManagerDirectStandaloneTest,
|
||||
AgcMinMicLevelExperiment);
|
||||
|
||||
// Dependency injection for testing. Don't delete |agc| as the memory is owned
|
||||
// by the manager.
|
||||
AgcManagerDirect(Agc* agc,
|
||||
int startup_min_level,
|
||||
int clipped_level_min,
|
||||
int sample_rate_hz);
|
||||
|
||||
void AnalyzePreProcess(const float* const* audio, size_t samples_per_channel);
|
||||
|
||||
void AggregateChannelLevels();
|
||||
|
||||
std::unique_ptr<ApmDataDumper> data_dumper_;
|
||||
static int instance_counter_;
|
||||
const bool use_min_channel_level_;
|
||||
const int sample_rate_hz_;
|
||||
const int num_capture_channels_;
|
||||
const bool disable_digital_adaptive_;
|
||||
|
||||
int frames_since_clipped_;
|
||||
int stream_analog_level_ = 0;
|
||||
bool capture_muted_;
|
||||
int channel_controlling_gain_ = 0;
|
||||
|
||||
std::vector<std::unique_ptr<MonoAgc>> channel_agcs_;
|
||||
std::vector<absl::optional<int>> new_compressions_to_set_;
|
||||
};
|
||||
|
||||
class MonoAgc {
|
||||
public:
|
||||
MonoAgc(ApmDataDumper* data_dumper,
|
||||
int startup_min_level,
|
||||
int clipped_level_min,
|
||||
bool use_agc2_level_estimation,
|
||||
bool disable_digital_adaptive,
|
||||
int min_mic_level);
|
||||
~MonoAgc();
|
||||
MonoAgc(const MonoAgc&) = delete;
|
||||
MonoAgc& operator=(const MonoAgc&) = delete;
|
||||
|
||||
void Initialize();
|
||||
void SetCaptureMuted(bool muted);
|
||||
|
||||
void HandleClipping();
|
||||
|
||||
void Process(const int16_t* audio,
|
||||
size_t samples_per_channel,
|
||||
int sample_rate_hz);
|
||||
|
||||
void set_stream_analog_level(int level) { stream_analog_level_ = level; }
|
||||
int stream_analog_level() const { return stream_analog_level_; }
|
||||
float voice_probability() const { return agc_->voice_probability(); }
|
||||
void ActivateLogging() { log_to_histograms_ = true; }
|
||||
absl::optional<int> new_compression() const {
|
||||
return new_compression_to_set_;
|
||||
}
|
||||
|
||||
// Only used for testing.
|
||||
void set_agc(Agc* agc) { agc_.reset(agc); }
|
||||
int min_mic_level() const { return min_mic_level_; }
|
||||
int startup_min_level() const { return startup_min_level_; }
|
||||
|
||||
private:
|
||||
// Sets a new microphone level, after first checking that it hasn't been
|
||||
@ -81,28 +149,26 @@ class AgcManagerDirect final {
|
||||
void UpdateGain();
|
||||
void UpdateCompressor();
|
||||
|
||||
rtc::scoped_ptr<Agc> agc_;
|
||||
GainControl* gctrl_;
|
||||
VolumeCallbacks* volume_callbacks_;
|
||||
|
||||
int frames_since_clipped_;
|
||||
int level_;
|
||||
const int min_mic_level_;
|
||||
const bool disable_digital_adaptive_;
|
||||
std::unique_ptr<Agc> agc_;
|
||||
int level_ = 0;
|
||||
int max_level_;
|
||||
int max_compression_gain_;
|
||||
int target_compression_;
|
||||
int compression_;
|
||||
float compression_accumulator_;
|
||||
bool capture_muted_;
|
||||
bool check_volume_on_next_process_;
|
||||
bool startup_;
|
||||
bool capture_muted_ = false;
|
||||
bool check_volume_on_next_process_ = true;
|
||||
bool startup_ = true;
|
||||
int startup_min_level_;
|
||||
|
||||
rtc::scoped_ptr<DebugFile> file_preproc_;
|
||||
rtc::scoped_ptr<DebugFile> file_postproc_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(AgcManagerDirect);
|
||||
int calls_since_last_gain_log_ = 0;
|
||||
int stream_analog_level_ = 0;
|
||||
absl::optional<int> new_compression_to_set_;
|
||||
bool log_to_histograms_ = false;
|
||||
const int clipped_level_min_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
|
||||
|
105
webrtc/modules/audio_processing/agc/gain_control.h
Normal file
105
webrtc/modules/audio_processing/agc/gain_control.h
Normal file
@ -0,0 +1,105 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC_GAIN_CONTROL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC_GAIN_CONTROL_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// The automatic gain control (AGC) component brings the signal to an
|
||||
// appropriate range. This is done by applying a digital gain directly and, in
|
||||
// the analog mode, prescribing an analog gain to be applied at the audio HAL.
|
||||
//
|
||||
// Recommended to be enabled on the client-side.
|
||||
class GainControl {
|
||||
public:
|
||||
// When an analog mode is set, this must be called prior to |ProcessStream()|
|
||||
// to pass the current analog level from the audio HAL. Must be within the
|
||||
// range provided to |set_analog_level_limits()|.
|
||||
virtual int set_stream_analog_level(int level) = 0;
|
||||
|
||||
// When an analog mode is set, this should be called after |ProcessStream()|
|
||||
// to obtain the recommended new analog level for the audio HAL. It is the
|
||||
// users responsibility to apply this level.
|
||||
virtual int stream_analog_level() const = 0;
|
||||
|
||||
enum Mode {
|
||||
// Adaptive mode intended for use if an analog volume control is available
|
||||
// on the capture device. It will require the user to provide coupling
|
||||
// between the OS mixer controls and AGC through the |stream_analog_level()|
|
||||
// functions.
|
||||
//
|
||||
// It consists of an analog gain prescription for the audio device and a
|
||||
// digital compression stage.
|
||||
kAdaptiveAnalog,
|
||||
|
||||
// Adaptive mode intended for situations in which an analog volume control
|
||||
// is unavailable. It operates in a similar fashion to the adaptive analog
|
||||
// mode, but with scaling instead applied in the digital domain. As with
|
||||
// the analog mode, it additionally uses a digital compression stage.
|
||||
kAdaptiveDigital,
|
||||
|
||||
// Fixed mode which enables only the digital compression stage also used by
|
||||
// the two adaptive modes.
|
||||
//
|
||||
// It is distinguished from the adaptive modes by considering only a
|
||||
// short time-window of the input signal. It applies a fixed gain through
|
||||
// most of the input level range, and compresses (gradually reduces gain
|
||||
// with increasing level) the input signal at higher levels. This mode is
|
||||
// preferred on embedded devices where the capture signal level is
|
||||
// predictable, so that a known gain can be applied.
|
||||
kFixedDigital
|
||||
};
|
||||
|
||||
virtual int set_mode(Mode mode) = 0;
|
||||
virtual Mode mode() const = 0;
|
||||
|
||||
// Sets the target peak |level| (or envelope) of the AGC in dBFs (decibels
|
||||
// from digital full-scale). The convention is to use positive values. For
|
||||
// instance, passing in a value of 3 corresponds to -3 dBFs, or a target
|
||||
// level 3 dB below full-scale. Limited to [0, 31].
|
||||
//
|
||||
// TODO(ajm): use a negative value here instead, if/when VoE will similarly
|
||||
// update its interface.
|
||||
virtual int set_target_level_dbfs(int level) = 0;
|
||||
virtual int target_level_dbfs() const = 0;
|
||||
|
||||
// Sets the maximum |gain| the digital compression stage may apply, in dB. A
|
||||
// higher number corresponds to greater compression, while a value of 0 will
|
||||
// leave the signal uncompressed. Limited to [0, 90].
|
||||
virtual int set_compression_gain_db(int gain) = 0;
|
||||
virtual int compression_gain_db() const = 0;
|
||||
|
||||
// When enabled, the compression stage will hard limit the signal to the
|
||||
// target level. Otherwise, the signal will be compressed but not limited
|
||||
// above the target level.
|
||||
virtual int enable_limiter(bool enable) = 0;
|
||||
virtual bool is_limiter_enabled() const = 0;
|
||||
|
||||
// Sets the |minimum| and |maximum| analog levels of the audio capture device.
|
||||
// Must be set if and only if an analog mode is used. Limited to [0, 65535].
|
||||
virtual int set_analog_level_limits(int minimum, int maximum) = 0;
|
||||
virtual int analog_level_minimum() const = 0;
|
||||
virtual int analog_level_maximum() const = 0;
|
||||
|
||||
// Returns true if the AGC has detected a saturation event (period where the
|
||||
// signal reaches digital full-scale) in the current frame and the analog
|
||||
// level cannot be reduced.
|
||||
//
|
||||
// This could be used as an indicator to reduce or disable analog mic gain at
|
||||
// the audio HAL.
|
||||
virtual bool stream_is_saturated() const = 0;
|
||||
|
||||
protected:
|
||||
virtual ~GainControl() {}
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC_GAIN_CONTROL_H_
|
@ -8,268 +8,33 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kGainMapSize = 256;
|
||||
// Uses parameters: si = 2, sf = 0.25, D = 8/256
|
||||
static const int kGainMap[kGainMapSize] = {
|
||||
-56,
|
||||
-54,
|
||||
-52,
|
||||
-50,
|
||||
-48,
|
||||
-47,
|
||||
-45,
|
||||
-43,
|
||||
-42,
|
||||
-40,
|
||||
-38,
|
||||
-37,
|
||||
-35,
|
||||
-34,
|
||||
-33,
|
||||
-31,
|
||||
-30,
|
||||
-29,
|
||||
-27,
|
||||
-26,
|
||||
-25,
|
||||
-24,
|
||||
-23,
|
||||
-22,
|
||||
-20,
|
||||
-19,
|
||||
-18,
|
||||
-17,
|
||||
-16,
|
||||
-15,
|
||||
-14,
|
||||
-14,
|
||||
-13,
|
||||
-12,
|
||||
-11,
|
||||
-10,
|
||||
-9,
|
||||
-8,
|
||||
-8,
|
||||
-7,
|
||||
-6,
|
||||
-5,
|
||||
-5,
|
||||
-4,
|
||||
-3,
|
||||
-2,
|
||||
-2,
|
||||
-1,
|
||||
0,
|
||||
0,
|
||||
1,
|
||||
1,
|
||||
2,
|
||||
3,
|
||||
3,
|
||||
4,
|
||||
4,
|
||||
5,
|
||||
5,
|
||||
6,
|
||||
6,
|
||||
7,
|
||||
7,
|
||||
8,
|
||||
8,
|
||||
9,
|
||||
9,
|
||||
10,
|
||||
10,
|
||||
11,
|
||||
11,
|
||||
12,
|
||||
12,
|
||||
13,
|
||||
13,
|
||||
13,
|
||||
14,
|
||||
14,
|
||||
15,
|
||||
15,
|
||||
15,
|
||||
16,
|
||||
16,
|
||||
17,
|
||||
17,
|
||||
17,
|
||||
18,
|
||||
18,
|
||||
18,
|
||||
19,
|
||||
19,
|
||||
19,
|
||||
20,
|
||||
20,
|
||||
21,
|
||||
21,
|
||||
21,
|
||||
22,
|
||||
22,
|
||||
22,
|
||||
23,
|
||||
23,
|
||||
23,
|
||||
24,
|
||||
24,
|
||||
24,
|
||||
24,
|
||||
25,
|
||||
25,
|
||||
25,
|
||||
26,
|
||||
26,
|
||||
26,
|
||||
27,
|
||||
27,
|
||||
27,
|
||||
28,
|
||||
28,
|
||||
28,
|
||||
28,
|
||||
29,
|
||||
29,
|
||||
29,
|
||||
30,
|
||||
30,
|
||||
30,
|
||||
30,
|
||||
31,
|
||||
31,
|
||||
31,
|
||||
32,
|
||||
32,
|
||||
32,
|
||||
32,
|
||||
33,
|
||||
33,
|
||||
33,
|
||||
33,
|
||||
34,
|
||||
34,
|
||||
34,
|
||||
35,
|
||||
35,
|
||||
35,
|
||||
35,
|
||||
36,
|
||||
36,
|
||||
36,
|
||||
36,
|
||||
37,
|
||||
37,
|
||||
37,
|
||||
38,
|
||||
38,
|
||||
38,
|
||||
38,
|
||||
39,
|
||||
39,
|
||||
39,
|
||||
39,
|
||||
40,
|
||||
40,
|
||||
40,
|
||||
40,
|
||||
41,
|
||||
41,
|
||||
41,
|
||||
41,
|
||||
42,
|
||||
42,
|
||||
42,
|
||||
42,
|
||||
43,
|
||||
43,
|
||||
43,
|
||||
44,
|
||||
44,
|
||||
44,
|
||||
44,
|
||||
45,
|
||||
45,
|
||||
45,
|
||||
45,
|
||||
46,
|
||||
46,
|
||||
46,
|
||||
46,
|
||||
47,
|
||||
47,
|
||||
47,
|
||||
47,
|
||||
48,
|
||||
48,
|
||||
48,
|
||||
48,
|
||||
49,
|
||||
49,
|
||||
49,
|
||||
49,
|
||||
50,
|
||||
50,
|
||||
50,
|
||||
50,
|
||||
51,
|
||||
51,
|
||||
51,
|
||||
51,
|
||||
52,
|
||||
52,
|
||||
52,
|
||||
52,
|
||||
53,
|
||||
53,
|
||||
53,
|
||||
53,
|
||||
54,
|
||||
54,
|
||||
54,
|
||||
54,
|
||||
55,
|
||||
55,
|
||||
55,
|
||||
55,
|
||||
56,
|
||||
56,
|
||||
56,
|
||||
56,
|
||||
57,
|
||||
57,
|
||||
57,
|
||||
57,
|
||||
58,
|
||||
58,
|
||||
58,
|
||||
58,
|
||||
59,
|
||||
59,
|
||||
59,
|
||||
59,
|
||||
60,
|
||||
60,
|
||||
60,
|
||||
60,
|
||||
61,
|
||||
61,
|
||||
61,
|
||||
61,
|
||||
62,
|
||||
62,
|
||||
62,
|
||||
62,
|
||||
63,
|
||||
63,
|
||||
63,
|
||||
63,
|
||||
64
|
||||
};
|
||||
-56, -54, -52, -50, -48, -47, -45, -43, -42, -40, -38, -37, -35, -34, -33,
|
||||
-31, -30, -29, -27, -26, -25, -24, -23, -22, -20, -19, -18, -17, -16, -15,
|
||||
-14, -14, -13, -12, -11, -10, -9, -8, -8, -7, -6, -5, -5, -4, -3,
|
||||
-2, -2, -1, 0, 0, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6,
|
||||
6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13,
|
||||
13, 14, 14, 15, 15, 15, 16, 16, 17, 17, 17, 18, 18, 18, 19,
|
||||
19, 19, 20, 20, 21, 21, 21, 22, 22, 22, 23, 23, 23, 24, 24,
|
||||
24, 24, 25, 25, 25, 26, 26, 26, 27, 27, 27, 28, 28, 28, 28,
|
||||
29, 29, 29, 30, 30, 30, 30, 31, 31, 31, 32, 32, 32, 32, 33,
|
||||
33, 33, 33, 34, 34, 34, 35, 35, 35, 35, 36, 36, 36, 36, 37,
|
||||
37, 37, 38, 38, 38, 38, 39, 39, 39, 39, 40, 40, 40, 40, 41,
|
||||
41, 41, 41, 42, 42, 42, 42, 43, 43, 43, 44, 44, 44, 44, 45,
|
||||
45, 45, 45, 46, 46, 46, 46, 47, 47, 47, 47, 48, 48, 48, 48,
|
||||
49, 49, 49, 49, 50, 50, 50, 50, 51, 51, 51, 51, 52, 52, 52,
|
||||
52, 53, 53, 53, 53, 54, 54, 54, 54, 55, 55, 55, 55, 56, 56,
|
||||
56, 56, 57, 57, 57, 57, 58, 58, 58, 58, 59, 59, 59, 59, 60,
|
||||
60, 60, 60, 61, 61, 61, 61, 62, 62, 62, 62, 63, 63, 63, 63,
|
||||
64};
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
|
||||
|
File diff suppressed because it is too large
Load Diff
1238
webrtc/modules/audio_processing/agc/legacy/analog_agc.cc
Normal file
1238
webrtc/modules/audio_processing/agc/legacy/analog_agc.cc
Normal file
File diff suppressed because it is too large
Load Diff
@ -8,17 +8,14 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
|
||||
|
||||
//#define MIC_LEVEL_FEEDBACK
|
||||
#ifdef WEBRTC_AGC_DEBUG_DUMP
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/legacy/digital_agc.h"
|
||||
#include "webrtc/modules/audio_processing/agc/legacy/gain_control.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
#include "modules/audio_processing/agc/legacy/digital_agc.h"
|
||||
#include "modules/audio_processing/agc/legacy/gain_control.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
/* Analog Automatic Gain Control variables:
|
||||
* Constant declarations (inner limits inside which no changes are done)
|
||||
@ -32,102 +29,90 @@
|
||||
* of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in
|
||||
* Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) )
|
||||
*/
|
||||
#define RXX_BUFFER_LEN 10
|
||||
constexpr int16_t kRxxBufferLen = 10;
|
||||
|
||||
static const int16_t kMsecSpeechInner = 520;
|
||||
static const int16_t kMsecSpeechOuter = 340;
|
||||
|
||||
static const int16_t kNormalVadThreshold = 400;
|
||||
|
||||
static const int16_t kAlphaShortTerm = 6; // 1 >> 6 = 0.0156
|
||||
static const int16_t kAlphaLongTerm = 10; // 1 >> 10 = 0.000977
|
||||
static const int16_t kAlphaShortTerm = 6; // 1 >> 6 = 0.0156
|
||||
static const int16_t kAlphaLongTerm = 10; // 1 >> 10 = 0.000977
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Configurable parameters/variables
|
||||
uint32_t fs; // Sampling frequency
|
||||
int16_t compressionGaindB; // Fixed gain level in dB
|
||||
int16_t targetLevelDbfs; // Target level in -dBfs of envelope (default -3)
|
||||
int16_t agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig)
|
||||
uint8_t limiterEnable; // Enabling limiter (on/off (default off))
|
||||
WebRtcAgcConfig defaultConfig;
|
||||
WebRtcAgcConfig usedConfig;
|
||||
typedef struct {
|
||||
// Configurable parameters/variables
|
||||
uint32_t fs; // Sampling frequency
|
||||
int16_t compressionGaindB; // Fixed gain level in dB
|
||||
int16_t targetLevelDbfs; // Target level in -dBfs of envelope (default -3)
|
||||
int16_t agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig)
|
||||
uint8_t limiterEnable; // Enabling limiter (on/off (default off))
|
||||
WebRtcAgcConfig defaultConfig;
|
||||
WebRtcAgcConfig usedConfig;
|
||||
|
||||
// General variables
|
||||
int16_t initFlag;
|
||||
int16_t lastError;
|
||||
// General variables
|
||||
int16_t initFlag;
|
||||
int16_t lastError;
|
||||
|
||||
// Target level parameters
|
||||
// Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7)
|
||||
int32_t analogTargetLevel; // = RXX_BUFFER_LEN * 846805; -22 dBfs
|
||||
int32_t startUpperLimit; // = RXX_BUFFER_LEN * 1066064; -21 dBfs
|
||||
int32_t startLowerLimit; // = RXX_BUFFER_LEN * 672641; -23 dBfs
|
||||
int32_t upperPrimaryLimit; // = RXX_BUFFER_LEN * 1342095; -20 dBfs
|
||||
int32_t lowerPrimaryLimit; // = RXX_BUFFER_LEN * 534298; -24 dBfs
|
||||
int32_t upperSecondaryLimit;// = RXX_BUFFER_LEN * 2677832; -17 dBfs
|
||||
int32_t lowerSecondaryLimit;// = RXX_BUFFER_LEN * 267783; -27 dBfs
|
||||
uint16_t targetIdx; // Table index for corresponding target level
|
||||
#ifdef MIC_LEVEL_FEEDBACK
|
||||
uint16_t targetIdxOffset; // Table index offset for level compensation
|
||||
#endif
|
||||
int16_t analogTarget; // Digital reference level in ENV scale
|
||||
// Target level parameters
|
||||
// Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7)
|
||||
int32_t analogTargetLevel; // = kRxxBufferLen * 846805; -22 dBfs
|
||||
int32_t startUpperLimit; // = kRxxBufferLen * 1066064; -21 dBfs
|
||||
int32_t startLowerLimit; // = kRxxBufferLen * 672641; -23 dBfs
|
||||
int32_t upperPrimaryLimit; // = kRxxBufferLen * 1342095; -20 dBfs
|
||||
int32_t lowerPrimaryLimit; // = kRxxBufferLen * 534298; -24 dBfs
|
||||
int32_t upperSecondaryLimit; // = kRxxBufferLen * 2677832; -17 dBfs
|
||||
int32_t lowerSecondaryLimit; // = kRxxBufferLen * 267783; -27 dBfs
|
||||
uint16_t targetIdx; // Table index for corresponding target level
|
||||
int16_t analogTarget; // Digital reference level in ENV scale
|
||||
|
||||
// Analog AGC specific variables
|
||||
int32_t filterState[8]; // For downsampling wb to nb
|
||||
int32_t upperLimit; // Upper limit for mic energy
|
||||
int32_t lowerLimit; // Lower limit for mic energy
|
||||
int32_t Rxx160w32; // Average energy for one frame
|
||||
int32_t Rxx16_LPw32; // Low pass filtered subframe energies
|
||||
int32_t Rxx160_LPw32; // Low pass filtered frame energies
|
||||
int32_t Rxx16_LPw32Max; // Keeps track of largest energy subframe
|
||||
int32_t Rxx16_vectorw32[RXX_BUFFER_LEN];// Array with subframe energies
|
||||
int32_t Rxx16w32_array[2][5];// Energy values of microphone signal
|
||||
int32_t env[2][10]; // Envelope values of subframes
|
||||
// Analog AGC specific variables
|
||||
int32_t filterState[8]; // For downsampling wb to nb
|
||||
int32_t upperLimit; // Upper limit for mic energy
|
||||
int32_t lowerLimit; // Lower limit for mic energy
|
||||
int32_t Rxx160w32; // Average energy for one frame
|
||||
int32_t Rxx16_LPw32; // Low pass filtered subframe energies
|
||||
int32_t Rxx160_LPw32; // Low pass filtered frame energies
|
||||
int32_t Rxx16_LPw32Max; // Keeps track of largest energy subframe
|
||||
int32_t Rxx16_vectorw32[kRxxBufferLen]; // Array with subframe energies
|
||||
int32_t Rxx16w32_array[2][5]; // Energy values of microphone signal
|
||||
int32_t env[2][10]; // Envelope values of subframes
|
||||
|
||||
int16_t Rxx16pos; // Current position in the Rxx16_vectorw32
|
||||
int16_t envSum; // Filtered scaled envelope in subframes
|
||||
int16_t vadThreshold; // Threshold for VAD decision
|
||||
int16_t inActive; // Inactive time in milliseconds
|
||||
int16_t msTooLow; // Milliseconds of speech at a too low level
|
||||
int16_t msTooHigh; // Milliseconds of speech at a too high level
|
||||
int16_t changeToSlowMode; // Change to slow mode after some time at target
|
||||
int16_t firstCall; // First call to the process-function
|
||||
int16_t msZero; // Milliseconds of zero input
|
||||
int16_t msecSpeechOuterChange;// Min ms of speech between volume changes
|
||||
int16_t msecSpeechInnerChange;// Min ms of speech between volume changes
|
||||
int16_t activeSpeech; // Milliseconds of active speech
|
||||
int16_t muteGuardMs; // Counter to prevent mute action
|
||||
int16_t inQueue; // 10 ms batch indicator
|
||||
int16_t Rxx16pos; // Current position in the Rxx16_vectorw32
|
||||
int16_t envSum; // Filtered scaled envelope in subframes
|
||||
int16_t vadThreshold; // Threshold for VAD decision
|
||||
int16_t inActive; // Inactive time in milliseconds
|
||||
int16_t msTooLow; // Milliseconds of speech at a too low level
|
||||
int16_t msTooHigh; // Milliseconds of speech at a too high level
|
||||
int16_t changeToSlowMode; // Change to slow mode after some time at target
|
||||
int16_t firstCall; // First call to the process-function
|
||||
int16_t msZero; // Milliseconds of zero input
|
||||
int16_t msecSpeechOuterChange; // Min ms of speech between volume changes
|
||||
int16_t msecSpeechInnerChange; // Min ms of speech between volume changes
|
||||
int16_t activeSpeech; // Milliseconds of active speech
|
||||
int16_t muteGuardMs; // Counter to prevent mute action
|
||||
int16_t inQueue; // 10 ms batch indicator
|
||||
|
||||
// Microphone level variables
|
||||
int32_t micRef; // Remember ref. mic level for virtual mic
|
||||
uint16_t gainTableIdx; // Current position in virtual gain table
|
||||
int32_t micGainIdx; // Gain index of mic level to increase slowly
|
||||
int32_t micVol; // Remember volume between frames
|
||||
int32_t maxLevel; // Max possible vol level, incl dig gain
|
||||
int32_t maxAnalog; // Maximum possible analog volume level
|
||||
int32_t maxInit; // Initial value of "max"
|
||||
int32_t minLevel; // Minimum possible volume level
|
||||
int32_t minOutput; // Minimum output volume level
|
||||
int32_t zeroCtrlMax; // Remember max gain => don't amp low input
|
||||
int32_t lastInMicLevel;
|
||||
// Microphone level variables
|
||||
int32_t micRef; // Remember ref. mic level for virtual mic
|
||||
uint16_t gainTableIdx; // Current position in virtual gain table
|
||||
int32_t micGainIdx; // Gain index of mic level to increase slowly
|
||||
int32_t micVol; // Remember volume between frames
|
||||
int32_t maxLevel; // Max possible vol level, incl dig gain
|
||||
int32_t maxAnalog; // Maximum possible analog volume level
|
||||
int32_t maxInit; // Initial value of "max"
|
||||
int32_t minLevel; // Minimum possible volume level
|
||||
int32_t minOutput; // Minimum output volume level
|
||||
int32_t zeroCtrlMax; // Remember max gain => don't amp low input
|
||||
int32_t lastInMicLevel;
|
||||
|
||||
int16_t scale; // Scale factor for internal volume levels
|
||||
#ifdef MIC_LEVEL_FEEDBACK
|
||||
int16_t numBlocksMicLvlSat;
|
||||
uint8_t micLvlSat;
|
||||
#endif
|
||||
// Structs for VAD and digital_agc
|
||||
AgcVad vadMic;
|
||||
DigitalAgc digitalAgc;
|
||||
int16_t scale; // Scale factor for internal volume levels
|
||||
// Structs for VAD and digital_agc
|
||||
AgcVad vadMic;
|
||||
DigitalAgc digitalAgc;
|
||||
|
||||
#ifdef WEBRTC_AGC_DEBUG_DUMP
|
||||
FILE* fpt;
|
||||
FILE* agcLog;
|
||||
int32_t fcount;
|
||||
#endif
|
||||
|
||||
int16_t lowLevelSignal;
|
||||
int16_t lowLevelSignal;
|
||||
} LegacyAgc;
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
|
||||
|
@ -1,772 +0,0 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
/* digital_agc.c
|
||||
*
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/legacy/digital_agc.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
#ifdef WEBRTC_AGC_DEBUG_DUMP
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/legacy/gain_control.h"
|
||||
|
||||
// To generate the gaintable, copy&paste the following lines to a Matlab window:
|
||||
// MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1;
|
||||
// zeros = 0:31; lvl = 2.^(1-zeros);
|
||||
// A = -10*log10(lvl) * (CompRatio - 1) / CompRatio;
|
||||
// B = MaxGain - MinGain;
|
||||
// gains = round(2^16*10.^(0.05 * (MinGain + B * ( log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) / log(1/(1+exp(Knee*B))))));
|
||||
// fprintf(1, '\t%i, %i, %i, %i,\n', gains);
|
||||
// % Matlab code for plotting the gain and input/output level characteristic (copy/paste the following 3 lines):
|
||||
// in = 10*log10(lvl); out = 20*log10(gains/65536);
|
||||
// subplot(121); plot(in, out); axis([-30, 0, -5, 20]); grid on; xlabel('Input (dB)'); ylabel('Gain (dB)');
|
||||
// subplot(122); plot(in, in+out); axis([-30, 0, -30, 5]); grid on; xlabel('Input (dB)'); ylabel('Output (dB)');
|
||||
// zoom on;
|
||||
|
||||
// Generator table for y=log2(1+e^x) in Q8.
|
||||
enum { kGenFuncTableSize = 128 };
|
||||
static const uint16_t kGenFuncTable[kGenFuncTableSize] = {
|
||||
256, 485, 786, 1126, 1484, 1849, 2217, 2586,
|
||||
2955, 3324, 3693, 4063, 4432, 4801, 5171, 5540,
|
||||
5909, 6279, 6648, 7017, 7387, 7756, 8125, 8495,
|
||||
8864, 9233, 9603, 9972, 10341, 10711, 11080, 11449,
|
||||
11819, 12188, 12557, 12927, 13296, 13665, 14035, 14404,
|
||||
14773, 15143, 15512, 15881, 16251, 16620, 16989, 17359,
|
||||
17728, 18097, 18466, 18836, 19205, 19574, 19944, 20313,
|
||||
20682, 21052, 21421, 21790, 22160, 22529, 22898, 23268,
|
||||
23637, 24006, 24376, 24745, 25114, 25484, 25853, 26222,
|
||||
26592, 26961, 27330, 27700, 28069, 28438, 28808, 29177,
|
||||
29546, 29916, 30285, 30654, 31024, 31393, 31762, 32132,
|
||||
32501, 32870, 33240, 33609, 33978, 34348, 34717, 35086,
|
||||
35456, 35825, 36194, 36564, 36933, 37302, 37672, 38041,
|
||||
38410, 38780, 39149, 39518, 39888, 40257, 40626, 40996,
|
||||
41365, 41734, 42104, 42473, 42842, 43212, 43581, 43950,
|
||||
44320, 44689, 45058, 45428, 45797, 46166, 46536, 46905
|
||||
};
|
||||
|
||||
static const int16_t kAvgDecayTime = 250; // frames; < 3000
|
||||
|
||||
int32_t WebRtcAgc_CalculateGainTable(int32_t *gainTable, // Q16
|
||||
int16_t digCompGaindB, // Q0
|
||||
int16_t targetLevelDbfs,// Q0
|
||||
uint8_t limiterEnable,
|
||||
int16_t analogTarget) // Q0
|
||||
{
|
||||
// This function generates the compressor gain table used in the fixed digital part.
|
||||
uint32_t tmpU32no1, tmpU32no2, absInLevel, logApprox;
|
||||
int32_t inLevel, limiterLvl;
|
||||
int32_t tmp32, tmp32no1, tmp32no2, numFIX, den, y32;
|
||||
const uint16_t kLog10 = 54426; // log2(10) in Q14
|
||||
const uint16_t kLog10_2 = 49321; // 10*log10(2) in Q14
|
||||
const uint16_t kLogE_1 = 23637; // log2(e) in Q14
|
||||
uint16_t constMaxGain;
|
||||
uint16_t tmpU16, intPart, fracPart;
|
||||
const int16_t kCompRatio = 3;
|
||||
const int16_t kSoftLimiterLeft = 1;
|
||||
int16_t limiterOffset = 0; // Limiter offset
|
||||
int16_t limiterIdx, limiterLvlX;
|
||||
int16_t constLinApprox, zeroGainLvl, maxGain, diffGain;
|
||||
int16_t i, tmp16, tmp16no1;
|
||||
int zeros, zerosScale;
|
||||
|
||||
// Constants
|
||||
// kLogE_1 = 23637; // log2(e) in Q14
|
||||
// kLog10 = 54426; // log2(10) in Q14
|
||||
// kLog10_2 = 49321; // 10*log10(2) in Q14
|
||||
|
||||
// Calculate maximum digital gain and zero gain level
|
||||
tmp32no1 = (digCompGaindB - analogTarget) * (kCompRatio - 1);
|
||||
tmp16no1 = analogTarget - targetLevelDbfs;
|
||||
tmp16no1 += WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
|
||||
maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs));
|
||||
tmp32no1 = maxGain * kCompRatio;
|
||||
zeroGainLvl = digCompGaindB;
|
||||
zeroGainLvl -= WebRtcSpl_DivW32W16ResW16(tmp32no1 + ((kCompRatio - 1) >> 1),
|
||||
kCompRatio - 1);
|
||||
if ((digCompGaindB <= analogTarget) && (limiterEnable))
|
||||
{
|
||||
zeroGainLvl += (analogTarget - digCompGaindB + kSoftLimiterLeft);
|
||||
limiterOffset = 0;
|
||||
}
|
||||
|
||||
// Calculate the difference between maximum gain and gain at 0dB0v:
|
||||
// diffGain = maxGain + (compRatio-1)*zeroGainLvl/compRatio
|
||||
// = (compRatio-1)*digCompGaindB/compRatio
|
||||
tmp32no1 = digCompGaindB * (kCompRatio - 1);
|
||||
diffGain = WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
|
||||
if (diffGain < 0 || diffGain >= kGenFuncTableSize)
|
||||
{
|
||||
assert(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Calculate the limiter level and index:
|
||||
// limiterLvlX = analogTarget - limiterOffset
|
||||
// limiterLvl = targetLevelDbfs + limiterOffset/compRatio
|
||||
limiterLvlX = analogTarget - limiterOffset;
|
||||
limiterIdx =
|
||||
2 + WebRtcSpl_DivW32W16ResW16((int32_t)limiterLvlX << 13, kLog10_2 / 2);
|
||||
tmp16no1 = WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio);
|
||||
limiterLvl = targetLevelDbfs + tmp16no1;
|
||||
|
||||
// Calculate (through table lookup):
|
||||
// constMaxGain = log2(1+2^(log2(e)*diffGain)); (in Q8)
|
||||
constMaxGain = kGenFuncTable[diffGain]; // in Q8
|
||||
|
||||
// Calculate a parameter used to approximate the fractional part of 2^x with a
|
||||
// piecewise linear function in Q14:
|
||||
// constLinApprox = round(3/2*(4*(3-2*sqrt(2))/(log(2)^2)-0.5)*2^14);
|
||||
constLinApprox = 22817; // in Q14
|
||||
|
||||
// Calculate a denominator used in the exponential part to convert from dB to linear scale:
|
||||
// den = 20*constMaxGain (in Q8)
|
||||
den = WEBRTC_SPL_MUL_16_U16(20, constMaxGain); // in Q8
|
||||
|
||||
for (i = 0; i < 32; i++)
|
||||
{
|
||||
// Calculate scaled input level (compressor):
|
||||
// inLevel = fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio)
|
||||
tmp16 = (int16_t)((kCompRatio - 1) * (i - 1)); // Q0
|
||||
tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14
|
||||
inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14
|
||||
|
||||
// Calculate diffGain-inLevel, to map using the genFuncTable
|
||||
inLevel = ((int32_t)diffGain << 14) - inLevel; // Q14
|
||||
|
||||
// Make calculations on abs(inLevel) and compensate for the sign afterwards.
|
||||
absInLevel = (uint32_t)WEBRTC_SPL_ABS_W32(inLevel); // Q14
|
||||
|
||||
// LUT with interpolation
|
||||
intPart = (uint16_t)(absInLevel >> 14);
|
||||
fracPart = (uint16_t)(absInLevel & 0x00003FFF); // extract the fractional part
|
||||
tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8
|
||||
tmpU32no1 = tmpU16 * fracPart; // Q22
|
||||
tmpU32no1 += (uint32_t)kGenFuncTable[intPart] << 14; // Q22
|
||||
logApprox = tmpU32no1 >> 8; // Q14
|
||||
// Compensate for negative exponent using the relation:
|
||||
// log2(1 + 2^-x) = log2(1 + 2^x) - x
|
||||
if (inLevel < 0)
|
||||
{
|
||||
zeros = WebRtcSpl_NormU32(absInLevel);
|
||||
zerosScale = 0;
|
||||
if (zeros < 15)
|
||||
{
|
||||
// Not enough space for multiplication
|
||||
tmpU32no2 = absInLevel >> (15 - zeros); // Q(zeros-1)
|
||||
tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13)
|
||||
if (zeros < 9)
|
||||
{
|
||||
zerosScale = 9 - zeros;
|
||||
tmpU32no1 >>= zerosScale; // Q(zeros+13)
|
||||
} else
|
||||
{
|
||||
tmpU32no2 >>= zeros - 9; // Q22
|
||||
}
|
||||
} else
|
||||
{
|
||||
tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28
|
||||
tmpU32no2 >>= 6; // Q22
|
||||
}
|
||||
logApprox = 0;
|
||||
if (tmpU32no2 < tmpU32no1)
|
||||
{
|
||||
logApprox = (tmpU32no1 - tmpU32no2) >> (8 - zerosScale); //Q14
|
||||
}
|
||||
}
|
||||
numFIX = (maxGain * constMaxGain) << 6; // Q14
|
||||
numFIX -= (int32_t)logApprox * diffGain; // Q14
|
||||
|
||||
// Calculate ratio
|
||||
// Shift |numFIX| as much as possible.
|
||||
// Ensure we avoid wrap-around in |den| as well.
|
||||
if (numFIX > (den >> 8)) // |den| is Q8.
|
||||
{
|
||||
zeros = WebRtcSpl_NormW32(numFIX);
|
||||
} else
|
||||
{
|
||||
zeros = WebRtcSpl_NormW32(den) + 8;
|
||||
}
|
||||
numFIX <<= zeros; // Q(14+zeros)
|
||||
|
||||
// Shift den so we end up in Qy1
|
||||
tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 8); // Q(zeros)
|
||||
if (numFIX < 0)
|
||||
{
|
||||
numFIX -= tmp32no1 / 2;
|
||||
} else
|
||||
{
|
||||
numFIX += tmp32no1 / 2;
|
||||
}
|
||||
y32 = numFIX / tmp32no1; // in Q14
|
||||
if (limiterEnable && (i < limiterIdx))
|
||||
{
|
||||
tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14
|
||||
tmp32 -= limiterLvl << 14; // Q14
|
||||
y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20);
|
||||
}
|
||||
if (y32 > 39000)
|
||||
{
|
||||
tmp32 = (y32 >> 1) * kLog10 + 4096; // in Q27
|
||||
tmp32 >>= 13; // In Q14.
|
||||
} else
|
||||
{
|
||||
tmp32 = y32 * kLog10 + 8192; // in Q28
|
||||
tmp32 >>= 14; // In Q14.
|
||||
}
|
||||
tmp32 += 16 << 14; // in Q14 (Make sure final output is in Q16)
|
||||
|
||||
// Calculate power
|
||||
if (tmp32 > 0)
|
||||
{
|
||||
intPart = (int16_t)(tmp32 >> 14);
|
||||
fracPart = (uint16_t)(tmp32 & 0x00003FFF); // in Q14
|
||||
if ((fracPart >> 13) != 0)
|
||||
{
|
||||
tmp16 = (2 << 14) - constLinApprox;
|
||||
tmp32no2 = (1 << 14) - fracPart;
|
||||
tmp32no2 *= tmp16;
|
||||
tmp32no2 >>= 13;
|
||||
tmp32no2 = (1 << 14) - tmp32no2;
|
||||
} else
|
||||
{
|
||||
tmp16 = constLinApprox - (1 << 14);
|
||||
tmp32no2 = (fracPart * tmp16) >> 13;
|
||||
}
|
||||
fracPart = (uint16_t)tmp32no2;
|
||||
gainTable[i] =
|
||||
(1 << intPart) + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14);
|
||||
} else
|
||||
{
|
||||
gainTable[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAgc_InitDigital(DigitalAgc* stt, int16_t agcMode) {
|
||||
if (agcMode == kAgcModeFixedDigital)
|
||||
{
|
||||
// start at minimum to find correct gain faster
|
||||
stt->capacitorSlow = 0;
|
||||
} else
|
||||
{
|
||||
// start out with 0 dB gain
|
||||
stt->capacitorSlow = 134217728; // (int32_t)(0.125f * 32768.0f * 32768.0f);
|
||||
}
|
||||
stt->capacitorFast = 0;
|
||||
stt->gain = 65536;
|
||||
stt->gatePrevious = 0;
|
||||
stt->agcMode = agcMode;
|
||||
#ifdef WEBRTC_AGC_DEBUG_DUMP
|
||||
stt->frameCounter = 0;
|
||||
#endif
|
||||
|
||||
// initialize VADs
|
||||
WebRtcAgc_InitVad(&stt->vadNearend);
|
||||
WebRtcAgc_InitVad(&stt->vadFarend);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* stt,
|
||||
const int16_t* in_far,
|
||||
size_t nrSamples) {
|
||||
assert(stt != NULL);
|
||||
// VAD for far end
|
||||
WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAgc_ProcessDigital(DigitalAgc* stt,
|
||||
const int16_t* const* in_near,
|
||||
size_t num_bands,
|
||||
int16_t* const* out,
|
||||
uint32_t FS,
|
||||
int16_t lowlevelSignal) {
|
||||
// array for gains (one value per ms, incl start & end)
|
||||
int32_t gains[11];
|
||||
|
||||
int32_t out_tmp, tmp32;
|
||||
int32_t env[10];
|
||||
int32_t max_nrg;
|
||||
int32_t cur_level;
|
||||
int32_t gain32, delta;
|
||||
int16_t logratio;
|
||||
int16_t lower_thr, upper_thr;
|
||||
int16_t zeros = 0, zeros_fast, frac = 0;
|
||||
int16_t decay;
|
||||
int16_t gate, gain_adj;
|
||||
int16_t k;
|
||||
size_t n, i, L;
|
||||
int16_t L2; // samples/subframe
|
||||
|
||||
// determine number of samples per ms
|
||||
if (FS == 8000)
|
||||
{
|
||||
L = 8;
|
||||
L2 = 3;
|
||||
} else if (FS == 16000 || FS == 32000 || FS == 48000)
|
||||
{
|
||||
L = 16;
|
||||
L2 = 4;
|
||||
} else
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < num_bands; ++i)
|
||||
{
|
||||
if (in_near[i] != out[i])
|
||||
{
|
||||
// Only needed if they don't already point to the same place.
|
||||
memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0]));
|
||||
}
|
||||
}
|
||||
// VAD for near end
|
||||
logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out[0], L * 10);
|
||||
|
||||
// Account for far end VAD
|
||||
if (stt->vadFarend.counter > 10)
|
||||
{
|
||||
tmp32 = 3 * logratio;
|
||||
logratio = (int16_t)((tmp32 - stt->vadFarend.logRatio) >> 2);
|
||||
}
|
||||
|
||||
// Determine decay factor depending on VAD
|
||||
// upper_thr = 1.0f;
|
||||
// lower_thr = 0.25f;
|
||||
upper_thr = 1024; // Q10
|
||||
lower_thr = 0; // Q10
|
||||
if (logratio > upper_thr)
|
||||
{
|
||||
// decay = -2^17 / DecayTime; -> -65
|
||||
decay = -65;
|
||||
} else if (logratio < lower_thr)
|
||||
{
|
||||
decay = 0;
|
||||
} else
|
||||
{
|
||||
// decay = (int16_t)(((lower_thr - logratio)
|
||||
// * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10);
|
||||
// SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65
|
||||
tmp32 = (lower_thr - logratio) * 65;
|
||||
decay = (int16_t)(tmp32 >> 10);
|
||||
}
|
||||
|
||||
// adjust decay factor for long silence (detected as low standard deviation)
|
||||
// This is only done in the adaptive modes
|
||||
if (stt->agcMode != kAgcModeFixedDigital)
|
||||
{
|
||||
if (stt->vadNearend.stdLongTerm < 4000)
|
||||
{
|
||||
decay = 0;
|
||||
} else if (stt->vadNearend.stdLongTerm < 8096)
|
||||
{
|
||||
// decay = (int16_t)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> 12);
|
||||
tmp32 = (stt->vadNearend.stdLongTerm - 4000) * decay;
|
||||
decay = (int16_t)(tmp32 >> 12);
|
||||
}
|
||||
|
||||
if (lowlevelSignal != 0)
|
||||
{
|
||||
decay = 0;
|
||||
}
|
||||
}
|
||||
#ifdef WEBRTC_AGC_DEBUG_DUMP
|
||||
stt->frameCounter++;
|
||||
fprintf(stt->logFile,
|
||||
"%5.2f\t%d\t%d\t%d\t",
|
||||
(float)(stt->frameCounter) / 100,
|
||||
logratio,
|
||||
decay,
|
||||
stt->vadNearend.stdLongTerm);
|
||||
#endif
|
||||
// Find max amplitude per sub frame
|
||||
// iterate over sub frames
|
||||
for (k = 0; k < 10; k++)
|
||||
{
|
||||
// iterate over samples
|
||||
max_nrg = 0;
|
||||
for (n = 0; n < L; n++)
|
||||
{
|
||||
int32_t nrg = out[0][k * L + n] * out[0][k * L + n];
|
||||
if (nrg > max_nrg)
|
||||
{
|
||||
max_nrg = nrg;
|
||||
}
|
||||
}
|
||||
env[k] = max_nrg;
|
||||
}
|
||||
|
||||
// Calculate gain per sub frame
|
||||
gains[0] = stt->gain;
|
||||
for (k = 0; k < 10; k++)
|
||||
{
|
||||
// Fast envelope follower
|
||||
// decay time = -131000 / -1000 = 131 (ms)
|
||||
stt->capacitorFast = AGC_SCALEDIFF32(-1000, stt->capacitorFast, stt->capacitorFast);
|
||||
if (env[k] > stt->capacitorFast)
|
||||
{
|
||||
stt->capacitorFast = env[k];
|
||||
}
|
||||
// Slow envelope follower
|
||||
if (env[k] > stt->capacitorSlow)
|
||||
{
|
||||
// increase capacitorSlow
|
||||
stt->capacitorSlow
|
||||
= AGC_SCALEDIFF32(500, (env[k] - stt->capacitorSlow), stt->capacitorSlow);
|
||||
} else
|
||||
{
|
||||
// decrease capacitorSlow
|
||||
stt->capacitorSlow
|
||||
= AGC_SCALEDIFF32(decay, stt->capacitorSlow, stt->capacitorSlow);
|
||||
}
|
||||
|
||||
// use maximum of both capacitors as current level
|
||||
if (stt->capacitorFast > stt->capacitorSlow)
|
||||
{
|
||||
cur_level = stt->capacitorFast;
|
||||
} else
|
||||
{
|
||||
cur_level = stt->capacitorSlow;
|
||||
}
|
||||
// Translate signal level into gain, using a piecewise linear approximation
|
||||
// find number of leading zeros
|
||||
zeros = WebRtcSpl_NormU32((uint32_t)cur_level);
|
||||
if (cur_level == 0)
|
||||
{
|
||||
zeros = 31;
|
||||
}
|
||||
tmp32 = (cur_level << zeros) & 0x7FFFFFFF;
|
||||
frac = (int16_t)(tmp32 >> 19); // Q12.
|
||||
tmp32 = (stt->gainTable[zeros-1] - stt->gainTable[zeros]) * frac;
|
||||
gains[k + 1] = stt->gainTable[zeros] + (tmp32 >> 12);
|
||||
#ifdef WEBRTC_AGC_DEBUG_DUMP
|
||||
if (k == 0) {
|
||||
fprintf(stt->logFile,
|
||||
"%d\t%d\t%d\t%d\t%d\n",
|
||||
env[0],
|
||||
cur_level,
|
||||
stt->capacitorFast,
|
||||
stt->capacitorSlow,
|
||||
zeros);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
// Gate processing (lower gain during absence of speech)
|
||||
zeros = (zeros << 9) - (frac >> 3);
|
||||
// find number of leading zeros
|
||||
zeros_fast = WebRtcSpl_NormU32((uint32_t)stt->capacitorFast);
|
||||
if (stt->capacitorFast == 0)
|
||||
{
|
||||
zeros_fast = 31;
|
||||
}
|
||||
tmp32 = (stt->capacitorFast << zeros_fast) & 0x7FFFFFFF;
|
||||
zeros_fast <<= 9;
|
||||
zeros_fast -= (int16_t)(tmp32 >> 22);
|
||||
|
||||
gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm;
|
||||
|
||||
if (gate < 0)
|
||||
{
|
||||
stt->gatePrevious = 0;
|
||||
} else
|
||||
{
|
||||
tmp32 = stt->gatePrevious * 7;
|
||||
gate = (int16_t)((gate + tmp32) >> 3);
|
||||
stt->gatePrevious = gate;
|
||||
}
|
||||
// gate < 0 -> no gate
|
||||
// gate > 2500 -> max gate
|
||||
if (gate > 0)
|
||||
{
|
||||
if (gate < 2500)
|
||||
{
|
||||
gain_adj = (2500 - gate) >> 5;
|
||||
} else
|
||||
{
|
||||
gain_adj = 0;
|
||||
}
|
||||
for (k = 0; k < 10; k++)
|
||||
{
|
||||
if ((gains[k + 1] - stt->gainTable[0]) > 8388608)
|
||||
{
|
||||
// To prevent wraparound
|
||||
tmp32 = (gains[k + 1] - stt->gainTable[0]) >> 8;
|
||||
tmp32 *= 178 + gain_adj;
|
||||
} else
|
||||
{
|
||||
tmp32 = (gains[k+1] - stt->gainTable[0]) * (178 + gain_adj);
|
||||
tmp32 >>= 8;
|
||||
}
|
||||
gains[k + 1] = stt->gainTable[0] + tmp32;
|
||||
}
|
||||
}
|
||||
|
||||
// Limit gain to avoid overload distortion
|
||||
for (k = 0; k < 10; k++)
|
||||
{
|
||||
// To prevent wrap around
|
||||
zeros = 10;
|
||||
if (gains[k + 1] > 47453132)
|
||||
{
|
||||
zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]);
|
||||
}
|
||||
gain32 = (gains[k + 1] >> zeros) + 1;
|
||||
gain32 *= gain32;
|
||||
// check for overflow
|
||||
while (AGC_MUL32((env[k] >> 12) + 1, gain32)
|
||||
> WEBRTC_SPL_SHIFT_W32((int32_t)32767, 2 * (1 - zeros + 10)))
|
||||
{
|
||||
// multiply by 253/256 ==> -0.1 dB
|
||||
if (gains[k + 1] > 8388607)
|
||||
{
|
||||
// Prevent wrap around
|
||||
gains[k + 1] = (gains[k+1] / 256) * 253;
|
||||
} else
|
||||
{
|
||||
gains[k + 1] = (gains[k+1] * 253) / 256;
|
||||
}
|
||||
gain32 = (gains[k + 1] >> zeros) + 1;
|
||||
gain32 *= gain32;
|
||||
}
|
||||
}
|
||||
// gain reductions should be done 1 ms earlier than gain increases
|
||||
for (k = 1; k < 10; k++)
|
||||
{
|
||||
if (gains[k] > gains[k + 1])
|
||||
{
|
||||
gains[k] = gains[k + 1];
|
||||
}
|
||||
}
|
||||
// save start gain for next frame
|
||||
stt->gain = gains[10];
|
||||
|
||||
// Apply gain
|
||||
// handle first sub frame separately
|
||||
delta = (gains[1] - gains[0]) << (4 - L2);
|
||||
gain32 = gains[0] << 4;
|
||||
// iterate over samples
|
||||
for (n = 0; n < L; n++)
|
||||
{
|
||||
for (i = 0; i < num_bands; ++i)
|
||||
{
|
||||
tmp32 = out[i][n] * ((gain32 + 127) >> 7);
|
||||
out_tmp = tmp32 >> 16;
|
||||
if (out_tmp > 4095)
|
||||
{
|
||||
out[i][n] = (int16_t)32767;
|
||||
} else if (out_tmp < -4096)
|
||||
{
|
||||
out[i][n] = (int16_t)-32768;
|
||||
} else
|
||||
{
|
||||
tmp32 = out[i][n] * (gain32 >> 4);
|
||||
out[i][n] = (int16_t)(tmp32 >> 16);
|
||||
}
|
||||
}
|
||||
//
|
||||
|
||||
gain32 += delta;
|
||||
}
|
||||
// iterate over subframes
|
||||
for (k = 1; k < 10; k++)
|
||||
{
|
||||
delta = (gains[k+1] - gains[k]) << (4 - L2);
|
||||
gain32 = gains[k] << 4;
|
||||
// iterate over samples
|
||||
for (n = 0; n < L; n++)
|
||||
{
|
||||
for (i = 0; i < num_bands; ++i)
|
||||
{
|
||||
tmp32 = out[i][k * L + n] * (gain32 >> 4);
|
||||
out[i][k * L + n] = (int16_t)(tmp32 >> 16);
|
||||
}
|
||||
gain32 += delta;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void WebRtcAgc_InitVad(AgcVad* state) {
|
||||
int16_t k;
|
||||
|
||||
state->HPstate = 0; // state of high pass filter
|
||||
state->logRatio = 0; // log( P(active) / P(inactive) )
|
||||
// average input level (Q10)
|
||||
state->meanLongTerm = 15 << 10;
|
||||
|
||||
// variance of input level (Q8)
|
||||
state->varianceLongTerm = 500 << 8;
|
||||
|
||||
state->stdLongTerm = 0; // standard deviation of input level in dB
|
||||
// short-term average input level (Q10)
|
||||
state->meanShortTerm = 15 << 10;
|
||||
|
||||
// short-term variance of input level (Q8)
|
||||
state->varianceShortTerm = 500 << 8;
|
||||
|
||||
state->stdShortTerm = 0; // short-term standard deviation of input level in dB
|
||||
state->counter = 3; // counts updates
|
||||
for (k = 0; k < 8; k++)
|
||||
{
|
||||
// downsampling filter
|
||||
state->downState[k] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcAgc_ProcessVad(AgcVad* state, // (i) VAD state
|
||||
const int16_t* in, // (i) Speech signal
|
||||
size_t nrSamples) // (i) number of samples
|
||||
{
|
||||
int32_t out, nrg, tmp32, tmp32b;
|
||||
uint16_t tmpU16;
|
||||
int16_t k, subfr, tmp16;
|
||||
int16_t buf1[8];
|
||||
int16_t buf2[4];
|
||||
int16_t HPstate;
|
||||
int16_t zeros, dB;
|
||||
|
||||
// process in 10 sub frames of 1 ms (to save on memory)
|
||||
nrg = 0;
|
||||
HPstate = state->HPstate;
|
||||
for (subfr = 0; subfr < 10; subfr++)
|
||||
{
|
||||
// downsample to 4 kHz
|
||||
if (nrSamples == 160)
|
||||
{
|
||||
for (k = 0; k < 8; k++)
|
||||
{
|
||||
tmp32 = (int32_t)in[2 * k] + (int32_t)in[2 * k + 1];
|
||||
tmp32 >>= 1;
|
||||
buf1[k] = (int16_t)tmp32;
|
||||
}
|
||||
in += 16;
|
||||
|
||||
WebRtcSpl_DownsampleBy2(buf1, 8, buf2, state->downState);
|
||||
} else
|
||||
{
|
||||
WebRtcSpl_DownsampleBy2(in, 8, buf2, state->downState);
|
||||
in += 8;
|
||||
}
|
||||
|
||||
// high pass filter and compute energy
|
||||
for (k = 0; k < 4; k++)
|
||||
{
|
||||
out = buf2[k] + HPstate;
|
||||
tmp32 = 600 * out;
|
||||
HPstate = (int16_t)((tmp32 >> 10) - buf2[k]);
|
||||
nrg += (out * out) >> 6;
|
||||
}
|
||||
}
|
||||
state->HPstate = HPstate;
|
||||
|
||||
// find number of leading zeros
|
||||
if (!(0xFFFF0000 & nrg))
|
||||
{
|
||||
zeros = 16;
|
||||
} else
|
||||
{
|
||||
zeros = 0;
|
||||
}
|
||||
if (!(0xFF000000 & (nrg << zeros)))
|
||||
{
|
||||
zeros += 8;
|
||||
}
|
||||
if (!(0xF0000000 & (nrg << zeros)))
|
||||
{
|
||||
zeros += 4;
|
||||
}
|
||||
if (!(0xC0000000 & (nrg << zeros)))
|
||||
{
|
||||
zeros += 2;
|
||||
}
|
||||
if (!(0x80000000 & (nrg << zeros)))
|
||||
{
|
||||
zeros += 1;
|
||||
}
|
||||
|
||||
// energy level (range {-32..30}) (Q10)
|
||||
dB = (15 - zeros) << 11;
|
||||
|
||||
// Update statistics
|
||||
|
||||
if (state->counter < kAvgDecayTime)
|
||||
{
|
||||
// decay time = AvgDecTime * 10 ms
|
||||
state->counter++;
|
||||
}
|
||||
|
||||
// update short-term estimate of mean energy level (Q10)
|
||||
tmp32 = state->meanShortTerm * 15 + dB;
|
||||
state->meanShortTerm = (int16_t)(tmp32 >> 4);
|
||||
|
||||
// update short-term estimate of variance in energy level (Q8)
|
||||
tmp32 = (dB * dB) >> 12;
|
||||
tmp32 += state->varianceShortTerm * 15;
|
||||
state->varianceShortTerm = tmp32 / 16;
|
||||
|
||||
// update short-term estimate of standard deviation in energy level (Q10)
|
||||
tmp32 = state->meanShortTerm * state->meanShortTerm;
|
||||
tmp32 = (state->varianceShortTerm << 12) - tmp32;
|
||||
state->stdShortTerm = (int16_t)WebRtcSpl_Sqrt(tmp32);
|
||||
|
||||
// update long-term estimate of mean energy level (Q10)
|
||||
tmp32 = state->meanLongTerm * state->counter + dB;
|
||||
state->meanLongTerm = WebRtcSpl_DivW32W16ResW16(
|
||||
tmp32, WebRtcSpl_AddSatW16(state->counter, 1));
|
||||
|
||||
// update long-term estimate of variance in energy level (Q8)
|
||||
tmp32 = (dB * dB) >> 12;
|
||||
tmp32 += state->varianceLongTerm * state->counter;
|
||||
state->varianceLongTerm = WebRtcSpl_DivW32W16(
|
||||
tmp32, WebRtcSpl_AddSatW16(state->counter, 1));
|
||||
|
||||
// update long-term estimate of standard deviation in energy level (Q10)
|
||||
tmp32 = state->meanLongTerm * state->meanLongTerm;
|
||||
tmp32 = (state->varianceLongTerm << 12) - tmp32;
|
||||
state->stdLongTerm = (int16_t)WebRtcSpl_Sqrt(tmp32);
|
||||
|
||||
// update voice activity measure (Q10)
|
||||
tmp16 = 3 << 12;
|
||||
// TODO(bjornv): (dB - state->meanLongTerm) can overflow, e.g., in
|
||||
// ApmTest.Process unit test. Previously the macro WEBRTC_SPL_MUL_16_16()
|
||||
// was used, which did an intermediate cast to (int16_t), hence losing
|
||||
// significant bits. This cause logRatio to max out positive, rather than
|
||||
// negative. This is a bug, but has very little significance.
|
||||
tmp32 = tmp16 * (int16_t)(dB - state->meanLongTerm);
|
||||
tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm);
|
||||
tmpU16 = (13 << 12);
|
||||
tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16);
|
||||
tmp32 += tmp32b >> 10;
|
||||
|
||||
state->logRatio = (int16_t)(tmp32 >> 6);
|
||||
|
||||
// limit
|
||||
if (state->logRatio > 2048)
|
||||
{
|
||||
state->logRatio = 2048;
|
||||
}
|
||||
if (state->logRatio < -2048)
|
||||
{
|
||||
state->logRatio = -2048;
|
||||
}
|
||||
|
||||
return state->logRatio; // Q10
|
||||
}
|
714
webrtc/modules/audio_processing/agc/legacy/digital_agc.cc
Normal file
714
webrtc/modules/audio_processing/agc/legacy/digital_agc.cc
Normal file
@ -0,0 +1,714 @@
|
||||
/*
|
||||
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "modules/audio_processing/agc/legacy/digital_agc.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "modules/audio_processing/agc/legacy/gain_control.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
// To generate the gaintable, copy&paste the following lines to a Matlab window:
|
||||
// MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1;
|
||||
// zeros = 0:31; lvl = 2.^(1-zeros);
|
||||
// A = -10*log10(lvl) * (CompRatio - 1) / CompRatio;
|
||||
// B = MaxGain - MinGain;
|
||||
// gains = round(2^16*10.^(0.05 * (MinGain + B * (
|
||||
// log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) /
|
||||
// log(1/(1+exp(Knee*B))))));
|
||||
// fprintf(1, '\t%i, %i, %i, %i,\n', gains);
|
||||
// % Matlab code for plotting the gain and input/output level characteristic
|
||||
// (copy/paste the following 3 lines):
|
||||
// in = 10*log10(lvl); out = 20*log10(gains/65536);
|
||||
// subplot(121); plot(in, out); axis([-30, 0, -5, 20]); grid on; xlabel('Input
|
||||
// (dB)'); ylabel('Gain (dB)');
|
||||
// subplot(122); plot(in, in+out); axis([-30, 0, -30, 5]); grid on;
|
||||
// xlabel('Input (dB)'); ylabel('Output (dB)');
|
||||
// zoom on;
|
||||
|
||||
// Generator table for y=log2(1+e^x) in Q8.
|
||||
enum { kGenFuncTableSize = 128 };
|
||||
static const uint16_t kGenFuncTable[kGenFuncTableSize] = {
|
||||
256, 485, 786, 1126, 1484, 1849, 2217, 2586, 2955, 3324, 3693,
|
||||
4063, 4432, 4801, 5171, 5540, 5909, 6279, 6648, 7017, 7387, 7756,
|
||||
8125, 8495, 8864, 9233, 9603, 9972, 10341, 10711, 11080, 11449, 11819,
|
||||
12188, 12557, 12927, 13296, 13665, 14035, 14404, 14773, 15143, 15512, 15881,
|
||||
16251, 16620, 16989, 17359, 17728, 18097, 18466, 18836, 19205, 19574, 19944,
|
||||
20313, 20682, 21052, 21421, 21790, 22160, 22529, 22898, 23268, 23637, 24006,
|
||||
24376, 24745, 25114, 25484, 25853, 26222, 26592, 26961, 27330, 27700, 28069,
|
||||
28438, 28808, 29177, 29546, 29916, 30285, 30654, 31024, 31393, 31762, 32132,
|
||||
32501, 32870, 33240, 33609, 33978, 34348, 34717, 35086, 35456, 35825, 36194,
|
||||
36564, 36933, 37302, 37672, 38041, 38410, 38780, 39149, 39518, 39888, 40257,
|
||||
40626, 40996, 41365, 41734, 42104, 42473, 42842, 43212, 43581, 43950, 44320,
|
||||
44689, 45058, 45428, 45797, 46166, 46536, 46905};
|
||||
|
||||
static const int16_t kAvgDecayTime = 250; // frames; < 3000
|
||||
|
||||
// the 32 most significant bits of A(19) * B(26) >> 13
|
||||
#define AGC_MUL32(A, B) (((B) >> 13) * (A) + (((0x00001FFF & (B)) * (A)) >> 13))
|
||||
// C + the 32 most significant bits of A * B
|
||||
#define AGC_SCALEDIFF32(A, B, C) \
|
||||
((C) + ((B) >> 16) * (A) + (((0x0000FFFF & (B)) * (A)) >> 16))
|
||||
|
||||
} // namespace
|
||||
|
||||
int32_t WebRtcAgc_CalculateGainTable(int32_t* gainTable, // Q16
|
||||
int16_t digCompGaindB, // Q0
|
||||
int16_t targetLevelDbfs, // Q0
|
||||
uint8_t limiterEnable,
|
||||
int16_t analogTarget) { // Q0
|
||||
// This function generates the compressor gain table used in the fixed digital
|
||||
// part.
|
||||
uint32_t tmpU32no1, tmpU32no2, absInLevel, logApprox;
|
||||
int32_t inLevel, limiterLvl;
|
||||
int32_t tmp32, tmp32no1, tmp32no2, numFIX, den, y32;
|
||||
const uint16_t kLog10 = 54426; // log2(10) in Q14
|
||||
const uint16_t kLog10_2 = 49321; // 10*log10(2) in Q14
|
||||
const uint16_t kLogE_1 = 23637; // log2(e) in Q14
|
||||
uint16_t constMaxGain;
|
||||
uint16_t tmpU16, intPart, fracPart;
|
||||
const int16_t kCompRatio = 3;
|
||||
const int16_t kSoftLimiterLeft = 1;
|
||||
int16_t limiterOffset = 0; // Limiter offset
|
||||
int16_t limiterIdx, limiterLvlX;
|
||||
int16_t constLinApprox, zeroGainLvl, maxGain, diffGain;
|
||||
int16_t i, tmp16, tmp16no1;
|
||||
int zeros, zerosScale;
|
||||
|
||||
// Constants
|
||||
// kLogE_1 = 23637; // log2(e) in Q14
|
||||
// kLog10 = 54426; // log2(10) in Q14
|
||||
// kLog10_2 = 49321; // 10*log10(2) in Q14
|
||||
|
||||
// Calculate maximum digital gain and zero gain level
|
||||
tmp32no1 = (digCompGaindB - analogTarget) * (kCompRatio - 1);
|
||||
tmp16no1 = analogTarget - targetLevelDbfs;
|
||||
tmp16no1 +=
|
||||
WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
|
||||
maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs));
|
||||
tmp32no1 = maxGain * kCompRatio;
|
||||
zeroGainLvl = digCompGaindB;
|
||||
zeroGainLvl -= WebRtcSpl_DivW32W16ResW16(tmp32no1 + ((kCompRatio - 1) >> 1),
|
||||
kCompRatio - 1);
|
||||
if ((digCompGaindB <= analogTarget) && (limiterEnable)) {
|
||||
zeroGainLvl += (analogTarget - digCompGaindB + kSoftLimiterLeft);
|
||||
limiterOffset = 0;
|
||||
}
|
||||
|
||||
// Calculate the difference between maximum gain and gain at 0dB0v:
|
||||
// diffGain = maxGain + (compRatio-1)*zeroGainLvl/compRatio
|
||||
// = (compRatio-1)*digCompGaindB/compRatio
|
||||
tmp32no1 = digCompGaindB * (kCompRatio - 1);
|
||||
diffGain =
|
||||
WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
|
||||
if (diffGain < 0 || diffGain >= kGenFuncTableSize) {
|
||||
RTC_DCHECK(0);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Calculate the limiter level and index:
|
||||
// limiterLvlX = analogTarget - limiterOffset
|
||||
// limiterLvl = targetLevelDbfs + limiterOffset/compRatio
|
||||
limiterLvlX = analogTarget - limiterOffset;
|
||||
limiterIdx = 2 + WebRtcSpl_DivW32W16ResW16((int32_t)limiterLvlX * (1 << 13),
|
||||
kLog10_2 / 2);
|
||||
tmp16no1 =
|
||||
WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio);
|
||||
limiterLvl = targetLevelDbfs + tmp16no1;
|
||||
|
||||
// Calculate (through table lookup):
|
||||
// constMaxGain = log2(1+2^(log2(e)*diffGain)); (in Q8)
|
||||
constMaxGain = kGenFuncTable[diffGain]; // in Q8
|
||||
|
||||
// Calculate a parameter used to approximate the fractional part of 2^x with a
|
||||
// piecewise linear function in Q14:
|
||||
// constLinApprox = round(3/2*(4*(3-2*sqrt(2))/(log(2)^2)-0.5)*2^14);
|
||||
constLinApprox = 22817; // in Q14
|
||||
|
||||
// Calculate a denominator used in the exponential part to convert from dB to
|
||||
// linear scale:
|
||||
// den = 20*constMaxGain (in Q8)
|
||||
den = WEBRTC_SPL_MUL_16_U16(20, constMaxGain); // in Q8
|
||||
|
||||
for (i = 0; i < 32; i++) {
|
||||
// Calculate scaled input level (compressor):
|
||||
// inLevel =
|
||||
// fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio)
|
||||
tmp16 = (int16_t)((kCompRatio - 1) * (i - 1)); // Q0
|
||||
tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14
|
||||
inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14
|
||||
|
||||
// Calculate diffGain-inLevel, to map using the genFuncTable
|
||||
inLevel = (int32_t)diffGain * (1 << 14) - inLevel; // Q14
|
||||
|
||||
// Make calculations on abs(inLevel) and compensate for the sign afterwards.
|
||||
absInLevel = (uint32_t)WEBRTC_SPL_ABS_W32(inLevel); // Q14
|
||||
|
||||
// LUT with interpolation
|
||||
intPart = (uint16_t)(absInLevel >> 14);
|
||||
fracPart =
|
||||
(uint16_t)(absInLevel & 0x00003FFF); // extract the fractional part
|
||||
tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8
|
||||
tmpU32no1 = tmpU16 * fracPart; // Q22
|
||||
tmpU32no1 += (uint32_t)kGenFuncTable[intPart] << 14; // Q22
|
||||
logApprox = tmpU32no1 >> 8; // Q14
|
||||
// Compensate for negative exponent using the relation:
|
||||
// log2(1 + 2^-x) = log2(1 + 2^x) - x
|
||||
if (inLevel < 0) {
|
||||
zeros = WebRtcSpl_NormU32(absInLevel);
|
||||
zerosScale = 0;
|
||||
if (zeros < 15) {
|
||||
// Not enough space for multiplication
|
||||
tmpU32no2 = absInLevel >> (15 - zeros); // Q(zeros-1)
|
||||
tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13)
|
||||
if (zeros < 9) {
|
||||
zerosScale = 9 - zeros;
|
||||
tmpU32no1 >>= zerosScale; // Q(zeros+13)
|
||||
} else {
|
||||
tmpU32no2 >>= zeros - 9; // Q22
|
||||
}
|
||||
} else {
|
||||
tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28
|
||||
tmpU32no2 >>= 6; // Q22
|
||||
}
|
||||
logApprox = 0;
|
||||
if (tmpU32no2 < tmpU32no1) {
|
||||
logApprox = (tmpU32no1 - tmpU32no2) >> (8 - zerosScale); // Q14
|
||||
}
|
||||
}
|
||||
numFIX = (maxGain * constMaxGain) * (1 << 6); // Q14
|
||||
numFIX -= (int32_t)logApprox * diffGain; // Q14
|
||||
|
||||
// Calculate ratio
|
||||
// Shift |numFIX| as much as possible.
|
||||
// Ensure we avoid wrap-around in |den| as well.
|
||||
if (numFIX > (den >> 8) || -numFIX > (den >> 8)) { // |den| is Q8.
|
||||
zeros = WebRtcSpl_NormW32(numFIX);
|
||||
} else {
|
||||
zeros = WebRtcSpl_NormW32(den) + 8;
|
||||
}
|
||||
numFIX *= 1 << zeros; // Q(14+zeros)
|
||||
|
||||
// Shift den so we end up in Qy1
|
||||
tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 9); // Q(zeros - 1)
|
||||
y32 = numFIX / tmp32no1; // in Q15
|
||||
// This is to do rounding in Q14.
|
||||
y32 = y32 >= 0 ? (y32 + 1) >> 1 : -((-y32 + 1) >> 1);
|
||||
|
||||
if (limiterEnable && (i < limiterIdx)) {
|
||||
tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14
|
||||
tmp32 -= limiterLvl * (1 << 14); // Q14
|
||||
y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20);
|
||||
}
|
||||
if (y32 > 39000) {
|
||||
tmp32 = (y32 >> 1) * kLog10 + 4096; // in Q27
|
||||
tmp32 >>= 13; // In Q14.
|
||||
} else {
|
||||
tmp32 = y32 * kLog10 + 8192; // in Q28
|
||||
tmp32 >>= 14; // In Q14.
|
||||
}
|
||||
tmp32 += 16 << 14; // in Q14 (Make sure final output is in Q16)
|
||||
|
||||
// Calculate power
|
||||
if (tmp32 > 0) {
|
||||
intPart = (int16_t)(tmp32 >> 14);
|
||||
fracPart = (uint16_t)(tmp32 & 0x00003FFF); // in Q14
|
||||
if ((fracPart >> 13) != 0) {
|
||||
tmp16 = (2 << 14) - constLinApprox;
|
||||
tmp32no2 = (1 << 14) - fracPart;
|
||||
tmp32no2 *= tmp16;
|
||||
tmp32no2 >>= 13;
|
||||
tmp32no2 = (1 << 14) - tmp32no2;
|
||||
} else {
|
||||
tmp16 = constLinApprox - (1 << 14);
|
||||
tmp32no2 = (fracPart * tmp16) >> 13;
|
||||
}
|
||||
fracPart = (uint16_t)tmp32no2;
|
||||
gainTable[i] =
|
||||
(1 << intPart) + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14);
|
||||
} else {
|
||||
gainTable[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAgc_InitDigital(DigitalAgc* stt, int16_t agcMode) {
|
||||
if (agcMode == kAgcModeFixedDigital) {
|
||||
// start at minimum to find correct gain faster
|
||||
stt->capacitorSlow = 0;
|
||||
} else {
|
||||
// start out with 0 dB gain
|
||||
stt->capacitorSlow = 134217728; // (int32_t)(0.125f * 32768.0f * 32768.0f);
|
||||
}
|
||||
stt->capacitorFast = 0;
|
||||
stt->gain = 65536;
|
||||
stt->gatePrevious = 0;
|
||||
stt->agcMode = agcMode;
|
||||
|
||||
// initialize VADs
|
||||
WebRtcAgc_InitVad(&stt->vadNearend);
|
||||
WebRtcAgc_InitVad(&stt->vadFarend);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* stt,
|
||||
const int16_t* in_far,
|
||||
size_t nrSamples) {
|
||||
RTC_DCHECK(stt);
|
||||
// VAD for far end
|
||||
WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Gains is an 11 element long array (one value per ms, incl start & end).
|
||||
int32_t WebRtcAgc_ComputeDigitalGains(DigitalAgc* stt,
|
||||
const int16_t* const* in_near,
|
||||
size_t num_bands,
|
||||
uint32_t FS,
|
||||
int16_t lowlevelSignal,
|
||||
int32_t gains[11]) {
|
||||
int32_t tmp32;
|
||||
int32_t env[10];
|
||||
int32_t max_nrg;
|
||||
int32_t cur_level;
|
||||
int32_t gain32;
|
||||
int16_t logratio;
|
||||
int16_t lower_thr, upper_thr;
|
||||
int16_t zeros = 0, zeros_fast, frac = 0;
|
||||
int16_t decay;
|
||||
int16_t gate, gain_adj;
|
||||
int16_t k;
|
||||
size_t n, L;
|
||||
int16_t L2; // samples/subframe
|
||||
|
||||
// determine number of samples per ms
|
||||
if (FS == 8000) {
|
||||
L = 8;
|
||||
L2 = 3;
|
||||
} else if (FS == 16000 || FS == 32000 || FS == 48000) {
|
||||
L = 16;
|
||||
L2 = 4;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// VAD for near end
|
||||
logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, in_near[0], L * 10);
|
||||
|
||||
// Account for far end VAD
|
||||
if (stt->vadFarend.counter > 10) {
|
||||
tmp32 = 3 * logratio;
|
||||
logratio = (int16_t)((tmp32 - stt->vadFarend.logRatio) >> 2);
|
||||
}
|
||||
|
||||
// Determine decay factor depending on VAD
|
||||
// upper_thr = 1.0f;
|
||||
// lower_thr = 0.25f;
|
||||
upper_thr = 1024; // Q10
|
||||
lower_thr = 0; // Q10
|
||||
if (logratio > upper_thr) {
|
||||
// decay = -2^17 / DecayTime; -> -65
|
||||
decay = -65;
|
||||
} else if (logratio < lower_thr) {
|
||||
decay = 0;
|
||||
} else {
|
||||
// decay = (int16_t)(((lower_thr - logratio)
|
||||
// * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10);
|
||||
// SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65
|
||||
tmp32 = (lower_thr - logratio) * 65;
|
||||
decay = (int16_t)(tmp32 >> 10);
|
||||
}
|
||||
|
||||
// adjust decay factor for long silence (detected as low standard deviation)
|
||||
// This is only done in the adaptive modes
|
||||
if (stt->agcMode != kAgcModeFixedDigital) {
|
||||
if (stt->vadNearend.stdLongTerm < 4000) {
|
||||
decay = 0;
|
||||
} else if (stt->vadNearend.stdLongTerm < 8096) {
|
||||
// decay = (int16_t)(((stt->vadNearend.stdLongTerm - 4000) * decay) >>
|
||||
// 12);
|
||||
tmp32 = (stt->vadNearend.stdLongTerm - 4000) * decay;
|
||||
decay = (int16_t)(tmp32 >> 12);
|
||||
}
|
||||
|
||||
if (lowlevelSignal != 0) {
|
||||
decay = 0;
|
||||
}
|
||||
}
|
||||
// Find max amplitude per sub frame
|
||||
// iterate over sub frames
|
||||
for (k = 0; k < 10; k++) {
|
||||
// iterate over samples
|
||||
max_nrg = 0;
|
||||
for (n = 0; n < L; n++) {
|
||||
int32_t nrg = in_near[0][k * L + n] * in_near[0][k * L + n];
|
||||
if (nrg > max_nrg) {
|
||||
max_nrg = nrg;
|
||||
}
|
||||
}
|
||||
env[k] = max_nrg;
|
||||
}
|
||||
|
||||
// Calculate gain per sub frame
|
||||
gains[0] = stt->gain;
|
||||
for (k = 0; k < 10; k++) {
|
||||
// Fast envelope follower
|
||||
// decay time = -131000 / -1000 = 131 (ms)
|
||||
stt->capacitorFast =
|
||||
AGC_SCALEDIFF32(-1000, stt->capacitorFast, stt->capacitorFast);
|
||||
if (env[k] > stt->capacitorFast) {
|
||||
stt->capacitorFast = env[k];
|
||||
}
|
||||
// Slow envelope follower
|
||||
if (env[k] > stt->capacitorSlow) {
|
||||
// increase capacitorSlow
|
||||
stt->capacitorSlow = AGC_SCALEDIFF32(500, (env[k] - stt->capacitorSlow),
|
||||
stt->capacitorSlow);
|
||||
} else {
|
||||
// decrease capacitorSlow
|
||||
stt->capacitorSlow =
|
||||
AGC_SCALEDIFF32(decay, stt->capacitorSlow, stt->capacitorSlow);
|
||||
}
|
||||
|
||||
// use maximum of both capacitors as current level
|
||||
if (stt->capacitorFast > stt->capacitorSlow) {
|
||||
cur_level = stt->capacitorFast;
|
||||
} else {
|
||||
cur_level = stt->capacitorSlow;
|
||||
}
|
||||
// Translate signal level into gain, using a piecewise linear approximation
|
||||
// find number of leading zeros
|
||||
zeros = WebRtcSpl_NormU32((uint32_t)cur_level);
|
||||
if (cur_level == 0) {
|
||||
zeros = 31;
|
||||
}
|
||||
tmp32 = ((uint32_t)cur_level << zeros) & 0x7FFFFFFF;
|
||||
frac = (int16_t)(tmp32 >> 19); // Q12.
|
||||
// Interpolate between gainTable[zeros] and gainTable[zeros-1].
|
||||
tmp32 =
|
||||
((stt->gainTable[zeros - 1] - stt->gainTable[zeros]) * (int64_t)frac) >>
|
||||
12;
|
||||
gains[k + 1] = stt->gainTable[zeros] + tmp32;
|
||||
}
|
||||
|
||||
// Gate processing (lower gain during absence of speech)
|
||||
zeros = (zeros << 9) - (frac >> 3);
|
||||
// find number of leading zeros
|
||||
zeros_fast = WebRtcSpl_NormU32((uint32_t)stt->capacitorFast);
|
||||
if (stt->capacitorFast == 0) {
|
||||
zeros_fast = 31;
|
||||
}
|
||||
tmp32 = ((uint32_t)stt->capacitorFast << zeros_fast) & 0x7FFFFFFF;
|
||||
zeros_fast <<= 9;
|
||||
zeros_fast -= (int16_t)(tmp32 >> 22);
|
||||
|
||||
gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm;
|
||||
|
||||
if (gate < 0) {
|
||||
stt->gatePrevious = 0;
|
||||
} else {
|
||||
tmp32 = stt->gatePrevious * 7;
|
||||
gate = (int16_t)((gate + tmp32) >> 3);
|
||||
stt->gatePrevious = gate;
|
||||
}
|
||||
// gate < 0 -> no gate
|
||||
// gate > 2500 -> max gate
|
||||
if (gate > 0) {
|
||||
if (gate < 2500) {
|
||||
gain_adj = (2500 - gate) >> 5;
|
||||
} else {
|
||||
gain_adj = 0;
|
||||
}
|
||||
for (k = 0; k < 10; k++) {
|
||||
if ((gains[k + 1] - stt->gainTable[0]) > 8388608) {
|
||||
// To prevent wraparound
|
||||
tmp32 = (gains[k + 1] - stt->gainTable[0]) >> 8;
|
||||
tmp32 *= 178 + gain_adj;
|
||||
} else {
|
||||
tmp32 = (gains[k + 1] - stt->gainTable[0]) * (178 + gain_adj);
|
||||
tmp32 >>= 8;
|
||||
}
|
||||
gains[k + 1] = stt->gainTable[0] + tmp32;
|
||||
}
|
||||
}
|
||||
|
||||
// Limit gain to avoid overload distortion
|
||||
for (k = 0; k < 10; k++) {
|
||||
// Find a shift of gains[k + 1] such that it can be squared without
|
||||
// overflow, but at least by 10 bits.
|
||||
zeros = 10;
|
||||
if (gains[k + 1] > 47452159) {
|
||||
zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]);
|
||||
}
|
||||
gain32 = (gains[k + 1] >> zeros) + 1;
|
||||
gain32 *= gain32;
|
||||
// check for overflow
|
||||
while (AGC_MUL32((env[k] >> 12) + 1, gain32) >
|
||||
WEBRTC_SPL_SHIFT_W32((int32_t)32767, 2 * (1 - zeros + 10))) {
|
||||
// multiply by 253/256 ==> -0.1 dB
|
||||
if (gains[k + 1] > 8388607) {
|
||||
// Prevent wrap around
|
||||
gains[k + 1] = (gains[k + 1] / 256) * 253;
|
||||
} else {
|
||||
gains[k + 1] = (gains[k + 1] * 253) / 256;
|
||||
}
|
||||
gain32 = (gains[k + 1] >> zeros) + 1;
|
||||
gain32 *= gain32;
|
||||
}
|
||||
}
|
||||
// gain reductions should be done 1 ms earlier than gain increases
|
||||
for (k = 1; k < 10; k++) {
|
||||
if (gains[k] > gains[k + 1]) {
|
||||
gains[k] = gains[k + 1];
|
||||
}
|
||||
}
|
||||
// save start gain for next frame
|
||||
stt->gain = gains[10];
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int32_t WebRtcAgc_ApplyDigitalGains(const int32_t gains[11],
|
||||
size_t num_bands,
|
||||
uint32_t FS,
|
||||
const int16_t* const* in_near,
|
||||
int16_t* const* out) {
|
||||
// Apply gain
|
||||
// handle first sub frame separately
|
||||
size_t L;
|
||||
int16_t L2; // samples/subframe
|
||||
|
||||
// determine number of samples per ms
|
||||
if (FS == 8000) {
|
||||
L = 8;
|
||||
L2 = 3;
|
||||
} else if (FS == 16000 || FS == 32000 || FS == 48000) {
|
||||
L = 16;
|
||||
L2 = 4;
|
||||
} else {
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < num_bands; ++i) {
|
||||
if (in_near[i] != out[i]) {
|
||||
// Only needed if they don't already point to the same place.
|
||||
memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0]));
|
||||
}
|
||||
}
|
||||
|
||||
// iterate over samples
|
||||
int32_t delta = (gains[1] - gains[0]) * (1 << (4 - L2));
|
||||
int32_t gain32 = gains[0] * (1 << 4);
|
||||
for (size_t n = 0; n < L; n++) {
|
||||
for (size_t i = 0; i < num_bands; ++i) {
|
||||
int32_t out_tmp = (int64_t)out[i][n] * ((gain32 + 127) >> 7) >> 16;
|
||||
if (out_tmp > 4095) {
|
||||
out[i][n] = (int16_t)32767;
|
||||
} else if (out_tmp < -4096) {
|
||||
out[i][n] = (int16_t)-32768;
|
||||
} else {
|
||||
int32_t tmp32 = ((int64_t)out[i][n] * (gain32 >> 4)) >> 16;
|
||||
out[i][n] = (int16_t)tmp32;
|
||||
}
|
||||
}
|
||||
|
||||
gain32 += delta;
|
||||
}
|
||||
// iterate over subframes
|
||||
for (int k = 1; k < 10; k++) {
|
||||
delta = (gains[k + 1] - gains[k]) * (1 << (4 - L2));
|
||||
gain32 = gains[k] * (1 << 4);
|
||||
// iterate over samples
|
||||
for (size_t n = 0; n < L; n++) {
|
||||
for (size_t i = 0; i < num_bands; ++i) {
|
||||
int64_t tmp64 = ((int64_t)(out[i][k * L + n])) * (gain32 >> 4);
|
||||
tmp64 = tmp64 >> 16;
|
||||
if (tmp64 > 32767) {
|
||||
out[i][k * L + n] = 32767;
|
||||
} else if (tmp64 < -32768) {
|
||||
out[i][k * L + n] = -32768;
|
||||
} else {
|
||||
out[i][k * L + n] = (int16_t)(tmp64);
|
||||
}
|
||||
}
|
||||
gain32 += delta;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
void WebRtcAgc_InitVad(AgcVad* state) {
|
||||
int16_t k;
|
||||
|
||||
state->HPstate = 0; // state of high pass filter
|
||||
state->logRatio = 0; // log( P(active) / P(inactive) )
|
||||
// average input level (Q10)
|
||||
state->meanLongTerm = 15 << 10;
|
||||
|
||||
// variance of input level (Q8)
|
||||
state->varianceLongTerm = 500 << 8;
|
||||
|
||||
state->stdLongTerm = 0; // standard deviation of input level in dB
|
||||
// short-term average input level (Q10)
|
||||
state->meanShortTerm = 15 << 10;
|
||||
|
||||
// short-term variance of input level (Q8)
|
||||
state->varianceShortTerm = 500 << 8;
|
||||
|
||||
state->stdShortTerm =
|
||||
0; // short-term standard deviation of input level in dB
|
||||
state->counter = 3; // counts updates
|
||||
for (k = 0; k < 8; k++) {
|
||||
// downsampling filter
|
||||
state->downState[k] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int16_t WebRtcAgc_ProcessVad(AgcVad* state, // (i) VAD state
|
||||
const int16_t* in, // (i) Speech signal
|
||||
size_t nrSamples) { // (i) number of samples
|
||||
uint32_t nrg;
|
||||
int32_t out, tmp32, tmp32b;
|
||||
uint16_t tmpU16;
|
||||
int16_t k, subfr, tmp16;
|
||||
int16_t buf1[8];
|
||||
int16_t buf2[4];
|
||||
int16_t HPstate;
|
||||
int16_t zeros, dB;
|
||||
int64_t tmp64;
|
||||
|
||||
// process in 10 sub frames of 1 ms (to save on memory)
|
||||
nrg = 0;
|
||||
HPstate = state->HPstate;
|
||||
for (subfr = 0; subfr < 10; subfr++) {
|
||||
// downsample to 4 kHz
|
||||
if (nrSamples == 160) {
|
||||
for (k = 0; k < 8; k++) {
|
||||
tmp32 = (int32_t)in[2 * k] + (int32_t)in[2 * k + 1];
|
||||
tmp32 >>= 1;
|
||||
buf1[k] = (int16_t)tmp32;
|
||||
}
|
||||
in += 16;
|
||||
|
||||
WebRtcSpl_DownsampleBy2(buf1, 8, buf2, state->downState);
|
||||
} else {
|
||||
WebRtcSpl_DownsampleBy2(in, 8, buf2, state->downState);
|
||||
in += 8;
|
||||
}
|
||||
|
||||
// high pass filter and compute energy
|
||||
for (k = 0; k < 4; k++) {
|
||||
out = buf2[k] + HPstate;
|
||||
tmp32 = 600 * out;
|
||||
HPstate = (int16_t)((tmp32 >> 10) - buf2[k]);
|
||||
|
||||
// Add 'out * out / 2**6' to 'nrg' in a non-overflowing
|
||||
// way. Guaranteed to work as long as 'out * out / 2**6' fits in
|
||||
// an int32_t.
|
||||
nrg += out * (out / (1 << 6));
|
||||
nrg += out * (out % (1 << 6)) / (1 << 6);
|
||||
}
|
||||
}
|
||||
state->HPstate = HPstate;
|
||||
|
||||
// find number of leading zeros
|
||||
if (!(0xFFFF0000 & nrg)) {
|
||||
zeros = 16;
|
||||
} else {
|
||||
zeros = 0;
|
||||
}
|
||||
if (!(0xFF000000 & (nrg << zeros))) {
|
||||
zeros += 8;
|
||||
}
|
||||
if (!(0xF0000000 & (nrg << zeros))) {
|
||||
zeros += 4;
|
||||
}
|
||||
if (!(0xC0000000 & (nrg << zeros))) {
|
||||
zeros += 2;
|
||||
}
|
||||
if (!(0x80000000 & (nrg << zeros))) {
|
||||
zeros += 1;
|
||||
}
|
||||
|
||||
// energy level (range {-32..30}) (Q10)
|
||||
dB = (15 - zeros) * (1 << 11);
|
||||
|
||||
// Update statistics
|
||||
|
||||
if (state->counter < kAvgDecayTime) {
|
||||
// decay time = AvgDecTime * 10 ms
|
||||
state->counter++;
|
||||
}
|
||||
|
||||
// update short-term estimate of mean energy level (Q10)
|
||||
tmp32 = state->meanShortTerm * 15 + dB;
|
||||
state->meanShortTerm = (int16_t)(tmp32 >> 4);
|
||||
|
||||
// update short-term estimate of variance in energy level (Q8)
|
||||
tmp32 = (dB * dB) >> 12;
|
||||
tmp32 += state->varianceShortTerm * 15;
|
||||
state->varianceShortTerm = tmp32 / 16;
|
||||
|
||||
// update short-term estimate of standard deviation in energy level (Q10)
|
||||
tmp32 = state->meanShortTerm * state->meanShortTerm;
|
||||
tmp32 = (state->varianceShortTerm << 12) - tmp32;
|
||||
state->stdShortTerm = (int16_t)WebRtcSpl_Sqrt(tmp32);
|
||||
|
||||
// update long-term estimate of mean energy level (Q10)
|
||||
tmp32 = state->meanLongTerm * state->counter + dB;
|
||||
state->meanLongTerm =
|
||||
WebRtcSpl_DivW32W16ResW16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1));
|
||||
|
||||
// update long-term estimate of variance in energy level (Q8)
|
||||
tmp32 = (dB * dB) >> 12;
|
||||
tmp32 += state->varianceLongTerm * state->counter;
|
||||
state->varianceLongTerm =
|
||||
WebRtcSpl_DivW32W16(tmp32, WebRtcSpl_AddSatW16(state->counter, 1));
|
||||
|
||||
// update long-term estimate of standard deviation in energy level (Q10)
|
||||
tmp32 = state->meanLongTerm * state->meanLongTerm;
|
||||
tmp32 = (state->varianceLongTerm << 12) - tmp32;
|
||||
state->stdLongTerm = (int16_t)WebRtcSpl_Sqrt(tmp32);
|
||||
|
||||
// update voice activity measure (Q10)
|
||||
tmp16 = 3 << 12;
|
||||
// TODO(bjornv): (dB - state->meanLongTerm) can overflow, e.g., in
|
||||
// ApmTest.Process unit test. Previously the macro WEBRTC_SPL_MUL_16_16()
|
||||
// was used, which did an intermediate cast to (int16_t), hence losing
|
||||
// significant bits. This cause logRatio to max out positive, rather than
|
||||
// negative. This is a bug, but has very little significance.
|
||||
tmp32 = tmp16 * (int16_t)(dB - state->meanLongTerm);
|
||||
tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm);
|
||||
tmpU16 = (13 << 12);
|
||||
tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16);
|
||||
tmp64 = tmp32;
|
||||
tmp64 += tmp32b >> 10;
|
||||
tmp64 >>= 6;
|
||||
|
||||
// limit
|
||||
if (tmp64 > 2048) {
|
||||
tmp64 = 2048;
|
||||
} else if (tmp64 < -2048) {
|
||||
tmp64 = -2048;
|
||||
}
|
||||
state->logRatio = (int16_t)tmp64;
|
||||
|
||||
return state->logRatio; // Q10
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -8,58 +8,51 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_
|
||||
|
||||
#ifdef WEBRTC_AGC_DEBUG_DUMP
|
||||
#include <stdio.h>
|
||||
#endif
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
#include "common_audio/signal_processing/include/signal_processing_library.h"
|
||||
|
||||
// the 32 most significant bits of A(19) * B(26) >> 13
|
||||
#define AGC_MUL32(A, B) (((B)>>13)*(A) + ( ((0x00001FFF & (B))*(A)) >> 13 ))
|
||||
// C + the 32 most significant bits of A * B
|
||||
#define AGC_SCALEDIFF32(A, B, C) ((C) + ((B)>>16)*(A) + ( ((0x0000FFFF & (B))*(A)) >> 16 ))
|
||||
namespace webrtc {
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int32_t downState[8];
|
||||
int16_t HPstate;
|
||||
int16_t counter;
|
||||
int16_t logRatio; // log( P(active) / P(inactive) ) (Q10)
|
||||
int16_t meanLongTerm; // Q10
|
||||
int32_t varianceLongTerm; // Q8
|
||||
int16_t stdLongTerm; // Q10
|
||||
int16_t meanShortTerm; // Q10
|
||||
int32_t varianceShortTerm; // Q8
|
||||
int16_t stdShortTerm; // Q10
|
||||
} AgcVad; // total = 54 bytes
|
||||
typedef struct {
|
||||
int32_t downState[8];
|
||||
int16_t HPstate;
|
||||
int16_t counter;
|
||||
int16_t logRatio; // log( P(active) / P(inactive) ) (Q10)
|
||||
int16_t meanLongTerm; // Q10
|
||||
int32_t varianceLongTerm; // Q8
|
||||
int16_t stdLongTerm; // Q10
|
||||
int16_t meanShortTerm; // Q10
|
||||
int32_t varianceShortTerm; // Q8
|
||||
int16_t stdShortTerm; // Q10
|
||||
} AgcVad; // total = 54 bytes
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int32_t capacitorSlow;
|
||||
int32_t capacitorFast;
|
||||
int32_t gain;
|
||||
int32_t gainTable[32];
|
||||
int16_t gatePrevious;
|
||||
int16_t agcMode;
|
||||
AgcVad vadNearend;
|
||||
AgcVad vadFarend;
|
||||
#ifdef WEBRTC_AGC_DEBUG_DUMP
|
||||
FILE* logFile;
|
||||
int frameCounter;
|
||||
#endif
|
||||
typedef struct {
|
||||
int32_t capacitorSlow;
|
||||
int32_t capacitorFast;
|
||||
int32_t gain;
|
||||
int32_t gainTable[32];
|
||||
int16_t gatePrevious;
|
||||
int16_t agcMode;
|
||||
AgcVad vadNearend;
|
||||
AgcVad vadFarend;
|
||||
} DigitalAgc;
|
||||
|
||||
int32_t WebRtcAgc_InitDigital(DigitalAgc* digitalAgcInst, int16_t agcMode);
|
||||
|
||||
int32_t WebRtcAgc_ProcessDigital(DigitalAgc* digitalAgcInst,
|
||||
const int16_t* const* inNear,
|
||||
size_t num_bands,
|
||||
int16_t* const* out,
|
||||
uint32_t FS,
|
||||
int16_t lowLevelSignal);
|
||||
int32_t WebRtcAgc_ComputeDigitalGains(DigitalAgc* digitalAgcInst,
|
||||
const int16_t* const* inNear,
|
||||
size_t num_bands,
|
||||
uint32_t FS,
|
||||
int16_t lowLevelSignal,
|
||||
int32_t gains[11]);
|
||||
|
||||
int32_t WebRtcAgc_ApplyDigitalGains(const int32_t gains[11],
|
||||
size_t num_bands,
|
||||
uint32_t FS,
|
||||
const int16_t* const* in_near,
|
||||
int16_t* const* out);
|
||||
|
||||
int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* digitalAgcInst,
|
||||
const int16_t* inFar,
|
||||
@ -67,14 +60,16 @@ int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* digitalAgcInst,
|
||||
|
||||
void WebRtcAgc_InitVad(AgcVad* vadInst);
|
||||
|
||||
int16_t WebRtcAgc_ProcessVad(AgcVad* vadInst, // (i) VAD state
|
||||
const int16_t* in, // (i) Speech signal
|
||||
int16_t WebRtcAgc_ProcessVad(AgcVad* vadInst, // (i) VAD state
|
||||
const int16_t* in, // (i) Speech signal
|
||||
size_t nrSamples); // (i) number of samples
|
||||
|
||||
int32_t WebRtcAgc_CalculateGainTable(int32_t *gainTable, // Q16
|
||||
int16_t compressionGaindB, // Q0 (in dB)
|
||||
int16_t targetLevelDbfs,// Q0 (in dB)
|
||||
int32_t WebRtcAgc_CalculateGainTable(int32_t* gainTable, // Q16
|
||||
int16_t compressionGaindB, // Q0 (in dB)
|
||||
int16_t targetLevelDbfs, // Q0 (in dB)
|
||||
uint8_t limiterEnable,
|
||||
int16_t analogTarget);
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_
|
||||
|
@ -8,46 +8,39 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
namespace webrtc {
|
||||
|
||||
// Errors
|
||||
#define AGC_UNSPECIFIED_ERROR 18000
|
||||
#define AGC_UNSUPPORTED_FUNCTION_ERROR 18001
|
||||
#define AGC_UNINITIALIZED_ERROR 18002
|
||||
#define AGC_NULL_POINTER_ERROR 18003
|
||||
#define AGC_BAD_PARAMETER_ERROR 18004
|
||||
|
||||
// Warnings
|
||||
#define AGC_BAD_PARAMETER_WARNING 18050
|
||||
|
||||
enum
|
||||
{
|
||||
kAgcModeUnchanged,
|
||||
kAgcModeAdaptiveAnalog,
|
||||
kAgcModeAdaptiveDigital,
|
||||
kAgcModeFixedDigital
|
||||
enum {
|
||||
kAgcModeUnchanged,
|
||||
kAgcModeAdaptiveAnalog,
|
||||
kAgcModeAdaptiveDigital,
|
||||
kAgcModeFixedDigital
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
kAgcFalse = 0,
|
||||
kAgcTrue
|
||||
};
|
||||
enum { kAgcFalse = 0, kAgcTrue };
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int16_t targetLevelDbfs; // default 3 (-3 dBOv)
|
||||
int16_t compressionGaindB; // default 9 dB
|
||||
uint8_t limiterEnable; // default kAgcTrue (on)
|
||||
typedef struct {
|
||||
int16_t targetLevelDbfs; // default 3 (-3 dBOv)
|
||||
int16_t compressionGaindB; // default 9 dB
|
||||
uint8_t limiterEnable; // default kAgcTrue (on)
|
||||
} WebRtcAgcConfig;
|
||||
|
||||
#if defined(__cplusplus)
|
||||
extern "C"
|
||||
{
|
||||
#endif
|
||||
/*
|
||||
* This function analyses the number of samples passed to
|
||||
* farend and produces any error code that could arise.
|
||||
*
|
||||
* Input:
|
||||
* - agcInst : AGC instance.
|
||||
* - samples : Number of samples in input vector.
|
||||
*
|
||||
* Return value:
|
||||
* : 0 - Normal operation.
|
||||
* : -1 - Error.
|
||||
*/
|
||||
int WebRtcAgc_GetAddFarendError(void* state, size_t samples);
|
||||
|
||||
/*
|
||||
* This function processes a 10 ms frame of far-end speech to determine
|
||||
@ -64,9 +57,7 @@ extern "C"
|
||||
* : 0 - Normal operation.
|
||||
* : -1 - Error
|
||||
*/
|
||||
int WebRtcAgc_AddFarend(void* agcInst,
|
||||
const int16_t* inFar,
|
||||
size_t samples);
|
||||
int WebRtcAgc_AddFarend(void* agcInst, const int16_t* inFar, size_t samples);
|
||||
|
||||
/*
|
||||
* This function processes a 10 ms frame of microphone speech to determine
|
||||
@ -124,12 +115,12 @@ int WebRtcAgc_VirtualMic(void* agcInst,
|
||||
int32_t* micLevelOut);
|
||||
|
||||
/*
|
||||
* This function processes a 10 ms frame and adjusts (normalizes) the gain both
|
||||
* analog and digitally. The gain adjustments are done only during active
|
||||
* periods of speech. The length of the speech vectors must be given in samples
|
||||
* (80 when FS=8000, and 160 when FS=16000, FS=32000 or FS=48000). The echo
|
||||
* parameter can be used to ensure the AGC will not adjust upward in the
|
||||
* presence of echo.
|
||||
* This function analyses a 10 ms frame and produces the analog and digital
|
||||
* gains required to normalize the signal. The gain adjustments are done only
|
||||
* during active periods of speech. The length of the speech vectors must be
|
||||
* given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
|
||||
* FS=48000). The echo parameter can be used to ensure the AGC will not adjust
|
||||
* upward in the presence of echo.
|
||||
*
|
||||
* This function should be called after processing the near-end microphone
|
||||
* signal, in any case after any echo cancellation.
|
||||
@ -147,25 +138,47 @@ int WebRtcAgc_VirtualMic(void* agcInst,
|
||||
*
|
||||
* Output:
|
||||
* - outMicLevel : Adjusted microphone volume level
|
||||
* - out : Gain-adjusted near-end speech vector
|
||||
* : May be the same vector as the input.
|
||||
* - saturationWarning : A returned value of 1 indicates a saturation event
|
||||
* has occurred and the volume cannot be further
|
||||
* reduced. Otherwise will be set to 0.
|
||||
* - gains : Vector of gains to apply for digital normalization
|
||||
*
|
||||
* Return value:
|
||||
* : 0 - Normal operation.
|
||||
* : -1 - Error
|
||||
*/
|
||||
int WebRtcAgc_Process(void* agcInst,
|
||||
int WebRtcAgc_Analyze(void* agcInst,
|
||||
const int16_t* const* inNear,
|
||||
size_t num_bands,
|
||||
size_t samples,
|
||||
int16_t* const* out,
|
||||
int32_t inMicLevel,
|
||||
int32_t* outMicLevel,
|
||||
int16_t echo,
|
||||
uint8_t* saturationWarning);
|
||||
uint8_t* saturationWarning,
|
||||
int32_t gains[11]);
|
||||
|
||||
/*
|
||||
* This function processes a 10 ms frame by applying precomputed digital gains.
|
||||
*
|
||||
* Input:
|
||||
* - agcInst : AGC instance
|
||||
* - gains : Vector of gains to apply for digital normalization
|
||||
* - in_near : Near-end input speech vector for each band
|
||||
* - num_bands : Number of bands in input/output vector
|
||||
*
|
||||
* Output:
|
||||
* - out : Gain-adjusted near-end speech vector
|
||||
* : May be the same vector as the input.
|
||||
*
|
||||
* Return value:
|
||||
* : 0 - Normal operation.
|
||||
* : -1 - Error
|
||||
*/
|
||||
int WebRtcAgc_Process(const void* agcInst,
|
||||
const int32_t gains[11],
|
||||
const int16_t* const* in_near,
|
||||
size_t num_bands,
|
||||
int16_t* const* out);
|
||||
|
||||
/*
|
||||
* This function sets the config parameters (targetLevelDbfs,
|
||||
@ -203,7 +216,7 @@ int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config);
|
||||
* This function creates and returns an AGC instance, which will contain the
|
||||
* state information for one (duplex) channel.
|
||||
*/
|
||||
void* WebRtcAgc_Create();
|
||||
void* WebRtcAgc_Create(void);
|
||||
|
||||
/*
|
||||
* This function frees the AGC instance created at the beginning.
|
||||
@ -229,14 +242,12 @@ void WebRtcAgc_Free(void* agcInst);
|
||||
* Return value : 0 - Ok
|
||||
* -1 - Error
|
||||
*/
|
||||
int WebRtcAgc_Init(void *agcInst,
|
||||
int WebRtcAgc_Init(void* agcInst,
|
||||
int32_t minLevel,
|
||||
int32_t maxLevel,
|
||||
int16_t agcMode,
|
||||
uint32_t fs);
|
||||
|
||||
#if defined(__cplusplus)
|
||||
}
|
||||
#endif
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_
|
||||
|
@ -8,57 +8,58 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/histogram.h"
|
||||
#include "modules/audio_processing/agc/loudness_histogram.h"
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include <cmath>
|
||||
#include <cstring>
|
||||
|
||||
#include "webrtc/modules/interface/module_common_types.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const double kHistBinCenters[] = {
|
||||
7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01,
|
||||
1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01,
|
||||
2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01,
|
||||
3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01,
|
||||
5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01,
|
||||
1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00,
|
||||
1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00,
|
||||
2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00,
|
||||
4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00,
|
||||
7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01,
|
||||
1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01,
|
||||
2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01,
|
||||
3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01,
|
||||
6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01,
|
||||
1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02,
|
||||
1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02,
|
||||
2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02,
|
||||
4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02,
|
||||
8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03,
|
||||
1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03,
|
||||
2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03,
|
||||
3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03,
|
||||
6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03,
|
||||
1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04,
|
||||
1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04,
|
||||
3.00339145144454e+04, 3.56647189489147e+04};
|
||||
7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01,
|
||||
1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01,
|
||||
2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01,
|
||||
3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01,
|
||||
5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01,
|
||||
1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00,
|
||||
1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00,
|
||||
2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00,
|
||||
4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00,
|
||||
7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01,
|
||||
1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01,
|
||||
2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01,
|
||||
3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01,
|
||||
6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01,
|
||||
1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02,
|
||||
1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02,
|
||||
2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02,
|
||||
4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02,
|
||||
8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03,
|
||||
1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03,
|
||||
2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03,
|
||||
3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03,
|
||||
6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03,
|
||||
1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04,
|
||||
1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04,
|
||||
3.00339145144454e+04, 3.56647189489147e+04};
|
||||
|
||||
static const double kProbQDomain = 1024.0;
|
||||
// Loudness of -15 dB (smallest expected loudness) in log domain,
|
||||
// loudness_db = 13.5 * log10(rms);
|
||||
static const double kLogDomainMinBinCenter = -2.57752062648587;
|
||||
// Loudness step of 1 dB in log domain
|
||||
static const double kLogDomainStepSizeInverse = 5.81954605750359;
|
||||
static const double kLogDomainStepSizeInverse = 5.81954605750359;
|
||||
|
||||
static const int kTransientWidthThreshold = 7;
|
||||
static const double kLowProbabilityThreshold = 0.2;
|
||||
|
||||
static const int kLowProbThresholdQ10 = static_cast<int>(
|
||||
kLowProbabilityThreshold * kProbQDomain);
|
||||
static const int kLowProbThresholdQ10 =
|
||||
static_cast<int>(kLowProbabilityThreshold * kProbQDomain);
|
||||
|
||||
Histogram::Histogram()
|
||||
LoudnessHistogram::LoudnessHistogram()
|
||||
: num_updates_(0),
|
||||
audio_content_q10_(0),
|
||||
bin_count_q10_(),
|
||||
@ -73,7 +74,7 @@ Histogram::Histogram()
|
||||
"histogram bin centers incorrect size");
|
||||
}
|
||||
|
||||
Histogram::Histogram(int window_size)
|
||||
LoudnessHistogram::LoudnessHistogram(int window_size)
|
||||
: num_updates_(0),
|
||||
audio_content_q10_(0),
|
||||
bin_count_q10_(),
|
||||
@ -84,9 +85,9 @@ Histogram::Histogram(int window_size)
|
||||
len_circular_buffer_(window_size),
|
||||
len_high_activity_(0) {}
|
||||
|
||||
Histogram::~Histogram() {}
|
||||
LoudnessHistogram::~LoudnessHistogram() {}
|
||||
|
||||
void Histogram::Update(double rms, double activity_probaility) {
|
||||
void LoudnessHistogram::Update(double rms, double activity_probaility) {
|
||||
// If circular histogram is activated then remove the oldest entry.
|
||||
if (len_circular_buffer_ > 0)
|
||||
RemoveOldestEntryAndUpdate();
|
||||
@ -94,14 +95,14 @@ void Histogram::Update(double rms, double activity_probaility) {
|
||||
// Find the corresponding bin.
|
||||
int hist_index = GetBinIndex(rms);
|
||||
// To Q10 domain.
|
||||
int prob_q10 = static_cast<int16_t>(floor(activity_probaility *
|
||||
kProbQDomain));
|
||||
int prob_q10 =
|
||||
static_cast<int16_t>(floor(activity_probaility * kProbQDomain));
|
||||
InsertNewestEntryAndUpdate(prob_q10, hist_index);
|
||||
}
|
||||
|
||||
// Doing nothing if buffer is not full, yet.
|
||||
void Histogram::RemoveOldestEntryAndUpdate() {
|
||||
assert(len_circular_buffer_ > 0);
|
||||
void LoudnessHistogram::RemoveOldestEntryAndUpdate() {
|
||||
RTC_DCHECK_GT(len_circular_buffer_, 0);
|
||||
// Do nothing if circular buffer is not full.
|
||||
if (!buffer_is_full_)
|
||||
return;
|
||||
@ -111,12 +112,12 @@ void Histogram::RemoveOldestEntryAndUpdate() {
|
||||
UpdateHist(-oldest_prob, oldest_hist_index);
|
||||
}
|
||||
|
||||
void Histogram::RemoveTransient() {
|
||||
void LoudnessHistogram::RemoveTransient() {
|
||||
// Don't expect to be here if high-activity region is longer than
|
||||
// |kTransientWidthThreshold| or there has not been any transient.
|
||||
assert(len_high_activity_ <= kTransientWidthThreshold);
|
||||
int index = (buffer_index_ > 0) ? (buffer_index_ - 1) :
|
||||
len_circular_buffer_ - 1;
|
||||
RTC_DCHECK_LE(len_high_activity_, kTransientWidthThreshold);
|
||||
int index =
|
||||
(buffer_index_ > 0) ? (buffer_index_ - 1) : len_circular_buffer_ - 1;
|
||||
while (len_high_activity_ > 0) {
|
||||
UpdateHist(-activity_probability_[index], hist_bin_index_[index]);
|
||||
activity_probability_[index] = 0;
|
||||
@ -125,8 +126,8 @@ void Histogram::RemoveTransient() {
|
||||
}
|
||||
}
|
||||
|
||||
void Histogram::InsertNewestEntryAndUpdate(int activity_prob_q10,
|
||||
int hist_index) {
|
||||
void LoudnessHistogram::InsertNewestEntryAndUpdate(int activity_prob_q10,
|
||||
int hist_index) {
|
||||
// Update the circular buffer if it is enabled.
|
||||
if (len_circular_buffer_ > 0) {
|
||||
// Removing transient.
|
||||
@ -158,26 +159,26 @@ void Histogram::InsertNewestEntryAndUpdate(int activity_prob_q10,
|
||||
UpdateHist(activity_prob_q10, hist_index);
|
||||
}
|
||||
|
||||
void Histogram::UpdateHist(int activity_prob_q10, int hist_index) {
|
||||
void LoudnessHistogram::UpdateHist(int activity_prob_q10, int hist_index) {
|
||||
bin_count_q10_[hist_index] += activity_prob_q10;
|
||||
audio_content_q10_ += activity_prob_q10;
|
||||
}
|
||||
|
||||
double Histogram::AudioContent() const {
|
||||
double LoudnessHistogram::AudioContent() const {
|
||||
return audio_content_q10_ / kProbQDomain;
|
||||
}
|
||||
|
||||
Histogram* Histogram::Create() {
|
||||
return new Histogram;
|
||||
LoudnessHistogram* LoudnessHistogram::Create() {
|
||||
return new LoudnessHistogram;
|
||||
}
|
||||
|
||||
Histogram* Histogram::Create(int window_size) {
|
||||
LoudnessHistogram* LoudnessHistogram::Create(int window_size) {
|
||||
if (window_size < 0)
|
||||
return NULL;
|
||||
return new Histogram(window_size);
|
||||
return new LoudnessHistogram(window_size);
|
||||
}
|
||||
|
||||
void Histogram::Reset() {
|
||||
void LoudnessHistogram::Reset() {
|
||||
// Reset the histogram, audio-content and number of updates.
|
||||
memset(bin_count_q10_, 0, sizeof(bin_count_q10_));
|
||||
audio_content_q10_ = 0;
|
||||
@ -188,7 +189,7 @@ void Histogram::Reset() {
|
||||
len_high_activity_ = 0;
|
||||
}
|
||||
|
||||
int Histogram::GetBinIndex(double rms) {
|
||||
int LoudnessHistogram::GetBinIndex(double rms) {
|
||||
// First exclude overload cases.
|
||||
if (rms <= kHistBinCenters[0]) {
|
||||
return 0;
|
||||
@ -199,8 +200,8 @@ int Histogram::GetBinIndex(double rms) {
|
||||
// search in linear domain.
|
||||
double rms_log = log(rms);
|
||||
|
||||
int index = static_cast<int>(floor((rms_log - kLogDomainMinBinCenter) *
|
||||
kLogDomainStepSizeInverse));
|
||||
int index = static_cast<int>(
|
||||
floor((rms_log - kLogDomainMinBinCenter) * kLogDomainStepSizeInverse));
|
||||
// The final decision is in linear domain.
|
||||
double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]);
|
||||
if (rms > b) {
|
||||
@ -210,7 +211,7 @@ int Histogram::GetBinIndex(double rms) {
|
||||
}
|
||||
}
|
||||
|
||||
double Histogram::CurrentRms() const {
|
||||
double LoudnessHistogram::CurrentRms() const {
|
||||
double p;
|
||||
double mean_val = 0;
|
||||
if (audio_content_q10_ > 0) {
|
@ -8,27 +8,26 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_
|
||||
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
#include <memory>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This class implements the histogram of loudness with circular buffers so that
|
||||
// the histogram tracks the last T seconds of the loudness.
|
||||
class Histogram {
|
||||
class LoudnessHistogram {
|
||||
public:
|
||||
// Create a non-sliding Histogram.
|
||||
static Histogram* Create();
|
||||
// Create a non-sliding LoudnessHistogram.
|
||||
static LoudnessHistogram* Create();
|
||||
|
||||
// Create a sliding Histogram, i.e. the histogram represents the last
|
||||
// Create a sliding LoudnessHistogram, i.e. the histogram represents the last
|
||||
// |window_size| samples.
|
||||
static Histogram* Create(int window_size);
|
||||
~Histogram();
|
||||
static LoudnessHistogram* Create(int window_size);
|
||||
~LoudnessHistogram();
|
||||
|
||||
// Insert RMS and the corresponding activity probability.
|
||||
void Update(double rms, double activity_probability);
|
||||
@ -47,8 +46,8 @@ class Histogram {
|
||||
int num_updates() const { return num_updates_; }
|
||||
|
||||
private:
|
||||
Histogram();
|
||||
explicit Histogram(int window);
|
||||
LoudnessHistogram();
|
||||
explicit LoudnessHistogram(int window);
|
||||
|
||||
// Find the histogram bin associated with the given |rms|.
|
||||
int GetBinIndex(double rms);
|
||||
@ -67,15 +66,15 @@ class Histogram {
|
||||
// |bin_count_q10_|.
|
||||
int64_t audio_content_q10_;
|
||||
|
||||
// Histogram of input RMS in Q10 with |kHistSize_| bins. In each 'Update(),'
|
||||
// we increment the associated histogram-bin with the given probability. The
|
||||
// increment is implemented in Q10 to avoid rounding errors.
|
||||
// LoudnessHistogram of input RMS in Q10 with |kHistSize_| bins. In each
|
||||
// 'Update(),' we increment the associated histogram-bin with the given
|
||||
// probability. The increment is implemented in Q10 to avoid rounding errors.
|
||||
int64_t bin_count_q10_[kHistSize];
|
||||
|
||||
// Circular buffer for probabilities
|
||||
rtc::scoped_ptr<int[]> activity_probability_;
|
||||
std::unique_ptr<int[]> activity_probability_;
|
||||
// Circular buffer for histogram-indices of probabilities.
|
||||
rtc::scoped_ptr<int[]> hist_bin_index_;
|
||||
std::unique_ptr<int[]> hist_bin_index_;
|
||||
// Current index of circular buffer, where the newest data will be written to,
|
||||
// therefore, pointing to the oldest data if buffer is full.
|
||||
int buffer_index_;
|
||||
@ -88,4 +87,4 @@ class Histogram {
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC_LOUDNESS_HISTOGRAM_H_
|
34
webrtc/modules/audio_processing/agc/mock_agc.h
Normal file
34
webrtc/modules/audio_processing/agc/mock_agc.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*
|
||||
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_
|
||||
|
||||
#include "modules/audio_processing/agc/agc.h"
|
||||
#include "test/gmock.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class MockAgc : public Agc {
|
||||
public:
|
||||
virtual ~MockAgc() {}
|
||||
MOCK_METHOD(void,
|
||||
Process,
|
||||
(const int16_t* audio, size_t length, int sample_rate_hz),
|
||||
(override));
|
||||
MOCK_METHOD(bool, GetRmsErrorDb, (int* error), (override));
|
||||
MOCK_METHOD(void, Reset, (), (override));
|
||||
MOCK_METHOD(int, set_target_level_dbfs, (int level), (override));
|
||||
MOCK_METHOD(int, target_level_dbfs, (), (const, override));
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC_MOCK_AGC_H_
|
@ -8,10 +8,12 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/agc/utility.h"
|
||||
#include "modules/audio_processing/agc/utility.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const double kLog10 = 2.30258509299;
|
||||
static const double kLinear2DbScale = 20.0 / kLog10;
|
||||
static const double kLinear2LoudnessScale = 13.4 / kLog10;
|
||||
@ -33,3 +35,5 @@ double Db2Loudness(double db) {
|
||||
double Dbfs2Loudness(double dbfs) {
|
||||
return Db2Loudness(90 + dbfs);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
@ -8,8 +8,10 @@
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
|
||||
#ifndef MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
|
||||
#define MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// TODO(turajs): Add description of function.
|
||||
double Loudness2Db(double loudness);
|
||||
@ -20,4 +22,6 @@ double Db2Loudness(double db);
|
||||
|
||||
double Dbfs2Loudness(double dbfs);
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
|
||||
|
Reference in New Issue
Block a user