Update audio_processing module

Corresponds to upstream commit 524e9b043e7e86fd72353b987c9d5f6a1ebf83e1

Update notes:

 * Pull in third party license file

 * Replace .gypi files with BUILD.gn to keep track of what changes
   upstream

 * Bunch of new filse pulled in as dependencies

 * Won't build yet due to changes needed on top of these
This commit is contained in:
Arun Raghavan
2015-10-13 17:25:22 +05:30
parent 5ae7a5d6cd
commit 753eada3aa
324 changed files with 52533 additions and 16117 deletions

View File

@ -1,10 +0,0 @@
noinst_LTLIBRARIES = libagc.la
libagc_la_SOURCES = interface/gain_control.h \
analog_agc.c \
analog_agc.h \
digital_agc.c \
digital_agc.h
libagc_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
-I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-I$(top_srcdir)/src/modules/audio_processing/utility

View File

@ -0,0 +1,101 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/agc.h"
#include <cmath>
#include <cstdlib>
#include <algorithm>
#include <vector>
#include "webrtc/base/checks.h"
#include "webrtc/modules/audio_processing/agc/histogram.h"
#include "webrtc/modules/audio_processing/agc/utility.h"
#include "webrtc/modules/interface/module_common_types.h"
namespace webrtc {
namespace {
const int kDefaultLevelDbfs = -18;
const int kNumAnalysisFrames = 100;
const double kActivityThreshold = 0.3;
} // namespace
Agc::Agc()
: target_level_loudness_(Dbfs2Loudness(kDefaultLevelDbfs)),
target_level_dbfs_(kDefaultLevelDbfs),
histogram_(Histogram::Create(kNumAnalysisFrames)),
inactive_histogram_(Histogram::Create()) {
}
Agc::~Agc() {}
float Agc::AnalyzePreproc(const int16_t* audio, size_t length) {
assert(length > 0);
size_t num_clipped = 0;
for (size_t i = 0; i < length; ++i) {
if (audio[i] == 32767 || audio[i] == -32768)
++num_clipped;
}
return 1.0f * num_clipped / length;
}
int Agc::Process(const int16_t* audio, size_t length, int sample_rate_hz) {
vad_.ProcessChunk(audio, length, sample_rate_hz);
const std::vector<double>& rms = vad_.chunkwise_rms();
const std::vector<double>& probabilities =
vad_.chunkwise_voice_probabilities();
RTC_DCHECK_EQ(rms.size(), probabilities.size());
for (size_t i = 0; i < rms.size(); ++i) {
histogram_->Update(rms[i], probabilities[i]);
}
return 0;
}
bool Agc::GetRmsErrorDb(int* error) {
if (!error) {
assert(false);
return false;
}
if (histogram_->num_updates() < kNumAnalysisFrames) {
// We haven't yet received enough frames.
return false;
}
if (histogram_->AudioContent() < kNumAnalysisFrames * kActivityThreshold) {
// We are likely in an inactive segment.
return false;
}
double loudness = Linear2Loudness(histogram_->CurrentRms());
*error = std::floor(Loudness2Db(target_level_loudness_ - loudness) + 0.5);
histogram_->Reset();
return true;
}
void Agc::Reset() {
histogram_->Reset();
}
int Agc::set_target_level_dbfs(int level) {
// TODO(turajs): just some arbitrary sanity check. We can come up with better
// limits. The upper limit should be chosen such that the risk of clipping is
// low. The lower limit should not result in a too quiet signal.
if (level >= 0 || level <= -100)
return -1;
target_level_dbfs_ = level;
target_level_loudness_ = Dbfs2Loudness(level);
return 0;
}
} // namespace webrtc

View File

@ -1,34 +0,0 @@
# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'targets': [
{
'target_name': 'agc',
'type': '<(library)',
'dependencies': [
'<(webrtc_root)/common_audio/common_audio.gyp:spl',
],
'include_dirs': [
'interface',
],
'direct_dependent_settings': {
'include_dirs': [
'interface',
],
},
'sources': [
'interface/gain_control.h',
'analog_agc.c',
'analog_agc.h',
'digital_agc.c',
'digital_agc.h',
],
},
],
}

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class AudioFrame;
class Histogram;
class Agc {
public:
Agc();
virtual ~Agc();
// Returns the proportion of samples in the buffer which are at full-scale
// (and presumably clipped).
virtual float AnalyzePreproc(const int16_t* audio, size_t length);
// |audio| must be mono; in a multi-channel stream, provide the first (usually
// left) channel.
virtual int Process(const int16_t* audio, size_t length, int sample_rate_hz);
// Retrieves the difference between the target RMS level and the current
// signal RMS level in dB. Returns true if an update is available and false
// otherwise, in which case |error| should be ignored and no action taken.
virtual bool GetRmsErrorDb(int* error);
virtual void Reset();
virtual int set_target_level_dbfs(int level);
virtual int target_level_dbfs() const { return target_level_dbfs_; }
virtual float voice_probability() const {
return vad_.last_voice_probability();
}
private:
double target_level_loudness_;
int target_level_dbfs_;
rtc::scoped_ptr<Histogram> histogram_;
rtc::scoped_ptr<Histogram> inactive_histogram_;
VoiceActivityDetector vad_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_

View File

@ -0,0 +1,442 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
#include <cassert>
#include <cmath>
#ifdef WEBRTC_AGC_DEBUG_DUMP
#include <cstdio>
#endif
#include "webrtc/modules/audio_processing/agc/gain_map_internal.h"
#include "webrtc/modules/audio_processing/gain_control_impl.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/logging.h"
namespace webrtc {
namespace {
// Lowest the microphone level can be lowered due to clipping.
const int kClippedLevelMin = 170;
// Amount the microphone level is lowered with every clipping event.
const int kClippedLevelStep = 15;
// Proportion of clipped samples required to declare a clipping event.
const float kClippedRatioThreshold = 0.1f;
// Time in frames to wait after a clipping event before checking again.
const int kClippedWaitFrames = 300;
// Amount of error we tolerate in the microphone level (presumably due to OS
// quantization) before we assume the user has manually adjusted the microphone.
const int kLevelQuantizationSlack = 25;
const int kDefaultCompressionGain = 7;
const int kMaxCompressionGain = 12;
const int kMinCompressionGain = 2;
// Controls the rate of compression changes towards the target.
const float kCompressionGainStep = 0.05f;
const int kMaxMicLevel = 255;
static_assert(kGainMapSize > kMaxMicLevel, "gain map too small");
const int kMinMicLevel = 12;
// Prevent very large microphone level changes.
const int kMaxResidualGainChange = 15;
// Maximum additional gain allowed to compensate for microphone level
// restrictions from clipping events.
const int kSurplusCompressionGain = 6;
int ClampLevel(int mic_level) {
return std::min(std::max(kMinMicLevel, mic_level), kMaxMicLevel);
}
int LevelFromGainError(int gain_error, int level) {
assert(level >= 0 && level <= kMaxMicLevel);
if (gain_error == 0) {
return level;
}
// TODO(ajm): Could be made more efficient with a binary search.
int new_level = level;
if (gain_error > 0) {
while (kGainMap[new_level] - kGainMap[level] < gain_error &&
new_level < kMaxMicLevel) {
++new_level;
}
} else {
while (kGainMap[new_level] - kGainMap[level] > gain_error &&
new_level > kMinMicLevel) {
--new_level;
}
}
return new_level;
}
} // namespace
// Facility for dumping debug audio files. All methods are no-ops in the
// default case where WEBRTC_AGC_DEBUG_DUMP is undefined.
class DebugFile {
#ifdef WEBRTC_AGC_DEBUG_DUMP
public:
explicit DebugFile(const char* filename)
: file_(fopen(filename, "wb")) {
assert(file_);
}
~DebugFile() {
fclose(file_);
}
void Write(const int16_t* data, size_t length_samples) {
fwrite(data, 1, length_samples * sizeof(int16_t), file_);
}
private:
FILE* file_;
#else
public:
explicit DebugFile(const char* filename) {
}
~DebugFile() {
}
void Write(const int16_t* data, size_t length_samples) {
}
#endif // WEBRTC_AGC_DEBUG_DUMP
};
AgcManagerDirect::AgcManagerDirect(GainControl* gctrl,
VolumeCallbacks* volume_callbacks,
int startup_min_level)
: agc_(new Agc()),
gctrl_(gctrl),
volume_callbacks_(volume_callbacks),
frames_since_clipped_(kClippedWaitFrames),
level_(0),
max_level_(kMaxMicLevel),
max_compression_gain_(kMaxCompressionGain),
target_compression_(kDefaultCompressionGain),
compression_(target_compression_),
compression_accumulator_(compression_),
capture_muted_(false),
check_volume_on_next_process_(true), // Check at startup.
startup_(true),
startup_min_level_(ClampLevel(startup_min_level)),
file_preproc_(new DebugFile("agc_preproc.pcm")),
file_postproc_(new DebugFile("agc_postproc.pcm")) {
}
AgcManagerDirect::AgcManagerDirect(Agc* agc,
GainControl* gctrl,
VolumeCallbacks* volume_callbacks,
int startup_min_level)
: agc_(agc),
gctrl_(gctrl),
volume_callbacks_(volume_callbacks),
frames_since_clipped_(kClippedWaitFrames),
level_(0),
max_level_(kMaxMicLevel),
max_compression_gain_(kMaxCompressionGain),
target_compression_(kDefaultCompressionGain),
compression_(target_compression_),
compression_accumulator_(compression_),
capture_muted_(false),
check_volume_on_next_process_(true), // Check at startup.
startup_(true),
startup_min_level_(ClampLevel(startup_min_level)),
file_preproc_(new DebugFile("agc_preproc.pcm")),
file_postproc_(new DebugFile("agc_postproc.pcm")) {
}
AgcManagerDirect::~AgcManagerDirect() {}
int AgcManagerDirect::Initialize() {
max_level_ = kMaxMicLevel;
max_compression_gain_ = kMaxCompressionGain;
target_compression_ = kDefaultCompressionGain;
compression_ = target_compression_;
compression_accumulator_ = compression_;
capture_muted_ = false;
check_volume_on_next_process_ = true;
// TODO(bjornv): Investigate if we need to reset |startup_| as well. For
// example, what happens when we change devices.
if (gctrl_->set_mode(GainControl::kFixedDigital) != 0) {
LOG_FERR1(LS_ERROR, set_mode, GainControl::kFixedDigital);
return -1;
}
if (gctrl_->set_target_level_dbfs(2) != 0) {
LOG_FERR1(LS_ERROR, set_target_level_dbfs, 2);
return -1;
}
if (gctrl_->set_compression_gain_db(kDefaultCompressionGain) != 0) {
LOG_FERR1(LS_ERROR, set_compression_gain_db, kDefaultCompressionGain);
return -1;
}
if (gctrl_->enable_limiter(true) != 0) {
LOG_FERR1(LS_ERROR, enable_limiter, true);
return -1;
}
return 0;
}
void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
int num_channels,
size_t samples_per_channel) {
size_t length = num_channels * samples_per_channel;
if (capture_muted_) {
return;
}
file_preproc_->Write(audio, length);
if (frames_since_clipped_ < kClippedWaitFrames) {
++frames_since_clipped_;
return;
}
// Check for clipped samples, as the AGC has difficulty detecting pitch
// under clipping distortion. We do this in the preprocessing phase in order
// to catch clipped echo as well.
//
// If we find a sufficiently clipped frame, drop the current microphone level
// and enforce a new maximum level, dropped the same amount from the current
// maximum. This harsh treatment is an effort to avoid repeated clipped echo
// events. As compensation for this restriction, the maximum compression
// gain is increased, through SetMaxLevel().
float clipped_ratio = agc_->AnalyzePreproc(audio, length);
if (clipped_ratio > kClippedRatioThreshold) {
LOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
<< clipped_ratio;
// Always decrease the maximum level, even if the current level is below
// threshold.
SetMaxLevel(std::max(kClippedLevelMin, max_level_ - kClippedLevelStep));
if (level_ > kClippedLevelMin) {
// Don't try to adjust the level if we're already below the limit. As
// a consequence, if the user has brought the level above the limit, we
// will still not react until the postproc updates the level.
SetLevel(std::max(kClippedLevelMin, level_ - kClippedLevelStep));
// Reset the AGC since the level has changed.
agc_->Reset();
}
frames_since_clipped_ = 0;
}
}
void AgcManagerDirect::Process(const int16_t* audio,
size_t length,
int sample_rate_hz) {
if (capture_muted_) {
return;
}
if (check_volume_on_next_process_) {
check_volume_on_next_process_ = false;
// We have to wait until the first process call to check the volume,
// because Chromium doesn't guarantee it to be valid any earlier.
CheckVolumeAndReset();
}
if (agc_->Process(audio, length, sample_rate_hz) != 0) {
LOG_FERR0(LS_ERROR, Agc::Process);
assert(false);
}
UpdateGain();
UpdateCompressor();
file_postproc_->Write(audio, length);
}
void AgcManagerDirect::SetLevel(int new_level) {
int voe_level = volume_callbacks_->GetMicVolume();
if (voe_level < 0) {
return;
}
if (voe_level == 0) {
LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
return;
}
if (voe_level > kMaxMicLevel) {
LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << voe_level;
return;
}
if (voe_level > level_ + kLevelQuantizationSlack ||
voe_level < level_ - kLevelQuantizationSlack) {
LOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating "
<< "stored level from " << level_ << " to " << voe_level;
level_ = voe_level;
// Always allow the user to increase the volume.
if (level_ > max_level_) {
SetMaxLevel(level_);
}
// Take no action in this case, since we can't be sure when the volume
// was manually adjusted. The compressor will still provide some of the
// desired gain change.
agc_->Reset();
return;
}
new_level = std::min(new_level, max_level_);
if (new_level == level_) {
return;
}
volume_callbacks_->SetMicVolume(new_level);
LOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", "
<< "level_=" << level_ << ", "
<< "new_level=" << new_level;
level_ = new_level;
}
void AgcManagerDirect::SetMaxLevel(int level) {
assert(level >= kClippedLevelMin);
max_level_ = level;
// Scale the |kSurplusCompressionGain| linearly across the restricted
// level range.
max_compression_gain_ = kMaxCompressionGain + std::floor(
(1.f * kMaxMicLevel - max_level_) / (kMaxMicLevel - kClippedLevelMin) *
kSurplusCompressionGain + 0.5f);
LOG(LS_INFO) << "[agc] max_level_=" << max_level_
<< ", max_compression_gain_=" << max_compression_gain_;
}
void AgcManagerDirect::SetCaptureMuted(bool muted) {
if (capture_muted_ == muted) {
return;
}
capture_muted_ = muted;
if (!muted) {
// When we unmute, we should reset things to be safe.
check_volume_on_next_process_ = true;
}
}
float AgcManagerDirect::voice_probability() {
return agc_->voice_probability();
}
int AgcManagerDirect::CheckVolumeAndReset() {
int level = volume_callbacks_->GetMicVolume();
if (level < 0) {
return -1;
}
// Reasons for taking action at startup:
// 1) A person starting a call is expected to be heard.
// 2) Independent of interpretation of |level| == 0 we should raise it so the
// AGC can do its job properly.
if (level == 0 && !startup_) {
LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
return 0;
}
if (level > kMaxMicLevel) {
LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << level;
return -1;
}
LOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level;
int minLevel = startup_ ? startup_min_level_ : kMinMicLevel;
if (level < minLevel) {
level = minLevel;
LOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level;
volume_callbacks_->SetMicVolume(level);
}
agc_->Reset();
level_ = level;
startup_ = false;
return 0;
}
// Requests the RMS error from AGC and distributes the required gain change
// between the digital compression stage and volume slider. We use the
// compressor first, providing a slack region around the current slider
// position to reduce movement.
//
// If the slider needs to be moved, we check first if the user has adjusted
// it, in which case we take no action and cache the updated level.
void AgcManagerDirect::UpdateGain() {
int rms_error = 0;
if (!agc_->GetRmsErrorDb(&rms_error)) {
// No error update ready.
return;
}
// The compressor will always add at least kMinCompressionGain. In effect,
// this adjusts our target gain upward by the same amount and rms_error
// needs to reflect that.
rms_error += kMinCompressionGain;
// Handle as much error as possible with the compressor first.
int raw_compression = std::max(std::min(rms_error, max_compression_gain_),
kMinCompressionGain);
// Deemphasize the compression gain error. Move halfway between the current
// target and the newly received target. This serves to soften perceptible
// intra-talkspurt adjustments, at the cost of some adaptation speed.
if ((raw_compression == max_compression_gain_ &&
target_compression_ == max_compression_gain_ - 1) ||
(raw_compression == kMinCompressionGain &&
target_compression_ == kMinCompressionGain + 1)) {
// Special case to allow the target to reach the endpoints of the
// compression range. The deemphasis would otherwise halt it at 1 dB shy.
target_compression_ = raw_compression;
} else {
target_compression_ = (raw_compression - target_compression_) / 2
+ target_compression_;
}
// Residual error will be handled by adjusting the volume slider. Use the
// raw rather than deemphasized compression here as we would otherwise
// shrink the amount of slack the compressor provides.
int residual_gain = rms_error - raw_compression;
residual_gain = std::min(std::max(residual_gain, -kMaxResidualGainChange),
kMaxResidualGainChange);
LOG(LS_INFO) << "[agc] rms_error=" << rms_error << ", "
<< "target_compression=" << target_compression_ << ", "
<< "residual_gain=" << residual_gain;
if (residual_gain == 0)
return;
SetLevel(LevelFromGainError(residual_gain, level_));
}
void AgcManagerDirect::UpdateCompressor() {
if (compression_ == target_compression_) {
return;
}
// Adapt the compression gain slowly towards the target, in order to avoid
// highly perceptible changes.
if (target_compression_ > compression_) {
compression_accumulator_ += kCompressionGainStep;
} else {
compression_accumulator_ -= kCompressionGainStep;
}
// The compressor accepts integer gains in dB. Adjust the gain when
// we've come within half a stepsize of the nearest integer. (We don't
// check for equality due to potential floating point imprecision).
int new_compression = compression_;
int nearest_neighbor = std::floor(compression_accumulator_ + 0.5);
if (std::fabs(compression_accumulator_ - nearest_neighbor) <
kCompressionGainStep / 2) {
new_compression = nearest_neighbor;
}
// Set the new compression gain.
if (new_compression != compression_) {
compression_ = new_compression;
compression_accumulator_ = new_compression;
if (gctrl_->set_compression_gain_db(compression_) != 0) {
LOG_FERR1(LS_ERROR, set_compression_gain_db, compression_);
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,108 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_processing/agc/agc.h"
namespace webrtc {
class AudioFrame;
class DebugFile;
class GainControl;
// Callbacks that need to be injected into AgcManagerDirect to read and control
// the volume values. This is done to remove the VoiceEngine dependency in
// AgcManagerDirect.
// TODO(aluebs): Remove VolumeCallbacks.
class VolumeCallbacks {
public:
virtual ~VolumeCallbacks() {}
virtual void SetMicVolume(int volume) = 0;
virtual int GetMicVolume() = 0;
};
// Direct interface to use AGC to set volume and compression values.
// AudioProcessing uses this interface directly to integrate the callback-less
// AGC.
//
// This class is not thread-safe.
class AgcManagerDirect final {
public:
// AgcManagerDirect will configure GainControl internally. The user is
// responsible for processing the audio using it after the call to Process.
// The operating range of startup_min_level is [12, 255] and any input value
// outside that range will be clamped.
AgcManagerDirect(GainControl* gctrl,
VolumeCallbacks* volume_callbacks,
int startup_min_level);
// Dependency injection for testing. Don't delete |agc| as the memory is owned
// by the manager.
AgcManagerDirect(Agc* agc,
GainControl* gctrl,
VolumeCallbacks* volume_callbacks,
int startup_min_level);
~AgcManagerDirect();
int Initialize();
void AnalyzePreProcess(int16_t* audio,
int num_channels,
size_t samples_per_channel);
void Process(const int16_t* audio, size_t length, int sample_rate_hz);
// Call when the capture stream has been muted/unmuted. This causes the
// manager to disregard all incoming audio; chances are good it's background
// noise to which we'd like to avoid adapting.
void SetCaptureMuted(bool muted);
bool capture_muted() { return capture_muted_; }
float voice_probability();
private:
// Sets a new microphone level, after first checking that it hasn't been
// updated by the user, in which case no action is taken.
void SetLevel(int new_level);
// Set the maximum level the AGC is allowed to apply. Also updates the
// maximum compression gain to compensate. The level must be at least
// |kClippedLevelMin|.
void SetMaxLevel(int level);
int CheckVolumeAndReset();
void UpdateGain();
void UpdateCompressor();
rtc::scoped_ptr<Agc> agc_;
GainControl* gctrl_;
VolumeCallbacks* volume_callbacks_;
int frames_since_clipped_;
int level_;
int max_level_;
int max_compression_gain_;
int target_compression_;
int compression_;
float compression_accumulator_;
bool capture_muted_;
bool check_volume_on_next_process_;
bool startup_;
int startup_min_level_;
rtc::scoped_ptr<DebugFile> file_preproc_;
rtc::scoped_ptr<DebugFile> file_postproc_;
RTC_DISALLOW_COPY_AND_ASSIGN(AgcManagerDirect);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_

View File

@ -1,133 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_
#include "typedefs.h"
#include "gain_control.h"
#include "digital_agc.h"
//#define AGC_DEBUG
//#define MIC_LEVEL_FEEDBACK
#ifdef AGC_DEBUG
#include <stdio.h>
#endif
/* Analog Automatic Gain Control variables:
* Constant declarations (inner limits inside which no changes are done)
* In the beginning the range is narrower to widen as soon as the measure
* 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0
* and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal
* go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm
* The limits are created by running the AGC with a file having the desired
* signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined
* by out=10*log10(in/260537279.7); Set the target level to the average level
* of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in
* Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) )
*/
#define RXX_BUFFER_LEN 10
static const WebRtc_Word16 kMsecSpeechInner = 520;
static const WebRtc_Word16 kMsecSpeechOuter = 340;
static const WebRtc_Word16 kNormalVadThreshold = 400;
static const WebRtc_Word16 kAlphaShortTerm = 6; // 1 >> 6 = 0.0156
static const WebRtc_Word16 kAlphaLongTerm = 10; // 1 >> 10 = 0.000977
typedef struct
{
// Configurable parameters/variables
WebRtc_UWord32 fs; // Sampling frequency
WebRtc_Word16 compressionGaindB; // Fixed gain level in dB
WebRtc_Word16 targetLevelDbfs; // Target level in -dBfs of envelope (default -3)
WebRtc_Word16 agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig)
WebRtc_UWord8 limiterEnable; // Enabling limiter (on/off (default off))
WebRtcAgc_config_t defaultConfig;
WebRtcAgc_config_t usedConfig;
// General variables
WebRtc_Word16 initFlag;
WebRtc_Word16 lastError;
// Target level parameters
// Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7)
WebRtc_Word32 analogTargetLevel; // = RXX_BUFFER_LEN * 846805; -22 dBfs
WebRtc_Word32 startUpperLimit; // = RXX_BUFFER_LEN * 1066064; -21 dBfs
WebRtc_Word32 startLowerLimit; // = RXX_BUFFER_LEN * 672641; -23 dBfs
WebRtc_Word32 upperPrimaryLimit; // = RXX_BUFFER_LEN * 1342095; -20 dBfs
WebRtc_Word32 lowerPrimaryLimit; // = RXX_BUFFER_LEN * 534298; -24 dBfs
WebRtc_Word32 upperSecondaryLimit;// = RXX_BUFFER_LEN * 2677832; -17 dBfs
WebRtc_Word32 lowerSecondaryLimit;// = RXX_BUFFER_LEN * 267783; -27 dBfs
WebRtc_UWord16 targetIdx; // Table index for corresponding target level
#ifdef MIC_LEVEL_FEEDBACK
WebRtc_UWord16 targetIdxOffset; // Table index offset for level compensation
#endif
WebRtc_Word16 analogTarget; // Digital reference level in ENV scale
// Analog AGC specific variables
WebRtc_Word32 filterState[8]; // For downsampling wb to nb
WebRtc_Word32 upperLimit; // Upper limit for mic energy
WebRtc_Word32 lowerLimit; // Lower limit for mic energy
WebRtc_Word32 Rxx160w32; // Average energy for one frame
WebRtc_Word32 Rxx16_LPw32; // Low pass filtered subframe energies
WebRtc_Word32 Rxx160_LPw32; // Low pass filtered frame energies
WebRtc_Word32 Rxx16_LPw32Max; // Keeps track of largest energy subframe
WebRtc_Word32 Rxx16_vectorw32[RXX_BUFFER_LEN];// Array with subframe energies
WebRtc_Word32 Rxx16w32_array[2][5];// Energy values of microphone signal
WebRtc_Word32 env[2][10]; // Envelope values of subframes
WebRtc_Word16 Rxx16pos; // Current position in the Rxx16_vectorw32
WebRtc_Word16 envSum; // Filtered scaled envelope in subframes
WebRtc_Word16 vadThreshold; // Threshold for VAD decision
WebRtc_Word16 inActive; // Inactive time in milliseconds
WebRtc_Word16 msTooLow; // Milliseconds of speech at a too low level
WebRtc_Word16 msTooHigh; // Milliseconds of speech at a too high level
WebRtc_Word16 changeToSlowMode; // Change to slow mode after some time at target
WebRtc_Word16 firstCall; // First call to the process-function
WebRtc_Word16 msZero; // Milliseconds of zero input
WebRtc_Word16 msecSpeechOuterChange;// Min ms of speech between volume changes
WebRtc_Word16 msecSpeechInnerChange;// Min ms of speech between volume changes
WebRtc_Word16 activeSpeech; // Milliseconds of active speech
WebRtc_Word16 muteGuardMs; // Counter to prevent mute action
WebRtc_Word16 inQueue; // 10 ms batch indicator
// Microphone level variables
WebRtc_Word32 micRef; // Remember ref. mic level for virtual mic
WebRtc_UWord16 gainTableIdx; // Current position in virtual gain table
WebRtc_Word32 micGainIdx; // Gain index of mic level to increase slowly
WebRtc_Word32 micVol; // Remember volume between frames
WebRtc_Word32 maxLevel; // Max possible vol level, incl dig gain
WebRtc_Word32 maxAnalog; // Maximum possible analog volume level
WebRtc_Word32 maxInit; // Initial value of "max"
WebRtc_Word32 minLevel; // Minimum possible volume level
WebRtc_Word32 minOutput; // Minimum output volume level
WebRtc_Word32 zeroCtrlMax; // Remember max gain => don't amp low input
WebRtc_Word16 scale; // Scale factor for internal volume levels
#ifdef MIC_LEVEL_FEEDBACK
WebRtc_Word16 numBlocksMicLvlSat;
WebRtc_UWord8 micLvlSat;
#endif
// Structs for VAD and digital_agc
AgcVad_t vadMic;
DigitalAgc_t digitalAgc;
#ifdef AGC_DEBUG
FILE* fpt;
FILE* agcLog;
WebRtc_Word32 fcount;
#endif
WebRtc_Word16 lowLevelSignal;
} Agc_t;
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_

View File

@ -1,76 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_DIGITAL_AGC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_DIGITAL_AGC_H_
#ifdef AGC_DEBUG
#include <stdio.h>
#endif
#include "typedefs.h"
#include "signal_processing_library.h"
// the 32 most significant bits of A(19) * B(26) >> 13
#define AGC_MUL32(A, B) (((B)>>13)*(A) + ( ((0x00001FFF & (B))*(A)) >> 13 ))
// C + the 32 most significant bits of A * B
#define AGC_SCALEDIFF32(A, B, C) ((C) + ((B)>>16)*(A) + ( ((0x0000FFFF & (B))*(A)) >> 16 ))
typedef struct
{
WebRtc_Word32 downState[8];
WebRtc_Word16 HPstate;
WebRtc_Word16 counter;
WebRtc_Word16 logRatio; // log( P(active) / P(inactive) ) (Q10)
WebRtc_Word16 meanLongTerm; // Q10
WebRtc_Word32 varianceLongTerm; // Q8
WebRtc_Word16 stdLongTerm; // Q10
WebRtc_Word16 meanShortTerm; // Q10
WebRtc_Word32 varianceShortTerm; // Q8
WebRtc_Word16 stdShortTerm; // Q10
} AgcVad_t; // total = 54 bytes
typedef struct
{
WebRtc_Word32 capacitorSlow;
WebRtc_Word32 capacitorFast;
WebRtc_Word32 gain;
WebRtc_Word32 gainTable[32];
WebRtc_Word16 gatePrevious;
WebRtc_Word16 agcMode;
AgcVad_t vadNearend;
AgcVad_t vadFarend;
#ifdef AGC_DEBUG
FILE* logFile;
int frameCounter;
#endif
} DigitalAgc_t;
WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *digitalAgcInst, WebRtc_Word16 agcMode);
WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *digitalAgcInst, const WebRtc_Word16 *inNear,
const WebRtc_Word16 *inNear_H, WebRtc_Word16 *out,
WebRtc_Word16 *out_H, WebRtc_UWord32 FS,
WebRtc_Word16 lowLevelSignal);
WebRtc_Word32 WebRtcAgc_AddFarendToDigital(DigitalAgc_t *digitalAgcInst, const WebRtc_Word16 *inFar,
WebRtc_Word16 nrSamples);
void WebRtcAgc_InitVad(AgcVad_t *vadInst);
WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *vadInst, // (i) VAD state
const WebRtc_Word16 *in, // (i) Speech signal
WebRtc_Word16 nrSamples); // (i) number of samples
WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
WebRtc_Word16 compressionGaindB, // Q0 (in dB)
WebRtc_Word16 targetLevelDbfs,// Q0 (in dB)
WebRtc_UWord8 limiterEnable, WebRtc_Word16 analogTarget);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_

View File

@ -0,0 +1,275 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
static const int kGainMapSize = 256;
// Uses parameters: si = 2, sf = 0.25, D = 8/256
static const int kGainMap[kGainMapSize] = {
-56,
-54,
-52,
-50,
-48,
-47,
-45,
-43,
-42,
-40,
-38,
-37,
-35,
-34,
-33,
-31,
-30,
-29,
-27,
-26,
-25,
-24,
-23,
-22,
-20,
-19,
-18,
-17,
-16,
-15,
-14,
-14,
-13,
-12,
-11,
-10,
-9,
-8,
-8,
-7,
-6,
-5,
-5,
-4,
-3,
-2,
-2,
-1,
0,
0,
1,
1,
2,
3,
3,
4,
4,
5,
5,
6,
6,
7,
7,
8,
8,
9,
9,
10,
10,
11,
11,
12,
12,
13,
13,
13,
14,
14,
15,
15,
15,
16,
16,
17,
17,
17,
18,
18,
18,
19,
19,
19,
20,
20,
21,
21,
21,
22,
22,
22,
23,
23,
23,
24,
24,
24,
24,
25,
25,
25,
26,
26,
26,
27,
27,
27,
28,
28,
28,
28,
29,
29,
29,
30,
30,
30,
30,
31,
31,
31,
32,
32,
32,
32,
33,
33,
33,
33,
34,
34,
34,
35,
35,
35,
35,
36,
36,
36,
36,
37,
37,
37,
38,
38,
38,
38,
39,
39,
39,
39,
40,
40,
40,
40,
41,
41,
41,
41,
42,
42,
42,
42,
43,
43,
43,
44,
44,
44,
44,
45,
45,
45,
45,
46,
46,
46,
46,
47,
47,
47,
47,
48,
48,
48,
48,
49,
49,
49,
49,
50,
50,
50,
50,
51,
51,
51,
51,
52,
52,
52,
52,
53,
53,
53,
53,
54,
54,
54,
54,
55,
55,
55,
55,
56,
56,
56,
56,
57,
57,
57,
57,
58,
58,
58,
58,
59,
59,
59,
59,
60,
60,
60,
60,
61,
61,
61,
61,
62,
62,
62,
62,
63,
63,
63,
63,
64
};
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_

View File

@ -0,0 +1,228 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/histogram.h"
#include <cmath>
#include <cstring>
#include "webrtc/modules/interface/module_common_types.h"
namespace webrtc {
static const double kHistBinCenters[] = {
7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01,
1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01,
2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01,
3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01,
5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01,
1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00,
1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00,
2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00,
4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00,
7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01,
1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01,
2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01,
3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01,
6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01,
1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02,
1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02,
2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02,
4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02,
8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03,
1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03,
2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03,
3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03,
6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03,
1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04,
1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04,
3.00339145144454e+04, 3.56647189489147e+04};
static const double kProbQDomain = 1024.0;
// Loudness of -15 dB (smallest expected loudness) in log domain,
// loudness_db = 13.5 * log10(rms);
static const double kLogDomainMinBinCenter = -2.57752062648587;
// Loudness step of 1 dB in log domain
static const double kLogDomainStepSizeInverse = 5.81954605750359;
static const int kTransientWidthThreshold = 7;
static const double kLowProbabilityThreshold = 0.2;
static const int kLowProbThresholdQ10 = static_cast<int>(
kLowProbabilityThreshold * kProbQDomain);
Histogram::Histogram()
: num_updates_(0),
audio_content_q10_(0),
bin_count_q10_(),
activity_probability_(),
hist_bin_index_(),
buffer_index_(0),
buffer_is_full_(false),
len_circular_buffer_(0),
len_high_activity_(0) {
static_assert(
kHistSize == sizeof(kHistBinCenters) / sizeof(kHistBinCenters[0]),
"histogram bin centers incorrect size");
}
Histogram::Histogram(int window_size)
: num_updates_(0),
audio_content_q10_(0),
bin_count_q10_(),
activity_probability_(new int[window_size]),
hist_bin_index_(new int[window_size]),
buffer_index_(0),
buffer_is_full_(false),
len_circular_buffer_(window_size),
len_high_activity_(0) {}
Histogram::~Histogram() {}
void Histogram::Update(double rms, double activity_probaility) {
// If circular histogram is activated then remove the oldest entry.
if (len_circular_buffer_ > 0)
RemoveOldestEntryAndUpdate();
// Find the corresponding bin.
int hist_index = GetBinIndex(rms);
// To Q10 domain.
int prob_q10 = static_cast<int16_t>(floor(activity_probaility *
kProbQDomain));
InsertNewestEntryAndUpdate(prob_q10, hist_index);
}
// Doing nothing if buffer is not full, yet.
void Histogram::RemoveOldestEntryAndUpdate() {
assert(len_circular_buffer_ > 0);
// Do nothing if circular buffer is not full.
if (!buffer_is_full_)
return;
int oldest_prob = activity_probability_[buffer_index_];
int oldest_hist_index = hist_bin_index_[buffer_index_];
UpdateHist(-oldest_prob, oldest_hist_index);
}
void Histogram::RemoveTransient() {
// Don't expect to be here if high-activity region is longer than
// |kTransientWidthThreshold| or there has not been any transient.
assert(len_high_activity_ <= kTransientWidthThreshold);
int index = (buffer_index_ > 0) ? (buffer_index_ - 1) :
len_circular_buffer_ - 1;
while (len_high_activity_ > 0) {
UpdateHist(-activity_probability_[index], hist_bin_index_[index]);
activity_probability_[index] = 0;
index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1);
len_high_activity_--;
}
}
void Histogram::InsertNewestEntryAndUpdate(int activity_prob_q10,
int hist_index) {
// Update the circular buffer if it is enabled.
if (len_circular_buffer_ > 0) {
// Removing transient.
if (activity_prob_q10 <= kLowProbThresholdQ10) {
// Lower than threshold probability, set it to zero.
activity_prob_q10 = 0;
// Check if this has been a transient.
if (len_high_activity_ <= kTransientWidthThreshold)
RemoveTransient(); // Remove this transient.
len_high_activity_ = 0;
} else if (len_high_activity_ <= kTransientWidthThreshold) {
len_high_activity_++;
}
// Updating the circular buffer.
activity_probability_[buffer_index_] = activity_prob_q10;
hist_bin_index_[buffer_index_] = hist_index;
// Increment the buffer index and check for wrap-around.
buffer_index_++;
if (buffer_index_ >= len_circular_buffer_) {
buffer_index_ = 0;
buffer_is_full_ = true;
}
}
num_updates_++;
if (num_updates_ < 0)
num_updates_--;
UpdateHist(activity_prob_q10, hist_index);
}
void Histogram::UpdateHist(int activity_prob_q10, int hist_index) {
bin_count_q10_[hist_index] += activity_prob_q10;
audio_content_q10_ += activity_prob_q10;
}
double Histogram::AudioContent() const {
return audio_content_q10_ / kProbQDomain;
}
Histogram* Histogram::Create() {
return new Histogram;
}
Histogram* Histogram::Create(int window_size) {
if (window_size < 0)
return NULL;
return new Histogram(window_size);
}
void Histogram::Reset() {
// Reset the histogram, audio-content and number of updates.
memset(bin_count_q10_, 0, sizeof(bin_count_q10_));
audio_content_q10_ = 0;
num_updates_ = 0;
// Empty the circular buffer.
buffer_index_ = 0;
buffer_is_full_ = false;
len_high_activity_ = 0;
}
int Histogram::GetBinIndex(double rms) {
// First exclude overload cases.
if (rms <= kHistBinCenters[0]) {
return 0;
} else if (rms >= kHistBinCenters[kHistSize - 1]) {
return kHistSize - 1;
} else {
// The quantizer is uniform in log domain. Alternatively we could do binary
// search in linear domain.
double rms_log = log(rms);
int index = static_cast<int>(floor((rms_log - kLogDomainMinBinCenter) *
kLogDomainStepSizeInverse));
// The final decision is in linear domain.
double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]);
if (rms > b) {
return index + 1;
}
return index;
}
}
double Histogram::CurrentRms() const {
double p;
double mean_val = 0;
if (audio_content_q10_ > 0) {
double p_total_inverse = 1. / static_cast<double>(audio_content_q10_);
for (int n = 0; n < kHistSize; n++) {
p = static_cast<double>(bin_count_q10_[n]) * p_total_inverse;
mean_val += p * kHistBinCenters[n];
}
} else {
mean_val = kHistBinCenters[0];
}
return mean_val;
}
} // namespace webrtc

View File

@ -0,0 +1,91 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
#include <string.h>
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This class implements the histogram of loudness with circular buffers so that
// the histogram tracks the last T seconds of the loudness.
class Histogram {
public:
// Create a non-sliding Histogram.
static Histogram* Create();
// Create a sliding Histogram, i.e. the histogram represents the last
// |window_size| samples.
static Histogram* Create(int window_size);
~Histogram();
// Insert RMS and the corresponding activity probability.
void Update(double rms, double activity_probability);
// Reset the histogram, forget the past.
void Reset();
// Current loudness, which is actually the mean of histogram in loudness
// domain.
double CurrentRms() const;
// Sum of the histogram content.
double AudioContent() const;
// Number of times the histogram has been updated.
int num_updates() const { return num_updates_; }
private:
Histogram();
explicit Histogram(int window);
// Find the histogram bin associated with the given |rms|.
int GetBinIndex(double rms);
void RemoveOldestEntryAndUpdate();
void InsertNewestEntryAndUpdate(int activity_prob_q10, int hist_index);
void UpdateHist(int activity_prob_q10, int hist_index);
void RemoveTransient();
// Number of histogram bins.
static const int kHistSize = 77;
// Number of times the histogram is updated
int num_updates_;
// Audio content, this should be equal to the sum of the components of
// |bin_count_q10_|.
int64_t audio_content_q10_;
// Histogram of input RMS in Q10 with |kHistSize_| bins. In each 'Update(),'
// we increment the associated histogram-bin with the given probability. The
// increment is implemented in Q10 to avoid rounding errors.
int64_t bin_count_q10_[kHistSize];
// Circular buffer for probabilities
rtc::scoped_ptr<int[]> activity_probability_;
// Circular buffer for histogram-indices of probabilities.
rtc::scoped_ptr<int[]> hist_bin_index_;
// Current index of circular buffer, where the newest data will be written to,
// therefore, pointing to the oldest data if buffer is full.
int buffer_index_;
// Indicating if buffer is full and we had a wrap around.
int buffer_is_full_;
// Size of circular buffer.
int len_circular_buffer_;
int len_high_activity_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_

View File

@ -0,0 +1,133 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
//#define MIC_LEVEL_FEEDBACK
#ifdef WEBRTC_AGC_DEBUG_DUMP
#include <stdio.h>
#endif
#include "webrtc/modules/audio_processing/agc/legacy/digital_agc.h"
#include "webrtc/modules/audio_processing/agc/legacy/gain_control.h"
#include "webrtc/typedefs.h"
/* Analog Automatic Gain Control variables:
* Constant declarations (inner limits inside which no changes are done)
* In the beginning the range is narrower to widen as soon as the measure
* 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0
* and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal
* go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm
* The limits are created by running the AGC with a file having the desired
* signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined
* by out=10*log10(in/260537279.7); Set the target level to the average level
* of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in
* Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) )
*/
#define RXX_BUFFER_LEN 10
static const int16_t kMsecSpeechInner = 520;
static const int16_t kMsecSpeechOuter = 340;
static const int16_t kNormalVadThreshold = 400;
static const int16_t kAlphaShortTerm = 6; // 1 >> 6 = 0.0156
static const int16_t kAlphaLongTerm = 10; // 1 >> 10 = 0.000977
typedef struct
{
// Configurable parameters/variables
uint32_t fs; // Sampling frequency
int16_t compressionGaindB; // Fixed gain level in dB
int16_t targetLevelDbfs; // Target level in -dBfs of envelope (default -3)
int16_t agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig)
uint8_t limiterEnable; // Enabling limiter (on/off (default off))
WebRtcAgcConfig defaultConfig;
WebRtcAgcConfig usedConfig;
// General variables
int16_t initFlag;
int16_t lastError;
// Target level parameters
// Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7)
int32_t analogTargetLevel; // = RXX_BUFFER_LEN * 846805; -22 dBfs
int32_t startUpperLimit; // = RXX_BUFFER_LEN * 1066064; -21 dBfs
int32_t startLowerLimit; // = RXX_BUFFER_LEN * 672641; -23 dBfs
int32_t upperPrimaryLimit; // = RXX_BUFFER_LEN * 1342095; -20 dBfs
int32_t lowerPrimaryLimit; // = RXX_BUFFER_LEN * 534298; -24 dBfs
int32_t upperSecondaryLimit;// = RXX_BUFFER_LEN * 2677832; -17 dBfs
int32_t lowerSecondaryLimit;// = RXX_BUFFER_LEN * 267783; -27 dBfs
uint16_t targetIdx; // Table index for corresponding target level
#ifdef MIC_LEVEL_FEEDBACK
uint16_t targetIdxOffset; // Table index offset for level compensation
#endif
int16_t analogTarget; // Digital reference level in ENV scale
// Analog AGC specific variables
int32_t filterState[8]; // For downsampling wb to nb
int32_t upperLimit; // Upper limit for mic energy
int32_t lowerLimit; // Lower limit for mic energy
int32_t Rxx160w32; // Average energy for one frame
int32_t Rxx16_LPw32; // Low pass filtered subframe energies
int32_t Rxx160_LPw32; // Low pass filtered frame energies
int32_t Rxx16_LPw32Max; // Keeps track of largest energy subframe
int32_t Rxx16_vectorw32[RXX_BUFFER_LEN];// Array with subframe energies
int32_t Rxx16w32_array[2][5];// Energy values of microphone signal
int32_t env[2][10]; // Envelope values of subframes
int16_t Rxx16pos; // Current position in the Rxx16_vectorw32
int16_t envSum; // Filtered scaled envelope in subframes
int16_t vadThreshold; // Threshold for VAD decision
int16_t inActive; // Inactive time in milliseconds
int16_t msTooLow; // Milliseconds of speech at a too low level
int16_t msTooHigh; // Milliseconds of speech at a too high level
int16_t changeToSlowMode; // Change to slow mode after some time at target
int16_t firstCall; // First call to the process-function
int16_t msZero; // Milliseconds of zero input
int16_t msecSpeechOuterChange;// Min ms of speech between volume changes
int16_t msecSpeechInnerChange;// Min ms of speech between volume changes
int16_t activeSpeech; // Milliseconds of active speech
int16_t muteGuardMs; // Counter to prevent mute action
int16_t inQueue; // 10 ms batch indicator
// Microphone level variables
int32_t micRef; // Remember ref. mic level for virtual mic
uint16_t gainTableIdx; // Current position in virtual gain table
int32_t micGainIdx; // Gain index of mic level to increase slowly
int32_t micVol; // Remember volume between frames
int32_t maxLevel; // Max possible vol level, incl dig gain
int32_t maxAnalog; // Maximum possible analog volume level
int32_t maxInit; // Initial value of "max"
int32_t minLevel; // Minimum possible volume level
int32_t minOutput; // Minimum output volume level
int32_t zeroCtrlMax; // Remember max gain => don't amp low input
int32_t lastInMicLevel;
int16_t scale; // Scale factor for internal volume levels
#ifdef MIC_LEVEL_FEEDBACK
int16_t numBlocksMicLvlSat;
uint8_t micLvlSat;
#endif
// Structs for VAD and digital_agc
AgcVad vadMic;
DigitalAgc digitalAgc;
#ifdef WEBRTC_AGC_DEBUG_DUMP
FILE* fpt;
FILE* agcLog;
int32_t fcount;
#endif
int16_t lowLevelSignal;
} LegacyAgc;
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_

View File

@ -12,12 +12,15 @@
*
*/
#include "webrtc/modules/audio_processing/agc/legacy/digital_agc.h"
#include <assert.h>
#include <string.h>
#ifdef AGC_DEBUG
#ifdef WEBRTC_AGC_DEBUG_DUMP
#include <stdio.h>
#endif
#include "digital_agc.h"
#include "gain_control.h"
#include "webrtc/modules/audio_processing/agc/legacy/gain_control.h"
// To generate the gaintable, copy&paste the following lines to a Matlab window:
// MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1;
@ -33,7 +36,8 @@
// zoom on;
// Generator table for y=log2(1+e^x) in Q8.
static const WebRtc_UWord16 kGenFuncTable[128] = {
enum { kGenFuncTableSize = 128 };
static const uint16_t kGenFuncTable[kGenFuncTableSize] = {
256, 485, 786, 1126, 1484, 1849, 2217, 2586,
2955, 3324, 3693, 4063, 4432, 4801, 5171, 5540,
5909, 6279, 6648, 7017, 7387, 7756, 8125, 8495,
@ -52,29 +56,29 @@ static const WebRtc_UWord16 kGenFuncTable[128] = {
44320, 44689, 45058, 45428, 45797, 46166, 46536, 46905
};
static const WebRtc_Word16 kAvgDecayTime = 250; // frames; < 3000
static const int16_t kAvgDecayTime = 250; // frames; < 3000
WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
WebRtc_Word16 digCompGaindB, // Q0
WebRtc_Word16 targetLevelDbfs,// Q0
WebRtc_UWord8 limiterEnable,
WebRtc_Word16 analogTarget) // Q0
int32_t WebRtcAgc_CalculateGainTable(int32_t *gainTable, // Q16
int16_t digCompGaindB, // Q0
int16_t targetLevelDbfs,// Q0
uint8_t limiterEnable,
int16_t analogTarget) // Q0
{
// This function generates the compressor gain table used in the fixed digital part.
WebRtc_UWord32 tmpU32no1, tmpU32no2, absInLevel, logApprox;
WebRtc_Word32 inLevel, limiterLvl;
WebRtc_Word32 tmp32, tmp32no1, tmp32no2, numFIX, den, y32;
const WebRtc_UWord16 kLog10 = 54426; // log2(10) in Q14
const WebRtc_UWord16 kLog10_2 = 49321; // 10*log10(2) in Q14
const WebRtc_UWord16 kLogE_1 = 23637; // log2(e) in Q14
WebRtc_UWord16 constMaxGain;
WebRtc_UWord16 tmpU16, intPart, fracPart;
const WebRtc_Word16 kCompRatio = 3;
const WebRtc_Word16 kSoftLimiterLeft = 1;
WebRtc_Word16 limiterOffset = 0; // Limiter offset
WebRtc_Word16 limiterIdx, limiterLvlX;
WebRtc_Word16 constLinApprox, zeroGainLvl, maxGain, diffGain;
WebRtc_Word16 i, tmp16, tmp16no1;
uint32_t tmpU32no1, tmpU32no2, absInLevel, logApprox;
int32_t inLevel, limiterLvl;
int32_t tmp32, tmp32no1, tmp32no2, numFIX, den, y32;
const uint16_t kLog10 = 54426; // log2(10) in Q14
const uint16_t kLog10_2 = 49321; // 10*log10(2) in Q14
const uint16_t kLogE_1 = 23637; // log2(e) in Q14
uint16_t constMaxGain;
uint16_t tmpU16, intPart, fracPart;
const int16_t kCompRatio = 3;
const int16_t kSoftLimiterLeft = 1;
int16_t limiterOffset = 0; // Limiter offset
int16_t limiterIdx, limiterLvlX;
int16_t constLinApprox, zeroGainLvl, maxGain, diffGain;
int16_t i, tmp16, tmp16no1;
int zeros, zerosScale;
// Constants
@ -83,11 +87,11 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
// kLog10_2 = 49321; // 10*log10(2) in Q14
// Calculate maximum digital gain and zero gain level
tmp32no1 = WEBRTC_SPL_MUL_16_16(digCompGaindB - analogTarget, kCompRatio - 1);
tmp32no1 = (digCompGaindB - analogTarget) * (kCompRatio - 1);
tmp16no1 = analogTarget - targetLevelDbfs;
tmp16no1 += WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs));
tmp32no1 = WEBRTC_SPL_MUL_16_16(maxGain, kCompRatio);
tmp32no1 = maxGain * kCompRatio;
zeroGainLvl = digCompGaindB;
zeroGainLvl -= WebRtcSpl_DivW32W16ResW16(tmp32no1 + ((kCompRatio - 1) >> 1),
kCompRatio - 1);
@ -100,10 +104,11 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
// Calculate the difference between maximum gain and gain at 0dB0v:
// diffGain = maxGain + (compRatio-1)*zeroGainLvl/compRatio
// = (compRatio-1)*digCompGaindB/compRatio
tmp32no1 = WEBRTC_SPL_MUL_16_16(digCompGaindB, kCompRatio - 1);
tmp32no1 = digCompGaindB * (kCompRatio - 1);
diffGain = WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
if (diffGain < 0)
if (diffGain < 0 || diffGain >= kGenFuncTableSize)
{
assert(0);
return -1;
}
@ -111,9 +116,8 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
// limiterLvlX = analogTarget - limiterOffset
// limiterLvl = targetLevelDbfs + limiterOffset/compRatio
limiterLvlX = analogTarget - limiterOffset;
limiterIdx = 2
+ WebRtcSpl_DivW32W16ResW16(WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)limiterLvlX, 13),
WEBRTC_SPL_RSHIFT_U16(kLog10_2, 1));
limiterIdx =
2 + WebRtcSpl_DivW32W16ResW16((int32_t)limiterLvlX << 13, kLog10_2 / 2);
tmp16no1 = WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio);
limiterLvl = targetLevelDbfs + tmp16no1;
@ -134,23 +138,23 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
{
// Calculate scaled input level (compressor):
// inLevel = fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio)
tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(kCompRatio - 1, i - 1); // Q0
tmp16 = (int16_t)((kCompRatio - 1) * (i - 1)); // Q0
tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14
inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14
// Calculate diffGain-inLevel, to map using the genFuncTable
inLevel = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)diffGain, 14) - inLevel; // Q14
inLevel = ((int32_t)diffGain << 14) - inLevel; // Q14
// Make calculations on abs(inLevel) and compensate for the sign afterwards.
absInLevel = (WebRtc_UWord32)WEBRTC_SPL_ABS_W32(inLevel); // Q14
absInLevel = (uint32_t)WEBRTC_SPL_ABS_W32(inLevel); // Q14
// LUT with interpolation
intPart = (WebRtc_UWord16)WEBRTC_SPL_RSHIFT_U32(absInLevel, 14);
fracPart = (WebRtc_UWord16)(absInLevel & 0x00003FFF); // extract the fractional part
intPart = (uint16_t)(absInLevel >> 14);
fracPart = (uint16_t)(absInLevel & 0x00003FFF); // extract the fractional part
tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8
tmpU32no1 = WEBRTC_SPL_UMUL_16_16(tmpU16, fracPart); // Q22
tmpU32no1 += WEBRTC_SPL_LSHIFT_U32((WebRtc_UWord32)kGenFuncTable[intPart], 14); // Q22
logApprox = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 8); // Q14
tmpU32no1 = tmpU16 * fracPart; // Q22
tmpU32no1 += (uint32_t)kGenFuncTable[intPart] << 14; // Q22
logApprox = tmpU32no1 >> 8; // Q14
// Compensate for negative exponent using the relation:
// log2(1 + 2^-x) = log2(1 + 2^x) - x
if (inLevel < 0)
@ -160,83 +164,89 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
if (zeros < 15)
{
// Not enough space for multiplication
tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(absInLevel, 15 - zeros); // Q(zeros-1)
tmpU32no2 = absInLevel >> (15 - zeros); // Q(zeros-1)
tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13)
if (zeros < 9)
{
tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 9 - zeros); // Q(zeros+13)
zerosScale = 9 - zeros;
tmpU32no1 >>= zerosScale; // Q(zeros+13)
} else
{
tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, zeros - 9); // Q22
tmpU32no2 >>= zeros - 9; // Q22
}
} else
{
tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28
tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, 6); // Q22
tmpU32no2 >>= 6; // Q22
}
logApprox = 0;
if (tmpU32no2 < tmpU32no1)
{
logApprox = WEBRTC_SPL_RSHIFT_U32(tmpU32no1 - tmpU32no2, 8 - zerosScale); //Q14
logApprox = (tmpU32no1 - tmpU32no2) >> (8 - zerosScale); //Q14
}
}
numFIX = WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_U16(maxGain, constMaxGain), 6); // Q14
numFIX -= WEBRTC_SPL_MUL_32_16((WebRtc_Word32)logApprox, diffGain); // Q14
numFIX = (maxGain * constMaxGain) << 6; // Q14
numFIX -= (int32_t)logApprox * diffGain; // Q14
// Calculate ratio
// Shift numFIX as much as possible
zeros = WebRtcSpl_NormW32(numFIX);
numFIX = WEBRTC_SPL_LSHIFT_W32(numFIX, zeros); // Q(14+zeros)
// Shift |numFIX| as much as possible.
// Ensure we avoid wrap-around in |den| as well.
if (numFIX > (den >> 8)) // |den| is Q8.
{
zeros = WebRtcSpl_NormW32(numFIX);
} else
{
zeros = WebRtcSpl_NormW32(den) + 8;
}
numFIX <<= zeros; // Q(14+zeros)
// Shift den so we end up in Qy1
tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 8); // Q(zeros)
if (numFIX < 0)
{
numFIX -= WEBRTC_SPL_RSHIFT_W32(tmp32no1, 1);
numFIX -= tmp32no1 / 2;
} else
{
numFIX += WEBRTC_SPL_RSHIFT_W32(tmp32no1, 1);
numFIX += tmp32no1 / 2;
}
y32 = WEBRTC_SPL_DIV(numFIX, tmp32no1); // in Q14
y32 = numFIX / tmp32no1; // in Q14
if (limiterEnable && (i < limiterIdx))
{
tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14
tmp32 -= WEBRTC_SPL_LSHIFT_W32(limiterLvl, 14); // Q14
tmp32 -= limiterLvl << 14; // Q14
y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20);
}
if (y32 > 39000)
{
tmp32 = WEBRTC_SPL_MUL(y32 >> 1, kLog10) + 4096; // in Q27
tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 13); // in Q14
tmp32 = (y32 >> 1) * kLog10 + 4096; // in Q27
tmp32 >>= 13; // In Q14.
} else
{
tmp32 = WEBRTC_SPL_MUL(y32, kLog10) + 8192; // in Q28
tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 14); // in Q14
tmp32 = y32 * kLog10 + 8192; // in Q28
tmp32 >>= 14; // In Q14.
}
tmp32 += WEBRTC_SPL_LSHIFT_W32(16, 14); // in Q14 (Make sure final output is in Q16)
tmp32 += 16 << 14; // in Q14 (Make sure final output is in Q16)
// Calculate power
if (tmp32 > 0)
{
intPart = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 14);
fracPart = (WebRtc_UWord16)(tmp32 & 0x00003FFF); // in Q14
if (WEBRTC_SPL_RSHIFT_W32(fracPart, 13))
intPart = (int16_t)(tmp32 >> 14);
fracPart = (uint16_t)(tmp32 & 0x00003FFF); // in Q14
if ((fracPart >> 13) != 0)
{
tmp16 = WEBRTC_SPL_LSHIFT_W16(2, 14) - constLinApprox;
tmp32no2 = WEBRTC_SPL_LSHIFT_W32(1, 14) - fracPart;
tmp32no2 = WEBRTC_SPL_MUL_32_16(tmp32no2, tmp16);
tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 13);
tmp32no2 = WEBRTC_SPL_LSHIFT_W32(1, 14) - tmp32no2;
tmp16 = (2 << 14) - constLinApprox;
tmp32no2 = (1 << 14) - fracPart;
tmp32no2 *= tmp16;
tmp32no2 >>= 13;
tmp32no2 = (1 << 14) - tmp32no2;
} else
{
tmp16 = constLinApprox - WEBRTC_SPL_LSHIFT_W16(1, 14);
tmp32no2 = WEBRTC_SPL_MUL_32_16(fracPart, tmp16);
tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 13);
tmp16 = constLinApprox - (1 << 14);
tmp32no2 = (fracPart * tmp16) >> 13;
}
fracPart = (WebRtc_UWord16)tmp32no2;
gainTable[i] = WEBRTC_SPL_LSHIFT_W32(1, intPart)
+ WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14);
fracPart = (uint16_t)tmp32no2;
gainTable[i] =
(1 << intPart) + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14);
} else
{
gainTable[i] = 0;
@ -246,9 +256,7 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
return 0;
}
WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode)
{
int32_t WebRtcAgc_InitDigital(DigitalAgc* stt, int16_t agcMode) {
if (agcMode == kAgcModeFixedDigital)
{
// start at minimum to find correct gain faster
@ -256,13 +264,13 @@ WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode)
} else
{
// start out with 0 dB gain
stt->capacitorSlow = 134217728; // (WebRtc_Word32)(0.125f * 32768.0f * 32768.0f);
stt->capacitorSlow = 134217728; // (int32_t)(0.125f * 32768.0f * 32768.0f);
}
stt->capacitorFast = 0;
stt->gain = 65536;
stt->gatePrevious = 0;
stt->agcMode = agcMode;
#ifdef AGC_DEBUG
#ifdef WEBRTC_AGC_DEBUG_DUMP
stt->frameCounter = 0;
#endif
@ -273,52 +281,45 @@ WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode)
return 0;
}
WebRtc_Word32 WebRtcAgc_AddFarendToDigital(DigitalAgc_t *stt, const WebRtc_Word16 *in_far,
WebRtc_Word16 nrSamples)
{
// Check for valid pointer
if (&stt->vadFarend == NULL)
{
return -1;
}
int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* stt,
const int16_t* in_far,
size_t nrSamples) {
assert(stt != NULL);
// VAD for far end
WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples);
return 0;
}
WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *in_near,
const WebRtc_Word16 *in_near_H, WebRtc_Word16 *out,
WebRtc_Word16 *out_H, WebRtc_UWord32 FS,
WebRtc_Word16 lowlevelSignal)
{
int32_t WebRtcAgc_ProcessDigital(DigitalAgc* stt,
const int16_t* const* in_near,
size_t num_bands,
int16_t* const* out,
uint32_t FS,
int16_t lowlevelSignal) {
// array for gains (one value per ms, incl start & end)
WebRtc_Word32 gains[11];
int32_t gains[11];
WebRtc_Word32 out_tmp, tmp32;
WebRtc_Word32 env[10];
WebRtc_Word32 nrg, max_nrg;
WebRtc_Word32 cur_level;
WebRtc_Word32 gain32, delta;
WebRtc_Word16 logratio;
WebRtc_Word16 lower_thr, upper_thr;
WebRtc_Word16 zeros, zeros_fast, frac;
WebRtc_Word16 decay;
WebRtc_Word16 gate, gain_adj;
WebRtc_Word16 k, n;
WebRtc_Word16 L, L2; // samples/subframe
int32_t out_tmp, tmp32;
int32_t env[10];
int32_t max_nrg;
int32_t cur_level;
int32_t gain32, delta;
int16_t logratio;
int16_t lower_thr, upper_thr;
int16_t zeros = 0, zeros_fast, frac = 0;
int16_t decay;
int16_t gate, gain_adj;
int16_t k;
size_t n, i, L;
int16_t L2; // samples/subframe
// determine number of samples per ms
if (FS == 8000)
{
L = 8;
L2 = 3;
} else if (FS == 16000)
{
L = 16;
L2 = 4;
} else if (FS == 32000)
} else if (FS == 16000 || FS == 32000 || FS == 48000)
{
L = 16;
L2 = 4;
@ -327,27 +328,22 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
return -1;
}
// TODO(andrew): again, we don't need input and output pointers...
if (in_near != out)
for (i = 0; i < num_bands; ++i)
{
// Only needed if they don't already point to the same place.
memcpy(out, in_near, 10 * L * sizeof(WebRtc_Word16));
}
if (FS == 32000)
{
if (in_near_H != out_H)
if (in_near[i] != out[i])
{
memcpy(out_H, in_near_H, 10 * L * sizeof(WebRtc_Word16));
// Only needed if they don't already point to the same place.
memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0]));
}
}
// VAD for near end
logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out, L * 10);
logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out[0], L * 10);
// Account for far end VAD
if (stt->vadFarend.counter > 10)
{
tmp32 = WEBRTC_SPL_MUL_16_16(3, logratio);
logratio = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 - stt->vadFarend.logRatio, 2);
tmp32 = 3 * logratio;
logratio = (int16_t)((tmp32 - stt->vadFarend.logRatio) >> 2);
}
// Determine decay factor depending on VAD
@ -364,11 +360,11 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
decay = 0;
} else
{
// decay = (WebRtc_Word16)(((lower_thr - logratio)
// decay = (int16_t)(((lower_thr - logratio)
// * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10);
// SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65
tmp32 = WEBRTC_SPL_MUL_16_16((lower_thr - logratio), 65);
decay = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 10);
tmp32 = (lower_thr - logratio) * 65;
decay = (int16_t)(tmp32 >> 10);
}
// adjust decay factor for long silence (detected as low standard deviation)
@ -380,9 +376,9 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
decay = 0;
} else if (stt->vadNearend.stdLongTerm < 8096)
{
// decay = (WebRtc_Word16)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> 12);
tmp32 = WEBRTC_SPL_MUL_16_16((stt->vadNearend.stdLongTerm - 4000), decay);
decay = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 12);
// decay = (int16_t)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> 12);
tmp32 = (stt->vadNearend.stdLongTerm - 4000) * decay;
decay = (int16_t)(tmp32 >> 12);
}
if (lowlevelSignal != 0)
@ -390,9 +386,14 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
decay = 0;
}
}
#ifdef AGC_DEBUG
#ifdef WEBRTC_AGC_DEBUG_DUMP
stt->frameCounter++;
fprintf(stt->logFile, "%5.2f\t%d\t%d\t%d\t", (float)(stt->frameCounter) / 100, logratio, decay, stt->vadNearend.stdLongTerm);
fprintf(stt->logFile,
"%5.2f\t%d\t%d\t%d\t",
(float)(stt->frameCounter) / 100,
logratio,
decay,
stt->vadNearend.stdLongTerm);
#endif
// Find max amplitude per sub frame
// iterate over sub frames
@ -402,7 +403,7 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
max_nrg = 0;
for (n = 0; n < L; n++)
{
nrg = WEBRTC_SPL_MUL_16_16(out[k * L + n], out[k * L + n]);
int32_t nrg = out[0][k * L + n] * out[0][k * L + n];
if (nrg > max_nrg)
{
max_nrg = nrg;
@ -445,34 +446,39 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
}
// Translate signal level into gain, using a piecewise linear approximation
// find number of leading zeros
zeros = WebRtcSpl_NormU32((WebRtc_UWord32)cur_level);
zeros = WebRtcSpl_NormU32((uint32_t)cur_level);
if (cur_level == 0)
{
zeros = 31;
}
tmp32 = (WEBRTC_SPL_LSHIFT_W32(cur_level, zeros) & 0x7FFFFFFF);
frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 19); // Q12
tmp32 = WEBRTC_SPL_MUL((stt->gainTable[zeros-1] - stt->gainTable[zeros]), frac);
gains[k + 1] = stt->gainTable[zeros] + WEBRTC_SPL_RSHIFT_W32(tmp32, 12);
#ifdef AGC_DEBUG
if (k == 0)
{
fprintf(stt->logFile, "%d\t%d\t%d\t%d\t%d\n", env[0], cur_level, stt->capacitorFast, stt->capacitorSlow, zeros);
tmp32 = (cur_level << zeros) & 0x7FFFFFFF;
frac = (int16_t)(tmp32 >> 19); // Q12.
tmp32 = (stt->gainTable[zeros-1] - stt->gainTable[zeros]) * frac;
gains[k + 1] = stt->gainTable[zeros] + (tmp32 >> 12);
#ifdef WEBRTC_AGC_DEBUG_DUMP
if (k == 0) {
fprintf(stt->logFile,
"%d\t%d\t%d\t%d\t%d\n",
env[0],
cur_level,
stt->capacitorFast,
stt->capacitorSlow,
zeros);
}
#endif
}
// Gate processing (lower gain during absence of speech)
zeros = WEBRTC_SPL_LSHIFT_W16(zeros, 9) - WEBRTC_SPL_RSHIFT_W16(frac, 3);
zeros = (zeros << 9) - (frac >> 3);
// find number of leading zeros
zeros_fast = WebRtcSpl_NormU32((WebRtc_UWord32)stt->capacitorFast);
zeros_fast = WebRtcSpl_NormU32((uint32_t)stt->capacitorFast);
if (stt->capacitorFast == 0)
{
zeros_fast = 31;
}
tmp32 = (WEBRTC_SPL_LSHIFT_W32(stt->capacitorFast, zeros_fast) & 0x7FFFFFFF);
zeros_fast = WEBRTC_SPL_LSHIFT_W16(zeros_fast, 9);
zeros_fast -= (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 22);
tmp32 = (stt->capacitorFast << zeros_fast) & 0x7FFFFFFF;
zeros_fast <<= 9;
zeros_fast -= (int16_t)(tmp32 >> 22);
gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm;
@ -481,8 +487,8 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
stt->gatePrevious = 0;
} else
{
tmp32 = WEBRTC_SPL_MUL_16_16(stt->gatePrevious, 7);
gate = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32((WebRtc_Word32)gate + tmp32, 3);
tmp32 = stt->gatePrevious * 7;
gate = (int16_t)((gate + tmp32) >> 3);
stt->gatePrevious = gate;
}
// gate < 0 -> no gate
@ -491,7 +497,7 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
{
if (gate < 2500)
{
gain_adj = WEBRTC_SPL_RSHIFT_W16(2500 - gate, 5);
gain_adj = (2500 - gate) >> 5;
} else
{
gain_adj = 0;
@ -501,12 +507,12 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
if ((gains[k + 1] - stt->gainTable[0]) > 8388608)
{
// To prevent wraparound
tmp32 = WEBRTC_SPL_RSHIFT_W32((gains[k+1] - stt->gainTable[0]), 8);
tmp32 = WEBRTC_SPL_MUL(tmp32, (178 + gain_adj));
tmp32 = (gains[k + 1] - stt->gainTable[0]) >> 8;
tmp32 *= 178 + gain_adj;
} else
{
tmp32 = WEBRTC_SPL_MUL((gains[k+1] - stt->gainTable[0]), (178 + gain_adj));
tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 8);
tmp32 = (gains[k+1] - stt->gainTable[0]) * (178 + gain_adj);
tmp32 >>= 8;
}
gains[k + 1] = stt->gainTable[0] + tmp32;
}
@ -521,23 +527,23 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
{
zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]);
}
gain32 = WEBRTC_SPL_RSHIFT_W32(gains[k+1], zeros) + 1;
gain32 = WEBRTC_SPL_MUL(gain32, gain32);
gain32 = (gains[k + 1] >> zeros) + 1;
gain32 *= gain32;
// check for overflow
while (AGC_MUL32(WEBRTC_SPL_RSHIFT_W32(env[k], 12) + 1, gain32)
> WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)32767, 2 * (1 - zeros + 10)))
while (AGC_MUL32((env[k] >> 12) + 1, gain32)
> WEBRTC_SPL_SHIFT_W32((int32_t)32767, 2 * (1 - zeros + 10)))
{
// multiply by 253/256 ==> -0.1 dB
if (gains[k + 1] > 8388607)
{
// Prevent wrap around
gains[k + 1] = WEBRTC_SPL_MUL(WEBRTC_SPL_RSHIFT_W32(gains[k+1], 8), 253);
gains[k + 1] = (gains[k+1] / 256) * 253;
} else
{
gains[k + 1] = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(gains[k+1], 253), 8);
gains[k + 1] = (gains[k+1] * 253) / 256;
}
gain32 = WEBRTC_SPL_RSHIFT_W32(gains[k+1], zeros) + 1;
gain32 = WEBRTC_SPL_MUL(gain32, gain32);
gain32 = (gains[k + 1] >> zeros) + 1;
gain32 *= gain32;
}
}
// gain reductions should be done 1 ms earlier than gain increases
@ -553,42 +559,25 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
// Apply gain
// handle first sub frame separately
delta = WEBRTC_SPL_LSHIFT_W32(gains[1] - gains[0], (4 - L2));
gain32 = WEBRTC_SPL_LSHIFT_W32(gains[0], 4);
delta = (gains[1] - gains[0]) << (4 - L2);
gain32 = gains[0] << 4;
// iterate over samples
for (n = 0; n < L; n++)
{
// For lower band
tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[n], WEBRTC_SPL_RSHIFT_W32(gain32 + 127, 7));
out_tmp = WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
if (out_tmp > 4095)
for (i = 0; i < num_bands; ++i)
{
out[n] = (WebRtc_Word16)32767;
} else if (out_tmp < -4096)
{
out[n] = (WebRtc_Word16)-32768;
} else
{
tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[n], WEBRTC_SPL_RSHIFT_W32(gain32, 4));
out[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
}
// For higher band
if (FS == 32000)
{
tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[n],
WEBRTC_SPL_RSHIFT_W32(gain32 + 127, 7));
out_tmp = WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
tmp32 = out[i][n] * ((gain32 + 127) >> 7);
out_tmp = tmp32 >> 16;
if (out_tmp > 4095)
{
out_H[n] = (WebRtc_Word16)32767;
out[i][n] = (int16_t)32767;
} else if (out_tmp < -4096)
{
out_H[n] = (WebRtc_Word16)-32768;
out[i][n] = (int16_t)-32768;
} else
{
tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[n],
WEBRTC_SPL_RSHIFT_W32(gain32, 4));
out_H[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
tmp32 = out[i][n] * (gain32 >> 4);
out[i][n] = (int16_t)(tmp32 >> 16);
}
}
//
@ -598,21 +587,15 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
// iterate over subframes
for (k = 1; k < 10; k++)
{
delta = WEBRTC_SPL_LSHIFT_W32(gains[k+1] - gains[k], (4 - L2));
gain32 = WEBRTC_SPL_LSHIFT_W32(gains[k], 4);
delta = (gains[k+1] - gains[k]) << (4 - L2);
gain32 = gains[k] << 4;
// iterate over samples
for (n = 0; n < L; n++)
{
// For lower band
tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[k * L + n],
WEBRTC_SPL_RSHIFT_W32(gain32, 4));
out[k * L + n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
// For higher band
if (FS == 32000)
for (i = 0; i < num_bands; ++i)
{
tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[k * L + n],
WEBRTC_SPL_RSHIFT_W32(gain32, 4));
out_H[k * L + n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
tmp32 = out[i][k * L + n] * (gain32 >> 4);
out[i][k * L + n] = (int16_t)(tmp32 >> 16);
}
gain32 += delta;
}
@ -621,24 +604,23 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
return 0;
}
void WebRtcAgc_InitVad(AgcVad_t *state)
{
WebRtc_Word16 k;
void WebRtcAgc_InitVad(AgcVad* state) {
int16_t k;
state->HPstate = 0; // state of high pass filter
state->logRatio = 0; // log( P(active) / P(inactive) )
// average input level (Q10)
state->meanLongTerm = WEBRTC_SPL_LSHIFT_W16(15, 10);
state->meanLongTerm = 15 << 10;
// variance of input level (Q8)
state->varianceLongTerm = WEBRTC_SPL_LSHIFT_W32(500, 8);
state->varianceLongTerm = 500 << 8;
state->stdLongTerm = 0; // standard deviation of input level in dB
// short-term average input level (Q10)
state->meanShortTerm = WEBRTC_SPL_LSHIFT_W16(15, 10);
state->meanShortTerm = 15 << 10;
// short-term variance of input level (Q8)
state->varianceShortTerm = WEBRTC_SPL_LSHIFT_W32(500, 8);
state->varianceShortTerm = 500 << 8;
state->stdShortTerm = 0; // short-term standard deviation of input level in dB
state->counter = 3; // counts updates
@ -649,17 +631,17 @@ void WebRtcAgc_InitVad(AgcVad_t *state)
}
}
WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
const WebRtc_Word16 *in, // (i) Speech signal
WebRtc_Word16 nrSamples) // (i) number of samples
int16_t WebRtcAgc_ProcessVad(AgcVad* state, // (i) VAD state
const int16_t* in, // (i) Speech signal
size_t nrSamples) // (i) number of samples
{
WebRtc_Word32 out, nrg, tmp32, tmp32b;
WebRtc_UWord16 tmpU16;
WebRtc_Word16 k, subfr, tmp16;
WebRtc_Word16 buf1[8];
WebRtc_Word16 buf2[4];
WebRtc_Word16 HPstate;
WebRtc_Word16 zeros, dB;
int32_t out, nrg, tmp32, tmp32b;
uint16_t tmpU16;
int16_t k, subfr, tmp16;
int16_t buf1[8];
int16_t buf2[4];
int16_t HPstate;
int16_t zeros, dB;
// process in 10 sub frames of 1 ms (to save on memory)
nrg = 0;
@ -671,9 +653,9 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
{
for (k = 0; k < 8; k++)
{
tmp32 = (WebRtc_Word32)in[2 * k] + (WebRtc_Word32)in[2 * k + 1];
tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 1);
buf1[k] = (WebRtc_Word16)tmp32;
tmp32 = (int32_t)in[2 * k] + (int32_t)in[2 * k + 1];
tmp32 >>= 1;
buf1[k] = (int16_t)tmp32;
}
in += 16;
@ -688,10 +670,9 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
for (k = 0; k < 4; k++)
{
out = buf2[k] + HPstate;
tmp32 = WEBRTC_SPL_MUL(600, out);
HPstate = (WebRtc_Word16)(WEBRTC_SPL_RSHIFT_W32(tmp32, 10) - buf2[k]);
tmp32 = WEBRTC_SPL_MUL(out, out);
nrg += WEBRTC_SPL_RSHIFT_W32(tmp32, 6);
tmp32 = 600 * out;
HPstate = (int16_t)((tmp32 >> 10) - buf2[k]);
nrg += (out * out) >> 6;
}
}
state->HPstate = HPstate;
@ -722,7 +703,7 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
}
// energy level (range {-32..30}) (Q10)
dB = WEBRTC_SPL_LSHIFT_W16(15 - zeros, 11);
dB = (15 - zeros) << 11;
// Update statistics
@ -733,44 +714,49 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
}
// update short-term estimate of mean energy level (Q10)
tmp32 = (WEBRTC_SPL_MUL_16_16(state->meanShortTerm, 15) + (WebRtc_Word32)dB);
state->meanShortTerm = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 4);
tmp32 = state->meanShortTerm * 15 + dB;
state->meanShortTerm = (int16_t)(tmp32 >> 4);
// update short-term estimate of variance in energy level (Q8)
tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(dB, dB), 12);
tmp32 += WEBRTC_SPL_MUL(state->varianceShortTerm, 15);
state->varianceShortTerm = WEBRTC_SPL_RSHIFT_W32(tmp32, 4);
tmp32 = (dB * dB) >> 12;
tmp32 += state->varianceShortTerm * 15;
state->varianceShortTerm = tmp32 / 16;
// update short-term estimate of standard deviation in energy level (Q10)
tmp32 = WEBRTC_SPL_MUL_16_16(state->meanShortTerm, state->meanShortTerm);
tmp32 = WEBRTC_SPL_LSHIFT_W32(state->varianceShortTerm, 12) - tmp32;
state->stdShortTerm = (WebRtc_Word16)WebRtcSpl_Sqrt(tmp32);
tmp32 = state->meanShortTerm * state->meanShortTerm;
tmp32 = (state->varianceShortTerm << 12) - tmp32;
state->stdShortTerm = (int16_t)WebRtcSpl_Sqrt(tmp32);
// update long-term estimate of mean energy level (Q10)
tmp32 = WEBRTC_SPL_MUL_16_16(state->meanLongTerm, state->counter) + (WebRtc_Word32)dB;
state->meanLongTerm = WebRtcSpl_DivW32W16ResW16(tmp32,
WEBRTC_SPL_ADD_SAT_W16(state->counter, 1));
tmp32 = state->meanLongTerm * state->counter + dB;
state->meanLongTerm = WebRtcSpl_DivW32W16ResW16(
tmp32, WebRtcSpl_AddSatW16(state->counter, 1));
// update long-term estimate of variance in energy level (Q8)
tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(dB, dB), 12);
tmp32 += WEBRTC_SPL_MUL(state->varianceLongTerm, state->counter);
state->varianceLongTerm = WebRtcSpl_DivW32W16(tmp32,
WEBRTC_SPL_ADD_SAT_W16(state->counter, 1));
tmp32 = (dB * dB) >> 12;
tmp32 += state->varianceLongTerm * state->counter;
state->varianceLongTerm = WebRtcSpl_DivW32W16(
tmp32, WebRtcSpl_AddSatW16(state->counter, 1));
// update long-term estimate of standard deviation in energy level (Q10)
tmp32 = WEBRTC_SPL_MUL_16_16(state->meanLongTerm, state->meanLongTerm);
tmp32 = WEBRTC_SPL_LSHIFT_W32(state->varianceLongTerm, 12) - tmp32;
state->stdLongTerm = (WebRtc_Word16)WebRtcSpl_Sqrt(tmp32);
tmp32 = state->meanLongTerm * state->meanLongTerm;
tmp32 = (state->varianceLongTerm << 12) - tmp32;
state->stdLongTerm = (int16_t)WebRtcSpl_Sqrt(tmp32);
// update voice activity measure (Q10)
tmp16 = WEBRTC_SPL_LSHIFT_W16(3, 12);
tmp32 = WEBRTC_SPL_MUL_16_16(tmp16, (dB - state->meanLongTerm));
tmp16 = 3 << 12;
// TODO(bjornv): (dB - state->meanLongTerm) can overflow, e.g., in
// ApmTest.Process unit test. Previously the macro WEBRTC_SPL_MUL_16_16()
// was used, which did an intermediate cast to (int16_t), hence losing
// significant bits. This cause logRatio to max out positive, rather than
// negative. This is a bug, but has very little significance.
tmp32 = tmp16 * (int16_t)(dB - state->meanLongTerm);
tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm);
tmpU16 = WEBRTC_SPL_LSHIFT_U16((WebRtc_UWord16)13, 12);
tmpU16 = (13 << 12);
tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16);
tmp32 += WEBRTC_SPL_RSHIFT_W32(tmp32b, 10);
tmp32 += tmp32b >> 10;
state->logRatio = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 6);
state->logRatio = (int16_t)(tmp32 >> 6);
// limit
if (state->logRatio > 2048)

View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_
#ifdef WEBRTC_AGC_DEBUG_DUMP
#include <stdio.h>
#endif
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/typedefs.h"
// the 32 most significant bits of A(19) * B(26) >> 13
#define AGC_MUL32(A, B) (((B)>>13)*(A) + ( ((0x00001FFF & (B))*(A)) >> 13 ))
// C + the 32 most significant bits of A * B
#define AGC_SCALEDIFF32(A, B, C) ((C) + ((B)>>16)*(A) + ( ((0x0000FFFF & (B))*(A)) >> 16 ))
typedef struct
{
int32_t downState[8];
int16_t HPstate;
int16_t counter;
int16_t logRatio; // log( P(active) / P(inactive) ) (Q10)
int16_t meanLongTerm; // Q10
int32_t varianceLongTerm; // Q8
int16_t stdLongTerm; // Q10
int16_t meanShortTerm; // Q10
int32_t varianceShortTerm; // Q8
int16_t stdShortTerm; // Q10
} AgcVad; // total = 54 bytes
typedef struct
{
int32_t capacitorSlow;
int32_t capacitorFast;
int32_t gain;
int32_t gainTable[32];
int16_t gatePrevious;
int16_t agcMode;
AgcVad vadNearend;
AgcVad vadFarend;
#ifdef WEBRTC_AGC_DEBUG_DUMP
FILE* logFile;
int frameCounter;
#endif
} DigitalAgc;
int32_t WebRtcAgc_InitDigital(DigitalAgc* digitalAgcInst, int16_t agcMode);
int32_t WebRtcAgc_ProcessDigital(DigitalAgc* digitalAgcInst,
const int16_t* const* inNear,
size_t num_bands,
int16_t* const* out,
uint32_t FS,
int16_t lowLevelSignal);
int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* digitalAgcInst,
const int16_t* inFar,
size_t nrSamples);
void WebRtcAgc_InitVad(AgcVad* vadInst);
int16_t WebRtcAgc_ProcessVad(AgcVad* vadInst, // (i) VAD state
const int16_t* in, // (i) Speech signal
size_t nrSamples); // (i) number of samples
int32_t WebRtcAgc_CalculateGainTable(int32_t *gainTable, // Q16
int16_t compressionGaindB, // Q0 (in dB)
int16_t targetLevelDbfs,// Q0 (in dB)
uint8_t limiterEnable,
int16_t analogTarget);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,10 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_
#include "typedefs.h"
#include "webrtc/typedefs.h"
// Errors
#define AGC_UNSPECIFIED_ERROR 18000
@ -39,10 +39,10 @@ enum
typedef struct
{
WebRtc_Word16 targetLevelDbfs; // default 3 (-3 dBOv)
WebRtc_Word16 compressionGaindB; // default 9 dB
WebRtc_UWord8 limiterEnable; // default kAgcTrue (on)
} WebRtcAgc_config_t;
int16_t targetLevelDbfs; // default 3 (-3 dBOv)
int16_t compressionGaindB; // default 9 dB
uint8_t limiterEnable; // default kAgcTrue (on)
} WebRtcAgcConfig;
#if defined(__cplusplus)
extern "C"
@ -50,14 +50,14 @@ extern "C"
#endif
/*
* This function processes a 10/20ms frame of far-end speech to determine
* if there is active speech. Far-end speech length can be either 10ms or
* 20ms. The length of the input speech vector must be given in samples
* (80/160 when FS=8000, and 160/320 when FS=16000 or FS=32000).
* This function processes a 10 ms frame of far-end speech to determine
* if there is active speech. The length of the input speech vector must be
* given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
* FS=48000).
*
* Input:
* - agcInst : AGC instance.
* - inFar : Far-end input speech vector (10 or 20ms)
* - inFar : Far-end input speech vector
* - samples : Number of samples in input vector
*
* Return value:
@ -65,26 +65,23 @@ extern "C"
* : -1 - Error
*/
int WebRtcAgc_AddFarend(void* agcInst,
const WebRtc_Word16* inFar,
WebRtc_Word16 samples);
const int16_t* inFar,
size_t samples);
/*
* This function processes a 10/20ms frame of microphone speech to determine
* if there is active speech. Microphone speech length can be either 10ms or
* 20ms. The length of the input speech vector must be given in samples
* (80/160 when FS=8000, and 160/320 when FS=16000 or FS=32000). For very low
* input levels, the input signal is increased in level by multiplying and
* overwriting the samples in inMic[].
* This function processes a 10 ms frame of microphone speech to determine
* if there is active speech. The length of the input speech vector must be
* given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
* FS=48000). For very low input levels, the input signal is increased in level
* by multiplying and overwriting the samples in inMic[].
*
* This function should be called before any further processing of the
* near-end microphone signal.
*
* Input:
* - agcInst : AGC instance.
* - inMic : Microphone input speech vector (10 or 20 ms) for
* L band
* - inMic_H : Microphone input speech vector (10 or 20 ms) for
* H band
* - inMic : Microphone input speech vector for each band
* - num_bands : Number of bands in input vector
* - samples : Number of samples in input vector
*
* Return value:
@ -92,24 +89,21 @@ int WebRtcAgc_AddFarend(void* agcInst,
* : -1 - Error
*/
int WebRtcAgc_AddMic(void* agcInst,
WebRtc_Word16* inMic,
WebRtc_Word16* inMic_H,
WebRtc_Word16 samples);
int16_t* const* inMic,
size_t num_bands,
size_t samples);
/*
* This function replaces the analog microphone with a virtual one.
* It is a digital gain applied to the input signal and is used in the
* agcAdaptiveDigital mode where no microphone level is adjustable.
* Microphone speech length can be either 10ms or 20ms. The length of the
* input speech vector must be given in samples (80/160 when FS=8000, and
* 160/320 when FS=16000 or FS=32000).
* agcAdaptiveDigital mode where no microphone level is adjustable. The length
* of the input speech vector must be given in samples (80 when FS=8000, and 160
* when FS=16000, FS=32000 or FS=48000).
*
* Input:
* - agcInst : AGC instance.
* - inMic : Microphone input speech vector for (10 or 20 ms)
* L band
* - inMic_H : Microphone input speech vector for (10 or 20 ms)
* H band
* - inMic : Microphone input speech vector for each band
* - num_bands : Number of bands in input vector
* - samples : Number of samples in input vector
* - micLevelIn : Input level of microphone (static)
*
@ -123,30 +117,27 @@ int WebRtcAgc_AddMic(void* agcInst,
* : -1 - Error
*/
int WebRtcAgc_VirtualMic(void* agcInst,
WebRtc_Word16* inMic,
WebRtc_Word16* inMic_H,
WebRtc_Word16 samples,
WebRtc_Word32 micLevelIn,
WebRtc_Word32* micLevelOut);
int16_t* const* inMic,
size_t num_bands,
size_t samples,
int32_t micLevelIn,
int32_t* micLevelOut);
/*
* This function processes a 10/20ms frame and adjusts (normalizes) the gain
* both analog and digitally. The gain adjustments are done only during
* active periods of speech. The input speech length can be either 10ms or
* 20ms and the output is of the same length. The length of the speech
* vectors must be given in samples (80/160 when FS=8000, and 160/320 when
* FS=16000 or FS=32000). The echo parameter can be used to ensure the AGC will
* not adjust upward in the presence of echo.
* This function processes a 10 ms frame and adjusts (normalizes) the gain both
* analog and digitally. The gain adjustments are done only during active
* periods of speech. The length of the speech vectors must be given in samples
* (80 when FS=8000, and 160 when FS=16000, FS=32000 or FS=48000). The echo
* parameter can be used to ensure the AGC will not adjust upward in the
* presence of echo.
*
* This function should be called after processing the near-end microphone
* signal, in any case after any echo cancellation.
*
* Input:
* - agcInst : AGC instance
* - inNear : Near-end input speech vector (10 or 20 ms) for
* L band
* - inNear_H : Near-end input speech vector (10 or 20 ms) for
* H band
* - inNear : Near-end input speech vector for each band
* - num_bands : Number of bands in input/output vector
* - samples : Number of samples in input/output vector
* - inMicLevel : Current microphone volume level
* - echo : Set to 0 if the signal passed to add_mic is
@ -156,9 +147,8 @@ int WebRtcAgc_VirtualMic(void* agcInst,
*
* Output:
* - outMicLevel : Adjusted microphone volume level
* - out : Gain-adjusted near-end speech vector (L band)
* - out : Gain-adjusted near-end speech vector
* : May be the same vector as the input.
* - out_H : Gain-adjusted near-end speech vector (H band)
* - saturationWarning : A returned value of 1 indicates a saturation event
* has occurred and the volume cannot be further
* reduced. Otherwise will be set to 0.
@ -168,15 +158,14 @@ int WebRtcAgc_VirtualMic(void* agcInst,
* : -1 - Error
*/
int WebRtcAgc_Process(void* agcInst,
const WebRtc_Word16* inNear,
const WebRtc_Word16* inNear_H,
WebRtc_Word16 samples,
WebRtc_Word16* out,
WebRtc_Word16* out_H,
WebRtc_Word32 inMicLevel,
WebRtc_Word32* outMicLevel,
WebRtc_Word16 echo,
WebRtc_UWord8* saturationWarning);
const int16_t* const* inNear,
size_t num_bands,
size_t samples,
int16_t* const* out,
int32_t inMicLevel,
int32_t* outMicLevel,
int16_t echo,
uint8_t* saturationWarning);
/*
* This function sets the config parameters (targetLevelDbfs,
@ -192,7 +181,7 @@ int WebRtcAgc_Process(void* agcInst,
* : 0 - Normal operation.
* : -1 - Error
*/
int WebRtcAgc_set_config(void* agcInst, WebRtcAgc_config_t config);
int WebRtcAgc_set_config(void* agcInst, WebRtcAgcConfig config);
/*
* This function returns the config parameters (targetLevelDbfs,
@ -208,27 +197,21 @@ int WebRtcAgc_set_config(void* agcInst, WebRtcAgc_config_t config);
* : 0 - Normal operation.
* : -1 - Error
*/
int WebRtcAgc_get_config(void* agcInst, WebRtcAgc_config_t* config);
int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config);
/*
* This function creates an AGC instance, which will contain the state
* information for one (duplex) channel.
*
* Return value : AGC instance if successful
* : 0 (i.e., a NULL pointer) if unsuccessful
* This function creates and returns an AGC instance, which will contain the
* state information for one (duplex) channel.
*/
int WebRtcAgc_Create(void **agcInst);
void* WebRtcAgc_Create();
/*
* This function frees the AGC instance created at the beginning.
*
* Input:
* - agcInst : AGC instance.
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcAgc_Free(void *agcInst);
void WebRtcAgc_Free(void* agcInst);
/*
* This function initializes an AGC instance.
@ -247,27 +230,13 @@ int WebRtcAgc_Free(void *agcInst);
* -1 - Error
*/
int WebRtcAgc_Init(void *agcInst,
WebRtc_Word32 minLevel,
WebRtc_Word32 maxLevel,
WebRtc_Word16 agcMode,
WebRtc_UWord32 fs);
/*
* This function returns a text string containing the version.
*
* Input:
* - length : Length of the char array pointed to by version
* Output:
* - version : Pointer to a char array of to which the version
* : string will be copied.
*
* Return value : 0 - OK
* -1 - Error
*/
int WebRtcAgc_Version(WebRtc_Word8 *versionStr, WebRtc_Word16 length);
int32_t minLevel,
int32_t maxLevel,
int16_t agcMode,
uint32_t fs);
#if defined(__cplusplus)
}
#endif
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/utility.h"
#include <math.h>
static const double kLog10 = 2.30258509299;
static const double kLinear2DbScale = 20.0 / kLog10;
static const double kLinear2LoudnessScale = 13.4 / kLog10;
double Loudness2Db(double loudness) {
return loudness * kLinear2DbScale / kLinear2LoudnessScale;
}
double Linear2Loudness(double rms) {
if (rms == 0)
return -15;
return kLinear2LoudnessScale * log(rms);
}
double Db2Loudness(double db) {
return db * kLinear2LoudnessScale / kLinear2DbScale;
}
double Dbfs2Loudness(double dbfs) {
return Db2Loudness(90 + dbfs);
}

View File

@ -0,0 +1,23 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
// TODO(turajs): Add description of function.
double Loudness2Db(double loudness);
double Linear2Loudness(double rms);
double Db2Loudness(double db);
double Dbfs2Loudness(double dbfs);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_