Ongoing fixes and improvements, transient suppressor is gone. Also, dropping isac because it doesn't seem to be useful, and is just build system deadweight now. Upstream references: Version: 131.0.6778.200 WebRTC: 79aff54b0fa9238ce3518dd9eaf9610cd6f22e82 Chromium: 2a19506ad24af755f2a215a4c61f775393e0db42
385 lines
15 KiB
C++
385 lines
15 KiB
C++
/*
|
|
* Copyright (c) 2021 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/agc2/clipping_predictor.h"
|
|
|
|
#include <algorithm>
|
|
#include <memory>
|
|
|
|
#include "common_audio/include/audio_util.h"
|
|
#include "modules/audio_processing/agc2/clipping_predictor_level_buffer.h"
|
|
#include "modules/audio_processing/agc2/gain_map_internal.h"
|
|
#include "rtc_base/checks.h"
|
|
#include "rtc_base/logging.h"
|
|
#include "rtc_base/numerics/safe_minmax.h"
|
|
|
|
namespace webrtc {
|
|
namespace {
|
|
|
|
constexpr int kClippingPredictorMaxGainChange = 15;
|
|
|
|
// Returns an input volume in the [`min_input_volume`, `max_input_volume`] range
|
|
// that reduces `gain_error_db`, which is a gain error estimated when
|
|
// `input_volume` was applied, according to a fixed gain map.
|
|
int ComputeVolumeUpdate(int gain_error_db,
|
|
int input_volume,
|
|
int min_input_volume,
|
|
int max_input_volume) {
|
|
RTC_DCHECK_GE(input_volume, 0);
|
|
RTC_DCHECK_LE(input_volume, max_input_volume);
|
|
if (gain_error_db == 0) {
|
|
return input_volume;
|
|
}
|
|
int new_volume = input_volume;
|
|
if (gain_error_db > 0) {
|
|
while (kGainMap[new_volume] - kGainMap[input_volume] < gain_error_db &&
|
|
new_volume < max_input_volume) {
|
|
++new_volume;
|
|
}
|
|
} else {
|
|
while (kGainMap[new_volume] - kGainMap[input_volume] > gain_error_db &&
|
|
new_volume > min_input_volume) {
|
|
--new_volume;
|
|
}
|
|
}
|
|
return new_volume;
|
|
}
|
|
|
|
float ComputeCrestFactor(const ClippingPredictorLevelBuffer::Level& level) {
|
|
const float crest_factor =
|
|
FloatS16ToDbfs(level.max) - FloatS16ToDbfs(std::sqrt(level.average));
|
|
return crest_factor;
|
|
}
|
|
|
|
// Crest factor-based clipping prediction and clipped level step estimation.
|
|
class ClippingEventPredictor : public ClippingPredictor {
|
|
public:
|
|
// ClippingEventPredictor with `num_channels` channels (limited to values
|
|
// higher than zero); window size `window_length` and reference window size
|
|
// `reference_window_length` (both referring to the number of frames in the
|
|
// respective sliding windows and limited to values higher than zero);
|
|
// reference window delay `reference_window_delay` (delay in frames, limited
|
|
// to values zero and higher with an additional requirement of
|
|
// `window_length` < `reference_window_length` + reference_window_delay`);
|
|
// and an estimation peak threshold `clipping_threshold` and a crest factor
|
|
// drop threshold `crest_factor_margin` (both in dB).
|
|
ClippingEventPredictor(int num_channels,
|
|
int window_length,
|
|
int reference_window_length,
|
|
int reference_window_delay,
|
|
float clipping_threshold,
|
|
float crest_factor_margin)
|
|
: window_length_(window_length),
|
|
reference_window_length_(reference_window_length),
|
|
reference_window_delay_(reference_window_delay),
|
|
clipping_threshold_(clipping_threshold),
|
|
crest_factor_margin_(crest_factor_margin) {
|
|
RTC_DCHECK_GT(num_channels, 0);
|
|
RTC_DCHECK_GT(window_length, 0);
|
|
RTC_DCHECK_GT(reference_window_length, 0);
|
|
RTC_DCHECK_GE(reference_window_delay, 0);
|
|
RTC_DCHECK_GT(reference_window_length + reference_window_delay,
|
|
window_length);
|
|
const int buffer_length = GetMinFramesProcessed();
|
|
RTC_DCHECK_GT(buffer_length, 0);
|
|
for (int i = 0; i < num_channels; ++i) {
|
|
ch_buffers_.push_back(
|
|
std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
|
|
}
|
|
}
|
|
|
|
ClippingEventPredictor(const ClippingEventPredictor&) = delete;
|
|
ClippingEventPredictor& operator=(const ClippingEventPredictor&) = delete;
|
|
~ClippingEventPredictor() {}
|
|
|
|
void Reset() {
|
|
const int num_channels = ch_buffers_.size();
|
|
for (int i = 0; i < num_channels; ++i) {
|
|
ch_buffers_[i]->Reset();
|
|
}
|
|
}
|
|
|
|
// Analyzes a frame of audio and stores the framewise metrics in
|
|
// `ch_buffers_`.
|
|
void Analyze(const AudioFrameView<const float>& frame) {
|
|
const int num_channels = frame.num_channels();
|
|
RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
|
|
const int samples_per_channel = frame.samples_per_channel();
|
|
RTC_DCHECK_GT(samples_per_channel, 0);
|
|
for (int channel = 0; channel < num_channels; ++channel) {
|
|
float sum_squares = 0.0f;
|
|
float peak = 0.0f;
|
|
for (const auto& sample : frame.channel(channel)) {
|
|
sum_squares += sample * sample;
|
|
peak = std::max(std::fabs(sample), peak);
|
|
}
|
|
ch_buffers_[channel]->Push(
|
|
{sum_squares / static_cast<float>(samples_per_channel), peak});
|
|
}
|
|
}
|
|
|
|
// Estimates the analog gain adjustment for channel `channel` using a
|
|
// sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
|
|
// estimate for the clipped level step equal to `default_clipped_level_step_`
|
|
// if at least `GetMinFramesProcessed()` frames have been processed since the
|
|
// last reset and a clipping event is predicted. `level`, `min_mic_level`, and
|
|
// `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
|
|
std::optional<int> EstimateClippedLevelStep(int channel,
|
|
int level,
|
|
int default_step,
|
|
int min_mic_level,
|
|
int max_mic_level) const {
|
|
RTC_CHECK_GE(channel, 0);
|
|
RTC_CHECK_LT(channel, ch_buffers_.size());
|
|
RTC_DCHECK_GE(level, 0);
|
|
RTC_DCHECK_LE(level, 255);
|
|
RTC_DCHECK_GT(default_step, 0);
|
|
RTC_DCHECK_LE(default_step, 255);
|
|
RTC_DCHECK_GE(min_mic_level, 0);
|
|
RTC_DCHECK_LE(min_mic_level, 255);
|
|
RTC_DCHECK_GE(max_mic_level, 0);
|
|
RTC_DCHECK_LE(max_mic_level, 255);
|
|
if (level <= min_mic_level) {
|
|
return std::nullopt;
|
|
}
|
|
if (PredictClippingEvent(channel)) {
|
|
const int new_level =
|
|
rtc::SafeClamp(level - default_step, min_mic_level, max_mic_level);
|
|
const int step = level - new_level;
|
|
if (step > 0) {
|
|
return step;
|
|
}
|
|
}
|
|
return std::nullopt;
|
|
}
|
|
|
|
private:
|
|
int GetMinFramesProcessed() const {
|
|
return reference_window_delay_ + reference_window_length_;
|
|
}
|
|
|
|
// Predicts clipping events based on the processed audio frames. Returns
|
|
// true if a clipping event is likely.
|
|
bool PredictClippingEvent(int channel) const {
|
|
const auto metrics =
|
|
ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
|
|
if (!metrics.has_value() ||
|
|
!(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
|
|
return false;
|
|
}
|
|
const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
|
|
reference_window_delay_, reference_window_length_);
|
|
if (!reference_metrics.has_value()) {
|
|
return false;
|
|
}
|
|
const float crest_factor = ComputeCrestFactor(metrics.value());
|
|
const float reference_crest_factor =
|
|
ComputeCrestFactor(reference_metrics.value());
|
|
if (crest_factor < reference_crest_factor - crest_factor_margin_) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
|
|
const int window_length_;
|
|
const int reference_window_length_;
|
|
const int reference_window_delay_;
|
|
const float clipping_threshold_;
|
|
const float crest_factor_margin_;
|
|
};
|
|
|
|
// Performs crest factor-based clipping peak prediction.
|
|
class ClippingPeakPredictor : public ClippingPredictor {
|
|
public:
|
|
// Ctor. ClippingPeakPredictor with `num_channels` channels (limited to values
|
|
// higher than zero); window size `window_length` and reference window size
|
|
// `reference_window_length` (both referring to the number of frames in the
|
|
// respective sliding windows and limited to values higher than zero);
|
|
// reference window delay `reference_window_delay` (delay in frames, limited
|
|
// to values zero and higher with an additional requirement of
|
|
// `window_length` < `reference_window_length` + reference_window_delay`);
|
|
// and a clipping prediction threshold `clipping_threshold` (in dB). Adaptive
|
|
// clipped level step estimation is used if `adaptive_step_estimation` is
|
|
// true.
|
|
explicit ClippingPeakPredictor(int num_channels,
|
|
int window_length,
|
|
int reference_window_length,
|
|
int reference_window_delay,
|
|
int clipping_threshold,
|
|
bool adaptive_step_estimation)
|
|
: window_length_(window_length),
|
|
reference_window_length_(reference_window_length),
|
|
reference_window_delay_(reference_window_delay),
|
|
clipping_threshold_(clipping_threshold),
|
|
adaptive_step_estimation_(adaptive_step_estimation) {
|
|
RTC_DCHECK_GT(num_channels, 0);
|
|
RTC_DCHECK_GT(window_length, 0);
|
|
RTC_DCHECK_GT(reference_window_length, 0);
|
|
RTC_DCHECK_GE(reference_window_delay, 0);
|
|
RTC_DCHECK_GT(reference_window_length + reference_window_delay,
|
|
window_length);
|
|
const int buffer_length = GetMinFramesProcessed();
|
|
RTC_DCHECK_GT(buffer_length, 0);
|
|
for (int i = 0; i < num_channels; ++i) {
|
|
ch_buffers_.push_back(
|
|
std::make_unique<ClippingPredictorLevelBuffer>(buffer_length));
|
|
}
|
|
}
|
|
|
|
ClippingPeakPredictor(const ClippingPeakPredictor&) = delete;
|
|
ClippingPeakPredictor& operator=(const ClippingPeakPredictor&) = delete;
|
|
~ClippingPeakPredictor() {}
|
|
|
|
void Reset() {
|
|
const int num_channels = ch_buffers_.size();
|
|
for (int i = 0; i < num_channels; ++i) {
|
|
ch_buffers_[i]->Reset();
|
|
}
|
|
}
|
|
|
|
// Analyzes a frame of audio and stores the framewise metrics in
|
|
// `ch_buffers_`.
|
|
void Analyze(const AudioFrameView<const float>& frame) {
|
|
const int num_channels = frame.num_channels();
|
|
RTC_DCHECK_EQ(num_channels, ch_buffers_.size());
|
|
const int samples_per_channel = frame.samples_per_channel();
|
|
RTC_DCHECK_GT(samples_per_channel, 0);
|
|
for (int channel = 0; channel < num_channels; ++channel) {
|
|
float sum_squares = 0.0f;
|
|
float peak = 0.0f;
|
|
for (const auto& sample : frame.channel(channel)) {
|
|
sum_squares += sample * sample;
|
|
peak = std::max(std::fabs(sample), peak);
|
|
}
|
|
ch_buffers_[channel]->Push(
|
|
{sum_squares / static_cast<float>(samples_per_channel), peak});
|
|
}
|
|
}
|
|
|
|
// Estimates the analog gain adjustment for channel `channel` using a
|
|
// sliding window over the frame-wise metrics in `ch_buffers_`. Returns an
|
|
// estimate for the clipped level step (equal to
|
|
// `default_clipped_level_step_` if `adaptive_estimation_` is false) if at
|
|
// least `GetMinFramesProcessed()` frames have been processed since the last
|
|
// reset and a clipping event is predicted. `level`, `min_mic_level`, and
|
|
// `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
|
|
std::optional<int> EstimateClippedLevelStep(int channel,
|
|
int level,
|
|
int default_step,
|
|
int min_mic_level,
|
|
int max_mic_level) const {
|
|
RTC_DCHECK_GE(channel, 0);
|
|
RTC_DCHECK_LT(channel, ch_buffers_.size());
|
|
RTC_DCHECK_GE(level, 0);
|
|
RTC_DCHECK_LE(level, 255);
|
|
RTC_DCHECK_GT(default_step, 0);
|
|
RTC_DCHECK_LE(default_step, 255);
|
|
RTC_DCHECK_GE(min_mic_level, 0);
|
|
RTC_DCHECK_LE(min_mic_level, 255);
|
|
RTC_DCHECK_GE(max_mic_level, 0);
|
|
RTC_DCHECK_LE(max_mic_level, 255);
|
|
if (level <= min_mic_level) {
|
|
return std::nullopt;
|
|
}
|
|
std::optional<float> estimate_db = EstimatePeakValue(channel);
|
|
if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
|
|
int step = 0;
|
|
if (!adaptive_step_estimation_) {
|
|
step = default_step;
|
|
} else {
|
|
const int estimated_gain_change =
|
|
rtc::SafeClamp(-static_cast<int>(std::ceil(estimate_db.value())),
|
|
-kClippingPredictorMaxGainChange, 0);
|
|
step =
|
|
std::max(level - ComputeVolumeUpdate(estimated_gain_change, level,
|
|
min_mic_level, max_mic_level),
|
|
default_step);
|
|
}
|
|
const int new_level =
|
|
rtc::SafeClamp(level - step, min_mic_level, max_mic_level);
|
|
if (level > new_level) {
|
|
return level - new_level;
|
|
}
|
|
}
|
|
return std::nullopt;
|
|
}
|
|
|
|
private:
|
|
int GetMinFramesProcessed() {
|
|
return reference_window_delay_ + reference_window_length_;
|
|
}
|
|
|
|
// Predicts clipping sample peaks based on the processed audio frames.
|
|
// Returns the estimated peak value if clipping is predicted. Otherwise
|
|
// returns std::nullopt.
|
|
std::optional<float> EstimatePeakValue(int channel) const {
|
|
const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
|
|
reference_window_delay_, reference_window_length_);
|
|
if (!reference_metrics.has_value()) {
|
|
return std::nullopt;
|
|
}
|
|
const auto metrics =
|
|
ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
|
|
if (!metrics.has_value() ||
|
|
!(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
|
|
return std::nullopt;
|
|
}
|
|
const float reference_crest_factor =
|
|
ComputeCrestFactor(reference_metrics.value());
|
|
const float& mean_squares = metrics.value().average;
|
|
const float projected_peak =
|
|
reference_crest_factor + FloatS16ToDbfs(std::sqrt(mean_squares));
|
|
return projected_peak;
|
|
}
|
|
|
|
std::vector<std::unique_ptr<ClippingPredictorLevelBuffer>> ch_buffers_;
|
|
const int window_length_;
|
|
const int reference_window_length_;
|
|
const int reference_window_delay_;
|
|
const int clipping_threshold_;
|
|
const bool adaptive_step_estimation_;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
std::unique_ptr<ClippingPredictor> CreateClippingPredictor(
|
|
int num_channels,
|
|
const AudioProcessing::Config::GainController1::AnalogGainController::
|
|
ClippingPredictor& config) {
|
|
if (!config.enabled) {
|
|
RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction disabled.";
|
|
return nullptr;
|
|
}
|
|
RTC_LOG(LS_INFO) << "[AGC2] Clipping prediction enabled.";
|
|
using ClippingPredictorMode = AudioProcessing::Config::GainController1::
|
|
AnalogGainController::ClippingPredictor::Mode;
|
|
switch (config.mode) {
|
|
case ClippingPredictorMode::kClippingEventPrediction:
|
|
return std::make_unique<ClippingEventPredictor>(
|
|
num_channels, config.window_length, config.reference_window_length,
|
|
config.reference_window_delay, config.clipping_threshold,
|
|
config.crest_factor_margin);
|
|
case ClippingPredictorMode::kAdaptiveStepClippingPeakPrediction:
|
|
return std::make_unique<ClippingPeakPredictor>(
|
|
num_channels, config.window_length, config.reference_window_length,
|
|
config.reference_window_delay, config.clipping_threshold,
|
|
/*adaptive_step_estimation=*/true);
|
|
case ClippingPredictorMode::kFixedStepClippingPeakPrediction:
|
|
return std::make_unique<ClippingPeakPredictor>(
|
|
num_channels, config.window_length, config.reference_window_length,
|
|
config.reference_window_delay, config.clipping_threshold,
|
|
/*adaptive_step_estimation=*/false);
|
|
}
|
|
RTC_DCHECK_NOTREACHED();
|
|
}
|
|
|
|
} // namespace webrtc
|