Bump to WebRTC M120 release
Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone. We're continuing to carry iSAC even though it's gone upstream, but maybe we'll want to drop that soon.
This commit is contained in:
@ -17,98 +17,156 @@
|
||||
#include <numeric>
|
||||
|
||||
#include "api/array_view.h"
|
||||
#include "common_audio/include/audio_util.h"
|
||||
#include "modules/audio_processing/logging/apm_data_dumper.h"
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
namespace {
|
||||
|
||||
constexpr int kFramesPerSecond = 100;
|
||||
|
||||
float FrameEnergy(const AudioFrameView<const float>& audio) {
|
||||
float energy = 0.f;
|
||||
for (size_t k = 0; k < audio.num_channels(); ++k) {
|
||||
float energy = 0.0f;
|
||||
for (int k = 0; k < audio.num_channels(); ++k) {
|
||||
float channel_energy =
|
||||
std::accumulate(audio.channel(k).begin(), audio.channel(k).end(), 0.f,
|
||||
std::accumulate(audio.channel(k).begin(), audio.channel(k).end(), 0.0f,
|
||||
[](float a, float b) -> float { return a + b * b; });
|
||||
energy = std::max(channel_energy, energy);
|
||||
}
|
||||
return energy;
|
||||
}
|
||||
|
||||
float EnergyToDbfs(float signal_energy, size_t num_samples) {
|
||||
const float rms = std::sqrt(signal_energy / num_samples);
|
||||
return FloatS16ToDbfs(rms);
|
||||
float EnergyToDbfs(float signal_energy, int num_samples) {
|
||||
RTC_DCHECK_GE(signal_energy, 0.0f);
|
||||
const float rms_square = signal_energy / num_samples;
|
||||
constexpr float kMinDbfs = -90.30899869919436f;
|
||||
if (rms_square <= 1.0f) {
|
||||
return kMinDbfs;
|
||||
}
|
||||
return 10.0f * std::log10(rms_square) + kMinDbfs;
|
||||
}
|
||||
|
||||
// Updates the noise floor with instant decay and slow attack. This tuning is
|
||||
// specific for AGC2, so that (i) it can promptly increase the gain if the noise
|
||||
// floor drops (instant decay) and (ii) in case of music or fast speech, due to
|
||||
// which the noise floor can be overestimated, the gain reduction is slowed
|
||||
// down.
|
||||
float SmoothNoiseFloorEstimate(float current_estimate, float new_estimate) {
|
||||
constexpr float kAttack = 0.5f;
|
||||
if (current_estimate < new_estimate) {
|
||||
// Attack phase.
|
||||
return kAttack * new_estimate + (1.0f - kAttack) * current_estimate;
|
||||
}
|
||||
// Instant attack.
|
||||
return new_estimate;
|
||||
}
|
||||
|
||||
class NoiseFloorEstimator : public NoiseLevelEstimator {
|
||||
public:
|
||||
// Update the noise floor every 5 seconds.
|
||||
static constexpr int kUpdatePeriodNumFrames = 500;
|
||||
static_assert(kUpdatePeriodNumFrames >= 200,
|
||||
"A too small value may cause noise level overestimation.");
|
||||
static_assert(kUpdatePeriodNumFrames <= 1500,
|
||||
"A too large value may make AGC2 slow at reacting to increased "
|
||||
"noise levels.");
|
||||
|
||||
NoiseFloorEstimator(ApmDataDumper* data_dumper) : data_dumper_(data_dumper) {
|
||||
RTC_DCHECK(data_dumper_);
|
||||
// Initially assume that 48 kHz will be used. `Analyze()` will detect the
|
||||
// used sample rate and call `Initialize()` again if needed.
|
||||
Initialize(/*sample_rate_hz=*/48000);
|
||||
}
|
||||
NoiseFloorEstimator(const NoiseFloorEstimator&) = delete;
|
||||
NoiseFloorEstimator& operator=(const NoiseFloorEstimator&) = delete;
|
||||
~NoiseFloorEstimator() = default;
|
||||
|
||||
float Analyze(const AudioFrameView<const float>& frame) override {
|
||||
// Detect sample rate changes.
|
||||
const int sample_rate_hz =
|
||||
static_cast<int>(frame.samples_per_channel() * kFramesPerSecond);
|
||||
if (sample_rate_hz != sample_rate_hz_) {
|
||||
Initialize(sample_rate_hz);
|
||||
}
|
||||
|
||||
const float frame_energy = FrameEnergy(frame);
|
||||
if (frame_energy <= min_noise_energy_) {
|
||||
// Ignore frames when muted or below the minimum measurable energy.
|
||||
if (data_dumper_)
|
||||
data_dumper_->DumpRaw("agc2_noise_floor_estimator_preliminary_level",
|
||||
noise_energy_);
|
||||
return EnergyToDbfs(noise_energy_,
|
||||
static_cast<int>(frame.samples_per_channel()));
|
||||
}
|
||||
|
||||
if (preliminary_noise_energy_set_) {
|
||||
preliminary_noise_energy_ =
|
||||
std::min(preliminary_noise_energy_, frame_energy);
|
||||
} else {
|
||||
preliminary_noise_energy_ = frame_energy;
|
||||
preliminary_noise_energy_set_ = true;
|
||||
}
|
||||
if (data_dumper_)
|
||||
data_dumper_->DumpRaw("agc2_noise_floor_estimator_preliminary_level",
|
||||
preliminary_noise_energy_);
|
||||
|
||||
if (counter_ == 0) {
|
||||
// Full period observed.
|
||||
first_period_ = false;
|
||||
// Update the estimated noise floor energy with the preliminary
|
||||
// estimation.
|
||||
noise_energy_ = SmoothNoiseFloorEstimate(
|
||||
/*current_estimate=*/noise_energy_,
|
||||
/*new_estimate=*/preliminary_noise_energy_);
|
||||
// Reset for a new observation period.
|
||||
counter_ = kUpdatePeriodNumFrames;
|
||||
preliminary_noise_energy_set_ = false;
|
||||
} else if (first_period_) {
|
||||
// While analyzing the signal during the initial period, continuously
|
||||
// update the estimated noise energy, which is monotonic.
|
||||
noise_energy_ = preliminary_noise_energy_;
|
||||
counter_--;
|
||||
} else {
|
||||
// During the observation period it's only allowed to lower the energy.
|
||||
noise_energy_ = std::min(noise_energy_, preliminary_noise_energy_);
|
||||
counter_--;
|
||||
}
|
||||
|
||||
float noise_rms_dbfs = EnergyToDbfs(
|
||||
noise_energy_, static_cast<int>(frame.samples_per_channel()));
|
||||
if (data_dumper_)
|
||||
data_dumper_->DumpRaw("agc2_noise_rms_dbfs", noise_rms_dbfs);
|
||||
|
||||
return noise_rms_dbfs;
|
||||
}
|
||||
|
||||
private:
|
||||
void Initialize(int sample_rate_hz) {
|
||||
sample_rate_hz_ = sample_rate_hz;
|
||||
first_period_ = true;
|
||||
preliminary_noise_energy_set_ = false;
|
||||
// Initialize the minimum noise energy to -84 dBFS.
|
||||
min_noise_energy_ = sample_rate_hz * 2.0f * 2.0f / kFramesPerSecond;
|
||||
preliminary_noise_energy_ = min_noise_energy_;
|
||||
noise_energy_ = min_noise_energy_;
|
||||
counter_ = kUpdatePeriodNumFrames;
|
||||
}
|
||||
|
||||
ApmDataDumper* const data_dumper_;
|
||||
int sample_rate_hz_;
|
||||
float min_noise_energy_;
|
||||
bool first_period_;
|
||||
bool preliminary_noise_energy_set_;
|
||||
float preliminary_noise_energy_;
|
||||
float noise_energy_;
|
||||
int counter_;
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
NoiseLevelEstimator::NoiseLevelEstimator(ApmDataDumper* data_dumper)
|
||||
: signal_classifier_(data_dumper) {
|
||||
Initialize(48000);
|
||||
}
|
||||
|
||||
NoiseLevelEstimator::~NoiseLevelEstimator() {}
|
||||
|
||||
void NoiseLevelEstimator::Initialize(int sample_rate_hz) {
|
||||
sample_rate_hz_ = sample_rate_hz;
|
||||
noise_energy_ = 1.f;
|
||||
first_update_ = true;
|
||||
min_noise_energy_ = sample_rate_hz * 2.f * 2.f / kFramesPerSecond;
|
||||
noise_energy_hold_counter_ = 0;
|
||||
signal_classifier_.Initialize(sample_rate_hz);
|
||||
}
|
||||
|
||||
float NoiseLevelEstimator::Analyze(const AudioFrameView<const float>& frame) {
|
||||
const int rate =
|
||||
static_cast<int>(frame.samples_per_channel() * kFramesPerSecond);
|
||||
if (rate != sample_rate_hz_) {
|
||||
Initialize(rate);
|
||||
}
|
||||
const float frame_energy = FrameEnergy(frame);
|
||||
if (frame_energy <= 0.f) {
|
||||
RTC_DCHECK_GE(frame_energy, 0.f);
|
||||
return EnergyToDbfs(noise_energy_, frame.samples_per_channel());
|
||||
}
|
||||
|
||||
if (first_update_) {
|
||||
// Initialize the noise energy to the frame energy.
|
||||
first_update_ = false;
|
||||
return EnergyToDbfs(
|
||||
noise_energy_ = std::max(frame_energy, min_noise_energy_),
|
||||
frame.samples_per_channel());
|
||||
}
|
||||
|
||||
const SignalClassifier::SignalType signal_type =
|
||||
signal_classifier_.Analyze(frame.channel(0));
|
||||
|
||||
// Update the noise estimate in a minimum statistics-type manner.
|
||||
if (signal_type == SignalClassifier::SignalType::kStationary) {
|
||||
if (frame_energy > noise_energy_) {
|
||||
// Leak the estimate upwards towards the frame energy if no recent
|
||||
// downward update.
|
||||
noise_energy_hold_counter_ = std::max(noise_energy_hold_counter_ - 1, 0);
|
||||
|
||||
if (noise_energy_hold_counter_ == 0) {
|
||||
noise_energy_ = std::min(noise_energy_ * 1.01f, frame_energy);
|
||||
}
|
||||
} else {
|
||||
// Update smoothly downwards with a limited maximum update magnitude.
|
||||
noise_energy_ =
|
||||
std::max(noise_energy_ * 0.9f,
|
||||
noise_energy_ + 0.05f * (frame_energy - noise_energy_));
|
||||
noise_energy_hold_counter_ = 1000;
|
||||
}
|
||||
} else {
|
||||
// For a non-stationary signal, leak the estimate downwards in order to
|
||||
// avoid estimate locking due to incorrect signal classification.
|
||||
noise_energy_ = noise_energy_ * 0.99f;
|
||||
}
|
||||
|
||||
// Ensure a minimum of the estimate.
|
||||
return EnergyToDbfs(
|
||||
noise_energy_ = std::max(noise_energy_, min_noise_energy_),
|
||||
frame.samples_per_channel());
|
||||
std::unique_ptr<NoiseLevelEstimator> CreateNoiseFloorEstimator(
|
||||
ApmDataDumper* data_dumper) {
|
||||
return std::make_unique<NoiseFloorEstimator>(data_dumper);
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
Reference in New Issue
Block a user