Bump to WebRTC M131 release

Ongoing fixes and improvements, transient suppressor is gone. Also,
dropping isac because it doesn't seem to be useful, and is just build
system deadweight now.

Upstream references:

  Version: 131.0.6778.200
  WebRTC: 79aff54b0fa9238ce3518dd9eaf9610cd6f22e82
  Chromium: 2a19506ad24af755f2a215a4c61f775393e0db42
This commit is contained in:
Arun Raghavan
2024-12-24 19:32:07 -05:00
parent 8bdb53d91c
commit b5c48b97f6
263 changed files with 4628 additions and 20416 deletions

View File

@ -23,9 +23,9 @@ rtc_library("speech_level_estimator") {
deps = [
":common",
"..:api",
"..:apm_logging",
"../../../api:array_view",
"../../../api/audio:audio_processing",
"../../../rtc_base:checks",
"../../../rtc_base:logging",
"../../../rtc_base:safe_minmax",
@ -48,9 +48,9 @@ rtc_library("adaptive_digital_gain_controller") {
deps = [
":common",
":gain_applier",
"..:api",
"..:apm_logging",
"..:audio_frame_view",
"../../../api/audio:audio_frame_api",
"../../../api/audio:audio_processing",
"../../../common_audio",
"../../../rtc_base:checks",
"../../../rtc_base:logging",
@ -81,8 +81,6 @@ rtc_library("saturation_protector") {
"../../../rtc_base:safe_compare",
"../../../rtc_base:safe_minmax",
]
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
}
rtc_library("biquad_filter") {
@ -112,15 +110,13 @@ rtc_library("clipping_predictor") {
deps = [
":gain_map",
"..:api",
"..:audio_frame_view",
"../../../api/audio:audio_processing",
"../../../common_audio",
"../../../rtc_base:checks",
"../../../rtc_base:logging",
"../../../rtc_base:safe_minmax",
]
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
}
rtc_source_set("common") {
@ -150,6 +146,7 @@ rtc_library("fixed_digital") {
"..:apm_logging",
"..:audio_frame_view",
"../../../api:array_view",
"../../../api/audio:audio_frame_api",
"../../../common_audio",
"../../../rtc_base:checks",
"../../../rtc_base:gtest_prod",
@ -157,8 +154,8 @@ rtc_library("fixed_digital") {
"../../../rtc_base:safe_minmax",
"../../../rtc_base:stringutils",
"../../../system_wrappers:metrics",
"//third_party/abseil-cpp/absl/strings:string_view",
]
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
}
rtc_library("gain_applier") {
@ -175,7 +172,7 @@ rtc_library("gain_applier") {
deps = [
":common",
"..:audio_frame_view",
"../../../api:array_view",
"../../../api/audio:audio_frame_api",
"../../../rtc_base:safe_minmax",
]
}
@ -209,10 +206,10 @@ rtc_library("input_volume_controller") {
":clipping_predictor",
":gain_map",
":input_volume_stats_reporter",
"..:api",
"..:audio_buffer",
"..:audio_frame_view",
"../../../api:array_view",
"../../../api/audio:audio_processing",
"../../../rtc_base:checks",
"../../../rtc_base:checks",
"../../../rtc_base:gtest_prod",
@ -222,8 +219,6 @@ rtc_library("input_volume_controller") {
"../../../system_wrappers:field_trial",
"../../../system_wrappers:metrics",
]
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
}
rtc_library("noise_level_estimator") {
@ -234,8 +229,7 @@ rtc_library("noise_level_estimator") {
deps = [
":biquad_filter",
"..:apm_logging",
"..:audio_frame_view",
"../../../api:array_view",
"../../../api/audio:audio_frame_api",
"../../../rtc_base:checks",
"../../../system_wrappers",
]
@ -268,8 +262,7 @@ rtc_library("vad_wrapper") {
deps = [
":common",
":cpu_features",
"..:audio_frame_view",
"../../../api:array_view",
"../../../api/audio:audio_frame_api",
"../../../common_audio",
"../../../rtc_base:checks",
"rnn_vad",
@ -303,8 +296,8 @@ rtc_library("speech_level_estimator_unittest") {
deps = [
":common",
":speech_level_estimator",
"..:api",
"..:apm_logging",
"../../../api/audio:audio_processing",
"../../../rtc_base:gunit_helpers",
"../../../test:test_support",
]
@ -320,9 +313,9 @@ rtc_library("adaptive_digital_gain_controller_unittest") {
":adaptive_digital_gain_controller",
":common",
":test_utils",
"..:api",
"..:apm_logging",
"..:audio_frame_view",
"../../../api/audio:audio_processing",
"../../../common_audio",
"../../../rtc_base:gunit_helpers",
"../../../test:test_support",
@ -337,7 +330,7 @@ rtc_library("gain_applier_unittest") {
deps = [
":gain_applier",
":test_utils",
"..:audio_frame_view",
"../../../api/audio:audio_frame_api",
"../../../rtc_base:gunit_helpers",
"../../../test:test_support",
]
@ -391,6 +384,7 @@ rtc_library("fixed_digital_unittests") {
"..:apm_logging",
"..:audio_frame_view",
"../../../api:array_view",
"../../../api/audio:audio_frame_api",
"../../../common_audio",
"../../../rtc_base:checks",
"../../../rtc_base:gunit_helpers",
@ -413,7 +407,6 @@ rtc_library("input_volume_controller_unittests") {
":clipping_predictor",
":gain_map",
":input_volume_controller",
"..:api",
"../../../api:array_view",
"../../../rtc_base:checks",
"../../../rtc_base:random",
@ -426,8 +419,6 @@ rtc_library("input_volume_controller_unittests") {
"../../../test:test_support",
"//testing/gtest",
]
absl_deps = [ "//third_party/abseil-cpp/absl/types:optional" ]
}
rtc_library("noise_estimator_unittests") {
@ -439,9 +430,8 @@ rtc_library("noise_estimator_unittests") {
":noise_level_estimator",
":test_utils",
"..:apm_logging",
"..:audio_frame_view",
"../../../api:array_view",
"../../../api:function_view",
"../../../api/audio:audio_frame_api",
"../../../rtc_base:checks",
"../../../rtc_base:gunit_helpers",
]
@ -453,7 +443,7 @@ rtc_library("vad_wrapper_unittests") {
deps = [
":common",
":vad_wrapper",
"..:audio_frame_view",
"../../../api/audio:audio_frame_api",
"../../../rtc_base:checks",
"../../../rtc_base:gunit_helpers",
"../../../rtc_base:safe_compare",
@ -475,6 +465,7 @@ rtc_library("test_utils") {
]
deps = [
"..:audio_frame_view",
"../../../api/audio:audio_frame_api",
"../../../rtc_base:checks",
"../../../rtc_base:random",
]
@ -491,10 +482,7 @@ rtc_library("input_volume_stats_reporter") {
"../../../rtc_base:safe_minmax",
"../../../rtc_base:stringutils",
"../../../system_wrappers:metrics",
]
absl_deps = [
"//third_party/abseil-cpp/absl/strings",
"//third_party/abseil-cpp/absl/types:optional",
"//third_party/abseil-cpp/absl/strings:string_view",
]
}
@ -506,6 +494,6 @@ rtc_library("input_volume_stats_reporter_unittests") {
"../../../rtc_base:stringutils",
"../../../system_wrappers:metrics",
"../../../test:test_support",
"//third_party/abseil-cpp/absl/strings:string_view",
]
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
}

View File

@ -124,7 +124,7 @@ AdaptiveDigitalGainController::AdaptiveDigitalGainController(
}
void AdaptiveDigitalGainController::Process(const FrameInfo& info,
AudioFrameView<float> frame) {
DeinterleavedView<float> frame) {
RTC_DCHECK_GE(info.speech_level_dbfs, -150.0f);
RTC_DCHECK_GE(frame.num_channels(), 1);
RTC_DCHECK(

View File

@ -13,9 +13,9 @@
#include <vector>
#include "api/audio/audio_processing.h"
#include "api/audio/audio_view.h"
#include "modules/audio_processing/agc2/gain_applier.h"
#include "modules/audio_processing/include/audio_frame_view.h"
#include "modules/audio_processing/include/audio_processing.h"
namespace webrtc {
@ -46,7 +46,7 @@ class AdaptiveDigitalGainController {
// Analyzes `info`, updates the digital gain and applies it to a 10 ms
// `frame`. Supports any sample rate supported by APM.
void Process(const FrameInfo& info, AudioFrameView<float> frame);
void Process(const FrameInfo& info, DeinterleavedView<float> frame);
private:
ApmDataDumper* const apm_data_dumper_;

View File

@ -131,11 +131,11 @@ class ClippingEventPredictor : public ClippingPredictor {
// if at least `GetMinFramesProcessed()` frames have been processed since the
// last reset and a clipping event is predicted. `level`, `min_mic_level`, and
// `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
absl::optional<int> EstimateClippedLevelStep(int channel,
int level,
int default_step,
int min_mic_level,
int max_mic_level) const {
std::optional<int> EstimateClippedLevelStep(int channel,
int level,
int default_step,
int min_mic_level,
int max_mic_level) const {
RTC_CHECK_GE(channel, 0);
RTC_CHECK_LT(channel, ch_buffers_.size());
RTC_DCHECK_GE(level, 0);
@ -147,7 +147,7 @@ class ClippingEventPredictor : public ClippingPredictor {
RTC_DCHECK_GE(max_mic_level, 0);
RTC_DCHECK_LE(max_mic_level, 255);
if (level <= min_mic_level) {
return absl::nullopt;
return std::nullopt;
}
if (PredictClippingEvent(channel)) {
const int new_level =
@ -157,7 +157,7 @@ class ClippingEventPredictor : public ClippingPredictor {
return step;
}
}
return absl::nullopt;
return std::nullopt;
}
private:
@ -271,11 +271,11 @@ class ClippingPeakPredictor : public ClippingPredictor {
// least `GetMinFramesProcessed()` frames have been processed since the last
// reset and a clipping event is predicted. `level`, `min_mic_level`, and
// `max_mic_level` are limited to [0, 255] and `default_step` to [1, 255].
absl::optional<int> EstimateClippedLevelStep(int channel,
int level,
int default_step,
int min_mic_level,
int max_mic_level) const {
std::optional<int> EstimateClippedLevelStep(int channel,
int level,
int default_step,
int min_mic_level,
int max_mic_level) const {
RTC_DCHECK_GE(channel, 0);
RTC_DCHECK_LT(channel, ch_buffers_.size());
RTC_DCHECK_GE(level, 0);
@ -287,9 +287,9 @@ class ClippingPeakPredictor : public ClippingPredictor {
RTC_DCHECK_GE(max_mic_level, 0);
RTC_DCHECK_LE(max_mic_level, 255);
if (level <= min_mic_level) {
return absl::nullopt;
return std::nullopt;
}
absl::optional<float> estimate_db = EstimatePeakValue(channel);
std::optional<float> estimate_db = EstimatePeakValue(channel);
if (estimate_db.has_value() && estimate_db.value() > clipping_threshold_) {
int step = 0;
if (!adaptive_step_estimation_) {
@ -309,7 +309,7 @@ class ClippingPeakPredictor : public ClippingPredictor {
return level - new_level;
}
}
return absl::nullopt;
return std::nullopt;
}
private:
@ -319,18 +319,18 @@ class ClippingPeakPredictor : public ClippingPredictor {
// Predicts clipping sample peaks based on the processed audio frames.
// Returns the estimated peak value if clipping is predicted. Otherwise
// returns absl::nullopt.
absl::optional<float> EstimatePeakValue(int channel) const {
// returns std::nullopt.
std::optional<float> EstimatePeakValue(int channel) const {
const auto reference_metrics = ch_buffers_[channel]->ComputePartialMetrics(
reference_window_delay_, reference_window_length_);
if (!reference_metrics.has_value()) {
return absl::nullopt;
return std::nullopt;
}
const auto metrics =
ch_buffers_[channel]->ComputePartialMetrics(0, window_length_);
if (!metrics.has_value() ||
!(FloatS16ToDbfs(metrics.value().max) > clipping_threshold_)) {
return absl::nullopt;
return std::nullopt;
}
const float reference_crest_factor =
ComputeCrestFactor(reference_metrics.value());

View File

@ -12,11 +12,11 @@
#define MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_H_
#include <memory>
#include <optional>
#include <vector>
#include "absl/types/optional.h"
#include "api/audio/audio_processing.h"
#include "modules/audio_processing/include/audio_frame_view.h"
#include "modules/audio_processing/include/audio_processing.h"
namespace webrtc {
@ -35,12 +35,12 @@ class ClippingPredictor {
// Predicts if clipping is going to occur for the specified `channel` in the
// near-future and, if so, it returns a recommended analog mic level decrease
// step. Returns absl::nullopt if clipping is not predicted.
// step. Returns std::nullopt if clipping is not predicted.
// `level` is the current analog mic level, `default_step` is the amount the
// mic level is lowered by the analog controller with every clipping event and
// `min_mic_level` and `max_mic_level` is the range of allowed analog mic
// levels.
virtual absl::optional<int> EstimateClippedLevelStep(
virtual std::optional<int> EstimateClippedLevelStep(
int channel,
int level,
int default_step,

View File

@ -50,7 +50,7 @@ void ClippingPredictorLevelBuffer::Push(Level level) {
}
// TODO(bugs.webrtc.org/12774): Optimize partial computation for long buffers.
absl::optional<ClippingPredictorLevelBuffer::Level>
std::optional<ClippingPredictorLevelBuffer::Level>
ClippingPredictorLevelBuffer::ComputePartialMetrics(int delay,
int num_items) const {
RTC_DCHECK_GE(delay, 0);
@ -59,7 +59,7 @@ ClippingPredictorLevelBuffer::ComputePartialMetrics(int delay,
RTC_DCHECK_LE(num_items, Capacity());
RTC_DCHECK_LE(delay + num_items, Capacity());
if (delay + num_items > Size()) {
return absl::nullopt;
return std::nullopt;
}
float sum = 0.0f;
float max = 0.0f;
@ -71,7 +71,7 @@ ClippingPredictorLevelBuffer::ComputePartialMetrics(int delay,
sum += data_[idx].average;
max = std::fmax(data_[idx].max, max);
}
return absl::optional<Level>({sum / static_cast<float>(num_items), max});
return std::optional<Level>({sum / static_cast<float>(num_items), max});
}
} // namespace webrtc

View File

@ -12,10 +12,9 @@
#define MODULES_AUDIO_PROCESSING_AGC2_CLIPPING_PREDICTOR_LEVEL_BUFFER_H_
#include <memory>
#include <optional>
#include <vector>
#include "absl/types/optional.h"
namespace webrtc {
// A circular buffer to store frame-wise `Level` items for clipping prediction.
@ -58,7 +57,7 @@ class ClippingPredictorLevelBuffer {
// from `delay` to `delay` - `num_items` (a delay equal to zero corresponds
// to the most recently pushed item). The value of `delay` is limited to
// [0, N] and `num_items` to [1, M] where N + M is the capacity of the buffer.
absl::optional<Level> ComputePartialMetrics(int delay, int num_items) const;
std::optional<Level> ComputePartialMetrics(int delay, int num_items) const;
private:
int tail_;

View File

@ -14,6 +14,7 @@
#include <cmath>
#include "api/array_view.h"
#include "api/audio/audio_frame.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
@ -34,14 +35,17 @@ constexpr float kDecayFilterConstant = 0.9971259f;
} // namespace
FixedDigitalLevelEstimator::FixedDigitalLevelEstimator(
int sample_rate_hz,
size_t samples_per_channel,
ApmDataDumper* apm_data_dumper)
: apm_data_dumper_(apm_data_dumper),
filter_state_level_(kInitialFilterStateLevel) {
SetSampleRate(sample_rate_hz);
SetSamplesPerChannel(samples_per_channel);
CheckParameterCombination();
RTC_DCHECK(apm_data_dumper_);
apm_data_dumper_->DumpRaw("agc2_level_estimator_samplerate", sample_rate_hz);
// Convert `samples_per_channel` to sample rate for
// `agc2_level_estimator_samplerate`.
apm_data_dumper_->DumpRaw("agc2_level_estimator_samplerate",
samples_per_channel * kDefaultAudioBuffersPerSec);
}
void FixedDigitalLevelEstimator::CheckParameterCombination() {
@ -52,15 +56,15 @@ void FixedDigitalLevelEstimator::CheckParameterCombination() {
}
std::array<float, kSubFramesInFrame> FixedDigitalLevelEstimator::ComputeLevel(
const AudioFrameView<const float>& float_frame) {
DeinterleavedView<const float> float_frame) {
RTC_DCHECK_GT(float_frame.num_channels(), 0);
RTC_DCHECK_EQ(float_frame.samples_per_channel(), samples_in_frame_);
// Compute max envelope without smoothing.
std::array<float, kSubFramesInFrame> envelope{};
for (int channel_idx = 0; channel_idx < float_frame.num_channels();
for (size_t channel_idx = 0; channel_idx < float_frame.num_channels();
++channel_idx) {
const auto channel = float_frame.channel(channel_idx);
const auto channel = float_frame[channel_idx];
for (int sub_frame = 0; sub_frame < kSubFramesInFrame; ++sub_frame) {
for (int sample_in_sub_frame = 0;
sample_in_sub_frame < samples_in_sub_frame_; ++sample_in_sub_frame) {
@ -95,7 +99,7 @@ std::array<float, kSubFramesInFrame> FixedDigitalLevelEstimator::ComputeLevel(
// Dump data for debug.
RTC_DCHECK(apm_data_dumper_);
const auto channel = float_frame.channel(0);
const auto channel = float_frame[0];
apm_data_dumper_->DumpRaw("agc2_level_estimator_samples",
samples_in_sub_frame_,
&channel[sub_frame * samples_in_sub_frame_]);
@ -106,9 +110,9 @@ std::array<float, kSubFramesInFrame> FixedDigitalLevelEstimator::ComputeLevel(
return envelope;
}
void FixedDigitalLevelEstimator::SetSampleRate(int sample_rate_hz) {
samples_in_frame_ =
rtc::CheckedDivExact(sample_rate_hz * kFrameDurationMs, 1000);
void FixedDigitalLevelEstimator::SetSamplesPerChannel(
size_t samples_per_channel) {
samples_in_frame_ = static_cast<int>(samples_per_channel);
samples_in_sub_frame_ =
rtc::CheckedDivExact(samples_in_frame_, kSubFramesInFrame);
CheckParameterCombination();

View File

@ -25,12 +25,16 @@ class ApmDataDumper;
// filtering.
class FixedDigitalLevelEstimator {
public:
// Sample rates are allowed if the number of samples in a frame
// (sample_rate_hz * kFrameDurationMs / 1000) is divisible by
// `samples_per_channel` is expected to be derived from this formula:
// sample_rate_hz * kFrameDurationMs / 1000
// or, for a 10ms duration:
// sample_rate_hz / 100
// I.e. the number of samples for 10ms of the given sample rate. The
// expectation is that samples per channel is divisible by
// kSubFramesInSample. For kFrameDurationMs=10 and
// kSubFramesInSample=20, this means that sample_rate_hz has to be
// divisible by 2000.
FixedDigitalLevelEstimator(int sample_rate_hz,
// kSubFramesInSample=20, this means that the original sample rate has to be
// divisible by 2000 and therefore `samples_per_channel` by 20.
FixedDigitalLevelEstimator(size_t samples_per_channel,
ApmDataDumper* apm_data_dumper);
FixedDigitalLevelEstimator(const FixedDigitalLevelEstimator&) = delete;
@ -42,11 +46,11 @@ class FixedDigitalLevelEstimator {
// ms of audio produces a level estimates in the same scale. The
// level estimate contains kSubFramesInFrame values.
std::array<float, kSubFramesInFrame> ComputeLevel(
const AudioFrameView<const float>& float_frame);
DeinterleavedView<const float> float_frame);
// Rate may be changed at any time (but not concurrently) from the
// value passed to the constructor. The class is not thread safe.
void SetSampleRate(int sample_rate_hz);
void SetSamplesPerChannel(size_t samples_per_channel);
// Resets the level estimator internal state.
void Reset();

View File

@ -10,7 +10,7 @@
#include "modules/audio_processing/agc2/gain_applier.h"
#include "api/array_view.h"
#include "api/audio/audio_view.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "rtc_base/numerics/safe_minmax.h"
@ -24,9 +24,9 @@ bool GainCloseToOne(float gain_factor) {
gain_factor <= 1.f + 1.f / kMaxFloatS16Value;
}
void ClipSignal(AudioFrameView<float> signal) {
for (int k = 0; k < signal.num_channels(); ++k) {
rtc::ArrayView<float> channel_view = signal.channel(k);
void ClipSignal(DeinterleavedView<float> signal) {
for (size_t k = 0; k < signal.num_channels(); ++k) {
MonoView<float> channel_view = signal[k];
for (auto& sample : channel_view) {
sample = rtc::SafeClamp(sample, kMinFloatS16Value, kMaxFloatS16Value);
}
@ -36,7 +36,7 @@ void ClipSignal(AudioFrameView<float> signal) {
void ApplyGainWithRamping(float last_gain_linear,
float gain_at_end_of_frame_linear,
float inverse_samples_per_channel,
AudioFrameView<float> float_frame) {
DeinterleavedView<float> float_frame) {
// Do not modify the signal.
if (last_gain_linear == gain_at_end_of_frame_linear &&
GainCloseToOne(gain_at_end_of_frame_linear)) {
@ -45,8 +45,8 @@ void ApplyGainWithRamping(float last_gain_linear,
// Gain is constant and different from 1.
if (last_gain_linear == gain_at_end_of_frame_linear) {
for (int k = 0; k < float_frame.num_channels(); ++k) {
rtc::ArrayView<float> channel_view = float_frame.channel(k);
for (size_t k = 0; k < float_frame.num_channels(); ++k) {
MonoView<float> channel_view = float_frame[k];
for (auto& sample : channel_view) {
sample *= gain_at_end_of_frame_linear;
}
@ -57,12 +57,12 @@ void ApplyGainWithRamping(float last_gain_linear,
// The gain changes. We have to change slowly to avoid discontinuities.
const float increment = (gain_at_end_of_frame_linear - last_gain_linear) *
inverse_samples_per_channel;
float gain = last_gain_linear;
for (int i = 0; i < float_frame.samples_per_channel(); ++i) {
for (int ch = 0; ch < float_frame.num_channels(); ++ch) {
float_frame.channel(ch)[i] *= gain;
for (size_t ch = 0; ch < float_frame.num_channels(); ++ch) {
float gain = last_gain_linear;
for (float& sample : float_frame[ch]) {
sample *= gain;
gain += increment;
}
gain += increment;
}
}
@ -73,7 +73,7 @@ GainApplier::GainApplier(bool hard_clip_samples, float initial_gain_factor)
last_gain_factor_(initial_gain_factor),
current_gain_factor_(initial_gain_factor) {}
void GainApplier::ApplyGain(AudioFrameView<float> signal) {
void GainApplier::ApplyGain(DeinterleavedView<float> signal) {
if (static_cast<int>(signal.samples_per_channel()) != samples_per_channel_) {
Initialize(signal.samples_per_channel());
}

View File

@ -13,6 +13,7 @@
#include <stddef.h>
#include "api/audio/audio_view.h"
#include "modules/audio_processing/include/audio_frame_view.h"
namespace webrtc {
@ -20,10 +21,15 @@ class GainApplier {
public:
GainApplier(bool hard_clip_samples, float initial_gain_factor);
void ApplyGain(AudioFrameView<float> signal);
void ApplyGain(DeinterleavedView<float> signal);
void SetGainFactor(float gain_factor);
float GetGainFactor() const { return current_gain_factor_; }
[[deprecated("Use DeinterleavedView<> version")]] void ApplyGain(
AudioFrameView<float> signal) {
ApplyGain(signal.view());
}
private:
void Initialize(int samples_per_channel);

View File

@ -173,7 +173,7 @@ void MonoInputVolumeController::Initialize() {
// previous update and the ratio of non-silence frames (i.e., frames with a
// `speech_probability` higher than `speech_probability_threshold_`) is at least
// `speech_ratio_threshold_`.
void MonoInputVolumeController::Process(absl::optional<int> rms_error_db,
void MonoInputVolumeController::Process(std::optional<int> rms_error_db,
float speech_probability) {
if (check_volume_on_next_process_) {
check_volume_on_next_process_ = false;
@ -404,7 +404,7 @@ void InputVolumeController::Initialize() {
clipping_rate_log_ = 0.0f;
clipping_rate_log_counter_ = 0;
applied_input_volume_ = absl::nullopt;
applied_input_volume_ = std::nullopt;
}
void InputVolumeController::AnalyzeInputAudio(int applied_input_volume,
@ -498,13 +498,13 @@ void InputVolumeController::AnalyzeInputAudio(int applied_input_volume,
AggregateChannelLevels();
}
absl::optional<int> InputVolumeController::RecommendInputVolume(
std::optional<int> InputVolumeController::RecommendInputVolume(
float speech_probability,
absl::optional<float> speech_level_dbfs) {
std::optional<float> speech_level_dbfs) {
// Only process if applied input volume is set.
if (!applied_input_volume_.has_value()) {
RTC_LOG(LS_ERROR) << "[AGC2] Applied input volume not set.";
return absl::nullopt;
return std::nullopt;
}
AggregateChannelLevels();
@ -514,7 +514,7 @@ absl::optional<int> InputVolumeController::RecommendInputVolume(
return applied_input_volume_;
}
absl::optional<int> rms_error_db;
std::optional<int> rms_error_db;
if (speech_level_dbfs.has_value()) {
// Compute the error for all frames (both speech and non-speech frames).
rms_error_db = GetSpeechLevelRmsErrorDb(
@ -533,7 +533,7 @@ absl::optional<int> InputVolumeController::RecommendInputVolume(
recommended_input_volume_);
}
applied_input_volume_ = absl::nullopt;
applied_input_volume_ = std::nullopt;
return recommended_input_volume();
}

View File

@ -12,13 +12,13 @@
#define MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_CONTROLLER_H_
#include <memory>
#include <optional>
#include <vector>
#include "absl/types/optional.h"
#include "api/array_view.h"
#include "api/audio/audio_processing.h"
#include "modules/audio_processing/agc2/clipping_predictor.h"
#include "modules/audio_processing/audio_buffer.h"
#include "modules/audio_processing/include/audio_processing.h"
#include "rtc_base/gtest_prod_util.h"
namespace webrtc {
@ -50,7 +50,7 @@ class InputVolumeController final {
// Limited to values higher than 0.
int clipped_wait_frames = 300;
// Enables clipping prediction functionality.
bool enable_clipping_predictor = false;
bool enable_clipping_predictor = true;
// Speech level target range (dBFS). If the speech level is in the range
// [`target_range_min_dbfs`, `target_range_max_dbfs`], no input volume
// adjustments are done based on the speech level. For speech levels below
@ -95,9 +95,9 @@ class InputVolumeController final {
// suppression are applied. Returns a non-empty input volume recommendation if
// available. If `capture_output_used_` is true, returns the applied input
// volume.
absl::optional<int> RecommendInputVolume(
std::optional<int> RecommendInputVolume(
float speech_probability,
absl::optional<float> speech_level_dbfs);
std::optional<float> speech_level_dbfs);
// Stores whether the capture output will be used or not. Call when the
// capture stream output has been flagged to be used/not-used. If unused, the
@ -155,7 +155,7 @@ class InputVolumeController final {
int recommended_input_volume_ = 0;
// Applied input volume. After `SetAppliedInputVolume()` is called it holds
// the current applied volume.
absl::optional<int> applied_input_volume_;
std::optional<int> applied_input_volume_;
bool capture_output_used_;
@ -213,7 +213,7 @@ class MonoInputVolumeController {
// result of `HandleClipping()` and on `rms_error_dbfs`. Updates are only
// allowed for active speech segments and when `rms_error_dbfs` is not empty.
// Must be called after `HandleClipping()`.
void Process(absl::optional<int> rms_error_dbfs, float speech_probability);
void Process(std::optional<int> rms_error_dbfs, float speech_probability);
// Returns the recommended input volume. Must be called after `Process()`.
int recommended_analog_level() const { return recommended_input_volume_; }

View File

@ -11,7 +11,8 @@
#ifndef MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_STATS_REPORTER_H_
#define MODULES_AUDIO_PROCESSING_AGC2_INPUT_VOLUME_STATS_REPORTER_H_
#include "absl/types/optional.h"
#include <optional>
#include "rtc_base/gtest_prod_util.h"
#include "system_wrappers/include/metrics.h"
@ -83,7 +84,7 @@ class InputVolumeStatsReporter {
const bool cannot_log_stats_;
int log_volume_update_stats_counter_ = 0;
absl::optional<int> previous_input_volume_ = absl::nullopt;
std::optional<int> previous_input_volume_ = std::nullopt;
};
// Updates the histogram that keeps track of recommended input volume changes

View File

@ -46,22 +46,20 @@ void InterpolateFirstSubframe(float last_factor,
void ComputePerSampleSubframeFactors(
const std::array<float, kSubFramesInFrame + 1>& scaling_factors,
int samples_per_channel,
rtc::ArrayView<float> per_sample_scaling_factors) {
const int num_subframes = scaling_factors.size() - 1;
const int subframe_size =
rtc::CheckedDivExact(samples_per_channel, num_subframes);
MonoView<float> per_sample_scaling_factors) {
const size_t num_subframes = scaling_factors.size() - 1;
const int subframe_size = rtc::CheckedDivExact(
SamplesPerChannel(per_sample_scaling_factors), num_subframes);
// Handle first sub-frame differently in case of attack.
const bool is_attack = scaling_factors[0] > scaling_factors[1];
if (is_attack) {
InterpolateFirstSubframe(
scaling_factors[0], scaling_factors[1],
rtc::ArrayView<float>(
per_sample_scaling_factors.subview(0, subframe_size)));
per_sample_scaling_factors.subview(0, subframe_size));
}
for (int i = is_attack ? 1 : 0; i < num_subframes; ++i) {
for (size_t i = is_attack ? 1 : 0; i < num_subframes; ++i) {
const int subframe_start = i * subframe_size;
const float scaling_start = scaling_factors[i];
const float scaling_end = scaling_factors[i + 1];
@ -73,39 +71,36 @@ void ComputePerSampleSubframeFactors(
}
}
void ScaleSamples(rtc::ArrayView<const float> per_sample_scaling_factors,
AudioFrameView<float> signal) {
void ScaleSamples(MonoView<const float> per_sample_scaling_factors,
DeinterleavedView<float> signal) {
const int samples_per_channel = signal.samples_per_channel();
RTC_DCHECK_EQ(samples_per_channel, per_sample_scaling_factors.size());
for (int i = 0; i < signal.num_channels(); ++i) {
rtc::ArrayView<float> channel = signal.channel(i);
RTC_DCHECK_EQ(samples_per_channel,
SamplesPerChannel(per_sample_scaling_factors));
for (size_t i = 0; i < signal.num_channels(); ++i) {
MonoView<float> channel = signal[i];
for (int j = 0; j < samples_per_channel; ++j) {
channel[j] = rtc::SafeClamp(channel[j] * per_sample_scaling_factors[j],
kMinFloatS16Value, kMaxFloatS16Value);
}
}
}
void CheckLimiterSampleRate(int sample_rate_hz) {
// Check that per_sample_scaling_factors_ is large enough.
RTC_DCHECK_LE(sample_rate_hz,
kMaximalNumberOfSamplesPerChannel * 1000 / kFrameDurationMs);
}
} // namespace
Limiter::Limiter(int sample_rate_hz,
ApmDataDumper* apm_data_dumper,
Limiter::Limiter(ApmDataDumper* apm_data_dumper,
size_t samples_per_channel,
absl::string_view histogram_name)
: interp_gain_curve_(apm_data_dumper, histogram_name),
level_estimator_(sample_rate_hz, apm_data_dumper),
level_estimator_(samples_per_channel, apm_data_dumper),
apm_data_dumper_(apm_data_dumper) {
CheckLimiterSampleRate(sample_rate_hz);
RTC_DCHECK_LE(samples_per_channel, kMaximalNumberOfSamplesPerChannel);
}
Limiter::~Limiter() = default;
void Limiter::Process(AudioFrameView<float> signal) {
void Limiter::Process(DeinterleavedView<float> signal) {
RTC_DCHECK_LE(signal.samples_per_channel(),
kMaximalNumberOfSamplesPerChannel);
const std::array<float, kSubFramesInFrame> level_estimate =
level_estimator_.ComputeLevel(signal);
@ -116,13 +111,9 @@ void Limiter::Process(AudioFrameView<float> signal) {
return interp_gain_curve_.LookUpGainToApply(x);
});
const int samples_per_channel = signal.samples_per_channel();
RTC_DCHECK_LE(samples_per_channel, kMaximalNumberOfSamplesPerChannel);
auto per_sample_scaling_factors = rtc::ArrayView<float>(
&per_sample_scaling_factors_[0], samples_per_channel);
ComputePerSampleSubframeFactors(scaling_factors_, samples_per_channel,
per_sample_scaling_factors);
MonoView<float> per_sample_scaling_factors(&per_sample_scaling_factors_[0],
signal.samples_per_channel());
ComputePerSampleSubframeFactors(scaling_factors_, per_sample_scaling_factors);
ScaleSamples(per_sample_scaling_factors, signal);
last_scaling_factor_ = scaling_factors_.back();
@ -139,9 +130,9 @@ InterpolatedGainCurve::Stats Limiter::GetGainCurveStats() const {
return interp_gain_curve_.get_stats();
}
void Limiter::SetSampleRate(int sample_rate_hz) {
CheckLimiterSampleRate(sample_rate_hz);
level_estimator_.SetSampleRate(sample_rate_hz);
void Limiter::SetSamplesPerChannel(size_t samples_per_channel) {
RTC_DCHECK_LE(samples_per_channel, kMaximalNumberOfSamplesPerChannel);
level_estimator_.SetSamplesPerChannel(samples_per_channel);
}
void Limiter::Reset() {

View File

@ -14,6 +14,7 @@
#include <vector>
#include "absl/strings/string_view.h"
#include "api/audio/audio_frame.h"
#include "modules/audio_processing/agc2/fixed_digital_level_estimator.h"
#include "modules/audio_processing/agc2/interpolated_gain_curve.h"
#include "modules/audio_processing/include/audio_frame_view.h"
@ -23,23 +24,25 @@ class ApmDataDumper;
class Limiter {
public:
Limiter(int sample_rate_hz,
ApmDataDumper* apm_data_dumper,
// See `SetSamplesPerChannel()` for valid values for `samples_per_channel`.
Limiter(ApmDataDumper* apm_data_dumper,
size_t samples_per_channel,
absl::string_view histogram_name_prefix);
Limiter(const Limiter& limiter) = delete;
Limiter& operator=(const Limiter& limiter) = delete;
~Limiter();
// Applies limiter and hard-clipping to `signal`.
void Process(AudioFrameView<float> signal);
void Process(DeinterleavedView<float> signal);
InterpolatedGainCurve::Stats GetGainCurveStats() const;
// Supported rates must be
// * supported by FixedDigitalLevelEstimator
// * below kMaximalNumberOfSamplesPerChannel*1000/kFrameDurationMs
// so that samples_per_channel fit in the
// per_sample_scaling_factors_ array.
void SetSampleRate(int sample_rate_hz);
// Supported values must be
// * Supported by FixedDigitalLevelEstimator
// * Below or equal to kMaximalNumberOfSamplesPerChannel so that samples
// fit in the per_sample_scaling_factors_ array.
void SetSamplesPerChannel(size_t samples_per_channel);
// Resets the internal state.
void Reset();

View File

@ -16,7 +16,7 @@
#include <cmath>
#include <numeric>
#include "api/array_view.h"
#include "api/audio/audio_view.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
#include "rtc_base/checks.h"
@ -25,11 +25,12 @@ namespace {
constexpr int kFramesPerSecond = 100;
float FrameEnergy(const AudioFrameView<const float>& audio) {
float FrameEnergy(DeinterleavedView<const float> audio) {
float energy = 0.0f;
for (int k = 0; k < audio.num_channels(); ++k) {
for (size_t k = 0; k < audio.num_channels(); ++k) {
MonoView<const float> ch = audio[k];
float channel_energy =
std::accumulate(audio.channel(k).begin(), audio.channel(k).end(), 0.0f,
std::accumulate(ch.begin(), ch.end(), 0.0f,
[](float a, float b) -> float { return a + b * b; });
energy = std::max(channel_energy, energy);
}
@ -81,7 +82,7 @@ class NoiseFloorEstimator : public NoiseLevelEstimator {
NoiseFloorEstimator& operator=(const NoiseFloorEstimator&) = delete;
~NoiseFloorEstimator() = default;
float Analyze(const AudioFrameView<const float>& frame) override {
float Analyze(DeinterleavedView<const float> frame) override {
// Detect sample rate changes.
const int sample_rate_hz =
static_cast<int>(frame.samples_per_channel() * kFramesPerSecond);

View File

@ -13,7 +13,7 @@
#include <memory>
#include "modules/audio_processing/include/audio_frame_view.h"
#include "api/audio/audio_view.h"
namespace webrtc {
class ApmDataDumper;
@ -24,7 +24,7 @@ class NoiseLevelEstimator {
virtual ~NoiseLevelEstimator() = default;
// Analyzes a 10 ms `frame`, updates the noise level estimation and returns
// the value for the latter in dBFS.
virtual float Analyze(const AudioFrameView<const float>& frame) = 0;
virtual float Analyze(DeinterleavedView<const float> frame) = 0;
};
// Creates a noise level estimator based on noise floor detection.

View File

@ -100,12 +100,12 @@ rtc_source_set("rnn_vad_layers") {
"../../../../api:function_view",
"../../../../rtc_base:checks",
"../../../../rtc_base:safe_conversions",
"//third_party/abseil-cpp/absl/strings:string_view",
"//third_party/rnnoise:rnn_vad",
]
if (current_cpu == "x86" || current_cpu == "x64") {
deps += [ ":vector_math_avx2" ]
}
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
}
rtc_source_set("vector_math") {
@ -229,8 +229,8 @@ if (rtc_include_tests) {
"../../../../rtc_base:safe_compare",
"../../../../test:fileutils",
"../../../../test:test_support",
"//third_party/abseil-cpp/absl/strings:string_view",
]
absl_deps = [ "//third_party/abseil-cpp/absl/strings" ]
}
unittest_resources = [
@ -306,7 +306,6 @@ if (rtc_include_tests) {
if (current_cpu == "x86" || current_cpu == "x64") {
deps += [ ":vector_math_avx2" ]
}
absl_deps = [ "//third_party/abseil-cpp/absl/memory" ]
data = unittest_resources
if (is_ios) {
deps += [ ":unittests_bundle_data" ]

View File

@ -62,9 +62,9 @@ void SaturationProtectorBuffer::PushBack(float v) {
}
}
absl::optional<float> SaturationProtectorBuffer::Front() const {
std::optional<float> SaturationProtectorBuffer::Front() const {
if (size_ == 0) {
return absl::nullopt;
return std::nullopt;
}
RTC_DCHECK_LT(FrontIndex(), buffer_.size());
return buffer_[FrontIndex()];

View File

@ -12,8 +12,8 @@
#define MODULES_AUDIO_PROCESSING_AGC2_SATURATION_PROTECTOR_BUFFER_H_
#include <array>
#include <optional>
#include "absl/types/optional.h"
#include "modules/audio_processing/agc2/agc2_common.h"
namespace webrtc {
@ -43,7 +43,7 @@ class SaturationProtectorBuffer {
// Returns the oldest item in the buffer. Returns an empty value if the
// buffer is empty.
absl::optional<float> Front() const;
std::optional<float> Front() const;
private:
int FrontIndex() const;

View File

@ -15,8 +15,8 @@
#include <type_traits>
#include "api/audio/audio_processing.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/include/audio_processing.h"
namespace webrtc {
class ApmDataDumper;

View File

@ -13,7 +13,6 @@
#include <array>
#include <utility>
#include "api/array_view.h"
#include "common_audio/resampler/include/push_resampler.h"
#include "modules/audio_processing/agc2/agc2_common.h"
#include "modules/audio_processing/agc2/rnn_vad/common.h"
@ -36,7 +35,7 @@ class MonoVadImpl : public VoiceActivityDetectorWrapper::MonoVad {
int SampleRateHz() const override { return rnn_vad::kSampleRate24kHz; }
void Reset() override { rnn_vad_.Reset(); }
float Analyze(rtc::ArrayView<const float> frame) override {
float Analyze(MonoView<const float> frame) override {
RTC_DCHECK_EQ(frame.size(), rnn_vad::kFrameSize10ms24kHz);
std::array<float, rnn_vad::kFeatureVectorSize> feature_vector;
const bool is_silence = features_extractor_.CheckSilenceComputeFeatures(
@ -73,39 +72,33 @@ VoiceActivityDetectorWrapper::VoiceActivityDetectorWrapper(
int sample_rate_hz)
: vad_reset_period_frames_(
rtc::CheckedDivExact(vad_reset_period_ms, kFrameDurationMs)),
frame_size_(rtc::CheckedDivExact(sample_rate_hz, kNumFramesPerSecond)),
time_to_vad_reset_(vad_reset_period_frames_),
vad_(std::move(vad)) {
RTC_DCHECK(vad_);
vad_(std::move(vad)),
resampled_buffer_(
rtc::CheckedDivExact(vad_->SampleRateHz(), kNumFramesPerSecond)),
resampler_(frame_size_,
resampled_buffer_.size(),
/*num_channels=*/1) {
RTC_DCHECK_GT(vad_reset_period_frames_, 1);
resampled_buffer_.resize(
rtc::CheckedDivExact(vad_->SampleRateHz(), kNumFramesPerSecond));
Initialize(sample_rate_hz);
vad_->Reset();
}
VoiceActivityDetectorWrapper::~VoiceActivityDetectorWrapper() = default;
void VoiceActivityDetectorWrapper::Initialize(int sample_rate_hz) {
RTC_DCHECK_GT(sample_rate_hz, 0);
frame_size_ = rtc::CheckedDivExact(sample_rate_hz, kNumFramesPerSecond);
int status =
resampler_.InitializeIfNeeded(sample_rate_hz, vad_->SampleRateHz(),
/*num_channels=*/1);
constexpr int kStatusOk = 0;
RTC_DCHECK_EQ(status, kStatusOk);
vad_->Reset();
}
float VoiceActivityDetectorWrapper::Analyze(AudioFrameView<const float> frame) {
float VoiceActivityDetectorWrapper::Analyze(
DeinterleavedView<const float> frame) {
// Periodically reset the VAD.
time_to_vad_reset_--;
if (time_to_vad_reset_ <= 0) {
vad_->Reset();
time_to_vad_reset_ = vad_reset_period_frames_;
}
// Resample the first channel of `frame`.
RTC_DCHECK_EQ(frame.samples_per_channel(), frame_size_);
resampler_.Resample(frame.channel(0).data(), frame_size_,
resampled_buffer_.data(), resampled_buffer_.size());
MonoView<float> dst(resampled_buffer_.data(), resampled_buffer_.size());
resampler_.Resample(frame[0], dst);
return vad_->Analyze(resampled_buffer_);
}

View File

@ -14,10 +14,9 @@
#include <memory>
#include <vector>
#include "api/array_view.h"
#include "api/audio/audio_view.h"
#include "common_audio/resampler/include/push_resampler.h"
#include "modules/audio_processing/agc2/cpu_features.h"
#include "modules/audio_processing/include/audio_frame_view.h"
namespace webrtc {
@ -37,7 +36,7 @@ class VoiceActivityDetectorWrapper {
// Resets the internal state.
virtual void Reset() = 0;
// Analyzes an audio frame and returns the speech probability.
virtual float Analyze(rtc::ArrayView<const float> frame) = 0;
virtual float Analyze(MonoView<const float> frame) = 0;
};
// Ctor. Uses `cpu_features` to instantiate the default VAD.
@ -60,21 +59,18 @@ class VoiceActivityDetectorWrapper {
delete;
~VoiceActivityDetectorWrapper();
// Initializes the VAD wrapper.
void Initialize(int sample_rate_hz);
// Analyzes the first channel of `frame` and returns the speech probability.
// `frame` must be a 10 ms frame with the sample rate specified in the last
// `Initialize()` call.
float Analyze(AudioFrameView<const float> frame);
float Analyze(DeinterleavedView<const float> frame);
private:
const int vad_reset_period_frames_;
int frame_size_;
const int frame_size_;
int time_to_vad_reset_;
PushResampler<float> resampler_;
std::unique_ptr<MonoVad> vad_;
std::vector<float> resampled_buffer_;
PushResampler<float> resampler_;
};
} // namespace webrtc

View File

@ -12,28 +12,20 @@
namespace webrtc {
namespace {
std::vector<float*> ConstructChannelPointers(
std::vector<std::vector<float>>* x) {
std::vector<float*> channel_ptrs;
for (auto& v : *x) {
channel_ptrs.push_back(v.data());
}
return channel_ptrs;
}
} // namespace
VectorFloatFrame::VectorFloatFrame(int num_channels,
int samples_per_channel,
float start_value)
: channels_(num_channels,
std::vector<float>(samples_per_channel, start_value)),
channel_ptrs_(ConstructChannelPointers(&channels_)),
float_frame_view_(channel_ptrs_.data(),
channels_.size(),
samples_per_channel) {}
: channels_(num_channels * samples_per_channel, start_value),
view_(channels_.data(), samples_per_channel, num_channels) {}
VectorFloatFrame::~VectorFloatFrame() = default;
AudioFrameView<float> VectorFloatFrame::float_frame_view() {
return AudioFrameView<float>(view_);
}
AudioFrameView<const float> VectorFloatFrame::float_frame_view() const {
return AudioFrameView<const float>(view_);
}
} // namespace webrtc

View File

@ -13,6 +13,7 @@
#include <vector>
#include "api/audio/audio_view.h"
#include "modules/audio_processing/include/audio_frame_view.h"
namespace webrtc {
@ -24,17 +25,17 @@ class VectorFloatFrame {
VectorFloatFrame(int num_channels,
int samples_per_channel,
float start_value);
const AudioFrameView<float>& float_frame_view() { return float_frame_view_; }
AudioFrameView<const float> float_frame_view() const {
return float_frame_view_;
}
~VectorFloatFrame();
AudioFrameView<float> float_frame_view();
AudioFrameView<const float> float_frame_view() const;
DeinterleavedView<float> view() { return view_; }
DeinterleavedView<const float> view() const { return view_; }
private:
std::vector<std::vector<float>> channels_;
std::vector<float*> channel_ptrs_;
AudioFrameView<float> float_frame_view_;
std::vector<float> channels_;
DeinterleavedView<float> view_;
};
} // namespace webrtc