Bump to WebRTC M120 release

Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone.
We're continuing to carry iSAC even though it's gone upstream, but maybe
we'll want to drop that soon.
This commit is contained in:
Arun Raghavan
2023-12-12 10:42:58 -05:00
parent 9a202fb8c2
commit c6abf6cd3f
479 changed files with 20900 additions and 11996 deletions

View File

@ -11,8 +11,6 @@
#include "api/audio/audio_frame.h"
#include <string.h>
#include <algorithm>
#include <utility>
#include "rtc_base/checks.h"
#include "rtc_base/time_utils.h"
@ -24,35 +22,13 @@ AudioFrame::AudioFrame() {
static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
}
void swap(AudioFrame& a, AudioFrame& b) {
using std::swap;
swap(a.timestamp_, b.timestamp_);
swap(a.elapsed_time_ms_, b.elapsed_time_ms_);
swap(a.ntp_time_ms_, b.ntp_time_ms_);
swap(a.samples_per_channel_, b.samples_per_channel_);
swap(a.sample_rate_hz_, b.sample_rate_hz_);
swap(a.num_channels_, b.num_channels_);
swap(a.channel_layout_, b.channel_layout_);
swap(a.speech_type_, b.speech_type_);
swap(a.vad_activity_, b.vad_activity_);
swap(a.profile_timestamp_ms_, b.profile_timestamp_ms_);
swap(a.packet_infos_, b.packet_infos_);
const size_t length_a = a.samples_per_channel_ * a.num_channels_;
const size_t length_b = b.samples_per_channel_ * b.num_channels_;
RTC_DCHECK_LE(length_a, AudioFrame::kMaxDataSizeSamples);
RTC_DCHECK_LE(length_b, AudioFrame::kMaxDataSizeSamples);
std::swap_ranges(a.data_, a.data_ + std::max(length_a, length_b), b.data_);
swap(a.muted_, b.muted_);
swap(a.absolute_capture_timestamp_ms_, b.absolute_capture_timestamp_ms_);
}
void AudioFrame::Reset() {
ResetWithoutMuting();
muted_ = true;
}
void AudioFrame::ResetWithoutMuting() {
// TODO(wu): Zero is a valid value for |timestamp_|. We should initialize
// TODO(wu): Zero is a valid value for `timestamp_`. We should initialize
// to an invalid value, or add a new member to indicate invalidity.
timestamp_ = 0;
elapsed_time_ms_ = -1;

View File

@ -14,11 +14,8 @@
#include <stddef.h>
#include <stdint.h>
#include <utility>
#include "api/audio/channel_layout.h"
#include "api/rtp_packet_infos.h"
#include "rtc_base/constructor_magic.h"
namespace webrtc {
@ -60,7 +57,8 @@ class AudioFrame {
AudioFrame();
friend void swap(AudioFrame& a, AudioFrame& b);
AudioFrame(const AudioFrame&) = delete;
AudioFrame& operator=(const AudioFrame&) = delete;
// Resets all members to their default state.
void Reset();
@ -139,7 +137,7 @@ class AudioFrame {
int64_t profile_timestamp_ms_ = 0;
// Information about packets used to assemble this audio frame. This is needed
// by |SourceTracker| when the frame is delivered to the RTCRtpReceiver's
// by `SourceTracker` when the frame is delivered to the RTCRtpReceiver's
// MediaStreamTrack, in order to implement getContributingSources(). See:
// https://w3c.github.io/webrtc-pc/#dom-rtcrtpreceiver-getcontributingsources
//
@ -149,7 +147,7 @@ class AudioFrame {
// sync buffer is the small sample-holding buffer located after the audio
// decoder and before where samples are assembled into output frames.
//
// |RtpPacketInfos| may also be empty if the audio samples did not come from
// `RtpPacketInfos` may also be empty if the audio samples did not come from
// RTP packets. E.g. if the audio were locally generated by packet loss
// concealment, comfort noise generation, etc.
RtpPacketInfos packet_infos_;
@ -165,11 +163,9 @@ class AudioFrame {
// Absolute capture timestamp when this audio frame was originally captured.
// This is only valid for audio frames captured on this machine. The absolute
// capture timestamp of a received frame is found in |packet_infos_|.
// capture timestamp of a received frame is found in `packet_infos_`.
// This timestamp MUST be based on the same clock as rtc::TimeMillis().
absl::optional<int64_t> absolute_capture_timestamp_ms_;
RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
};
} // namespace webrtc

View File

@ -275,7 +275,7 @@ const char* ChannelLayoutToString(ChannelLayout layout) {
case CHANNEL_LAYOUT_BITSTREAM:
return "BITSTREAM";
}
RTC_NOTREACHED() << "Invalid channel layout provided: " << layout;
RTC_DCHECK_NOTREACHED() << "Invalid channel layout provided: " << layout;
return "";
}

View File

@ -153,6 +153,7 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {
res = res & Limit(&c->filter.config_change_duration_blocks, 0, 100000);
res = res & Limit(&c->filter.initial_state_seconds, 0.f, 100.f);
res = res & Limit(&c->filter.coarse_reset_hangover_blocks, 0, 250000);
res = res & Limit(&c->erle.min, 1.f, 100000.f);
res = res & Limit(&c->erle.max_l, 1.f, 100000.f);
@ -165,6 +166,7 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {
res = res & Limit(&c->ep_strength.default_gain, 0.f, 1000000.f);
res = res & Limit(&c->ep_strength.default_len, -1.f, 1.f);
res = res & Limit(&c->ep_strength.nearend_len, -1.0f, 1.0f);
res =
res & Limit(&c->echo_audibility.low_render_limit, 0.f, 32768.f * 32768.f);
@ -228,6 +230,12 @@ bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {
res =
res & Limit(&c->suppressor.nearend_tuning.max_dec_factor_lf, 0.f, 100.f);
res = res & Limit(&c->suppressor.last_permanent_lf_smoothing_band, 0, 64);
res = res & Limit(&c->suppressor.last_lf_smoothing_band, 0, 64);
res = res & Limit(&c->suppressor.last_lf_band, 0, 63);
res = res &
Limit(&c->suppressor.first_hf_band, c->suppressor.last_lf_band + 1, 64);
res = res & Limit(&c->suppressor.dominant_nearend_detection.enr_threshold,
0.f, 1000000.f);
res = res & Limit(&c->suppressor.dominant_nearend_detection.snr_threshold,

View File

@ -43,6 +43,7 @@ struct RTC_EXPORT EchoCanceller3Config {
size_t hysteresis_limit_blocks = 1;
size_t fixed_capture_delay_samples = 0;
float delay_estimate_smoothing = 0.7f;
float delay_estimate_smoothing_delay_found = 0.7f;
float delay_candidate_detection_threshold = 0.2f;
struct DelaySelectionThresholds {
int initial;
@ -58,6 +59,7 @@ struct RTC_EXPORT EchoCanceller3Config {
};
AlignmentMixing render_alignment_mixing = {false, true, 10000.f, true};
AlignmentMixing capture_alignment_mixing = {false, true, 10000.f, false};
bool detect_pre_echo = true;
} delay;
struct Filter {
@ -86,9 +88,11 @@ struct RTC_EXPORT EchoCanceller3Config {
size_t config_change_duration_blocks = 250;
float initial_state_seconds = 2.5f;
int coarse_reset_hangover_blocks = 25;
bool conservative_initial_phase = false;
bool enable_coarse_filter_output_usage = true;
bool use_linear_filter = true;
bool high_pass_filter_echo_reference = false;
bool export_linear_aec_output = false;
} filter;
@ -105,8 +109,11 @@ struct RTC_EXPORT EchoCanceller3Config {
struct EpStrength {
float default_gain = 1.f;
float default_len = 0.83f;
float nearend_len = 0.83f;
bool echo_can_saturate = true;
bool bounded_erl = false;
bool erle_onset_compensation_in_dominant_nearend = false;
bool use_conservative_tail_frequency_response = true;
} ep_strength;
struct EchoAudibility {
@ -190,6 +197,12 @@ struct RTC_EXPORT EchoCanceller3Config {
2.0f,
0.25f);
bool lf_smoothing_during_initial_phase = true;
int last_permanent_lf_smoothing_band = 0;
int last_lf_smoothing_band = 5;
int last_lf_band = 5;
int first_hf_band = 8;
struct DominantNearendDetection {
float enr_threshold = .25f;
float enr_exit_threshold = 10.f;
@ -197,6 +210,7 @@ struct RTC_EXPORT EchoCanceller3Config {
int hold_duration = 50;
int trigger_threshold = 12;
bool use_during_initial_phase = true;
bool use_unbounded_echo_spectrum = true;
} dominant_nearend_detection;
struct SubbandNearendDetection {
@ -221,7 +235,15 @@ struct RTC_EXPORT EchoCanceller3Config {
} high_bands_suppression;
float floor_first_increase = 0.00001f;
bool conservative_hf_suppression = false;
} suppressor;
struct MultiChannel {
bool detect_stereo_content = true;
float stereo_detection_threshold = 0.0f;
int stereo_detection_timeout_threshold_seconds = 300;
float stereo_detection_hysteresis_seconds = 2.0f;
} multi_channel;
};
} // namespace webrtc

View File

@ -48,6 +48,13 @@ class EchoControl {
// Provides an optional external estimate of the audio buffer delay.
virtual void SetAudioBufferDelay(int delay_ms) = 0;
// Specifies whether the capture output will be used. The purpose of this is
// to allow the echo controller to deactivate some of the processing when the
// resulting output is anyway not used, for instance when the endpoint is
// muted.
// TODO(b/177830919): Make pure virtual.
virtual void SetCaptureOutputUsage(bool capture_output_used) {}
// Returns wheter the signal is altered.
virtual bool ActiveProcessing() const = 0;