Update to current webrtc library
This is from the upstream library commit id 3326535126e435f1ba647885ce43a8f0f3d317eb, corresponding to Chromium 88.0.4290.1.
This commit is contained in:
164
webrtc/api/audio/audio_frame.cc
Normal file
164
webrtc/api/audio/audio_frame.cc
Normal file
@ -0,0 +1,164 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "api/audio/audio_frame.h"
|
||||
|
||||
#include <string.h>
|
||||
#include <algorithm>
|
||||
#include <utility>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/time_utils.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
AudioFrame::AudioFrame() {
|
||||
// Visual Studio doesn't like this in the class definition.
|
||||
static_assert(sizeof(data_) == kMaxDataSizeBytes, "kMaxDataSizeBytes");
|
||||
}
|
||||
|
||||
void swap(AudioFrame& a, AudioFrame& b) {
|
||||
using std::swap;
|
||||
swap(a.timestamp_, b.timestamp_);
|
||||
swap(a.elapsed_time_ms_, b.elapsed_time_ms_);
|
||||
swap(a.ntp_time_ms_, b.ntp_time_ms_);
|
||||
swap(a.samples_per_channel_, b.samples_per_channel_);
|
||||
swap(a.sample_rate_hz_, b.sample_rate_hz_);
|
||||
swap(a.num_channels_, b.num_channels_);
|
||||
swap(a.channel_layout_, b.channel_layout_);
|
||||
swap(a.speech_type_, b.speech_type_);
|
||||
swap(a.vad_activity_, b.vad_activity_);
|
||||
swap(a.profile_timestamp_ms_, b.profile_timestamp_ms_);
|
||||
swap(a.packet_infos_, b.packet_infos_);
|
||||
const size_t length_a = a.samples_per_channel_ * a.num_channels_;
|
||||
const size_t length_b = b.samples_per_channel_ * b.num_channels_;
|
||||
RTC_DCHECK_LE(length_a, AudioFrame::kMaxDataSizeSamples);
|
||||
RTC_DCHECK_LE(length_b, AudioFrame::kMaxDataSizeSamples);
|
||||
std::swap_ranges(a.data_, a.data_ + std::max(length_a, length_b), b.data_);
|
||||
swap(a.muted_, b.muted_);
|
||||
swap(a.absolute_capture_timestamp_ms_, b.absolute_capture_timestamp_ms_);
|
||||
}
|
||||
|
||||
void AudioFrame::Reset() {
|
||||
ResetWithoutMuting();
|
||||
muted_ = true;
|
||||
}
|
||||
|
||||
void AudioFrame::ResetWithoutMuting() {
|
||||
// TODO(wu): Zero is a valid value for |timestamp_|. We should initialize
|
||||
// to an invalid value, or add a new member to indicate invalidity.
|
||||
timestamp_ = 0;
|
||||
elapsed_time_ms_ = -1;
|
||||
ntp_time_ms_ = -1;
|
||||
samples_per_channel_ = 0;
|
||||
sample_rate_hz_ = 0;
|
||||
num_channels_ = 0;
|
||||
channel_layout_ = CHANNEL_LAYOUT_NONE;
|
||||
speech_type_ = kUndefined;
|
||||
vad_activity_ = kVadUnknown;
|
||||
profile_timestamp_ms_ = 0;
|
||||
packet_infos_ = RtpPacketInfos();
|
||||
absolute_capture_timestamp_ms_ = absl::nullopt;
|
||||
}
|
||||
|
||||
void AudioFrame::UpdateFrame(uint32_t timestamp,
|
||||
const int16_t* data,
|
||||
size_t samples_per_channel,
|
||||
int sample_rate_hz,
|
||||
SpeechType speech_type,
|
||||
VADActivity vad_activity,
|
||||
size_t num_channels) {
|
||||
timestamp_ = timestamp;
|
||||
samples_per_channel_ = samples_per_channel;
|
||||
sample_rate_hz_ = sample_rate_hz;
|
||||
speech_type_ = speech_type;
|
||||
vad_activity_ = vad_activity;
|
||||
num_channels_ = num_channels;
|
||||
channel_layout_ = GuessChannelLayout(num_channels);
|
||||
if (channel_layout_ != CHANNEL_LAYOUT_UNSUPPORTED) {
|
||||
RTC_DCHECK_EQ(num_channels, ChannelLayoutToChannelCount(channel_layout_));
|
||||
}
|
||||
|
||||
const size_t length = samples_per_channel * num_channels;
|
||||
RTC_CHECK_LE(length, kMaxDataSizeSamples);
|
||||
if (data != nullptr) {
|
||||
memcpy(data_, data, sizeof(int16_t) * length);
|
||||
muted_ = false;
|
||||
} else {
|
||||
muted_ = true;
|
||||
}
|
||||
}
|
||||
|
||||
void AudioFrame::CopyFrom(const AudioFrame& src) {
|
||||
if (this == &src)
|
||||
return;
|
||||
|
||||
timestamp_ = src.timestamp_;
|
||||
elapsed_time_ms_ = src.elapsed_time_ms_;
|
||||
ntp_time_ms_ = src.ntp_time_ms_;
|
||||
packet_infos_ = src.packet_infos_;
|
||||
muted_ = src.muted();
|
||||
samples_per_channel_ = src.samples_per_channel_;
|
||||
sample_rate_hz_ = src.sample_rate_hz_;
|
||||
speech_type_ = src.speech_type_;
|
||||
vad_activity_ = src.vad_activity_;
|
||||
num_channels_ = src.num_channels_;
|
||||
channel_layout_ = src.channel_layout_;
|
||||
absolute_capture_timestamp_ms_ = src.absolute_capture_timestamp_ms();
|
||||
|
||||
const size_t length = samples_per_channel_ * num_channels_;
|
||||
RTC_CHECK_LE(length, kMaxDataSizeSamples);
|
||||
if (!src.muted()) {
|
||||
memcpy(data_, src.data(), sizeof(int16_t) * length);
|
||||
muted_ = false;
|
||||
}
|
||||
}
|
||||
|
||||
void AudioFrame::UpdateProfileTimeStamp() {
|
||||
profile_timestamp_ms_ = rtc::TimeMillis();
|
||||
}
|
||||
|
||||
int64_t AudioFrame::ElapsedProfileTimeMs() const {
|
||||
if (profile_timestamp_ms_ == 0) {
|
||||
// Profiling has not been activated.
|
||||
return -1;
|
||||
}
|
||||
return rtc::TimeSince(profile_timestamp_ms_);
|
||||
}
|
||||
|
||||
const int16_t* AudioFrame::data() const {
|
||||
return muted_ ? empty_data() : data_;
|
||||
}
|
||||
|
||||
// TODO(henrik.lundin) Can we skip zeroing the buffer?
|
||||
// See https://bugs.chromium.org/p/webrtc/issues/detail?id=5647.
|
||||
int16_t* AudioFrame::mutable_data() {
|
||||
if (muted_) {
|
||||
memset(data_, 0, kMaxDataSizeBytes);
|
||||
muted_ = false;
|
||||
}
|
||||
return data_;
|
||||
}
|
||||
|
||||
void AudioFrame::Mute() {
|
||||
muted_ = true;
|
||||
}
|
||||
|
||||
bool AudioFrame::muted() const {
|
||||
return muted_;
|
||||
}
|
||||
|
||||
// static
|
||||
const int16_t* AudioFrame::empty_data() {
|
||||
static int16_t* null_data = new int16_t[kMaxDataSizeSamples]();
|
||||
return &null_data[0];
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
177
webrtc/api/audio/audio_frame.h
Normal file
177
webrtc/api/audio/audio_frame.h
Normal file
@ -0,0 +1,177 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_AUDIO_AUDIO_FRAME_H_
|
||||
#define API_AUDIO_AUDIO_FRAME_H_
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <utility>
|
||||
|
||||
#include "api/audio/channel_layout.h"
|
||||
#include "api/rtp_packet_infos.h"
|
||||
#include "rtc_base/constructor_magic.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
/* This class holds up to 120 ms of super-wideband (32 kHz) stereo audio. It
|
||||
* allows for adding and subtracting frames while keeping track of the resulting
|
||||
* states.
|
||||
*
|
||||
* Notes
|
||||
* - This is a de-facto api, not designed for external use. The AudioFrame class
|
||||
* is in need of overhaul or even replacement, and anyone depending on it
|
||||
* should be prepared for that.
|
||||
* - The total number of samples is samples_per_channel_ * num_channels_.
|
||||
* - Stereo data is interleaved starting with the left channel.
|
||||
*/
|
||||
class AudioFrame {
|
||||
public:
|
||||
// Using constexpr here causes linker errors unless the variable also has an
|
||||
// out-of-class definition, which is impractical in this header-only class.
|
||||
// (This makes no sense because it compiles as an enum value, which we most
|
||||
// certainly cannot take the address of, just fine.) C++17 introduces inline
|
||||
// variables which should allow us to switch to constexpr and keep this a
|
||||
// header-only class.
|
||||
enum : size_t {
|
||||
// Stereo, 32 kHz, 120 ms (2 * 32 * 120)
|
||||
// Stereo, 192 kHz, 20 ms (2 * 192 * 20)
|
||||
kMaxDataSizeSamples = 7680,
|
||||
kMaxDataSizeBytes = kMaxDataSizeSamples * sizeof(int16_t),
|
||||
};
|
||||
|
||||
enum VADActivity { kVadActive = 0, kVadPassive = 1, kVadUnknown = 2 };
|
||||
enum SpeechType {
|
||||
kNormalSpeech = 0,
|
||||
kPLC = 1,
|
||||
kCNG = 2,
|
||||
kPLCCNG = 3,
|
||||
kCodecPLC = 5,
|
||||
kUndefined = 4
|
||||
};
|
||||
|
||||
AudioFrame();
|
||||
|
||||
friend void swap(AudioFrame& a, AudioFrame& b);
|
||||
|
||||
// Resets all members to their default state.
|
||||
void Reset();
|
||||
// Same as Reset(), but leaves mute state unchanged. Muting a frame requires
|
||||
// the buffer to be zeroed on the next call to mutable_data(). Callers
|
||||
// intending to write to the buffer immediately after Reset() can instead use
|
||||
// ResetWithoutMuting() to skip this wasteful zeroing.
|
||||
void ResetWithoutMuting();
|
||||
|
||||
void UpdateFrame(uint32_t timestamp,
|
||||
const int16_t* data,
|
||||
size_t samples_per_channel,
|
||||
int sample_rate_hz,
|
||||
SpeechType speech_type,
|
||||
VADActivity vad_activity,
|
||||
size_t num_channels = 1);
|
||||
|
||||
void CopyFrom(const AudioFrame& src);
|
||||
|
||||
// Sets a wall-time clock timestamp in milliseconds to be used for profiling
|
||||
// of time between two points in the audio chain.
|
||||
// Example:
|
||||
// t0: UpdateProfileTimeStamp()
|
||||
// t1: ElapsedProfileTimeMs() => t1 - t0 [msec]
|
||||
void UpdateProfileTimeStamp();
|
||||
// Returns the time difference between now and when UpdateProfileTimeStamp()
|
||||
// was last called. Returns -1 if UpdateProfileTimeStamp() has not yet been
|
||||
// called.
|
||||
int64_t ElapsedProfileTimeMs() const;
|
||||
|
||||
// data() returns a zeroed static buffer if the frame is muted.
|
||||
// mutable_frame() always returns a non-static buffer; the first call to
|
||||
// mutable_frame() zeros the non-static buffer and marks the frame unmuted.
|
||||
const int16_t* data() const;
|
||||
int16_t* mutable_data();
|
||||
|
||||
// Prefer to mute frames using AudioFrameOperations::Mute.
|
||||
void Mute();
|
||||
// Frame is muted by default.
|
||||
bool muted() const;
|
||||
|
||||
size_t max_16bit_samples() const { return kMaxDataSizeSamples; }
|
||||
size_t samples_per_channel() const { return samples_per_channel_; }
|
||||
size_t num_channels() const { return num_channels_; }
|
||||
ChannelLayout channel_layout() const { return channel_layout_; }
|
||||
int sample_rate_hz() const { return sample_rate_hz_; }
|
||||
|
||||
void set_absolute_capture_timestamp_ms(
|
||||
int64_t absolute_capture_time_stamp_ms) {
|
||||
absolute_capture_timestamp_ms_ = absolute_capture_time_stamp_ms;
|
||||
}
|
||||
|
||||
absl::optional<int64_t> absolute_capture_timestamp_ms() const {
|
||||
return absolute_capture_timestamp_ms_;
|
||||
}
|
||||
|
||||
// RTP timestamp of the first sample in the AudioFrame.
|
||||
uint32_t timestamp_ = 0;
|
||||
// Time since the first frame in milliseconds.
|
||||
// -1 represents an uninitialized value.
|
||||
int64_t elapsed_time_ms_ = -1;
|
||||
// NTP time of the estimated capture time in local timebase in milliseconds.
|
||||
// -1 represents an uninitialized value.
|
||||
int64_t ntp_time_ms_ = -1;
|
||||
size_t samples_per_channel_ = 0;
|
||||
int sample_rate_hz_ = 0;
|
||||
size_t num_channels_ = 0;
|
||||
ChannelLayout channel_layout_ = CHANNEL_LAYOUT_NONE;
|
||||
SpeechType speech_type_ = kUndefined;
|
||||
VADActivity vad_activity_ = kVadUnknown;
|
||||
// Monotonically increasing timestamp intended for profiling of audio frames.
|
||||
// Typically used for measuring elapsed time between two different points in
|
||||
// the audio path. No lock is used to save resources and we are thread safe
|
||||
// by design.
|
||||
// TODO(nisse@webrtc.org): consider using absl::optional.
|
||||
int64_t profile_timestamp_ms_ = 0;
|
||||
|
||||
// Information about packets used to assemble this audio frame. This is needed
|
||||
// by |SourceTracker| when the frame is delivered to the RTCRtpReceiver's
|
||||
// MediaStreamTrack, in order to implement getContributingSources(). See:
|
||||
// https://w3c.github.io/webrtc-pc/#dom-rtcrtpreceiver-getcontributingsources
|
||||
//
|
||||
// TODO(bugs.webrtc.org/10757):
|
||||
// Note that this information might not be fully accurate since we currently
|
||||
// don't have a proper way to track it across the audio sync buffer. The
|
||||
// sync buffer is the small sample-holding buffer located after the audio
|
||||
// decoder and before where samples are assembled into output frames.
|
||||
//
|
||||
// |RtpPacketInfos| may also be empty if the audio samples did not come from
|
||||
// RTP packets. E.g. if the audio were locally generated by packet loss
|
||||
// concealment, comfort noise generation, etc.
|
||||
RtpPacketInfos packet_infos_;
|
||||
|
||||
private:
|
||||
// A permanently zeroed out buffer to represent muted frames. This is a
|
||||
// header-only class, so the only way to avoid creating a separate empty
|
||||
// buffer per translation unit is to wrap a static in an inline function.
|
||||
static const int16_t* empty_data();
|
||||
|
||||
int16_t data_[kMaxDataSizeSamples];
|
||||
bool muted_ = true;
|
||||
|
||||
// Absolute capture timestamp when this audio frame was originally captured.
|
||||
// This is only valid for audio frames captured on this machine. The absolute
|
||||
// capture timestamp of a received frame is found in |packet_infos_|.
|
||||
// This timestamp MUST be based on the same clock as rtc::TimeMillis().
|
||||
absl::optional<int64_t> absolute_capture_timestamp_ms_;
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(AudioFrame);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_AUDIO_AUDIO_FRAME_H_
|
282
webrtc/api/audio/channel_layout.cc
Normal file
282
webrtc/api/audio/channel_layout.cc
Normal file
@ -0,0 +1,282 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "api/audio/channel_layout.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include "rtc_base/arraysize.h"
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/logging.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kLayoutToChannels[] = {
|
||||
0, // CHANNEL_LAYOUT_NONE
|
||||
0, // CHANNEL_LAYOUT_UNSUPPORTED
|
||||
1, // CHANNEL_LAYOUT_MONO
|
||||
2, // CHANNEL_LAYOUT_STEREO
|
||||
3, // CHANNEL_LAYOUT_2_1
|
||||
3, // CHANNEL_LAYOUT_SURROUND
|
||||
4, // CHANNEL_LAYOUT_4_0
|
||||
4, // CHANNEL_LAYOUT_2_2
|
||||
4, // CHANNEL_LAYOUT_QUAD
|
||||
5, // CHANNEL_LAYOUT_5_0
|
||||
6, // CHANNEL_LAYOUT_5_1
|
||||
5, // CHANNEL_LAYOUT_5_0_BACK
|
||||
6, // CHANNEL_LAYOUT_5_1_BACK
|
||||
7, // CHANNEL_LAYOUT_7_0
|
||||
8, // CHANNEL_LAYOUT_7_1
|
||||
8, // CHANNEL_LAYOUT_7_1_WIDE
|
||||
2, // CHANNEL_LAYOUT_STEREO_DOWNMIX
|
||||
3, // CHANNEL_LAYOUT_2POINT1
|
||||
4, // CHANNEL_LAYOUT_3_1
|
||||
5, // CHANNEL_LAYOUT_4_1
|
||||
6, // CHANNEL_LAYOUT_6_0
|
||||
6, // CHANNEL_LAYOUT_6_0_FRONT
|
||||
6, // CHANNEL_LAYOUT_HEXAGONAL
|
||||
7, // CHANNEL_LAYOUT_6_1
|
||||
7, // CHANNEL_LAYOUT_6_1_BACK
|
||||
7, // CHANNEL_LAYOUT_6_1_FRONT
|
||||
7, // CHANNEL_LAYOUT_7_0_FRONT
|
||||
8, // CHANNEL_LAYOUT_7_1_WIDE_BACK
|
||||
8, // CHANNEL_LAYOUT_OCTAGONAL
|
||||
0, // CHANNEL_LAYOUT_DISCRETE
|
||||
3, // CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC
|
||||
5, // CHANNEL_LAYOUT_4_1_QUAD_SIDE
|
||||
0, // CHANNEL_LAYOUT_BITSTREAM
|
||||
};
|
||||
|
||||
// The channel orderings for each layout as specified by FFmpeg. Each value
|
||||
// represents the index of each channel in each layout. Values of -1 mean the
|
||||
// channel at that index is not used for that layout. For example, the left side
|
||||
// surround sound channel in FFmpeg's 5.1 layout is in the 5th position (because
|
||||
// the order is L, R, C, LFE, LS, RS), so
|
||||
// kChannelOrderings[CHANNEL_LAYOUT_5_1][SIDE_LEFT] = 4;
|
||||
static const int kChannelOrderings[CHANNEL_LAYOUT_MAX + 1][CHANNELS_MAX + 1] = {
|
||||
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
|
||||
|
||||
// CHANNEL_LAYOUT_NONE
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_UNSUPPORTED
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_MONO
|
||||
{-1, -1, 0, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_STEREO
|
||||
{0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_2_1
|
||||
{0, 1, -1, -1, -1, -1, -1, -1, 2, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_SURROUND
|
||||
{0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_4_0
|
||||
{0, 1, 2, -1, -1, -1, -1, -1, 3, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_2_2
|
||||
{0, 1, -1, -1, -1, -1, -1, -1, -1, 2, 3},
|
||||
|
||||
// CHANNEL_LAYOUT_QUAD
|
||||
{0, 1, -1, -1, 2, 3, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_5_0
|
||||
{0, 1, 2, -1, -1, -1, -1, -1, -1, 3, 4},
|
||||
|
||||
// CHANNEL_LAYOUT_5_1
|
||||
{0, 1, 2, 3, -1, -1, -1, -1, -1, 4, 5},
|
||||
|
||||
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
|
||||
|
||||
// CHANNEL_LAYOUT_5_0_BACK
|
||||
{0, 1, 2, -1, 3, 4, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_5_1_BACK
|
||||
{0, 1, 2, 3, 4, 5, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_7_0
|
||||
{0, 1, 2, -1, 5, 6, -1, -1, -1, 3, 4},
|
||||
|
||||
// CHANNEL_LAYOUT_7_1
|
||||
{0, 1, 2, 3, 6, 7, -1, -1, -1, 4, 5},
|
||||
|
||||
// CHANNEL_LAYOUT_7_1_WIDE
|
||||
{0, 1, 2, 3, -1, -1, 6, 7, -1, 4, 5},
|
||||
|
||||
// CHANNEL_LAYOUT_STEREO_DOWNMIX
|
||||
{0, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_2POINT1
|
||||
{0, 1, -1, 2, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_3_1
|
||||
{0, 1, 2, 3, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_4_1
|
||||
{0, 1, 2, 4, -1, -1, -1, -1, 3, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_6_0
|
||||
{0, 1, 2, -1, -1, -1, -1, -1, 5, 3, 4},
|
||||
|
||||
// CHANNEL_LAYOUT_6_0_FRONT
|
||||
{0, 1, -1, -1, -1, -1, 4, 5, -1, 2, 3},
|
||||
|
||||
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
|
||||
|
||||
// CHANNEL_LAYOUT_HEXAGONAL
|
||||
{0, 1, 2, -1, 3, 4, -1, -1, 5, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_6_1
|
||||
{0, 1, 2, 3, -1, -1, -1, -1, 6, 4, 5},
|
||||
|
||||
// CHANNEL_LAYOUT_6_1_BACK
|
||||
{0, 1, 2, 3, 4, 5, -1, -1, 6, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_6_1_FRONT
|
||||
{0, 1, -1, 6, -1, -1, 4, 5, -1, 2, 3},
|
||||
|
||||
// CHANNEL_LAYOUT_7_0_FRONT
|
||||
{0, 1, 2, -1, -1, -1, 5, 6, -1, 3, 4},
|
||||
|
||||
// CHANNEL_LAYOUT_7_1_WIDE_BACK
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_OCTAGONAL
|
||||
{0, 1, 2, -1, 5, 6, -1, -1, 7, 3, 4},
|
||||
|
||||
// CHANNEL_LAYOUT_DISCRETE
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC
|
||||
{0, 1, 2, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// CHANNEL_LAYOUT_4_1_QUAD_SIDE
|
||||
{0, 1, -1, 4, -1, -1, -1, -1, -1, 2, 3},
|
||||
|
||||
// CHANNEL_LAYOUT_BITSTREAM
|
||||
{-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1},
|
||||
|
||||
// FL | FR | FC | LFE | BL | BR | FLofC | FRofC | BC | SL | SR
|
||||
};
|
||||
|
||||
int ChannelLayoutToChannelCount(ChannelLayout layout) {
|
||||
RTC_DCHECK_LT(static_cast<size_t>(layout), arraysize(kLayoutToChannels));
|
||||
RTC_DCHECK_LE(kLayoutToChannels[layout], kMaxConcurrentChannels);
|
||||
return kLayoutToChannels[layout];
|
||||
}
|
||||
|
||||
// Converts a channel count into a channel layout.
|
||||
ChannelLayout GuessChannelLayout(int channels) {
|
||||
switch (channels) {
|
||||
case 1:
|
||||
return CHANNEL_LAYOUT_MONO;
|
||||
case 2:
|
||||
return CHANNEL_LAYOUT_STEREO;
|
||||
case 3:
|
||||
return CHANNEL_LAYOUT_SURROUND;
|
||||
case 4:
|
||||
return CHANNEL_LAYOUT_QUAD;
|
||||
case 5:
|
||||
return CHANNEL_LAYOUT_5_0;
|
||||
case 6:
|
||||
return CHANNEL_LAYOUT_5_1;
|
||||
case 7:
|
||||
return CHANNEL_LAYOUT_6_1;
|
||||
case 8:
|
||||
return CHANNEL_LAYOUT_7_1;
|
||||
default:
|
||||
RTC_DLOG(LS_WARNING) << "Unsupported channel count: " << channels;
|
||||
}
|
||||
return CHANNEL_LAYOUT_UNSUPPORTED;
|
||||
}
|
||||
|
||||
int ChannelOrder(ChannelLayout layout, Channels channel) {
|
||||
RTC_DCHECK_LT(static_cast<size_t>(layout), arraysize(kChannelOrderings));
|
||||
RTC_DCHECK_LT(static_cast<size_t>(channel), arraysize(kChannelOrderings[0]));
|
||||
return kChannelOrderings[layout][channel];
|
||||
}
|
||||
|
||||
const char* ChannelLayoutToString(ChannelLayout layout) {
|
||||
switch (layout) {
|
||||
case CHANNEL_LAYOUT_NONE:
|
||||
return "NONE";
|
||||
case CHANNEL_LAYOUT_UNSUPPORTED:
|
||||
return "UNSUPPORTED";
|
||||
case CHANNEL_LAYOUT_MONO:
|
||||
return "MONO";
|
||||
case CHANNEL_LAYOUT_STEREO:
|
||||
return "STEREO";
|
||||
case CHANNEL_LAYOUT_2_1:
|
||||
return "2.1";
|
||||
case CHANNEL_LAYOUT_SURROUND:
|
||||
return "SURROUND";
|
||||
case CHANNEL_LAYOUT_4_0:
|
||||
return "4.0";
|
||||
case CHANNEL_LAYOUT_2_2:
|
||||
return "QUAD_SIDE";
|
||||
case CHANNEL_LAYOUT_QUAD:
|
||||
return "QUAD";
|
||||
case CHANNEL_LAYOUT_5_0:
|
||||
return "5.0";
|
||||
case CHANNEL_LAYOUT_5_1:
|
||||
return "5.1";
|
||||
case CHANNEL_LAYOUT_5_0_BACK:
|
||||
return "5.0_BACK";
|
||||
case CHANNEL_LAYOUT_5_1_BACK:
|
||||
return "5.1_BACK";
|
||||
case CHANNEL_LAYOUT_7_0:
|
||||
return "7.0";
|
||||
case CHANNEL_LAYOUT_7_1:
|
||||
return "7.1";
|
||||
case CHANNEL_LAYOUT_7_1_WIDE:
|
||||
return "7.1_WIDE";
|
||||
case CHANNEL_LAYOUT_STEREO_DOWNMIX:
|
||||
return "STEREO_DOWNMIX";
|
||||
case CHANNEL_LAYOUT_2POINT1:
|
||||
return "2POINT1";
|
||||
case CHANNEL_LAYOUT_3_1:
|
||||
return "3.1";
|
||||
case CHANNEL_LAYOUT_4_1:
|
||||
return "4.1";
|
||||
case CHANNEL_LAYOUT_6_0:
|
||||
return "6.0";
|
||||
case CHANNEL_LAYOUT_6_0_FRONT:
|
||||
return "6.0_FRONT";
|
||||
case CHANNEL_LAYOUT_HEXAGONAL:
|
||||
return "HEXAGONAL";
|
||||
case CHANNEL_LAYOUT_6_1:
|
||||
return "6.1";
|
||||
case CHANNEL_LAYOUT_6_1_BACK:
|
||||
return "6.1_BACK";
|
||||
case CHANNEL_LAYOUT_6_1_FRONT:
|
||||
return "6.1_FRONT";
|
||||
case CHANNEL_LAYOUT_7_0_FRONT:
|
||||
return "7.0_FRONT";
|
||||
case CHANNEL_LAYOUT_7_1_WIDE_BACK:
|
||||
return "7.1_WIDE_BACK";
|
||||
case CHANNEL_LAYOUT_OCTAGONAL:
|
||||
return "OCTAGONAL";
|
||||
case CHANNEL_LAYOUT_DISCRETE:
|
||||
return "DISCRETE";
|
||||
case CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC:
|
||||
return "STEREO_AND_KEYBOARD_MIC";
|
||||
case CHANNEL_LAYOUT_4_1_QUAD_SIDE:
|
||||
return "4.1_QUAD_SIDE";
|
||||
case CHANNEL_LAYOUT_BITSTREAM:
|
||||
return "BITSTREAM";
|
||||
}
|
||||
RTC_NOTREACHED() << "Invalid channel layout provided: " << layout;
|
||||
return "";
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
165
webrtc/api/audio/channel_layout.h
Normal file
165
webrtc/api/audio/channel_layout.h
Normal file
@ -0,0 +1,165 @@
|
||||
/*
|
||||
* Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_AUDIO_CHANNEL_LAYOUT_H_
|
||||
#define API_AUDIO_CHANNEL_LAYOUT_H_
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This file is derived from Chromium's base/channel_layout.h.
|
||||
|
||||
// Enumerates the various representations of the ordering of audio channels.
|
||||
// Logged to UMA, so never reuse a value, always add new/greater ones!
|
||||
enum ChannelLayout {
|
||||
CHANNEL_LAYOUT_NONE = 0,
|
||||
CHANNEL_LAYOUT_UNSUPPORTED = 1,
|
||||
|
||||
// Front C
|
||||
CHANNEL_LAYOUT_MONO = 2,
|
||||
|
||||
// Front L, Front R
|
||||
CHANNEL_LAYOUT_STEREO = 3,
|
||||
|
||||
// Front L, Front R, Back C
|
||||
CHANNEL_LAYOUT_2_1 = 4,
|
||||
|
||||
// Front L, Front R, Front C
|
||||
CHANNEL_LAYOUT_SURROUND = 5,
|
||||
|
||||
// Front L, Front R, Front C, Back C
|
||||
CHANNEL_LAYOUT_4_0 = 6,
|
||||
|
||||
// Front L, Front R, Side L, Side R
|
||||
CHANNEL_LAYOUT_2_2 = 7,
|
||||
|
||||
// Front L, Front R, Back L, Back R
|
||||
CHANNEL_LAYOUT_QUAD = 8,
|
||||
|
||||
// Front L, Front R, Front C, Side L, Side R
|
||||
CHANNEL_LAYOUT_5_0 = 9,
|
||||
|
||||
// Front L, Front R, Front C, LFE, Side L, Side R
|
||||
CHANNEL_LAYOUT_5_1 = 10,
|
||||
|
||||
// Front L, Front R, Front C, Back L, Back R
|
||||
CHANNEL_LAYOUT_5_0_BACK = 11,
|
||||
|
||||
// Front L, Front R, Front C, LFE, Back L, Back R
|
||||
CHANNEL_LAYOUT_5_1_BACK = 12,
|
||||
|
||||
// Front L, Front R, Front C, Side L, Side R, Back L, Back R
|
||||
CHANNEL_LAYOUT_7_0 = 13,
|
||||
|
||||
// Front L, Front R, Front C, LFE, Side L, Side R, Back L, Back R
|
||||
CHANNEL_LAYOUT_7_1 = 14,
|
||||
|
||||
// Front L, Front R, Front C, LFE, Side L, Side R, Front LofC, Front RofC
|
||||
CHANNEL_LAYOUT_7_1_WIDE = 15,
|
||||
|
||||
// Stereo L, Stereo R
|
||||
CHANNEL_LAYOUT_STEREO_DOWNMIX = 16,
|
||||
|
||||
// Stereo L, Stereo R, LFE
|
||||
CHANNEL_LAYOUT_2POINT1 = 17,
|
||||
|
||||
// Stereo L, Stereo R, Front C, LFE
|
||||
CHANNEL_LAYOUT_3_1 = 18,
|
||||
|
||||
// Stereo L, Stereo R, Front C, Rear C, LFE
|
||||
CHANNEL_LAYOUT_4_1 = 19,
|
||||
|
||||
// Stereo L, Stereo R, Front C, Side L, Side R, Back C
|
||||
CHANNEL_LAYOUT_6_0 = 20,
|
||||
|
||||
// Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC
|
||||
CHANNEL_LAYOUT_6_0_FRONT = 21,
|
||||
|
||||
// Stereo L, Stereo R, Front C, Rear L, Rear R, Rear C
|
||||
CHANNEL_LAYOUT_HEXAGONAL = 22,
|
||||
|
||||
// Stereo L, Stereo R, Front C, LFE, Side L, Side R, Rear Center
|
||||
CHANNEL_LAYOUT_6_1 = 23,
|
||||
|
||||
// Stereo L, Stereo R, Front C, LFE, Back L, Back R, Rear Center
|
||||
CHANNEL_LAYOUT_6_1_BACK = 24,
|
||||
|
||||
// Stereo L, Stereo R, Side L, Side R, Front LofC, Front RofC, LFE
|
||||
CHANNEL_LAYOUT_6_1_FRONT = 25,
|
||||
|
||||
// Front L, Front R, Front C, Side L, Side R, Front LofC, Front RofC
|
||||
CHANNEL_LAYOUT_7_0_FRONT = 26,
|
||||
|
||||
// Front L, Front R, Front C, LFE, Back L, Back R, Front LofC, Front RofC
|
||||
CHANNEL_LAYOUT_7_1_WIDE_BACK = 27,
|
||||
|
||||
// Front L, Front R, Front C, Side L, Side R, Rear L, Back R, Back C.
|
||||
CHANNEL_LAYOUT_OCTAGONAL = 28,
|
||||
|
||||
// Channels are not explicitly mapped to speakers.
|
||||
CHANNEL_LAYOUT_DISCRETE = 29,
|
||||
|
||||
// Front L, Front R, Front C. Front C contains the keyboard mic audio. This
|
||||
// layout is only intended for input for WebRTC. The Front C channel
|
||||
// is stripped away in the WebRTC audio input pipeline and never seen outside
|
||||
// of that.
|
||||
CHANNEL_LAYOUT_STEREO_AND_KEYBOARD_MIC = 30,
|
||||
|
||||
// Front L, Front R, Side L, Side R, LFE
|
||||
CHANNEL_LAYOUT_4_1_QUAD_SIDE = 31,
|
||||
|
||||
// Actual channel layout is specified in the bitstream and the actual channel
|
||||
// count is unknown at Chromium media pipeline level (useful for audio
|
||||
// pass-through mode).
|
||||
CHANNEL_LAYOUT_BITSTREAM = 32,
|
||||
|
||||
// Max value, must always equal the largest entry ever logged.
|
||||
CHANNEL_LAYOUT_MAX = CHANNEL_LAYOUT_BITSTREAM
|
||||
};
|
||||
|
||||
// Note: Do not reorder or reassign these values; other code depends on their
|
||||
// ordering to operate correctly. E.g., CoreAudio channel layout computations.
|
||||
enum Channels {
|
||||
LEFT = 0,
|
||||
RIGHT,
|
||||
CENTER,
|
||||
LFE,
|
||||
BACK_LEFT,
|
||||
BACK_RIGHT,
|
||||
LEFT_OF_CENTER,
|
||||
RIGHT_OF_CENTER,
|
||||
BACK_CENTER,
|
||||
SIDE_LEFT,
|
||||
SIDE_RIGHT,
|
||||
CHANNELS_MAX =
|
||||
SIDE_RIGHT, // Must always equal the largest value ever logged.
|
||||
};
|
||||
|
||||
// The maximum number of concurrently active channels for all possible layouts.
|
||||
// ChannelLayoutToChannelCount() will never return a value higher than this.
|
||||
constexpr int kMaxConcurrentChannels = 8;
|
||||
|
||||
// Returns the expected channel position in an interleaved stream. Values of -1
|
||||
// mean the channel at that index is not used for that layout. Values range
|
||||
// from 0 to ChannelLayoutToChannelCount(layout) - 1.
|
||||
int ChannelOrder(ChannelLayout layout, Channels channel);
|
||||
|
||||
// Returns the number of channels in a given ChannelLayout.
|
||||
int ChannelLayoutToChannelCount(ChannelLayout layout);
|
||||
|
||||
// Given the number of channels, return the best layout,
|
||||
// or return CHANNEL_LAYOUT_UNSUPPORTED if there is no good match.
|
||||
ChannelLayout GuessChannelLayout(int channels);
|
||||
|
||||
// Returns a string representation of the channel layout.
|
||||
const char* ChannelLayoutToString(ChannelLayout layout);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_AUDIO_CHANNEL_LAYOUT_H_
|
270
webrtc/api/audio/echo_canceller3_config.cc
Normal file
270
webrtc/api/audio/echo_canceller3_config.cc
Normal file
@ -0,0 +1,270 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
#include "api/audio/echo_canceller3_config.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
#include "rtc_base/numerics/safe_minmax.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
bool Limit(float* value, float min, float max) {
|
||||
float clamped = rtc::SafeClamp(*value, min, max);
|
||||
clamped = std::isfinite(clamped) ? clamped : min;
|
||||
bool res = *value == clamped;
|
||||
*value = clamped;
|
||||
return res;
|
||||
}
|
||||
|
||||
bool Limit(size_t* value, size_t min, size_t max) {
|
||||
size_t clamped = rtc::SafeClamp(*value, min, max);
|
||||
bool res = *value == clamped;
|
||||
*value = clamped;
|
||||
return res;
|
||||
}
|
||||
|
||||
bool Limit(int* value, int min, int max) {
|
||||
int clamped = rtc::SafeClamp(*value, min, max);
|
||||
bool res = *value == clamped;
|
||||
*value = clamped;
|
||||
return res;
|
||||
}
|
||||
|
||||
bool FloorLimit(size_t* value, size_t min) {
|
||||
size_t clamped = *value >= min ? *value : min;
|
||||
bool res = *value == clamped;
|
||||
*value = clamped;
|
||||
return res;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
EchoCanceller3Config::EchoCanceller3Config() = default;
|
||||
EchoCanceller3Config::EchoCanceller3Config(const EchoCanceller3Config& e) =
|
||||
default;
|
||||
EchoCanceller3Config& EchoCanceller3Config::operator=(
|
||||
const EchoCanceller3Config& e) = default;
|
||||
EchoCanceller3Config::Delay::Delay() = default;
|
||||
EchoCanceller3Config::Delay::Delay(const EchoCanceller3Config::Delay& e) =
|
||||
default;
|
||||
EchoCanceller3Config::Delay& EchoCanceller3Config::Delay::operator=(
|
||||
const Delay& e) = default;
|
||||
|
||||
EchoCanceller3Config::EchoModel::EchoModel() = default;
|
||||
EchoCanceller3Config::EchoModel::EchoModel(
|
||||
const EchoCanceller3Config::EchoModel& e) = default;
|
||||
EchoCanceller3Config::EchoModel& EchoCanceller3Config::EchoModel::operator=(
|
||||
const EchoModel& e) = default;
|
||||
|
||||
EchoCanceller3Config::Suppressor::Suppressor() = default;
|
||||
EchoCanceller3Config::Suppressor::Suppressor(
|
||||
const EchoCanceller3Config::Suppressor& e) = default;
|
||||
EchoCanceller3Config::Suppressor& EchoCanceller3Config::Suppressor::operator=(
|
||||
const Suppressor& e) = default;
|
||||
|
||||
EchoCanceller3Config::Suppressor::MaskingThresholds::MaskingThresholds(
|
||||
float enr_transparent,
|
||||
float enr_suppress,
|
||||
float emr_transparent)
|
||||
: enr_transparent(enr_transparent),
|
||||
enr_suppress(enr_suppress),
|
||||
emr_transparent(emr_transparent) {}
|
||||
EchoCanceller3Config::Suppressor::MaskingThresholds::MaskingThresholds(
|
||||
const EchoCanceller3Config::Suppressor::MaskingThresholds& e) = default;
|
||||
EchoCanceller3Config::Suppressor::MaskingThresholds&
|
||||
EchoCanceller3Config::Suppressor::MaskingThresholds::operator=(
|
||||
const MaskingThresholds& e) = default;
|
||||
|
||||
EchoCanceller3Config::Suppressor::Tuning::Tuning(MaskingThresholds mask_lf,
|
||||
MaskingThresholds mask_hf,
|
||||
float max_inc_factor,
|
||||
float max_dec_factor_lf)
|
||||
: mask_lf(mask_lf),
|
||||
mask_hf(mask_hf),
|
||||
max_inc_factor(max_inc_factor),
|
||||
max_dec_factor_lf(max_dec_factor_lf) {}
|
||||
EchoCanceller3Config::Suppressor::Tuning::Tuning(
|
||||
const EchoCanceller3Config::Suppressor::Tuning& e) = default;
|
||||
EchoCanceller3Config::Suppressor::Tuning&
|
||||
EchoCanceller3Config::Suppressor::Tuning::operator=(const Tuning& e) = default;
|
||||
|
||||
bool EchoCanceller3Config::Validate(EchoCanceller3Config* config) {
|
||||
RTC_DCHECK(config);
|
||||
EchoCanceller3Config* c = config;
|
||||
bool res = true;
|
||||
|
||||
if (c->delay.down_sampling_factor != 4 &&
|
||||
c->delay.down_sampling_factor != 8) {
|
||||
c->delay.down_sampling_factor = 4;
|
||||
res = false;
|
||||
}
|
||||
|
||||
res = res & Limit(&c->delay.default_delay, 0, 5000);
|
||||
res = res & Limit(&c->delay.num_filters, 0, 5000);
|
||||
res = res & Limit(&c->delay.delay_headroom_samples, 0, 5000);
|
||||
res = res & Limit(&c->delay.hysteresis_limit_blocks, 0, 5000);
|
||||
res = res & Limit(&c->delay.fixed_capture_delay_samples, 0, 5000);
|
||||
res = res & Limit(&c->delay.delay_estimate_smoothing, 0.f, 1.f);
|
||||
res = res & Limit(&c->delay.delay_candidate_detection_threshold, 0.f, 1.f);
|
||||
res = res & Limit(&c->delay.delay_selection_thresholds.initial, 1, 250);
|
||||
res = res & Limit(&c->delay.delay_selection_thresholds.converged, 1, 250);
|
||||
|
||||
res = res & FloorLimit(&c->filter.refined.length_blocks, 1);
|
||||
res = res & Limit(&c->filter.refined.leakage_converged, 0.f, 1000.f);
|
||||
res = res & Limit(&c->filter.refined.leakage_diverged, 0.f, 1000.f);
|
||||
res = res & Limit(&c->filter.refined.error_floor, 0.f, 1000.f);
|
||||
res = res & Limit(&c->filter.refined.error_ceil, 0.f, 100000000.f);
|
||||
res = res & Limit(&c->filter.refined.noise_gate, 0.f, 100000000.f);
|
||||
|
||||
res = res & FloorLimit(&c->filter.refined_initial.length_blocks, 1);
|
||||
res = res & Limit(&c->filter.refined_initial.leakage_converged, 0.f, 1000.f);
|
||||
res = res & Limit(&c->filter.refined_initial.leakage_diverged, 0.f, 1000.f);
|
||||
res = res & Limit(&c->filter.refined_initial.error_floor, 0.f, 1000.f);
|
||||
res = res & Limit(&c->filter.refined_initial.error_ceil, 0.f, 100000000.f);
|
||||
res = res & Limit(&c->filter.refined_initial.noise_gate, 0.f, 100000000.f);
|
||||
|
||||
if (c->filter.refined.length_blocks <
|
||||
c->filter.refined_initial.length_blocks) {
|
||||
c->filter.refined_initial.length_blocks = c->filter.refined.length_blocks;
|
||||
res = false;
|
||||
}
|
||||
|
||||
res = res & FloorLimit(&c->filter.coarse.length_blocks, 1);
|
||||
res = res & Limit(&c->filter.coarse.rate, 0.f, 1.f);
|
||||
res = res & Limit(&c->filter.coarse.noise_gate, 0.f, 100000000.f);
|
||||
|
||||
res = res & FloorLimit(&c->filter.coarse_initial.length_blocks, 1);
|
||||
res = res & Limit(&c->filter.coarse_initial.rate, 0.f, 1.f);
|
||||
res = res & Limit(&c->filter.coarse_initial.noise_gate, 0.f, 100000000.f);
|
||||
|
||||
if (c->filter.coarse.length_blocks < c->filter.coarse_initial.length_blocks) {
|
||||
c->filter.coarse_initial.length_blocks = c->filter.coarse.length_blocks;
|
||||
res = false;
|
||||
}
|
||||
|
||||
res = res & Limit(&c->filter.config_change_duration_blocks, 0, 100000);
|
||||
res = res & Limit(&c->filter.initial_state_seconds, 0.f, 100.f);
|
||||
|
||||
res = res & Limit(&c->erle.min, 1.f, 100000.f);
|
||||
res = res & Limit(&c->erle.max_l, 1.f, 100000.f);
|
||||
res = res & Limit(&c->erle.max_h, 1.f, 100000.f);
|
||||
if (c->erle.min > c->erle.max_l || c->erle.min > c->erle.max_h) {
|
||||
c->erle.min = std::min(c->erle.max_l, c->erle.max_h);
|
||||
res = false;
|
||||
}
|
||||
res = res & Limit(&c->erle.num_sections, 1, c->filter.refined.length_blocks);
|
||||
|
||||
res = res & Limit(&c->ep_strength.default_gain, 0.f, 1000000.f);
|
||||
res = res & Limit(&c->ep_strength.default_len, -1.f, 1.f);
|
||||
|
||||
res =
|
||||
res & Limit(&c->echo_audibility.low_render_limit, 0.f, 32768.f * 32768.f);
|
||||
res = res &
|
||||
Limit(&c->echo_audibility.normal_render_limit, 0.f, 32768.f * 32768.f);
|
||||
res = res & Limit(&c->echo_audibility.floor_power, 0.f, 32768.f * 32768.f);
|
||||
res = res & Limit(&c->echo_audibility.audibility_threshold_lf, 0.f,
|
||||
32768.f * 32768.f);
|
||||
res = res & Limit(&c->echo_audibility.audibility_threshold_mf, 0.f,
|
||||
32768.f * 32768.f);
|
||||
res = res & Limit(&c->echo_audibility.audibility_threshold_hf, 0.f,
|
||||
32768.f * 32768.f);
|
||||
|
||||
res = res &
|
||||
Limit(&c->render_levels.active_render_limit, 0.f, 32768.f * 32768.f);
|
||||
res = res & Limit(&c->render_levels.poor_excitation_render_limit, 0.f,
|
||||
32768.f * 32768.f);
|
||||
res = res & Limit(&c->render_levels.poor_excitation_render_limit_ds8, 0.f,
|
||||
32768.f * 32768.f);
|
||||
|
||||
res = res & Limit(&c->echo_model.noise_floor_hold, 0, 1000);
|
||||
res = res & Limit(&c->echo_model.min_noise_floor_power, 0, 2000000.f);
|
||||
res = res & Limit(&c->echo_model.stationary_gate_slope, 0, 1000000.f);
|
||||
res = res & Limit(&c->echo_model.noise_gate_power, 0, 1000000.f);
|
||||
res = res & Limit(&c->echo_model.noise_gate_slope, 0, 1000000.f);
|
||||
res = res & Limit(&c->echo_model.render_pre_window_size, 0, 100);
|
||||
res = res & Limit(&c->echo_model.render_post_window_size, 0, 100);
|
||||
|
||||
res = res & Limit(&c->comfort_noise.noise_floor_dbfs, -200.f, 0.f);
|
||||
|
||||
res = res & Limit(&c->suppressor.nearend_average_blocks, 1, 5000);
|
||||
|
||||
res = res &
|
||||
Limit(&c->suppressor.normal_tuning.mask_lf.enr_transparent, 0.f, 100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.normal_tuning.mask_lf.enr_suppress, 0.f, 100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.normal_tuning.mask_lf.emr_transparent, 0.f, 100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.normal_tuning.mask_hf.enr_transparent, 0.f, 100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.normal_tuning.mask_hf.enr_suppress, 0.f, 100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.normal_tuning.mask_hf.emr_transparent, 0.f, 100.f);
|
||||
res = res & Limit(&c->suppressor.normal_tuning.max_inc_factor, 0.f, 100.f);
|
||||
res = res & Limit(&c->suppressor.normal_tuning.max_dec_factor_lf, 0.f, 100.f);
|
||||
|
||||
res = res & Limit(&c->suppressor.nearend_tuning.mask_lf.enr_transparent, 0.f,
|
||||
100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.nearend_tuning.mask_lf.enr_suppress, 0.f, 100.f);
|
||||
res = res & Limit(&c->suppressor.nearend_tuning.mask_lf.emr_transparent, 0.f,
|
||||
100.f);
|
||||
res = res & Limit(&c->suppressor.nearend_tuning.mask_hf.enr_transparent, 0.f,
|
||||
100.f);
|
||||
res = res &
|
||||
Limit(&c->suppressor.nearend_tuning.mask_hf.enr_suppress, 0.f, 100.f);
|
||||
res = res & Limit(&c->suppressor.nearend_tuning.mask_hf.emr_transparent, 0.f,
|
||||
100.f);
|
||||
res = res & Limit(&c->suppressor.nearend_tuning.max_inc_factor, 0.f, 100.f);
|
||||
res =
|
||||
res & Limit(&c->suppressor.nearend_tuning.max_dec_factor_lf, 0.f, 100.f);
|
||||
|
||||
res = res & Limit(&c->suppressor.dominant_nearend_detection.enr_threshold,
|
||||
0.f, 1000000.f);
|
||||
res = res & Limit(&c->suppressor.dominant_nearend_detection.snr_threshold,
|
||||
0.f, 1000000.f);
|
||||
res = res & Limit(&c->suppressor.dominant_nearend_detection.hold_duration, 0,
|
||||
10000);
|
||||
res = res & Limit(&c->suppressor.dominant_nearend_detection.trigger_threshold,
|
||||
0, 10000);
|
||||
|
||||
res = res &
|
||||
Limit(&c->suppressor.subband_nearend_detection.nearend_average_blocks,
|
||||
1, 1024);
|
||||
res =
|
||||
res & Limit(&c->suppressor.subband_nearend_detection.subband1.low, 0, 65);
|
||||
res = res & Limit(&c->suppressor.subband_nearend_detection.subband1.high,
|
||||
c->suppressor.subband_nearend_detection.subband1.low, 65);
|
||||
res =
|
||||
res & Limit(&c->suppressor.subband_nearend_detection.subband2.low, 0, 65);
|
||||
res = res & Limit(&c->suppressor.subband_nearend_detection.subband2.high,
|
||||
c->suppressor.subband_nearend_detection.subband2.low, 65);
|
||||
res = res & Limit(&c->suppressor.subband_nearend_detection.nearend_threshold,
|
||||
0.f, 1.e24f);
|
||||
res = res & Limit(&c->suppressor.subband_nearend_detection.snr_threshold, 0.f,
|
||||
1.e24f);
|
||||
|
||||
res = res & Limit(&c->suppressor.high_bands_suppression.enr_threshold, 0.f,
|
||||
1000000.f);
|
||||
res = res & Limit(&c->suppressor.high_bands_suppression.max_gain_during_echo,
|
||||
0.f, 1.f);
|
||||
res = res & Limit(&c->suppressor.high_bands_suppression
|
||||
.anti_howling_activation_threshold,
|
||||
0.f, 32768.f * 32768.f);
|
||||
res = res & Limit(&c->suppressor.high_bands_suppression.anti_howling_gain,
|
||||
0.f, 1.f);
|
||||
|
||||
res = res & Limit(&c->suppressor.floor_first_increase, 0.f, 1000000.f);
|
||||
|
||||
return res;
|
||||
}
|
||||
} // namespace webrtc
|
228
webrtc/api/audio/echo_canceller3_config.h
Normal file
228
webrtc/api/audio/echo_canceller3_config.h
Normal file
@ -0,0 +1,228 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
|
||||
#define API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
|
||||
|
||||
#include <stddef.h> // size_t
|
||||
|
||||
#include "rtc_base/system/rtc_export.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Configuration struct for EchoCanceller3
|
||||
struct RTC_EXPORT EchoCanceller3Config {
|
||||
// Checks and updates the config parameters to lie within (mostly) reasonable
|
||||
// ranges. Returns true if and only of the config did not need to be changed.
|
||||
static bool Validate(EchoCanceller3Config* config);
|
||||
|
||||
EchoCanceller3Config();
|
||||
EchoCanceller3Config(const EchoCanceller3Config& e);
|
||||
EchoCanceller3Config& operator=(const EchoCanceller3Config& other);
|
||||
|
||||
struct Buffering {
|
||||
size_t excess_render_detection_interval_blocks = 250;
|
||||
size_t max_allowed_excess_render_blocks = 8;
|
||||
} buffering;
|
||||
|
||||
struct Delay {
|
||||
Delay();
|
||||
Delay(const Delay& e);
|
||||
Delay& operator=(const Delay& e);
|
||||
size_t default_delay = 5;
|
||||
size_t down_sampling_factor = 4;
|
||||
size_t num_filters = 5;
|
||||
size_t delay_headroom_samples = 32;
|
||||
size_t hysteresis_limit_blocks = 1;
|
||||
size_t fixed_capture_delay_samples = 0;
|
||||
float delay_estimate_smoothing = 0.7f;
|
||||
float delay_candidate_detection_threshold = 0.2f;
|
||||
struct DelaySelectionThresholds {
|
||||
int initial;
|
||||
int converged;
|
||||
} delay_selection_thresholds = {5, 20};
|
||||
bool use_external_delay_estimator = false;
|
||||
bool log_warning_on_delay_changes = false;
|
||||
struct AlignmentMixing {
|
||||
bool downmix;
|
||||
bool adaptive_selection;
|
||||
float activity_power_threshold;
|
||||
bool prefer_first_two_channels;
|
||||
};
|
||||
AlignmentMixing render_alignment_mixing = {false, true, 10000.f, true};
|
||||
AlignmentMixing capture_alignment_mixing = {false, true, 10000.f, false};
|
||||
} delay;
|
||||
|
||||
struct Filter {
|
||||
struct RefinedConfiguration {
|
||||
size_t length_blocks;
|
||||
float leakage_converged;
|
||||
float leakage_diverged;
|
||||
float error_floor;
|
||||
float error_ceil;
|
||||
float noise_gate;
|
||||
};
|
||||
|
||||
struct CoarseConfiguration {
|
||||
size_t length_blocks;
|
||||
float rate;
|
||||
float noise_gate;
|
||||
};
|
||||
|
||||
RefinedConfiguration refined = {13, 0.00005f, 0.05f,
|
||||
0.001f, 2.f, 20075344.f};
|
||||
CoarseConfiguration coarse = {13, 0.7f, 20075344.f};
|
||||
|
||||
RefinedConfiguration refined_initial = {12, 0.005f, 0.5f,
|
||||
0.001f, 2.f, 20075344.f};
|
||||
CoarseConfiguration coarse_initial = {12, 0.9f, 20075344.f};
|
||||
|
||||
size_t config_change_duration_blocks = 250;
|
||||
float initial_state_seconds = 2.5f;
|
||||
bool conservative_initial_phase = false;
|
||||
bool enable_coarse_filter_output_usage = true;
|
||||
bool use_linear_filter = true;
|
||||
bool export_linear_aec_output = false;
|
||||
} filter;
|
||||
|
||||
struct Erle {
|
||||
float min = 1.f;
|
||||
float max_l = 4.f;
|
||||
float max_h = 1.5f;
|
||||
bool onset_detection = true;
|
||||
size_t num_sections = 1;
|
||||
bool clamp_quality_estimate_to_zero = true;
|
||||
bool clamp_quality_estimate_to_one = true;
|
||||
} erle;
|
||||
|
||||
struct EpStrength {
|
||||
float default_gain = 1.f;
|
||||
float default_len = 0.83f;
|
||||
bool echo_can_saturate = true;
|
||||
bool bounded_erl = false;
|
||||
} ep_strength;
|
||||
|
||||
struct EchoAudibility {
|
||||
float low_render_limit = 4 * 64.f;
|
||||
float normal_render_limit = 64.f;
|
||||
float floor_power = 2 * 64.f;
|
||||
float audibility_threshold_lf = 10;
|
||||
float audibility_threshold_mf = 10;
|
||||
float audibility_threshold_hf = 10;
|
||||
bool use_stationarity_properties = false;
|
||||
bool use_stationarity_properties_at_init = false;
|
||||
} echo_audibility;
|
||||
|
||||
struct RenderLevels {
|
||||
float active_render_limit = 100.f;
|
||||
float poor_excitation_render_limit = 150.f;
|
||||
float poor_excitation_render_limit_ds8 = 20.f;
|
||||
float render_power_gain_db = 0.f;
|
||||
} render_levels;
|
||||
|
||||
struct EchoRemovalControl {
|
||||
bool has_clock_drift = false;
|
||||
bool linear_and_stable_echo_path = false;
|
||||
} echo_removal_control;
|
||||
|
||||
struct EchoModel {
|
||||
EchoModel();
|
||||
EchoModel(const EchoModel& e);
|
||||
EchoModel& operator=(const EchoModel& e);
|
||||
size_t noise_floor_hold = 50;
|
||||
float min_noise_floor_power = 1638400.f;
|
||||
float stationary_gate_slope = 10.f;
|
||||
float noise_gate_power = 27509.42f;
|
||||
float noise_gate_slope = 0.3f;
|
||||
size_t render_pre_window_size = 1;
|
||||
size_t render_post_window_size = 1;
|
||||
bool model_reverb_in_nonlinear_mode = true;
|
||||
} echo_model;
|
||||
|
||||
struct ComfortNoise {
|
||||
float noise_floor_dbfs = -96.03406f;
|
||||
} comfort_noise;
|
||||
|
||||
struct Suppressor {
|
||||
Suppressor();
|
||||
Suppressor(const Suppressor& e);
|
||||
Suppressor& operator=(const Suppressor& e);
|
||||
|
||||
size_t nearend_average_blocks = 4;
|
||||
|
||||
struct MaskingThresholds {
|
||||
MaskingThresholds(float enr_transparent,
|
||||
float enr_suppress,
|
||||
float emr_transparent);
|
||||
MaskingThresholds(const MaskingThresholds& e);
|
||||
MaskingThresholds& operator=(const MaskingThresholds& e);
|
||||
float enr_transparent;
|
||||
float enr_suppress;
|
||||
float emr_transparent;
|
||||
};
|
||||
|
||||
struct Tuning {
|
||||
Tuning(MaskingThresholds mask_lf,
|
||||
MaskingThresholds mask_hf,
|
||||
float max_inc_factor,
|
||||
float max_dec_factor_lf);
|
||||
Tuning(const Tuning& e);
|
||||
Tuning& operator=(const Tuning& e);
|
||||
MaskingThresholds mask_lf;
|
||||
MaskingThresholds mask_hf;
|
||||
float max_inc_factor;
|
||||
float max_dec_factor_lf;
|
||||
};
|
||||
|
||||
Tuning normal_tuning = Tuning(MaskingThresholds(.3f, .4f, .3f),
|
||||
MaskingThresholds(.07f, .1f, .3f),
|
||||
2.0f,
|
||||
0.25f);
|
||||
Tuning nearend_tuning = Tuning(MaskingThresholds(1.09f, 1.1f, .3f),
|
||||
MaskingThresholds(.1f, .3f, .3f),
|
||||
2.0f,
|
||||
0.25f);
|
||||
|
||||
struct DominantNearendDetection {
|
||||
float enr_threshold = .25f;
|
||||
float enr_exit_threshold = 10.f;
|
||||
float snr_threshold = 30.f;
|
||||
int hold_duration = 50;
|
||||
int trigger_threshold = 12;
|
||||
bool use_during_initial_phase = true;
|
||||
} dominant_nearend_detection;
|
||||
|
||||
struct SubbandNearendDetection {
|
||||
size_t nearend_average_blocks = 1;
|
||||
struct SubbandRegion {
|
||||
size_t low;
|
||||
size_t high;
|
||||
};
|
||||
SubbandRegion subband1 = {1, 1};
|
||||
SubbandRegion subband2 = {1, 1};
|
||||
float nearend_threshold = 1.f;
|
||||
float snr_threshold = 1.f;
|
||||
} subband_nearend_detection;
|
||||
|
||||
bool use_subband_nearend_detection = false;
|
||||
|
||||
struct HighBandsSuppression {
|
||||
float enr_threshold = 1.f;
|
||||
float max_gain_during_echo = 1.f;
|
||||
float anti_howling_activation_threshold = 400.f;
|
||||
float anti_howling_gain = 1.f;
|
||||
} high_bands_suppression;
|
||||
|
||||
float floor_first_increase = 0.00001f;
|
||||
} suppressor;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_AUDIO_ECHO_CANCELLER3_CONFIG_H_
|
68
webrtc/api/audio/echo_control.h
Normal file
68
webrtc/api/audio/echo_control.h
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef API_AUDIO_ECHO_CONTROL_H_
|
||||
#define API_AUDIO_ECHO_CONTROL_H_
|
||||
|
||||
#include <memory>
|
||||
|
||||
#include "rtc_base/checks.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class AudioBuffer;
|
||||
|
||||
// Interface for an acoustic echo cancellation (AEC) submodule.
|
||||
class EchoControl {
|
||||
public:
|
||||
// Analysis (not changing) of the render signal.
|
||||
virtual void AnalyzeRender(AudioBuffer* render) = 0;
|
||||
|
||||
// Analysis (not changing) of the capture signal.
|
||||
virtual void AnalyzeCapture(AudioBuffer* capture) = 0;
|
||||
|
||||
// Processes the capture signal in order to remove the echo.
|
||||
virtual void ProcessCapture(AudioBuffer* capture, bool level_change) = 0;
|
||||
|
||||
// As above, but also returns the linear filter output.
|
||||
virtual void ProcessCapture(AudioBuffer* capture,
|
||||
AudioBuffer* linear_output,
|
||||
bool level_change) = 0;
|
||||
|
||||
struct Metrics {
|
||||
double echo_return_loss;
|
||||
double echo_return_loss_enhancement;
|
||||
int delay_ms;
|
||||
};
|
||||
|
||||
// Collect current metrics from the echo controller.
|
||||
virtual Metrics GetMetrics() const = 0;
|
||||
|
||||
// Provides an optional external estimate of the audio buffer delay.
|
||||
virtual void SetAudioBufferDelay(int delay_ms) = 0;
|
||||
|
||||
// Returns wheter the signal is altered.
|
||||
virtual bool ActiveProcessing() const = 0;
|
||||
|
||||
virtual ~EchoControl() {}
|
||||
};
|
||||
|
||||
// Interface for a factory that creates EchoControllers.
|
||||
class EchoControlFactory {
|
||||
public:
|
||||
virtual std::unique_ptr<EchoControl> Create(int sample_rate_hz,
|
||||
int num_render_channels,
|
||||
int num_capture_channels) = 0;
|
||||
|
||||
virtual ~EchoControlFactory() = default;
|
||||
};
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // API_AUDIO_ECHO_CONTROL_H_
|
Reference in New Issue
Block a user