Update audio_processing module

Corresponds to upstream commit 524e9b043e7e86fd72353b987c9d5f6a1ebf83e1

Update notes:

 * Pull in third party license file

 * Replace .gypi files with BUILD.gn to keep track of what changes
   upstream

 * Bunch of new filse pulled in as dependencies

 * Won't build yet due to changes needed on top of these
This commit is contained in:
Arun Raghavan
2015-10-13 17:25:22 +05:30
parent 5ae7a5d6cd
commit 753eada3aa
324 changed files with 52533 additions and 16117 deletions

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,173 +8,331 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "audio_buffer.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/modules/audio_processing/common.h"
namespace webrtc {
namespace {
enum {
kSamplesPer8kHzChannel = 80,
kSamplesPer16kHzChannel = 160,
kSamplesPer32kHzChannel = 320
};
const size_t kSamplesPer16kHzChannel = 160;
const size_t kSamplesPer32kHzChannel = 320;
const size_t kSamplesPer48kHzChannel = 480;
void StereoToMono(const WebRtc_Word16* left, const WebRtc_Word16* right,
WebRtc_Word16* out, int samples_per_channel) {
WebRtc_Word32 data_int32 = 0;
for (int i = 0; i < samples_per_channel; i++) {
data_int32 = (left[i] + right[i]) >> 1;
if (data_int32 > 32767) {
data_int32 = 32767;
} else if (data_int32 < -32768) {
data_int32 = -32768;
}
out[i] = static_cast<WebRtc_Word16>(data_int32);
int KeyboardChannelIndex(const StreamConfig& stream_config) {
if (!stream_config.has_keyboard()) {
assert(false);
return -1;
}
return stream_config.num_channels();
}
size_t NumBandsFromSamplesPerChannel(size_t num_frames) {
size_t num_bands = 1;
if (num_frames == kSamplesPer32kHzChannel ||
num_frames == kSamplesPer48kHzChannel) {
num_bands = rtc::CheckedDivExact(num_frames, kSamplesPer16kHzChannel);
}
return num_bands;
}
} // namespace
struct AudioChannel {
AudioChannel() {
memset(data, 0, sizeof(data));
}
WebRtc_Word16 data[kSamplesPer32kHzChannel];
};
struct SplitAudioChannel {
SplitAudioChannel() {
memset(low_pass_data, 0, sizeof(low_pass_data));
memset(high_pass_data, 0, sizeof(high_pass_data));
memset(analysis_filter_state1, 0, sizeof(analysis_filter_state1));
memset(analysis_filter_state2, 0, sizeof(analysis_filter_state2));
memset(synthesis_filter_state1, 0, sizeof(synthesis_filter_state1));
memset(synthesis_filter_state2, 0, sizeof(synthesis_filter_state2));
}
WebRtc_Word16 low_pass_data[kSamplesPer16kHzChannel];
WebRtc_Word16 high_pass_data[kSamplesPer16kHzChannel];
WebRtc_Word32 analysis_filter_state1[6];
WebRtc_Word32 analysis_filter_state2[6];
WebRtc_Word32 synthesis_filter_state1[6];
WebRtc_Word32 synthesis_filter_state2[6];
};
// TODO(andrew): check range of input parameters?
AudioBuffer::AudioBuffer(int max_num_channels,
int samples_per_channel)
: max_num_channels_(max_num_channels),
num_channels_(0),
num_mixed_channels_(0),
num_mixed_low_pass_channels_(0),
samples_per_channel_(samples_per_channel),
samples_per_split_channel_(samples_per_channel),
AudioBuffer::AudioBuffer(size_t input_num_frames,
int num_input_channels,
size_t process_num_frames,
int num_process_channels,
size_t output_num_frames)
: input_num_frames_(input_num_frames),
num_input_channels_(num_input_channels),
proc_num_frames_(process_num_frames),
num_proc_channels_(num_process_channels),
output_num_frames_(output_num_frames),
num_channels_(num_process_channels),
num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),
num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),
mixed_low_pass_valid_(false),
reference_copied_(false),
activity_(AudioFrame::kVadUnknown),
data_(NULL),
channels_(NULL),
split_channels_(NULL),
mixed_low_pass_channels_(NULL),
low_pass_reference_channels_(NULL) {
if (max_num_channels_ > 1) {
channels_ = new AudioChannel[max_num_channels_];
mixed_low_pass_channels_ = new AudioChannel[max_num_channels_];
}
low_pass_reference_channels_ = new AudioChannel[max_num_channels_];
keyboard_data_(NULL),
data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) {
assert(input_num_frames_ > 0);
assert(proc_num_frames_ > 0);
assert(output_num_frames_ > 0);
assert(num_input_channels_ > 0);
assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_);
if (samples_per_channel_ == kSamplesPer32kHzChannel) {
split_channels_ = new SplitAudioChannel[max_num_channels_];
samples_per_split_channel_ = kSamplesPer16kHzChannel;
if (input_num_frames_ != proc_num_frames_ ||
output_num_frames_ != proc_num_frames_) {
// Create an intermediate buffer for resampling.
process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_,
num_proc_channels_));
if (input_num_frames_ != proc_num_frames_) {
for (int i = 0; i < num_proc_channels_; ++i) {
input_resamplers_.push_back(
new PushSincResampler(input_num_frames_,
proc_num_frames_));
}
}
if (output_num_frames_ != proc_num_frames_) {
for (int i = 0; i < num_proc_channels_; ++i) {
output_resamplers_.push_back(
new PushSincResampler(proc_num_frames_,
output_num_frames_));
}
}
}
if (num_bands_ > 1) {
split_data_.reset(new IFChannelBuffer(proc_num_frames_,
num_proc_channels_,
num_bands_));
splitting_filter_.reset(new SplittingFilter(num_proc_channels_,
num_bands_,
proc_num_frames_));
}
}
AudioBuffer::~AudioBuffer() {
if (channels_ != NULL) {
delete [] channels_;
AudioBuffer::~AudioBuffer() {}
void AudioBuffer::CopyFrom(const float* const* data,
const StreamConfig& stream_config) {
assert(stream_config.num_frames() == input_num_frames_);
assert(stream_config.num_channels() == num_input_channels_);
InitForNewData();
// Initialized lazily because there's a different condition in
// DeinterleaveFrom.
const bool need_to_downmix =
num_input_channels_ > 1 && num_proc_channels_ == 1;
if (need_to_downmix && !input_buffer_) {
input_buffer_.reset(
new IFChannelBuffer(input_num_frames_, num_proc_channels_));
}
if (mixed_low_pass_channels_ != NULL) {
delete [] mixed_low_pass_channels_;
if (stream_config.has_keyboard()) {
keyboard_data_ = data[KeyboardChannelIndex(stream_config)];
}
if (low_pass_reference_channels_ != NULL) {
delete [] low_pass_reference_channels_;
// Downmix.
const float* const* data_ptr = data;
if (need_to_downmix) {
DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_,
input_buffer_->fbuf()->channels()[0]);
data_ptr = input_buffer_->fbuf_const()->channels();
}
if (split_channels_ != NULL) {
delete [] split_channels_;
// Resample.
if (input_num_frames_ != proc_num_frames_) {
for (int i = 0; i < num_proc_channels_; ++i) {
input_resamplers_[i]->Resample(data_ptr[i],
input_num_frames_,
process_buffer_->channels()[i],
proc_num_frames_);
}
data_ptr = process_buffer_->channels();
}
// Convert to the S16 range.
for (int i = 0; i < num_proc_channels_; ++i) {
FloatToFloatS16(data_ptr[i],
proc_num_frames_,
data_->fbuf()->channels()[i]);
}
}
WebRtc_Word16* AudioBuffer::data(int channel) const {
assert(channel >= 0 && channel < num_channels_);
if (data_ != NULL) {
return data_;
void AudioBuffer::CopyTo(const StreamConfig& stream_config,
float* const* data) {
assert(stream_config.num_frames() == output_num_frames_);
assert(stream_config.num_channels() == num_channels_);
// Convert to the float range.
float* const* data_ptr = data;
if (output_num_frames_ != proc_num_frames_) {
// Convert to an intermediate buffer for subsequent resampling.
data_ptr = process_buffer_->channels();
}
for (int i = 0; i < num_channels_; ++i) {
FloatS16ToFloat(data_->fbuf()->channels()[i],
proc_num_frames_,
data_ptr[i]);
}
return channels_[channel].data;
// Resample.
if (output_num_frames_ != proc_num_frames_) {
for (int i = 0; i < num_channels_; ++i) {
output_resamplers_[i]->Resample(data_ptr[i],
proc_num_frames_,
data[i],
output_num_frames_);
}
}
}
WebRtc_Word16* AudioBuffer::low_pass_split_data(int channel) const {
assert(channel >= 0 && channel < num_channels_);
if (split_channels_ == NULL) {
return data(channel);
void AudioBuffer::InitForNewData() {
keyboard_data_ = NULL;
mixed_low_pass_valid_ = false;
reference_copied_ = false;
activity_ = AudioFrame::kVadUnknown;
num_channels_ = num_proc_channels_;
}
const int16_t* const* AudioBuffer::channels_const() const {
return data_->ibuf_const()->channels();
}
int16_t* const* AudioBuffer::channels() {
mixed_low_pass_valid_ = false;
return data_->ibuf()->channels();
}
const int16_t* const* AudioBuffer::split_bands_const(int channel) const {
return split_data_.get() ?
split_data_->ibuf_const()->bands(channel) :
data_->ibuf_const()->bands(channel);
}
int16_t* const* AudioBuffer::split_bands(int channel) {
mixed_low_pass_valid_ = false;
return split_data_.get() ?
split_data_->ibuf()->bands(channel) :
data_->ibuf()->bands(channel);
}
const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
if (split_data_.get()) {
return split_data_->ibuf_const()->channels(band);
} else {
return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr;
}
}
int16_t* const* AudioBuffer::split_channels(Band band) {
mixed_low_pass_valid_ = false;
if (split_data_.get()) {
return split_data_->ibuf()->channels(band);
} else {
return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr;
}
}
ChannelBuffer<int16_t>* AudioBuffer::data() {
mixed_low_pass_valid_ = false;
return data_->ibuf();
}
const ChannelBuffer<int16_t>* AudioBuffer::data() const {
return data_->ibuf_const();
}
ChannelBuffer<int16_t>* AudioBuffer::split_data() {
mixed_low_pass_valid_ = false;
return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
}
const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
}
const float* const* AudioBuffer::channels_const_f() const {
return data_->fbuf_const()->channels();
}
float* const* AudioBuffer::channels_f() {
mixed_low_pass_valid_ = false;
return data_->fbuf()->channels();
}
const float* const* AudioBuffer::split_bands_const_f(int channel) const {
return split_data_.get() ?
split_data_->fbuf_const()->bands(channel) :
data_->fbuf_const()->bands(channel);
}
float* const* AudioBuffer::split_bands_f(int channel) {
mixed_low_pass_valid_ = false;
return split_data_.get() ?
split_data_->fbuf()->bands(channel) :
data_->fbuf()->bands(channel);
}
const float* const* AudioBuffer::split_channels_const_f(Band band) const {
if (split_data_.get()) {
return split_data_->fbuf_const()->channels(band);
} else {
return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
}
}
float* const* AudioBuffer::split_channels_f(Band band) {
mixed_low_pass_valid_ = false;
if (split_data_.get()) {
return split_data_->fbuf()->channels(band);
} else {
return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr;
}
}
ChannelBuffer<float>* AudioBuffer::data_f() {
mixed_low_pass_valid_ = false;
return data_->fbuf();
}
const ChannelBuffer<float>* AudioBuffer::data_f() const {
return data_->fbuf_const();
}
ChannelBuffer<float>* AudioBuffer::split_data_f() {
mixed_low_pass_valid_ = false;
return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
}
const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
}
const int16_t* AudioBuffer::mixed_low_pass_data() {
if (num_proc_channels_ == 1) {
return split_bands_const(0)[kBand0To8kHz];
}
return split_channels_[channel].low_pass_data;
}
if (!mixed_low_pass_valid_) {
if (!mixed_low_pass_channels_.get()) {
mixed_low_pass_channels_.reset(
new ChannelBuffer<int16_t>(num_split_frames_, 1));
}
WebRtc_Word16* AudioBuffer::high_pass_split_data(int channel) const {
assert(channel >= 0 && channel < num_channels_);
if (split_channels_ == NULL) {
return NULL;
DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz),
num_split_frames_, num_channels_,
mixed_low_pass_channels_->channels()[0]);
mixed_low_pass_valid_ = true;
}
return split_channels_[channel].high_pass_data;
return mixed_low_pass_channels_->channels()[0];
}
WebRtc_Word16* AudioBuffer::mixed_low_pass_data(int channel) const {
assert(channel >= 0 && channel < num_mixed_low_pass_channels_);
return mixed_low_pass_channels_[channel].data;
}
WebRtc_Word16* AudioBuffer::low_pass_reference(int channel) const {
assert(channel >= 0 && channel < num_channels_);
const int16_t* AudioBuffer::low_pass_reference(int channel) const {
if (!reference_copied_) {
return NULL;
}
return low_pass_reference_channels_[channel].data;
return low_pass_reference_channels_->channels()[channel];
}
WebRtc_Word32* AudioBuffer::analysis_filter_state1(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].analysis_filter_state1;
}
WebRtc_Word32* AudioBuffer::analysis_filter_state2(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].analysis_filter_state2;
}
WebRtc_Word32* AudioBuffer::synthesis_filter_state1(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].synthesis_filter_state1;
}
WebRtc_Word32* AudioBuffer::synthesis_filter_state2(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].synthesis_filter_state2;
const float* AudioBuffer::keyboard_data() const {
return keyboard_data_;
}
void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
activity_ = activity;
}
AudioFrame::VADActivity AudioBuffer::activity() {
AudioFrame::VADActivity AudioBuffer::activity() const {
return activity_;
}
@ -182,107 +340,123 @@ int AudioBuffer::num_channels() const {
return num_channels_;
}
int AudioBuffer::samples_per_channel() const {
return samples_per_channel_;
void AudioBuffer::set_num_channels(int num_channels) {
num_channels_ = num_channels;
}
int AudioBuffer::samples_per_split_channel() const {
return samples_per_split_channel_;
size_t AudioBuffer::num_frames() const {
return proc_num_frames_;
}
// TODO(andrew): Do deinterleaving and mixing in one step?
size_t AudioBuffer::num_frames_per_band() const {
return num_split_frames_;
}
size_t AudioBuffer::num_keyboard_frames() const {
// We don't resample the keyboard channel.
return input_num_frames_;
}
size_t AudioBuffer::num_bands() const {
return num_bands_;
}
// The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
assert(frame->_audioChannel <= max_num_channels_);
assert(frame->_payloadDataLengthInSamples == samples_per_channel_);
assert(frame->num_channels_ == num_input_channels_);
assert(frame->samples_per_channel_ == input_num_frames_);
InitForNewData();
// Initialized lazily because there's a different condition in CopyFrom.
if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) {
input_buffer_.reset(
new IFChannelBuffer(input_num_frames_, num_proc_channels_));
}
activity_ = frame->vad_activity_;
num_channels_ = frame->_audioChannel;
num_mixed_channels_ = 0;
num_mixed_low_pass_channels_ = 0;
reference_copied_ = false;
activity_ = frame->_vadActivity;
int16_t* const* deinterleaved;
if (input_num_frames_ == proc_num_frames_) {
deinterleaved = data_->ibuf()->channels();
} else {
deinterleaved = input_buffer_->ibuf()->channels();
}
if (num_proc_channels_ == 1) {
// Downmix and deinterleave simultaneously.
DownmixInterleavedToMono(frame->data_, input_num_frames_,
num_input_channels_, deinterleaved[0]);
} else {
assert(num_proc_channels_ == num_input_channels_);
Deinterleave(frame->data_,
input_num_frames_,
num_proc_channels_,
deinterleaved);
}
if (num_channels_ == 1) {
// We can get away with a pointer assignment in this case.
data_ = frame->_payloadData;
// Resample.
if (input_num_frames_ != proc_num_frames_) {
for (int i = 0; i < num_proc_channels_; ++i) {
input_resamplers_[i]->Resample(input_buffer_->fbuf_const()->channels()[i],
input_num_frames_,
data_->fbuf()->channels()[i],
proc_num_frames_);
}
}
}
void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) {
frame->vad_activity_ = activity_;
if (!data_changed) {
return;
}
WebRtc_Word16* interleaved = frame->_payloadData;
for (int i = 0; i < num_channels_; i++) {
WebRtc_Word16* deinterleaved = channels_[i].data;
int interleaved_idx = i;
for (int j = 0; j < samples_per_channel_; j++) {
deinterleaved[j] = interleaved[interleaved_idx];
interleaved_idx += num_channels_;
assert(frame->num_channels_ == num_channels_ || num_channels_ == 1);
assert(frame->samples_per_channel_ == output_num_frames_);
// Resample if necessary.
IFChannelBuffer* data_ptr = data_.get();
if (proc_num_frames_ != output_num_frames_) {
if (!output_buffer_) {
output_buffer_.reset(
new IFChannelBuffer(output_num_frames_, num_channels_));
}
}
}
void AudioBuffer::InterleaveTo(AudioFrame* frame) const {
assert(frame->_audioChannel == num_channels_);
assert(frame->_payloadDataLengthInSamples == samples_per_channel_);
frame->_vadActivity = activity_;
if (num_channels_ == 1) {
if (num_mixed_channels_ == 1) {
memcpy(frame->_payloadData,
channels_[0].data,
sizeof(WebRtc_Word16) * samples_per_channel_);
} else {
// These should point to the same buffer in this case.
assert(data_ == frame->_payloadData);
for (int i = 0; i < num_channels_; ++i) {
output_resamplers_[i]->Resample(
data_->fbuf()->channels()[i], proc_num_frames_,
output_buffer_->fbuf()->channels()[i], output_num_frames_);
}
return;
data_ptr = output_buffer_.get();
}
WebRtc_Word16* interleaved = frame->_payloadData;
for (int i = 0; i < num_channels_; i++) {
WebRtc_Word16* deinterleaved = channels_[i].data;
int interleaved_idx = i;
for (int j = 0; j < samples_per_channel_; j++) {
interleaved[interleaved_idx] = deinterleaved[j];
interleaved_idx += num_channels_;
}
if (frame->num_channels_ == num_channels_) {
Interleave(data_ptr->ibuf()->channels(), proc_num_frames_, num_channels_,
frame->data_);
} else {
UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], proc_num_frames_,
frame->num_channels_, frame->data_);
}
}
// TODO(andrew): would be good to support the no-mix case with pointer
// assignment.
// TODO(andrew): handle mixing to multiple channels?
void AudioBuffer::Mix(int num_mixed_channels) {
// We currently only support the stereo to mono case.
assert(num_channels_ == 2);
assert(num_mixed_channels == 1);
StereoToMono(channels_[0].data,
channels_[1].data,
channels_[0].data,
samples_per_channel_);
num_channels_ = num_mixed_channels;
num_mixed_channels_ = num_mixed_channels;
}
void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) {
// We currently only support the stereo to mono case.
assert(num_channels_ == 2);
assert(num_mixed_channels == 1);
StereoToMono(low_pass_split_data(0),
low_pass_split_data(1),
mixed_low_pass_channels_[0].data,
samples_per_split_channel_);
num_mixed_low_pass_channels_ = num_mixed_channels;
}
void AudioBuffer::CopyLowPassToReference() {
reference_copied_ = true;
for (int i = 0; i < num_channels_; i++) {
memcpy(low_pass_reference_channels_[i].data,
low_pass_split_data(i),
sizeof(WebRtc_Word16) * samples_per_split_channel_);
if (!low_pass_reference_channels_.get() ||
low_pass_reference_channels_->num_channels() != num_channels_) {
low_pass_reference_channels_.reset(
new ChannelBuffer<int16_t>(num_split_frames_,
num_proc_channels_));
}
for (int i = 0; i < num_proc_channels_; i++) {
memcpy(low_pass_reference_channels_->channels()[i],
split_bands_const(i)[kBand0To8kHz],
low_pass_reference_channels_->num_frames_per_band() *
sizeof(split_bands_const(i)[kBand0To8kHz][0]));
}
}
void AudioBuffer::SplitIntoFrequencyBands() {
splitting_filter_->Analysis(data_.get(), split_data_.get());
}
void AudioBuffer::MergeFrequencyBands() {
splitting_filter_->Synthesis(split_data_.get(), data_.get());
}
} // namespace webrtc