Update audio_processing module

Corresponds to upstream commit 524e9b043e7e86fd72353b987c9d5f6a1ebf83e1

Update notes:

 * Pull in third party license file

 * Replace .gypi files with BUILD.gn to keep track of what changes
   upstream

 * Bunch of new filse pulled in as dependencies

 * Won't build yet due to changes needed on top of these
This commit is contained in:
Arun Raghavan
2015-10-13 17:25:22 +05:30
parent 5ae7a5d6cd
commit 753eada3aa
324 changed files with 52533 additions and 16117 deletions

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_
#include <cmath>
namespace webrtc {
// Coordinates in meters.
template<typename T>
struct CartesianPoint {
CartesianPoint(T x, T y, T z) {
c[0] = x;
c[1] = y;
c[2] = z;
}
T x() const { return c[0]; }
T y() const { return c[1]; }
T z() const { return c[2]; }
T c[3];
};
using Point = CartesianPoint<float>;
template<typename T>
float Distance(CartesianPoint<T> a, CartesianPoint<T> b) {
return std::sqrt((a.x() - b.x()) * (a.x() - b.x()) +
(a.y() - b.y()) * (a.y() - b.y()) +
(a.z() - b.z()) * (a.z() - b.z()));
}
template <typename T>
struct SphericalPoint {
SphericalPoint(T azimuth, T elevation, T radius) {
s[0] = azimuth;
s[1] = elevation;
s[2] = radius;
}
T azimuth() const { return s[0]; }
T elevation() const { return s[1]; }
T distance() const { return s[2]; }
T s[3];
};
using SphericalPointf = SphericalPoint<float>;
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_
#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/modules/audio_processing/beamformer/array_util.h"
namespace webrtc {
template<typename T>
class Beamformer {
public:
virtual ~Beamformer() {}
// Process one time-domain chunk of audio. The audio is expected to be split
// into frequency bands inside the ChannelBuffer. The number of frames and
// channels must correspond to the constructor parameters. The same
// ChannelBuffer can be passed in as |input| and |output|.
virtual void ProcessChunk(const ChannelBuffer<T>& input,
ChannelBuffer<T>* output) = 0;
// Sample rate corresponds to the lower band.
// Needs to be called before the the Beamformer can be used.
virtual void Initialize(int chunk_size_ms, int sample_rate_hz) = 0;
// Indicates whether a given point is inside of the beam.
virtual bool IsInBeam(const SphericalPointf& spherical_point) { return true; }
// Returns true if the current data contains the target signal.
// Which signals are considered "targets" is implementation dependent.
virtual bool is_target_present() = 0;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_

View File

@ -0,0 +1,97 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_
#include <complex>
#include "webrtc/base/checks.h"
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_processing/beamformer/matrix.h"
namespace webrtc {
using std::complex;
// An extension of Matrix for operations that only work on a complex type.
template <typename T>
class ComplexMatrix : public Matrix<complex<T> > {
public:
ComplexMatrix() : Matrix<complex<T> >() {}
ComplexMatrix(int num_rows, int num_columns)
: Matrix<complex<T> >(num_rows, num_columns) {}
ComplexMatrix(const complex<T>* data, int num_rows, int num_columns)
: Matrix<complex<T> >(data, num_rows, num_columns) {}
// Complex Matrix operations.
ComplexMatrix& PointwiseConjugate() {
complex<T>* const data = this->data();
size_t size = this->num_rows() * this->num_columns();
for (size_t i = 0; i < size; ++i) {
data[i] = conj(data[i]);
}
return *this;
}
ComplexMatrix& PointwiseConjugate(const ComplexMatrix& operand) {
this->CopyFrom(operand);
return PointwiseConjugate();
}
ComplexMatrix& ConjugateTranspose() {
this->CopyDataToScratch();
int num_rows = this->num_rows();
this->SetNumRows(this->num_columns());
this->SetNumColumns(num_rows);
this->Resize();
return ConjugateTranspose(this->scratch_elements());
}
ComplexMatrix& ConjugateTranspose(const ComplexMatrix& operand) {
RTC_CHECK_EQ(operand.num_rows(), this->num_columns());
RTC_CHECK_EQ(operand.num_columns(), this->num_rows());
return ConjugateTranspose(operand.elements());
}
ComplexMatrix& ZeroImag() {
complex<T>* const data = this->data();
size_t size = this->num_rows() * this->num_columns();
for (size_t i = 0; i < size; ++i) {
data[i] = complex<T>(data[i].real(), 0);
}
return *this;
}
ComplexMatrix& ZeroImag(const ComplexMatrix& operand) {
this->CopyFrom(operand);
return ZeroImag();
}
private:
ComplexMatrix& ConjugateTranspose(const complex<T>* const* src) {
complex<T>* const* elements = this->elements();
for (int i = 0; i < this->num_rows(); ++i) {
for (int j = 0; j < this->num_columns(); ++j) {
elements[i][j] = conj(src[j][i]);
}
}
return *this;
}
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#define _USE_MATH_DEFINES
#include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h"
#include <cmath>
namespace {
float BesselJ0(float x) {
#if WEBRTC_WIN
return _j0(x);
#else
return j0(x);
#endif
}
} // namespace
namespace webrtc {
void CovarianceMatrixGenerator::UniformCovarianceMatrix(
float wave_number,
const std::vector<Point>& geometry,
ComplexMatrix<float>* mat) {
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_rows());
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns());
complex<float>* const* mat_els = mat->elements();
for (size_t i = 0; i < geometry.size(); ++i) {
for (size_t j = 0; j < geometry.size(); ++j) {
if (wave_number > 0.f) {
mat_els[i][j] =
BesselJ0(wave_number * Distance(geometry[i], geometry[j]));
} else {
mat_els[i][j] = i == j ? 1.f : 0.f;
}
}
}
}
void CovarianceMatrixGenerator::AngledCovarianceMatrix(
float sound_speed,
float angle,
size_t frequency_bin,
size_t fft_size,
size_t num_freq_bins,
int sample_rate,
const std::vector<Point>& geometry,
ComplexMatrix<float>* mat) {
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_rows());
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns());
ComplexMatrix<float> interf_cov_vector(1, geometry.size());
ComplexMatrix<float> interf_cov_vector_transposed(geometry.size(), 1);
PhaseAlignmentMasks(frequency_bin,
fft_size,
sample_rate,
sound_speed,
geometry,
angle,
&interf_cov_vector);
interf_cov_vector_transposed.Transpose(interf_cov_vector);
interf_cov_vector.PointwiseConjugate();
mat->Multiply(interf_cov_vector_transposed, interf_cov_vector);
}
void CovarianceMatrixGenerator::PhaseAlignmentMasks(
size_t frequency_bin,
size_t fft_size,
int sample_rate,
float sound_speed,
const std::vector<Point>& geometry,
float angle,
ComplexMatrix<float>* mat) {
RTC_CHECK_EQ(1, mat->num_rows());
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns());
float freq_in_hertz =
(static_cast<float>(frequency_bin) / fft_size) * sample_rate;
complex<float>* const* mat_els = mat->elements();
for (size_t c_ix = 0; c_ix < geometry.size(); ++c_ix) {
float distance = std::cos(angle) * geometry[c_ix].x() +
std::sin(angle) * geometry[c_ix].y();
float phase_shift = -2.f * M_PI * distance * freq_in_hertz / sound_speed;
// Euler's formula for mat[0][c_ix] = e^(j * phase_shift).
mat_els[0][c_ix] = complex<float>(cos(phase_shift), sin(phase_shift));
}
}
} // namespace webrtc

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COVARIANCE_MATRIX_GENERATOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COVARIANCE_MATRIX_GENERATOR_H_
#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
#include "webrtc/modules/audio_processing/beamformer/array_util.h"
namespace webrtc {
// Helper class for Beamformer in charge of generating covariance matrices. For
// each function, the passed-in ComplexMatrix is expected to be of size
// |num_input_channels| x |num_input_channels|.
class CovarianceMatrixGenerator {
public:
// A uniform covariance matrix with a gap at the target location. WARNING:
// The target angle is assumed to be 0.
static void UniformCovarianceMatrix(float wave_number,
const std::vector<Point>& geometry,
ComplexMatrix<float>* mat);
// The covariance matrix of a source at the given angle.
static void AngledCovarianceMatrix(float sound_speed,
float angle,
size_t frequency_bin,
size_t fft_size,
size_t num_freq_bins,
int sample_rate,
const std::vector<Point>& geometry,
ComplexMatrix<float>* mat);
// Calculates phase shifts that, when applied to a multichannel signal and
// added together, cause constructive interferernce for sources located at
// the given angle.
static void PhaseAlignmentMasks(size_t frequency_bin,
size_t fft_size,
int sample_rate,
float sound_speed,
const std::vector<Point>& geometry,
float angle,
ComplexMatrix<float>* mat);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BF_HELPERS_H_

View File

@ -0,0 +1,368 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_
#include <algorithm>
#include <cstring>
#include <string>
#include <vector>
#include "webrtc/base/checks.h"
#include "webrtc/base/constructormagic.h"
#include "webrtc/base/scoped_ptr.h"
namespace {
// Wrappers to get around the compiler warning resulting from the fact that
// there's no std::sqrt overload for ints. We cast all non-complex types to
// a double for the sqrt method.
template <typename T>
T sqrt_wrapper(T x) {
return sqrt(static_cast<double>(x));
}
template <typename S>
std::complex<S> sqrt_wrapper(std::complex<S> x) {
return sqrt(x);
}
} // namespace
namespace webrtc {
// Matrix is a class for doing standard matrix operations on 2 dimensional
// matrices of any size. Results of matrix operations are stored in the
// calling object. Function overloads exist for both in-place (the calling
// object is used as both an operand and the result) and out-of-place (all
// operands are passed in as parameters) operations. If operand dimensions
// mismatch, the program crashes. Out-of-place operations change the size of
// the calling object, if necessary, before operating.
//
// 'In-place' operations that inherently change the size of the matrix (eg.
// Transpose, Multiply on different-sized matrices) must make temporary copies
// (|scratch_elements_| and |scratch_data_|) of existing data to complete the
// operations.
//
// The data is stored contiguously. Data can be accessed internally as a flat
// array, |data_|, or as an array of row pointers, |elements_|, but is
// available to users only as an array of row pointers through |elements()|.
// Memory for storage is allocated when a matrix is resized only if the new
// size overflows capacity. Memory needed temporarily for any operations is
// similarly resized only if the new size overflows capacity.
//
// If you pass in storage through the ctor, that storage is copied into the
// matrix. TODO(claguna): albeit tricky, allow for data to be referenced
// instead of copied, and owned by the user.
template <typename T>
class Matrix {
public:
Matrix() : num_rows_(0), num_columns_(0) {}
// Allocates space for the elements and initializes all values to zero.
Matrix(int num_rows, int num_columns)
: num_rows_(num_rows), num_columns_(num_columns) {
Resize();
scratch_data_.resize(num_rows_ * num_columns_);
scratch_elements_.resize(num_rows_);
}
// Copies |data| into the new Matrix.
Matrix(const T* data, int num_rows, int num_columns)
: num_rows_(0), num_columns_(0) {
CopyFrom(data, num_rows, num_columns);
scratch_data_.resize(num_rows_ * num_columns_);
scratch_elements_.resize(num_rows_);
}
virtual ~Matrix() {}
// Deep copy an existing matrix.
void CopyFrom(const Matrix& other) {
CopyFrom(&other.data_[0], other.num_rows_, other.num_columns_);
}
// Copy |data| into the Matrix. The current data is lost.
void CopyFrom(const T* const data, int num_rows, int num_columns) {
Resize(num_rows, num_columns);
memcpy(&data_[0], data, num_rows_ * num_columns_ * sizeof(data_[0]));
}
Matrix& CopyFromColumn(const T* const* src,
size_t column_index,
int num_rows) {
Resize(1, num_rows);
for (int i = 0; i < num_columns_; ++i) {
data_[i] = src[i][column_index];
}
return *this;
}
void Resize(int num_rows, int num_columns) {
if (num_rows != num_rows_ || num_columns != num_columns_) {
num_rows_ = num_rows;
num_columns_ = num_columns;
Resize();
}
}
// Accessors and mutators.
int num_rows() const { return num_rows_; }
int num_columns() const { return num_columns_; }
T* const* elements() { return &elements_[0]; }
const T* const* elements() const { return &elements_[0]; }
T Trace() {
RTC_CHECK_EQ(num_rows_, num_columns_);
T trace = 0;
for (int i = 0; i < num_rows_; ++i) {
trace += elements_[i][i];
}
return trace;
}
// Matrix Operations. Returns *this to support method chaining.
Matrix& Transpose() {
CopyDataToScratch();
Resize(num_columns_, num_rows_);
return Transpose(scratch_elements());
}
Matrix& Transpose(const Matrix& operand) {
RTC_CHECK_EQ(operand.num_rows_, num_columns_);
RTC_CHECK_EQ(operand.num_columns_, num_rows_);
return Transpose(operand.elements());
}
template <typename S>
Matrix& Scale(const S& scalar) {
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] *= scalar;
}
return *this;
}
template <typename S>
Matrix& Scale(const Matrix& operand, const S& scalar) {
CopyFrom(operand);
return Scale(scalar);
}
Matrix& Add(const Matrix& operand) {
RTC_CHECK_EQ(num_rows_, operand.num_rows_);
RTC_CHECK_EQ(num_columns_, operand.num_columns_);
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] += operand.data_[i];
}
return *this;
}
Matrix& Add(const Matrix& lhs, const Matrix& rhs) {
CopyFrom(lhs);
return Add(rhs);
}
Matrix& Subtract(const Matrix& operand) {
RTC_CHECK_EQ(num_rows_, operand.num_rows_);
RTC_CHECK_EQ(num_columns_, operand.num_columns_);
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] -= operand.data_[i];
}
return *this;
}
Matrix& Subtract(const Matrix& lhs, const Matrix& rhs) {
CopyFrom(lhs);
return Subtract(rhs);
}
Matrix& PointwiseMultiply(const Matrix& operand) {
RTC_CHECK_EQ(num_rows_, operand.num_rows_);
RTC_CHECK_EQ(num_columns_, operand.num_columns_);
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] *= operand.data_[i];
}
return *this;
}
Matrix& PointwiseMultiply(const Matrix& lhs, const Matrix& rhs) {
CopyFrom(lhs);
return PointwiseMultiply(rhs);
}
Matrix& PointwiseDivide(const Matrix& operand) {
RTC_CHECK_EQ(num_rows_, operand.num_rows_);
RTC_CHECK_EQ(num_columns_, operand.num_columns_);
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] /= operand.data_[i];
}
return *this;
}
Matrix& PointwiseDivide(const Matrix& lhs, const Matrix& rhs) {
CopyFrom(lhs);
return PointwiseDivide(rhs);
}
Matrix& PointwiseSquareRoot() {
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] = sqrt_wrapper(data_[i]);
}
return *this;
}
Matrix& PointwiseSquareRoot(const Matrix& operand) {
CopyFrom(operand);
return PointwiseSquareRoot();
}
Matrix& PointwiseAbsoluteValue() {
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] = abs(data_[i]);
}
return *this;
}
Matrix& PointwiseAbsoluteValue(const Matrix& operand) {
CopyFrom(operand);
return PointwiseAbsoluteValue();
}
Matrix& PointwiseSquare() {
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] *= data_[i];
}
return *this;
}
Matrix& PointwiseSquare(const Matrix& operand) {
CopyFrom(operand);
return PointwiseSquare();
}
Matrix& Multiply(const Matrix& lhs, const Matrix& rhs) {
RTC_CHECK_EQ(lhs.num_columns_, rhs.num_rows_);
RTC_CHECK_EQ(num_rows_, lhs.num_rows_);
RTC_CHECK_EQ(num_columns_, rhs.num_columns_);
return Multiply(lhs.elements(), rhs.num_rows_, rhs.elements());
}
Matrix& Multiply(const Matrix& rhs) {
RTC_CHECK_EQ(num_columns_, rhs.num_rows_);
CopyDataToScratch();
Resize(num_rows_, rhs.num_columns_);
return Multiply(scratch_elements(), rhs.num_rows_, rhs.elements());
}
std::string ToString() const {
std::ostringstream ss;
ss << std::endl << "Matrix" << std::endl;
for (int i = 0; i < num_rows_; ++i) {
for (int j = 0; j < num_columns_; ++j) {
ss << elements_[i][j] << " ";
}
ss << std::endl;
}
ss << std::endl;
return ss.str();
}
protected:
void SetNumRows(const int num_rows) { num_rows_ = num_rows; }
void SetNumColumns(const int num_columns) { num_columns_ = num_columns; }
T* data() { return &data_[0]; }
const T* data() const { return &data_[0]; }
const T* const* scratch_elements() const { return &scratch_elements_[0]; }
// Resize the matrix. If an increase in capacity is required, the current
// data is lost.
void Resize() {
size_t size = num_rows_ * num_columns_;
data_.resize(size);
elements_.resize(num_rows_);
for (int i = 0; i < num_rows_; ++i) {
elements_[i] = &data_[i * num_columns_];
}
}
// Copies data_ into scratch_data_ and updates scratch_elements_ accordingly.
void CopyDataToScratch() {
scratch_data_ = data_;
scratch_elements_.resize(num_rows_);
for (int i = 0; i < num_rows_; ++i) {
scratch_elements_[i] = &scratch_data_[i * num_columns_];
}
}
private:
int num_rows_;
int num_columns_;
std::vector<T> data_;
std::vector<T*> elements_;
// Stores temporary copies of |data_| and |elements_| for in-place operations
// where referring to original data is necessary.
std::vector<T> scratch_data_;
std::vector<T*> scratch_elements_;
// Helpers for Transpose and Multiply operations that unify in-place and
// out-of-place solutions.
Matrix& Transpose(const T* const* src) {
for (int i = 0; i < num_rows_; ++i) {
for (int j = 0; j < num_columns_; ++j) {
elements_[i][j] = src[j][i];
}
}
return *this;
}
Matrix& Multiply(const T* const* lhs, int num_rows_rhs, const T* const* rhs) {
for (int row = 0; row < num_rows_; ++row) {
for (int col = 0; col < num_columns_; ++col) {
T cur_element = 0;
for (int i = 0; i < num_rows_rhs; ++i) {
cur_element += lhs[row][i] * rhs[i][col];
}
elements_[row][col] = cur_element;
}
}
return *this;
}
RTC_DISALLOW_COPY_AND_ASSIGN(Matrix);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
#include "webrtc/modules/audio_processing/beamformer/matrix.h"
namespace {
const float kTolerance = 0.001f;
}
namespace webrtc {
using std::complex;
// Functions used in both matrix_unittest and complex_matrix_unittest.
class MatrixTestHelpers {
public:
template <typename T>
static void ValidateMatrixEquality(const Matrix<T>& expected,
const Matrix<T>& actual) {
EXPECT_EQ(expected.num_rows(), actual.num_rows());
EXPECT_EQ(expected.num_columns(), actual.num_columns());
const T* const* expected_elements = expected.elements();
const T* const* actual_elements = actual.elements();
for (int i = 0; i < expected.num_rows(); ++i) {
for (int j = 0; j < expected.num_columns(); ++j) {
EXPECT_EQ(expected_elements[i][j], actual_elements[i][j]);
}
}
}
static void ValidateMatrixEqualityFloat(const Matrix<float>& expected,
const Matrix<float>& actual) {
EXPECT_EQ(expected.num_rows(), actual.num_rows());
EXPECT_EQ(expected.num_columns(), actual.num_columns());
const float* const* expected_elements = expected.elements();
const float* const* actual_elements = actual.elements();
for (int i = 0; i < expected.num_rows(); ++i) {
for (int j = 0; j < expected.num_columns(); ++j) {
EXPECT_NEAR(expected_elements[i][j], actual_elements[i][j], kTolerance);
}
}
}
static void ValidateMatrixEqualityComplexFloat(
const Matrix<complex<float> >& expected,
const Matrix<complex<float> >& actual) {
EXPECT_EQ(expected.num_rows(), actual.num_rows());
EXPECT_EQ(expected.num_columns(), actual.num_columns());
const complex<float>* const* expected_elements = expected.elements();
const complex<float>* const* actual_elements = actual.elements();
for (int i = 0; i < expected.num_rows(); ++i) {
for (int j = 0; j < expected.num_columns(); ++j) {
EXPECT_NEAR(expected_elements[i][j].real(),
actual_elements[i][j].real(),
kTolerance);
EXPECT_NEAR(expected_elements[i][j].imag(),
actual_elements[i][j].imag(),
kTolerance);
}
}
}
static void ValidateMatrixNearEqualityComplexFloat(
const Matrix<complex<float> >& expected,
const Matrix<complex<float> >& actual,
float tolerance) {
EXPECT_EQ(expected.num_rows(), actual.num_rows());
EXPECT_EQ(expected.num_columns(), actual.num_columns());
const complex<float>* const* expected_elements = expected.elements();
const complex<float>* const* actual_elements = actual.elements();
for (int i = 0; i < expected.num_rows(); ++i) {
for (int j = 0; j < expected.num_columns(); ++j) {
EXPECT_NEAR(expected_elements[i][j].real(),
actual_elements[i][j].real(),
tolerance);
EXPECT_NEAR(expected_elements[i][j].imag(),
actual_elements[i][j].imag(),
tolerance);
}
}
}
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_

View File

@ -0,0 +1,516 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#define _USE_MATH_DEFINES
#include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
#include <algorithm>
#include <cmath>
#include <numeric>
#include <vector>
#include "webrtc/base/arraysize.h"
#include "webrtc/common_audio/window_generator.h"
#include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h"
namespace webrtc {
namespace {
// Alpha for the Kaiser Bessel Derived window.
const float kKbdAlpha = 1.5f;
// The minimum value a post-processing mask can take.
const float kMaskMinimum = 0.01f;
const float kSpeedOfSoundMeterSeconds = 343;
// For both target and interference angles, PI / 2 is perpendicular to the
// microphone array, facing forwards. The positive direction goes
// counterclockwise.
// The angle at which we amplify sound.
const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;
// The angle at which we suppress sound. Suppression is symmetric around PI / 2
// radians, so sound is suppressed at both +|kInterfAngleRadians| and
// PI - |kInterfAngleRadians|. Since the beamformer is robust, this should
// suppress sound coming from close angles as well.
const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f;
// When calculating the interference covariance matrix, this is the weight for
// the weighted average between the uniform covariance matrix and the angled
// covariance matrix.
// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
const float kBalance = 0.4f;
const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f;
// TODO(claguna): need comment here.
const float kBeamwidthConstant = 0.00002f;
// Alpha coefficients for mask smoothing.
const float kMaskTimeSmoothAlpha = 0.2f;
const float kMaskFrequencySmoothAlpha = 0.6f;
// The average mask is computed from masks in this mid-frequency range. If these
// ranges are changed |kMaskQuantile| might need to be adjusted.
const int kLowMeanStartHz = 200;
const int kLowMeanEndHz = 400;
const int kHighMeanStartHz = 3000;
const int kHighMeanEndHz = 5000;
// Quantile of mask values which is used to estimate target presence.
const float kMaskQuantile = 0.7f;
// Mask threshold over which the data is considered signal and not interference.
const float kMaskTargetThreshold = 0.3f;
// Time in seconds after which the data is considered interference if the mask
// does not pass |kMaskTargetThreshold|.
const float kHoldTargetSeconds = 0.25f;
// Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
// used; to accomplish this, we compute both multiplications in the same loop.
// The returned norm is clamped to be non-negative.
float Norm(const ComplexMatrix<float>& mat,
const ComplexMatrix<float>& norm_mat) {
RTC_CHECK_EQ(norm_mat.num_rows(), 1);
RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows());
RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns());
complex<float> first_product = complex<float>(0.f, 0.f);
complex<float> second_product = complex<float>(0.f, 0.f);
const complex<float>* const* mat_els = mat.elements();
const complex<float>* const* norm_mat_els = norm_mat.elements();
for (int i = 0; i < norm_mat.num_columns(); ++i) {
for (int j = 0; j < norm_mat.num_columns(); ++j) {
first_product += conj(norm_mat_els[0][j]) * mat_els[j][i];
}
second_product += first_product * norm_mat_els[0][i];
first_product = 0.f;
}
return std::max(second_product.real(), 0.f);
}
// Does conjugate(|lhs|) * |rhs| for row vectors |lhs| and |rhs|.
complex<float> ConjugateDotProduct(const ComplexMatrix<float>& lhs,
const ComplexMatrix<float>& rhs) {
RTC_CHECK_EQ(lhs.num_rows(), 1);
RTC_CHECK_EQ(rhs.num_rows(), 1);
RTC_CHECK_EQ(lhs.num_columns(), rhs.num_columns());
const complex<float>* const* lhs_elements = lhs.elements();
const complex<float>* const* rhs_elements = rhs.elements();
complex<float> result = complex<float>(0.f, 0.f);
for (int i = 0; i < lhs.num_columns(); ++i) {
result += conj(lhs_elements[0][i]) * rhs_elements[0][i];
}
return result;
}
// Works for positive numbers only.
size_t Round(float x) {
return static_cast<size_t>(std::floor(x + 0.5f));
}
// Calculates the sum of absolute values of a complex matrix.
float SumAbs(const ComplexMatrix<float>& mat) {
float sum_abs = 0.f;
const complex<float>* const* mat_els = mat.elements();
for (int i = 0; i < mat.num_rows(); ++i) {
for (int j = 0; j < mat.num_columns(); ++j) {
sum_abs += std::abs(mat_els[i][j]);
}
}
return sum_abs;
}
// Calculates the sum of squares of a complex matrix.
float SumSquares(const ComplexMatrix<float>& mat) {
float sum_squares = 0.f;
const complex<float>* const* mat_els = mat.elements();
for (int i = 0; i < mat.num_rows(); ++i) {
for (int j = 0; j < mat.num_columns(); ++j) {
float abs_value = std::abs(mat_els[i][j]);
sum_squares += abs_value * abs_value;
}
}
return sum_squares;
}
// Does |out| = |in|.' * conj(|in|) for row vector |in|.
void TransposedConjugatedProduct(const ComplexMatrix<float>& in,
ComplexMatrix<float>* out) {
RTC_CHECK_EQ(in.num_rows(), 1);
RTC_CHECK_EQ(out->num_rows(), in.num_columns());
RTC_CHECK_EQ(out->num_columns(), in.num_columns());
const complex<float>* in_elements = in.elements()[0];
complex<float>* const* out_elements = out->elements();
for (int i = 0; i < out->num_rows(); ++i) {
for (int j = 0; j < out->num_columns(); ++j) {
out_elements[i][j] = in_elements[i] * conj(in_elements[j]);
}
}
}
std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) {
for (int dim = 0; dim < 3; ++dim) {
float center = 0.f;
for (size_t i = 0; i < array_geometry.size(); ++i) {
center += array_geometry[i].c[dim];
}
center /= array_geometry.size();
for (size_t i = 0; i < array_geometry.size(); ++i) {
array_geometry[i].c[dim] -= center;
}
}
return array_geometry;
}
} // namespace
// static
const size_t NonlinearBeamformer::kNumFreqBins;
NonlinearBeamformer::NonlinearBeamformer(
const std::vector<Point>& array_geometry)
: num_input_channels_(array_geometry.size()),
array_geometry_(GetCenteredArray(array_geometry)) {
WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
}
void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
chunk_length_ =
static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
sample_rate_hz_ = sample_rate_hz;
low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
// These bin indexes determine the regions over which a mean is taken. This
// is applied as a constant value over the adjacent end "frequency correction"
// regions.
//
// low_mean_start_bin_ high_mean_start_bin_
// v v constant
// |----------------|--------|----------------|-------|----------------|
// constant ^ ^
// low_mean_end_bin_ high_mean_end_bin_
//
RTC_DCHECK_GT(low_mean_start_bin_, 0U);
RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
high_pass_postfilter_mask_ = 1.f;
is_target_present_ = false;
hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
interference_blocks_count_ = hold_target_blocks_;
lapped_transform_.reset(new LappedTransform(num_input_channels_,
1,
chunk_length_,
window_,
kFftSize,
kFftSize / 2,
this));
for (size_t i = 0; i < kNumFreqBins; ++i) {
time_smooth_mask_[i] = 1.f;
final_mask_[i] = 1.f;
float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
mask_thresholds_[i] = num_input_channels_ * num_input_channels_ *
kBeamwidthConstant * wave_numbers_[i] *
wave_numbers_[i];
}
// Initialize all nonadaptive values before looping through the frames.
InitDelaySumMasks();
InitTargetCovMats();
InitInterfCovMats();
for (size_t i = 0; i < kNumFreqBins; ++i) {
rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]);
reflected_rpsiws_[i] =
Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);
}
}
void NonlinearBeamformer::InitDelaySumMasks() {
for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix,
kFftSize,
sample_rate_hz_,
kSpeedOfSoundMeterSeconds,
array_geometry_,
kTargetAngleRadians,
&delay_sum_masks_[f_ix]);
complex_f norm_factor = sqrt(
ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
delay_sum_masks_[f_ix].Scale(1.f / norm_factor);
normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);
normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(
normalized_delay_sum_masks_[f_ix]));
}
}
void NonlinearBeamformer::InitTargetCovMats() {
for (size_t i = 0; i < kNumFreqBins; ++i) {
target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
complex_f normalization_factor = target_cov_mats_[i].Trace();
target_cov_mats_[i].Scale(1.f / normalization_factor);
}
}
void NonlinearBeamformer::InitInterfCovMats() {
for (size_t i = 0; i < kNumFreqBins; ++i) {
interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);
ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],
array_geometry_,
&uniform_cov_mat);
CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds,
kInterfAngleRadians,
i,
kFftSize,
kNumFreqBins,
sample_rate_hz_,
array_geometry_,
&angled_cov_mat);
// Normalize matrices before averaging them.
complex_f normalization_factor = uniform_cov_mat.Trace();
uniform_cov_mat.Scale(1.f / normalization_factor);
normalization_factor = angled_cov_mat.Trace();
angled_cov_mat.Scale(1.f / normalization_factor);
// Average matrices.
uniform_cov_mat.Scale(1 - kBalance);
angled_cov_mat.Scale(kBalance);
interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat);
reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]);
}
}
void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
ChannelBuffer<float>* output) {
RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);
RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
float old_high_pass_mask = high_pass_postfilter_mask_;
lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));
// Ramp up/down for smoothing. 1 mask per 10ms results in audible
// discontinuities.
const float ramp_increment =
(high_pass_postfilter_mask_ - old_high_pass_mask) /
input.num_frames_per_band();
// Apply delay and sum and post-filter in the time domain. WARNING: only works
// because delay-and-sum is not frequency dependent.
for (size_t i = 1; i < input.num_bands(); ++i) {
float smoothed_mask = old_high_pass_mask;
for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
smoothed_mask += ramp_increment;
// Applying the delay and sum (at zero degrees, this is equivalent to
// averaging).
float sum = 0.f;
for (int k = 0; k < input.num_channels(); ++k) {
sum += input.channels(i)[k][j];
}
output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask;
}
}
}
bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {
// If more than half-beamwidth degrees away from the beam's center,
// you are out of the beam.
return fabs(spherical_point.azimuth() - kTargetAngleRadians) <
kHalfBeamWidthRadians;
}
void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
int num_input_channels,
size_t num_freq_bins,
int num_output_channels,
complex_f* const* output) {
RTC_CHECK_EQ(num_freq_bins, kNumFreqBins);
RTC_CHECK_EQ(num_input_channels, num_input_channels_);
RTC_CHECK_EQ(num_output_channels, 1);
// Calculating the post-filter masks. Note that we need two for each
// frequency bin to account for the positive and negative interferer
// angle.
for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
eig_m_.CopyFromColumn(input, i, num_input_channels_);
float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_));
if (eig_m_norm_factor != 0.f) {
eig_m_.Scale(1.f / eig_m_norm_factor);
}
float rxim = Norm(target_cov_mats_[i], eig_m_);
float ratio_rxiw_rxim = 0.f;
if (rxim > 0.f) {
ratio_rxiw_rxim = rxiws_[i] / rxim;
}
complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_));
rmw *= rmw;
float rmw_r = rmw.real();
new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i],
rpsiws_[i],
ratio_rxiw_rxim,
rmw_r,
mask_thresholds_[i]);
new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i],
reflected_rpsiws_[i],
ratio_rxiw_rxim,
rmw_r,
mask_thresholds_[i]);
}
ApplyMaskTimeSmoothing();
EstimateTargetPresence();
ApplyLowFrequencyCorrection();
ApplyHighFrequencyCorrection();
ApplyMaskFrequencySmoothing();
ApplyMasks(input, output);
}
float NonlinearBeamformer::CalculatePostfilterMask(
const ComplexMatrixF& interf_cov_mat,
float rpsiw,
float ratio_rxiw_rxim,
float rmw_r,
float mask_threshold) {
float rpsim = Norm(interf_cov_mat, eig_m_);
// Find lambda.
float ratio = 0.f;
if (rpsim > 0.f) {
ratio = rpsiw / rpsim;
}
float numerator = rmw_r - ratio;
float denominator = ratio_rxiw_rxim - ratio;
float mask = 1.f;
if (denominator > mask_threshold) {
float lambda = numerator / denominator;
mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum);
}
return mask;
}
void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
complex_f* const* output) {
complex_f* output_channel = output[0];
for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
output_channel[f_ix] = complex_f(0.f, 0.f);
const complex_f* delay_sum_mask_els =
normalized_delay_sum_masks_[f_ix].elements()[0];
for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) {
output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
}
output_channel[f_ix] *= final_mask_[f_ix];
}
}
// Smooth new_mask_ into time_smooth_mask_.
void NonlinearBeamformer::ApplyMaskTimeSmoothing() {
for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +
(1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];
}
}
// Copy time_smooth_mask_ to final_mask_ and smooth over frequency.
void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {
// Smooth over frequency in both directions. The "frequency correction"
// regions have constant value, but we enter them to smooth over the jump
// that exists at the boundary. However, this does mean when smoothing "away"
// from the region that we only need to use the last element.
//
// Upward smoothing:
// low_mean_start_bin_
// v
// |------|------------|------|
// ^------------------>^
//
// Downward smoothing:
// high_mean_end_bin_
// v
// |------|------------|------|
// ^<------------------^
std::copy(time_smooth_mask_, time_smooth_mask_ + kNumFreqBins, final_mask_);
for (size_t i = low_mean_start_bin_; i < kNumFreqBins; ++i) {
final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] +
(1 - kMaskFrequencySmoothAlpha) * final_mask_[i - 1];
}
for (size_t i = high_mean_end_bin_ + 1; i > 0; --i) {
final_mask_[i - 1] = kMaskFrequencySmoothAlpha * final_mask_[i - 1] +
(1 - kMaskFrequencySmoothAlpha) * final_mask_[i];
}
}
// Apply low frequency correction to time_smooth_mask_.
void NonlinearBeamformer::ApplyLowFrequencyCorrection() {
const float low_frequency_mask =
MaskRangeMean(low_mean_start_bin_, low_mean_end_bin_ + 1);
std::fill(time_smooth_mask_, time_smooth_mask_ + low_mean_start_bin_,
low_frequency_mask);
}
// Apply high frequency correction to time_smooth_mask_. Update
// high_pass_postfilter_mask_ to use for the high frequency time-domain bands.
void NonlinearBeamformer::ApplyHighFrequencyCorrection() {
high_pass_postfilter_mask_ =
MaskRangeMean(high_mean_start_bin_, high_mean_end_bin_ + 1);
std::fill(time_smooth_mask_ + high_mean_end_bin_ + 1,
time_smooth_mask_ + kNumFreqBins, high_pass_postfilter_mask_);
}
// Compute mean over the given range of time_smooth_mask_, [first, last).
float NonlinearBeamformer::MaskRangeMean(size_t first, size_t last) {
RTC_DCHECK_GT(last, first);
const float sum = std::accumulate(time_smooth_mask_ + first,
time_smooth_mask_ + last, 0.f);
return sum / (last - first);
}
void NonlinearBeamformer::EstimateTargetPresence() {
const size_t quantile = static_cast<size_t>(
(high_mean_end_bin_ - low_mean_start_bin_) * kMaskQuantile +
low_mean_start_bin_);
std::nth_element(new_mask_ + low_mean_start_bin_, new_mask_ + quantile,
new_mask_ + high_mean_end_bin_ + 1);
if (new_mask_[quantile] > kMaskTargetThreshold) {
is_target_present_ = true;
interference_blocks_count_ = 0;
} else {
is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
}
}
} // namespace webrtc

View File

@ -0,0 +1,177 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
#include <vector>
#include "webrtc/common_audio/lapped_transform.h"
#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/modules/audio_processing/beamformer/beamformer.h"
#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
namespace webrtc {
// Enhances sound sources coming directly in front of a uniform linear array
// and suppresses sound sources coming from all other directions. Operates on
// multichannel signals and produces single-channel output.
//
// The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear
// Beamforming Postprocessor" by Bastiaan Kleijn.
//
// TODO(aluebs): Target angle assumed to be 0. Parameterize target angle.
class NonlinearBeamformer
: public Beamformer<float>,
public LappedTransform::Callback {
public:
// At the moment it only accepts uniform linear microphone arrays. Using the
// first microphone as a reference position [0, 0, 0] is a natural choice.
explicit NonlinearBeamformer(const std::vector<Point>& array_geometry);
// Sample rate corresponds to the lower band.
// Needs to be called before the NonlinearBeamformer can be used.
void Initialize(int chunk_size_ms, int sample_rate_hz) override;
// Process one time-domain chunk of audio. The audio is expected to be split
// into frequency bands inside the ChannelBuffer. The number of frames and
// channels must correspond to the constructor parameters. The same
// ChannelBuffer can be passed in as |input| and |output|.
void ProcessChunk(const ChannelBuffer<float>& input,
ChannelBuffer<float>* output) override;
bool IsInBeam(const SphericalPointf& spherical_point) override;
// After processing each block |is_target_present_| is set to true if the
// target signal es present and to false otherwise. This methods can be called
// to know if the data is target signal or interference and process it
// accordingly.
bool is_target_present() override { return is_target_present_; }
protected:
// Process one frequency-domain block of audio. This is where the fun
// happens. Implements LappedTransform::Callback.
void ProcessAudioBlock(const complex<float>* const* input,
int num_input_channels,
size_t num_freq_bins,
int num_output_channels,
complex<float>* const* output) override;
private:
typedef Matrix<float> MatrixF;
typedef ComplexMatrix<float> ComplexMatrixF;
typedef complex<float> complex_f;
void InitDelaySumMasks();
void InitTargetCovMats(); // TODO(aluebs): Make this depend on target angle.
void InitInterfCovMats();
// An implementation of equation 18, which calculates postfilter masks that,
// when applied, minimize the mean-square error of our estimation of the
// desired signal. A sub-task is to calculate lambda, which is solved via
// equation 13.
float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,
float rpsiw,
float ratio_rxiw_rxim,
float rmxi_r,
float mask_threshold);
// Prevents the postfilter masks from degenerating too quickly (a cause of
// musical noise).
void ApplyMaskTimeSmoothing();
void ApplyMaskFrequencySmoothing();
// The postfilter masks are unreliable at low frequencies. Calculates a better
// mask by averaging mid-low frequency values.
void ApplyLowFrequencyCorrection();
// Postfilter masks are also unreliable at high frequencies. Average mid-high
// frequency masks to calculate a single mask per block which can be applied
// in the time-domain. Further, we average these block-masks over a chunk,
// resulting in one postfilter mask per audio chunk. This allows us to skip
// both transforming and blocking the high-frequency signal.
void ApplyHighFrequencyCorrection();
// Compute the means needed for the above frequency correction.
float MaskRangeMean(size_t start_bin, size_t end_bin);
// Applies both sets of masks to |input| and store in |output|.
void ApplyMasks(const complex_f* const* input, complex_f* const* output);
void EstimateTargetPresence();
static const size_t kFftSize = 256;
static const size_t kNumFreqBins = kFftSize / 2 + 1;
// Deals with the fft transform and blocking.
size_t chunk_length_;
rtc::scoped_ptr<LappedTransform> lapped_transform_;
float window_[kFftSize];
// Parameters exposed to the user.
const int num_input_channels_;
int sample_rate_hz_;
const std::vector<Point> array_geometry_;
// Calculated based on user-input and constants in the .cc file.
size_t low_mean_start_bin_;
size_t low_mean_end_bin_;
size_t high_mean_start_bin_;
size_t high_mean_end_bin_;
// Quickly varying mask updated every block.
float new_mask_[kNumFreqBins];
// Time smoothed mask.
float time_smooth_mask_[kNumFreqBins];
// Time and frequency smoothed mask.
float final_mask_[kNumFreqBins];
// Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
ComplexMatrixF delay_sum_masks_[kNumFreqBins];
ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
// |num_input_channels_|.
ComplexMatrixF target_cov_mats_[kNumFreqBins];
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
// |num_input_channels_|.
ComplexMatrixF interf_cov_mats_[kNumFreqBins];
ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins];
// Of length |kNumFreqBins|.
float mask_thresholds_[kNumFreqBins];
float wave_numbers_[kNumFreqBins];
// Preallocated for ProcessAudioBlock()
// Of length |kNumFreqBins|.
float rxiws_[kNumFreqBins];
float rpsiws_[kNumFreqBins];
float reflected_rpsiws_[kNumFreqBins];
// The microphone normalization factor.
ComplexMatrixF eig_m_;
// For processing the high-frequency input signal.
float high_pass_postfilter_mask_;
// True when the target signal is present.
bool is_target_present_;
// Number of blocks after which the data is considered interference if the
// mask does not pass |kMaskSignalThreshold|.
size_t hold_target_blocks_;
// Number of blocks since the last mask that passed |kMaskSignalThreshold|.
size_t interference_blocks_count_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_