Update audio_processing module
Corresponds to upstream commit 524e9b043e7e86fd72353b987c9d5f6a1ebf83e1 Update notes: * Pull in third party license file * Replace .gypi files with BUILD.gn to keep track of what changes upstream * Bunch of new filse pulled in as dependencies * Won't build yet due to changes needed on top of these
This commit is contained in:
58
webrtc/modules/audio_processing/beamformer/array_util.h
Normal file
58
webrtc/modules/audio_processing/beamformer/array_util.h
Normal file
@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_
|
||||
|
||||
#include <cmath>
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Coordinates in meters.
|
||||
template<typename T>
|
||||
struct CartesianPoint {
|
||||
CartesianPoint(T x, T y, T z) {
|
||||
c[0] = x;
|
||||
c[1] = y;
|
||||
c[2] = z;
|
||||
}
|
||||
T x() const { return c[0]; }
|
||||
T y() const { return c[1]; }
|
||||
T z() const { return c[2]; }
|
||||
T c[3];
|
||||
};
|
||||
|
||||
using Point = CartesianPoint<float>;
|
||||
|
||||
template<typename T>
|
||||
float Distance(CartesianPoint<T> a, CartesianPoint<T> b) {
|
||||
return std::sqrt((a.x() - b.x()) * (a.x() - b.x()) +
|
||||
(a.y() - b.y()) * (a.y() - b.y()) +
|
||||
(a.z() - b.z()) * (a.z() - b.z()));
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
struct SphericalPoint {
|
||||
SphericalPoint(T azimuth, T elevation, T radius) {
|
||||
s[0] = azimuth;
|
||||
s[1] = elevation;
|
||||
s[2] = radius;
|
||||
}
|
||||
T azimuth() const { return s[0]; }
|
||||
T elevation() const { return s[1]; }
|
||||
T distance() const { return s[2]; }
|
||||
T s[3];
|
||||
};
|
||||
|
||||
using SphericalPointf = SphericalPoint<float>;
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_
|
45
webrtc/modules/audio_processing/beamformer/beamformer.h
Normal file
45
webrtc/modules/audio_processing/beamformer/beamformer.h
Normal file
@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_
|
||||
|
||||
#include "webrtc/common_audio/channel_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/beamformer/array_util.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
template<typename T>
|
||||
class Beamformer {
|
||||
public:
|
||||
virtual ~Beamformer() {}
|
||||
|
||||
// Process one time-domain chunk of audio. The audio is expected to be split
|
||||
// into frequency bands inside the ChannelBuffer. The number of frames and
|
||||
// channels must correspond to the constructor parameters. The same
|
||||
// ChannelBuffer can be passed in as |input| and |output|.
|
||||
virtual void ProcessChunk(const ChannelBuffer<T>& input,
|
||||
ChannelBuffer<T>* output) = 0;
|
||||
|
||||
// Sample rate corresponds to the lower band.
|
||||
// Needs to be called before the the Beamformer can be used.
|
||||
virtual void Initialize(int chunk_size_ms, int sample_rate_hz) = 0;
|
||||
|
||||
// Indicates whether a given point is inside of the beam.
|
||||
virtual bool IsInBeam(const SphericalPointf& spherical_point) { return true; }
|
||||
|
||||
// Returns true if the current data contains the target signal.
|
||||
// Which signals are considered "targets" is implementation dependent.
|
||||
virtual bool is_target_present() = 0;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_
|
97
webrtc/modules/audio_processing/beamformer/complex_matrix.h
Normal file
97
webrtc/modules/audio_processing/beamformer/complex_matrix.h
Normal file
@ -0,0 +1,97 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_
|
||||
|
||||
#include <complex>
|
||||
|
||||
#include "webrtc/base/checks.h"
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/modules/audio_processing/beamformer/matrix.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
using std::complex;
|
||||
|
||||
// An extension of Matrix for operations that only work on a complex type.
|
||||
template <typename T>
|
||||
class ComplexMatrix : public Matrix<complex<T> > {
|
||||
public:
|
||||
ComplexMatrix() : Matrix<complex<T> >() {}
|
||||
|
||||
ComplexMatrix(int num_rows, int num_columns)
|
||||
: Matrix<complex<T> >(num_rows, num_columns) {}
|
||||
|
||||
ComplexMatrix(const complex<T>* data, int num_rows, int num_columns)
|
||||
: Matrix<complex<T> >(data, num_rows, num_columns) {}
|
||||
|
||||
// Complex Matrix operations.
|
||||
ComplexMatrix& PointwiseConjugate() {
|
||||
complex<T>* const data = this->data();
|
||||
size_t size = this->num_rows() * this->num_columns();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
data[i] = conj(data[i]);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
ComplexMatrix& PointwiseConjugate(const ComplexMatrix& operand) {
|
||||
this->CopyFrom(operand);
|
||||
return PointwiseConjugate();
|
||||
}
|
||||
|
||||
ComplexMatrix& ConjugateTranspose() {
|
||||
this->CopyDataToScratch();
|
||||
int num_rows = this->num_rows();
|
||||
this->SetNumRows(this->num_columns());
|
||||
this->SetNumColumns(num_rows);
|
||||
this->Resize();
|
||||
return ConjugateTranspose(this->scratch_elements());
|
||||
}
|
||||
|
||||
ComplexMatrix& ConjugateTranspose(const ComplexMatrix& operand) {
|
||||
RTC_CHECK_EQ(operand.num_rows(), this->num_columns());
|
||||
RTC_CHECK_EQ(operand.num_columns(), this->num_rows());
|
||||
return ConjugateTranspose(operand.elements());
|
||||
}
|
||||
|
||||
ComplexMatrix& ZeroImag() {
|
||||
complex<T>* const data = this->data();
|
||||
size_t size = this->num_rows() * this->num_columns();
|
||||
for (size_t i = 0; i < size; ++i) {
|
||||
data[i] = complex<T>(data[i].real(), 0);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
ComplexMatrix& ZeroImag(const ComplexMatrix& operand) {
|
||||
this->CopyFrom(operand);
|
||||
return ZeroImag();
|
||||
}
|
||||
|
||||
private:
|
||||
ComplexMatrix& ConjugateTranspose(const complex<T>* const* src) {
|
||||
complex<T>* const* elements = this->elements();
|
||||
for (int i = 0; i < this->num_rows(); ++i) {
|
||||
for (int j = 0; j < this->num_columns(); ++j) {
|
||||
elements[i][j] = conj(src[j][i]);
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_
|
@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#define _USE_MATH_DEFINES
|
||||
|
||||
#include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h"
|
||||
|
||||
#include <cmath>
|
||||
|
||||
namespace {
|
||||
|
||||
float BesselJ0(float x) {
|
||||
#if WEBRTC_WIN
|
||||
return _j0(x);
|
||||
#else
|
||||
return j0(x);
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
void CovarianceMatrixGenerator::UniformCovarianceMatrix(
|
||||
float wave_number,
|
||||
const std::vector<Point>& geometry,
|
||||
ComplexMatrix<float>* mat) {
|
||||
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_rows());
|
||||
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns());
|
||||
|
||||
complex<float>* const* mat_els = mat->elements();
|
||||
for (size_t i = 0; i < geometry.size(); ++i) {
|
||||
for (size_t j = 0; j < geometry.size(); ++j) {
|
||||
if (wave_number > 0.f) {
|
||||
mat_els[i][j] =
|
||||
BesselJ0(wave_number * Distance(geometry[i], geometry[j]));
|
||||
} else {
|
||||
mat_els[i][j] = i == j ? 1.f : 0.f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void CovarianceMatrixGenerator::AngledCovarianceMatrix(
|
||||
float sound_speed,
|
||||
float angle,
|
||||
size_t frequency_bin,
|
||||
size_t fft_size,
|
||||
size_t num_freq_bins,
|
||||
int sample_rate,
|
||||
const std::vector<Point>& geometry,
|
||||
ComplexMatrix<float>* mat) {
|
||||
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_rows());
|
||||
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns());
|
||||
|
||||
ComplexMatrix<float> interf_cov_vector(1, geometry.size());
|
||||
ComplexMatrix<float> interf_cov_vector_transposed(geometry.size(), 1);
|
||||
PhaseAlignmentMasks(frequency_bin,
|
||||
fft_size,
|
||||
sample_rate,
|
||||
sound_speed,
|
||||
geometry,
|
||||
angle,
|
||||
&interf_cov_vector);
|
||||
interf_cov_vector_transposed.Transpose(interf_cov_vector);
|
||||
interf_cov_vector.PointwiseConjugate();
|
||||
mat->Multiply(interf_cov_vector_transposed, interf_cov_vector);
|
||||
}
|
||||
|
||||
void CovarianceMatrixGenerator::PhaseAlignmentMasks(
|
||||
size_t frequency_bin,
|
||||
size_t fft_size,
|
||||
int sample_rate,
|
||||
float sound_speed,
|
||||
const std::vector<Point>& geometry,
|
||||
float angle,
|
||||
ComplexMatrix<float>* mat) {
|
||||
RTC_CHECK_EQ(1, mat->num_rows());
|
||||
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns());
|
||||
|
||||
float freq_in_hertz =
|
||||
(static_cast<float>(frequency_bin) / fft_size) * sample_rate;
|
||||
|
||||
complex<float>* const* mat_els = mat->elements();
|
||||
for (size_t c_ix = 0; c_ix < geometry.size(); ++c_ix) {
|
||||
float distance = std::cos(angle) * geometry[c_ix].x() +
|
||||
std::sin(angle) * geometry[c_ix].y();
|
||||
float phase_shift = -2.f * M_PI * distance * freq_in_hertz / sound_speed;
|
||||
|
||||
// Euler's formula for mat[0][c_ix] = e^(j * phase_shift).
|
||||
mat_els[0][c_ix] = complex<float>(cos(phase_shift), sin(phase_shift));
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,54 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COVARIANCE_MATRIX_GENERATOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COVARIANCE_MATRIX_GENERATOR_H_
|
||||
|
||||
#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
|
||||
#include "webrtc/modules/audio_processing/beamformer/array_util.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Helper class for Beamformer in charge of generating covariance matrices. For
|
||||
// each function, the passed-in ComplexMatrix is expected to be of size
|
||||
// |num_input_channels| x |num_input_channels|.
|
||||
class CovarianceMatrixGenerator {
|
||||
public:
|
||||
// A uniform covariance matrix with a gap at the target location. WARNING:
|
||||
// The target angle is assumed to be 0.
|
||||
static void UniformCovarianceMatrix(float wave_number,
|
||||
const std::vector<Point>& geometry,
|
||||
ComplexMatrix<float>* mat);
|
||||
|
||||
// The covariance matrix of a source at the given angle.
|
||||
static void AngledCovarianceMatrix(float sound_speed,
|
||||
float angle,
|
||||
size_t frequency_bin,
|
||||
size_t fft_size,
|
||||
size_t num_freq_bins,
|
||||
int sample_rate,
|
||||
const std::vector<Point>& geometry,
|
||||
ComplexMatrix<float>* mat);
|
||||
|
||||
// Calculates phase shifts that, when applied to a multichannel signal and
|
||||
// added together, cause constructive interferernce for sources located at
|
||||
// the given angle.
|
||||
static void PhaseAlignmentMasks(size_t frequency_bin,
|
||||
size_t fft_size,
|
||||
int sample_rate,
|
||||
float sound_speed,
|
||||
const std::vector<Point>& geometry,
|
||||
float angle,
|
||||
ComplexMatrix<float>* mat);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BF_HELPERS_H_
|
368
webrtc/modules/audio_processing/beamformer/matrix.h
Normal file
368
webrtc/modules/audio_processing/beamformer/matrix.h
Normal file
@ -0,0 +1,368 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstring>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/base/checks.h"
|
||||
#include "webrtc/base/constructormagic.h"
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
|
||||
namespace {
|
||||
|
||||
// Wrappers to get around the compiler warning resulting from the fact that
|
||||
// there's no std::sqrt overload for ints. We cast all non-complex types to
|
||||
// a double for the sqrt method.
|
||||
template <typename T>
|
||||
T sqrt_wrapper(T x) {
|
||||
return sqrt(static_cast<double>(x));
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
std::complex<S> sqrt_wrapper(std::complex<S> x) {
|
||||
return sqrt(x);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Matrix is a class for doing standard matrix operations on 2 dimensional
|
||||
// matrices of any size. Results of matrix operations are stored in the
|
||||
// calling object. Function overloads exist for both in-place (the calling
|
||||
// object is used as both an operand and the result) and out-of-place (all
|
||||
// operands are passed in as parameters) operations. If operand dimensions
|
||||
// mismatch, the program crashes. Out-of-place operations change the size of
|
||||
// the calling object, if necessary, before operating.
|
||||
//
|
||||
// 'In-place' operations that inherently change the size of the matrix (eg.
|
||||
// Transpose, Multiply on different-sized matrices) must make temporary copies
|
||||
// (|scratch_elements_| and |scratch_data_|) of existing data to complete the
|
||||
// operations.
|
||||
//
|
||||
// The data is stored contiguously. Data can be accessed internally as a flat
|
||||
// array, |data_|, or as an array of row pointers, |elements_|, but is
|
||||
// available to users only as an array of row pointers through |elements()|.
|
||||
// Memory for storage is allocated when a matrix is resized only if the new
|
||||
// size overflows capacity. Memory needed temporarily for any operations is
|
||||
// similarly resized only if the new size overflows capacity.
|
||||
//
|
||||
// If you pass in storage through the ctor, that storage is copied into the
|
||||
// matrix. TODO(claguna): albeit tricky, allow for data to be referenced
|
||||
// instead of copied, and owned by the user.
|
||||
template <typename T>
|
||||
class Matrix {
|
||||
public:
|
||||
Matrix() : num_rows_(0), num_columns_(0) {}
|
||||
|
||||
// Allocates space for the elements and initializes all values to zero.
|
||||
Matrix(int num_rows, int num_columns)
|
||||
: num_rows_(num_rows), num_columns_(num_columns) {
|
||||
Resize();
|
||||
scratch_data_.resize(num_rows_ * num_columns_);
|
||||
scratch_elements_.resize(num_rows_);
|
||||
}
|
||||
|
||||
// Copies |data| into the new Matrix.
|
||||
Matrix(const T* data, int num_rows, int num_columns)
|
||||
: num_rows_(0), num_columns_(0) {
|
||||
CopyFrom(data, num_rows, num_columns);
|
||||
scratch_data_.resize(num_rows_ * num_columns_);
|
||||
scratch_elements_.resize(num_rows_);
|
||||
}
|
||||
|
||||
virtual ~Matrix() {}
|
||||
|
||||
// Deep copy an existing matrix.
|
||||
void CopyFrom(const Matrix& other) {
|
||||
CopyFrom(&other.data_[0], other.num_rows_, other.num_columns_);
|
||||
}
|
||||
|
||||
// Copy |data| into the Matrix. The current data is lost.
|
||||
void CopyFrom(const T* const data, int num_rows, int num_columns) {
|
||||
Resize(num_rows, num_columns);
|
||||
memcpy(&data_[0], data, num_rows_ * num_columns_ * sizeof(data_[0]));
|
||||
}
|
||||
|
||||
Matrix& CopyFromColumn(const T* const* src,
|
||||
size_t column_index,
|
||||
int num_rows) {
|
||||
Resize(1, num_rows);
|
||||
for (int i = 0; i < num_columns_; ++i) {
|
||||
data_[i] = src[i][column_index];
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
void Resize(int num_rows, int num_columns) {
|
||||
if (num_rows != num_rows_ || num_columns != num_columns_) {
|
||||
num_rows_ = num_rows;
|
||||
num_columns_ = num_columns;
|
||||
Resize();
|
||||
}
|
||||
}
|
||||
|
||||
// Accessors and mutators.
|
||||
int num_rows() const { return num_rows_; }
|
||||
int num_columns() const { return num_columns_; }
|
||||
T* const* elements() { return &elements_[0]; }
|
||||
const T* const* elements() const { return &elements_[0]; }
|
||||
|
||||
T Trace() {
|
||||
RTC_CHECK_EQ(num_rows_, num_columns_);
|
||||
|
||||
T trace = 0;
|
||||
for (int i = 0; i < num_rows_; ++i) {
|
||||
trace += elements_[i][i];
|
||||
}
|
||||
return trace;
|
||||
}
|
||||
|
||||
// Matrix Operations. Returns *this to support method chaining.
|
||||
Matrix& Transpose() {
|
||||
CopyDataToScratch();
|
||||
Resize(num_columns_, num_rows_);
|
||||
return Transpose(scratch_elements());
|
||||
}
|
||||
|
||||
Matrix& Transpose(const Matrix& operand) {
|
||||
RTC_CHECK_EQ(operand.num_rows_, num_columns_);
|
||||
RTC_CHECK_EQ(operand.num_columns_, num_rows_);
|
||||
|
||||
return Transpose(operand.elements());
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
Matrix& Scale(const S& scalar) {
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
data_[i] *= scalar;
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
Matrix& Scale(const Matrix& operand, const S& scalar) {
|
||||
CopyFrom(operand);
|
||||
return Scale(scalar);
|
||||
}
|
||||
|
||||
Matrix& Add(const Matrix& operand) {
|
||||
RTC_CHECK_EQ(num_rows_, operand.num_rows_);
|
||||
RTC_CHECK_EQ(num_columns_, operand.num_columns_);
|
||||
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
data_[i] += operand.data_[i];
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
Matrix& Add(const Matrix& lhs, const Matrix& rhs) {
|
||||
CopyFrom(lhs);
|
||||
return Add(rhs);
|
||||
}
|
||||
|
||||
Matrix& Subtract(const Matrix& operand) {
|
||||
RTC_CHECK_EQ(num_rows_, operand.num_rows_);
|
||||
RTC_CHECK_EQ(num_columns_, operand.num_columns_);
|
||||
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
data_[i] -= operand.data_[i];
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
Matrix& Subtract(const Matrix& lhs, const Matrix& rhs) {
|
||||
CopyFrom(lhs);
|
||||
return Subtract(rhs);
|
||||
}
|
||||
|
||||
Matrix& PointwiseMultiply(const Matrix& operand) {
|
||||
RTC_CHECK_EQ(num_rows_, operand.num_rows_);
|
||||
RTC_CHECK_EQ(num_columns_, operand.num_columns_);
|
||||
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
data_[i] *= operand.data_[i];
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
Matrix& PointwiseMultiply(const Matrix& lhs, const Matrix& rhs) {
|
||||
CopyFrom(lhs);
|
||||
return PointwiseMultiply(rhs);
|
||||
}
|
||||
|
||||
Matrix& PointwiseDivide(const Matrix& operand) {
|
||||
RTC_CHECK_EQ(num_rows_, operand.num_rows_);
|
||||
RTC_CHECK_EQ(num_columns_, operand.num_columns_);
|
||||
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
data_[i] /= operand.data_[i];
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
Matrix& PointwiseDivide(const Matrix& lhs, const Matrix& rhs) {
|
||||
CopyFrom(lhs);
|
||||
return PointwiseDivide(rhs);
|
||||
}
|
||||
|
||||
Matrix& PointwiseSquareRoot() {
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
data_[i] = sqrt_wrapper(data_[i]);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
Matrix& PointwiseSquareRoot(const Matrix& operand) {
|
||||
CopyFrom(operand);
|
||||
return PointwiseSquareRoot();
|
||||
}
|
||||
|
||||
Matrix& PointwiseAbsoluteValue() {
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
data_[i] = abs(data_[i]);
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
Matrix& PointwiseAbsoluteValue(const Matrix& operand) {
|
||||
CopyFrom(operand);
|
||||
return PointwiseAbsoluteValue();
|
||||
}
|
||||
|
||||
Matrix& PointwiseSquare() {
|
||||
for (size_t i = 0; i < data_.size(); ++i) {
|
||||
data_[i] *= data_[i];
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
Matrix& PointwiseSquare(const Matrix& operand) {
|
||||
CopyFrom(operand);
|
||||
return PointwiseSquare();
|
||||
}
|
||||
|
||||
Matrix& Multiply(const Matrix& lhs, const Matrix& rhs) {
|
||||
RTC_CHECK_EQ(lhs.num_columns_, rhs.num_rows_);
|
||||
RTC_CHECK_EQ(num_rows_, lhs.num_rows_);
|
||||
RTC_CHECK_EQ(num_columns_, rhs.num_columns_);
|
||||
|
||||
return Multiply(lhs.elements(), rhs.num_rows_, rhs.elements());
|
||||
}
|
||||
|
||||
Matrix& Multiply(const Matrix& rhs) {
|
||||
RTC_CHECK_EQ(num_columns_, rhs.num_rows_);
|
||||
|
||||
CopyDataToScratch();
|
||||
Resize(num_rows_, rhs.num_columns_);
|
||||
return Multiply(scratch_elements(), rhs.num_rows_, rhs.elements());
|
||||
}
|
||||
|
||||
std::string ToString() const {
|
||||
std::ostringstream ss;
|
||||
ss << std::endl << "Matrix" << std::endl;
|
||||
|
||||
for (int i = 0; i < num_rows_; ++i) {
|
||||
for (int j = 0; j < num_columns_; ++j) {
|
||||
ss << elements_[i][j] << " ";
|
||||
}
|
||||
ss << std::endl;
|
||||
}
|
||||
ss << std::endl;
|
||||
|
||||
return ss.str();
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetNumRows(const int num_rows) { num_rows_ = num_rows; }
|
||||
void SetNumColumns(const int num_columns) { num_columns_ = num_columns; }
|
||||
T* data() { return &data_[0]; }
|
||||
const T* data() const { return &data_[0]; }
|
||||
const T* const* scratch_elements() const { return &scratch_elements_[0]; }
|
||||
|
||||
// Resize the matrix. If an increase in capacity is required, the current
|
||||
// data is lost.
|
||||
void Resize() {
|
||||
size_t size = num_rows_ * num_columns_;
|
||||
data_.resize(size);
|
||||
elements_.resize(num_rows_);
|
||||
|
||||
for (int i = 0; i < num_rows_; ++i) {
|
||||
elements_[i] = &data_[i * num_columns_];
|
||||
}
|
||||
}
|
||||
|
||||
// Copies data_ into scratch_data_ and updates scratch_elements_ accordingly.
|
||||
void CopyDataToScratch() {
|
||||
scratch_data_ = data_;
|
||||
scratch_elements_.resize(num_rows_);
|
||||
|
||||
for (int i = 0; i < num_rows_; ++i) {
|
||||
scratch_elements_[i] = &scratch_data_[i * num_columns_];
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
int num_rows_;
|
||||
int num_columns_;
|
||||
std::vector<T> data_;
|
||||
std::vector<T*> elements_;
|
||||
|
||||
// Stores temporary copies of |data_| and |elements_| for in-place operations
|
||||
// where referring to original data is necessary.
|
||||
std::vector<T> scratch_data_;
|
||||
std::vector<T*> scratch_elements_;
|
||||
|
||||
// Helpers for Transpose and Multiply operations that unify in-place and
|
||||
// out-of-place solutions.
|
||||
Matrix& Transpose(const T* const* src) {
|
||||
for (int i = 0; i < num_rows_; ++i) {
|
||||
for (int j = 0; j < num_columns_; ++j) {
|
||||
elements_[i][j] = src[j][i];
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
Matrix& Multiply(const T* const* lhs, int num_rows_rhs, const T* const* rhs) {
|
||||
for (int row = 0; row < num_rows_; ++row) {
|
||||
for (int col = 0; col < num_columns_; ++col) {
|
||||
T cur_element = 0;
|
||||
for (int i = 0; i < num_rows_rhs; ++i) {
|
||||
cur_element += lhs[row][i] * rhs[i][col];
|
||||
}
|
||||
|
||||
elements_[row][col] = cur_element;
|
||||
}
|
||||
}
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
RTC_DISALLOW_COPY_AND_ASSIGN(Matrix);
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_
|
102
webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h
Normal file
102
webrtc/modules/audio_processing/beamformer/matrix_test_helpers.h
Normal file
@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_
|
||||
|
||||
#include "testing/gtest/include/gtest/gtest.h"
|
||||
#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
|
||||
#include "webrtc/modules/audio_processing/beamformer/matrix.h"
|
||||
|
||||
namespace {
|
||||
const float kTolerance = 0.001f;
|
||||
}
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
using std::complex;
|
||||
|
||||
// Functions used in both matrix_unittest and complex_matrix_unittest.
|
||||
class MatrixTestHelpers {
|
||||
public:
|
||||
template <typename T>
|
||||
static void ValidateMatrixEquality(const Matrix<T>& expected,
|
||||
const Matrix<T>& actual) {
|
||||
EXPECT_EQ(expected.num_rows(), actual.num_rows());
|
||||
EXPECT_EQ(expected.num_columns(), actual.num_columns());
|
||||
|
||||
const T* const* expected_elements = expected.elements();
|
||||
const T* const* actual_elements = actual.elements();
|
||||
for (int i = 0; i < expected.num_rows(); ++i) {
|
||||
for (int j = 0; j < expected.num_columns(); ++j) {
|
||||
EXPECT_EQ(expected_elements[i][j], actual_elements[i][j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ValidateMatrixEqualityFloat(const Matrix<float>& expected,
|
||||
const Matrix<float>& actual) {
|
||||
EXPECT_EQ(expected.num_rows(), actual.num_rows());
|
||||
EXPECT_EQ(expected.num_columns(), actual.num_columns());
|
||||
|
||||
const float* const* expected_elements = expected.elements();
|
||||
const float* const* actual_elements = actual.elements();
|
||||
for (int i = 0; i < expected.num_rows(); ++i) {
|
||||
for (int j = 0; j < expected.num_columns(); ++j) {
|
||||
EXPECT_NEAR(expected_elements[i][j], actual_elements[i][j], kTolerance);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ValidateMatrixEqualityComplexFloat(
|
||||
const Matrix<complex<float> >& expected,
|
||||
const Matrix<complex<float> >& actual) {
|
||||
EXPECT_EQ(expected.num_rows(), actual.num_rows());
|
||||
EXPECT_EQ(expected.num_columns(), actual.num_columns());
|
||||
|
||||
const complex<float>* const* expected_elements = expected.elements();
|
||||
const complex<float>* const* actual_elements = actual.elements();
|
||||
for (int i = 0; i < expected.num_rows(); ++i) {
|
||||
for (int j = 0; j < expected.num_columns(); ++j) {
|
||||
EXPECT_NEAR(expected_elements[i][j].real(),
|
||||
actual_elements[i][j].real(),
|
||||
kTolerance);
|
||||
EXPECT_NEAR(expected_elements[i][j].imag(),
|
||||
actual_elements[i][j].imag(),
|
||||
kTolerance);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void ValidateMatrixNearEqualityComplexFloat(
|
||||
const Matrix<complex<float> >& expected,
|
||||
const Matrix<complex<float> >& actual,
|
||||
float tolerance) {
|
||||
EXPECT_EQ(expected.num_rows(), actual.num_rows());
|
||||
EXPECT_EQ(expected.num_columns(), actual.num_columns());
|
||||
|
||||
const complex<float>* const* expected_elements = expected.elements();
|
||||
const complex<float>* const* actual_elements = actual.elements();
|
||||
for (int i = 0; i < expected.num_rows(); ++i) {
|
||||
for (int j = 0; j < expected.num_columns(); ++j) {
|
||||
EXPECT_NEAR(expected_elements[i][j].real(),
|
||||
actual_elements[i][j].real(),
|
||||
tolerance);
|
||||
EXPECT_NEAR(expected_elements[i][j].imag(),
|
||||
actual_elements[i][j].imag(),
|
||||
tolerance);
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_
|
@ -0,0 +1,516 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#define _USE_MATH_DEFINES
|
||||
|
||||
#include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <cmath>
|
||||
#include <numeric>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/base/arraysize.h"
|
||||
#include "webrtc/common_audio/window_generator.h"
|
||||
#include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h"
|
||||
|
||||
namespace webrtc {
|
||||
namespace {
|
||||
|
||||
// Alpha for the Kaiser Bessel Derived window.
|
||||
const float kKbdAlpha = 1.5f;
|
||||
|
||||
// The minimum value a post-processing mask can take.
|
||||
const float kMaskMinimum = 0.01f;
|
||||
|
||||
const float kSpeedOfSoundMeterSeconds = 343;
|
||||
|
||||
// For both target and interference angles, PI / 2 is perpendicular to the
|
||||
// microphone array, facing forwards. The positive direction goes
|
||||
// counterclockwise.
|
||||
// The angle at which we amplify sound.
|
||||
const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;
|
||||
|
||||
// The angle at which we suppress sound. Suppression is symmetric around PI / 2
|
||||
// radians, so sound is suppressed at both +|kInterfAngleRadians| and
|
||||
// PI - |kInterfAngleRadians|. Since the beamformer is robust, this should
|
||||
// suppress sound coming from close angles as well.
|
||||
const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f;
|
||||
|
||||
// When calculating the interference covariance matrix, this is the weight for
|
||||
// the weighted average between the uniform covariance matrix and the angled
|
||||
// covariance matrix.
|
||||
// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
|
||||
const float kBalance = 0.4f;
|
||||
|
||||
const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f;
|
||||
|
||||
// TODO(claguna): need comment here.
|
||||
const float kBeamwidthConstant = 0.00002f;
|
||||
|
||||
// Alpha coefficients for mask smoothing.
|
||||
const float kMaskTimeSmoothAlpha = 0.2f;
|
||||
const float kMaskFrequencySmoothAlpha = 0.6f;
|
||||
|
||||
// The average mask is computed from masks in this mid-frequency range. If these
|
||||
// ranges are changed |kMaskQuantile| might need to be adjusted.
|
||||
const int kLowMeanStartHz = 200;
|
||||
const int kLowMeanEndHz = 400;
|
||||
|
||||
const int kHighMeanStartHz = 3000;
|
||||
const int kHighMeanEndHz = 5000;
|
||||
|
||||
// Quantile of mask values which is used to estimate target presence.
|
||||
const float kMaskQuantile = 0.7f;
|
||||
// Mask threshold over which the data is considered signal and not interference.
|
||||
const float kMaskTargetThreshold = 0.3f;
|
||||
// Time in seconds after which the data is considered interference if the mask
|
||||
// does not pass |kMaskTargetThreshold|.
|
||||
const float kHoldTargetSeconds = 0.25f;
|
||||
|
||||
// Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
|
||||
// used; to accomplish this, we compute both multiplications in the same loop.
|
||||
// The returned norm is clamped to be non-negative.
|
||||
float Norm(const ComplexMatrix<float>& mat,
|
||||
const ComplexMatrix<float>& norm_mat) {
|
||||
RTC_CHECK_EQ(norm_mat.num_rows(), 1);
|
||||
RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows());
|
||||
RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns());
|
||||
|
||||
complex<float> first_product = complex<float>(0.f, 0.f);
|
||||
complex<float> second_product = complex<float>(0.f, 0.f);
|
||||
|
||||
const complex<float>* const* mat_els = mat.elements();
|
||||
const complex<float>* const* norm_mat_els = norm_mat.elements();
|
||||
|
||||
for (int i = 0; i < norm_mat.num_columns(); ++i) {
|
||||
for (int j = 0; j < norm_mat.num_columns(); ++j) {
|
||||
first_product += conj(norm_mat_els[0][j]) * mat_els[j][i];
|
||||
}
|
||||
second_product += first_product * norm_mat_els[0][i];
|
||||
first_product = 0.f;
|
||||
}
|
||||
return std::max(second_product.real(), 0.f);
|
||||
}
|
||||
|
||||
// Does conjugate(|lhs|) * |rhs| for row vectors |lhs| and |rhs|.
|
||||
complex<float> ConjugateDotProduct(const ComplexMatrix<float>& lhs,
|
||||
const ComplexMatrix<float>& rhs) {
|
||||
RTC_CHECK_EQ(lhs.num_rows(), 1);
|
||||
RTC_CHECK_EQ(rhs.num_rows(), 1);
|
||||
RTC_CHECK_EQ(lhs.num_columns(), rhs.num_columns());
|
||||
|
||||
const complex<float>* const* lhs_elements = lhs.elements();
|
||||
const complex<float>* const* rhs_elements = rhs.elements();
|
||||
|
||||
complex<float> result = complex<float>(0.f, 0.f);
|
||||
for (int i = 0; i < lhs.num_columns(); ++i) {
|
||||
result += conj(lhs_elements[0][i]) * rhs_elements[0][i];
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
// Works for positive numbers only.
|
||||
size_t Round(float x) {
|
||||
return static_cast<size_t>(std::floor(x + 0.5f));
|
||||
}
|
||||
|
||||
// Calculates the sum of absolute values of a complex matrix.
|
||||
float SumAbs(const ComplexMatrix<float>& mat) {
|
||||
float sum_abs = 0.f;
|
||||
const complex<float>* const* mat_els = mat.elements();
|
||||
for (int i = 0; i < mat.num_rows(); ++i) {
|
||||
for (int j = 0; j < mat.num_columns(); ++j) {
|
||||
sum_abs += std::abs(mat_els[i][j]);
|
||||
}
|
||||
}
|
||||
return sum_abs;
|
||||
}
|
||||
|
||||
// Calculates the sum of squares of a complex matrix.
|
||||
float SumSquares(const ComplexMatrix<float>& mat) {
|
||||
float sum_squares = 0.f;
|
||||
const complex<float>* const* mat_els = mat.elements();
|
||||
for (int i = 0; i < mat.num_rows(); ++i) {
|
||||
for (int j = 0; j < mat.num_columns(); ++j) {
|
||||
float abs_value = std::abs(mat_els[i][j]);
|
||||
sum_squares += abs_value * abs_value;
|
||||
}
|
||||
}
|
||||
return sum_squares;
|
||||
}
|
||||
|
||||
// Does |out| = |in|.' * conj(|in|) for row vector |in|.
|
||||
void TransposedConjugatedProduct(const ComplexMatrix<float>& in,
|
||||
ComplexMatrix<float>* out) {
|
||||
RTC_CHECK_EQ(in.num_rows(), 1);
|
||||
RTC_CHECK_EQ(out->num_rows(), in.num_columns());
|
||||
RTC_CHECK_EQ(out->num_columns(), in.num_columns());
|
||||
const complex<float>* in_elements = in.elements()[0];
|
||||
complex<float>* const* out_elements = out->elements();
|
||||
for (int i = 0; i < out->num_rows(); ++i) {
|
||||
for (int j = 0; j < out->num_columns(); ++j) {
|
||||
out_elements[i][j] = in_elements[i] * conj(in_elements[j]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) {
|
||||
for (int dim = 0; dim < 3; ++dim) {
|
||||
float center = 0.f;
|
||||
for (size_t i = 0; i < array_geometry.size(); ++i) {
|
||||
center += array_geometry[i].c[dim];
|
||||
}
|
||||
center /= array_geometry.size();
|
||||
for (size_t i = 0; i < array_geometry.size(); ++i) {
|
||||
array_geometry[i].c[dim] -= center;
|
||||
}
|
||||
}
|
||||
return array_geometry;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
// static
|
||||
const size_t NonlinearBeamformer::kNumFreqBins;
|
||||
|
||||
NonlinearBeamformer::NonlinearBeamformer(
|
||||
const std::vector<Point>& array_geometry)
|
||||
: num_input_channels_(array_geometry.size()),
|
||||
array_geometry_(GetCenteredArray(array_geometry)) {
|
||||
WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
|
||||
}
|
||||
|
||||
void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
|
||||
chunk_length_ =
|
||||
static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
|
||||
sample_rate_hz_ = sample_rate_hz;
|
||||
low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
|
||||
low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
|
||||
high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
|
||||
high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
|
||||
// These bin indexes determine the regions over which a mean is taken. This
|
||||
// is applied as a constant value over the adjacent end "frequency correction"
|
||||
// regions.
|
||||
//
|
||||
// low_mean_start_bin_ high_mean_start_bin_
|
||||
// v v constant
|
||||
// |----------------|--------|----------------|-------|----------------|
|
||||
// constant ^ ^
|
||||
// low_mean_end_bin_ high_mean_end_bin_
|
||||
//
|
||||
RTC_DCHECK_GT(low_mean_start_bin_, 0U);
|
||||
RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
|
||||
RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
|
||||
RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
|
||||
RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
|
||||
|
||||
high_pass_postfilter_mask_ = 1.f;
|
||||
is_target_present_ = false;
|
||||
hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
|
||||
interference_blocks_count_ = hold_target_blocks_;
|
||||
|
||||
|
||||
lapped_transform_.reset(new LappedTransform(num_input_channels_,
|
||||
1,
|
||||
chunk_length_,
|
||||
window_,
|
||||
kFftSize,
|
||||
kFftSize / 2,
|
||||
this));
|
||||
for (size_t i = 0; i < kNumFreqBins; ++i) {
|
||||
time_smooth_mask_[i] = 1.f;
|
||||
final_mask_[i] = 1.f;
|
||||
float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
|
||||
wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
|
||||
mask_thresholds_[i] = num_input_channels_ * num_input_channels_ *
|
||||
kBeamwidthConstant * wave_numbers_[i] *
|
||||
wave_numbers_[i];
|
||||
}
|
||||
|
||||
// Initialize all nonadaptive values before looping through the frames.
|
||||
InitDelaySumMasks();
|
||||
InitTargetCovMats();
|
||||
InitInterfCovMats();
|
||||
|
||||
for (size_t i = 0; i < kNumFreqBins; ++i) {
|
||||
rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
|
||||
rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]);
|
||||
reflected_rpsiws_[i] =
|
||||
Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void NonlinearBeamformer::InitDelaySumMasks() {
|
||||
for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
|
||||
delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
|
||||
CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix,
|
||||
kFftSize,
|
||||
sample_rate_hz_,
|
||||
kSpeedOfSoundMeterSeconds,
|
||||
array_geometry_,
|
||||
kTargetAngleRadians,
|
||||
&delay_sum_masks_[f_ix]);
|
||||
|
||||
complex_f norm_factor = sqrt(
|
||||
ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
|
||||
delay_sum_masks_[f_ix].Scale(1.f / norm_factor);
|
||||
normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);
|
||||
normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(
|
||||
normalized_delay_sum_masks_[f_ix]));
|
||||
}
|
||||
}
|
||||
|
||||
void NonlinearBeamformer::InitTargetCovMats() {
|
||||
for (size_t i = 0; i < kNumFreqBins; ++i) {
|
||||
target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
|
||||
TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
|
||||
complex_f normalization_factor = target_cov_mats_[i].Trace();
|
||||
target_cov_mats_[i].Scale(1.f / normalization_factor);
|
||||
}
|
||||
}
|
||||
|
||||
void NonlinearBeamformer::InitInterfCovMats() {
|
||||
for (size_t i = 0; i < kNumFreqBins; ++i) {
|
||||
interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
|
||||
ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);
|
||||
ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
|
||||
|
||||
CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],
|
||||
array_geometry_,
|
||||
&uniform_cov_mat);
|
||||
|
||||
CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds,
|
||||
kInterfAngleRadians,
|
||||
i,
|
||||
kFftSize,
|
||||
kNumFreqBins,
|
||||
sample_rate_hz_,
|
||||
array_geometry_,
|
||||
&angled_cov_mat);
|
||||
// Normalize matrices before averaging them.
|
||||
complex_f normalization_factor = uniform_cov_mat.Trace();
|
||||
uniform_cov_mat.Scale(1.f / normalization_factor);
|
||||
normalization_factor = angled_cov_mat.Trace();
|
||||
angled_cov_mat.Scale(1.f / normalization_factor);
|
||||
|
||||
// Average matrices.
|
||||
uniform_cov_mat.Scale(1 - kBalance);
|
||||
angled_cov_mat.Scale(kBalance);
|
||||
interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat);
|
||||
reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
|
||||
ChannelBuffer<float>* output) {
|
||||
RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);
|
||||
RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
|
||||
|
||||
float old_high_pass_mask = high_pass_postfilter_mask_;
|
||||
lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));
|
||||
// Ramp up/down for smoothing. 1 mask per 10ms results in audible
|
||||
// discontinuities.
|
||||
const float ramp_increment =
|
||||
(high_pass_postfilter_mask_ - old_high_pass_mask) /
|
||||
input.num_frames_per_band();
|
||||
// Apply delay and sum and post-filter in the time domain. WARNING: only works
|
||||
// because delay-and-sum is not frequency dependent.
|
||||
for (size_t i = 1; i < input.num_bands(); ++i) {
|
||||
float smoothed_mask = old_high_pass_mask;
|
||||
for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
|
||||
smoothed_mask += ramp_increment;
|
||||
|
||||
// Applying the delay and sum (at zero degrees, this is equivalent to
|
||||
// averaging).
|
||||
float sum = 0.f;
|
||||
for (int k = 0; k < input.num_channels(); ++k) {
|
||||
sum += input.channels(i)[k][j];
|
||||
}
|
||||
output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {
|
||||
// If more than half-beamwidth degrees away from the beam's center,
|
||||
// you are out of the beam.
|
||||
return fabs(spherical_point.azimuth() - kTargetAngleRadians) <
|
||||
kHalfBeamWidthRadians;
|
||||
}
|
||||
|
||||
void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
|
||||
int num_input_channels,
|
||||
size_t num_freq_bins,
|
||||
int num_output_channels,
|
||||
complex_f* const* output) {
|
||||
RTC_CHECK_EQ(num_freq_bins, kNumFreqBins);
|
||||
RTC_CHECK_EQ(num_input_channels, num_input_channels_);
|
||||
RTC_CHECK_EQ(num_output_channels, 1);
|
||||
|
||||
// Calculating the post-filter masks. Note that we need two for each
|
||||
// frequency bin to account for the positive and negative interferer
|
||||
// angle.
|
||||
for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
|
||||
eig_m_.CopyFromColumn(input, i, num_input_channels_);
|
||||
float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_));
|
||||
if (eig_m_norm_factor != 0.f) {
|
||||
eig_m_.Scale(1.f / eig_m_norm_factor);
|
||||
}
|
||||
|
||||
float rxim = Norm(target_cov_mats_[i], eig_m_);
|
||||
float ratio_rxiw_rxim = 0.f;
|
||||
if (rxim > 0.f) {
|
||||
ratio_rxiw_rxim = rxiws_[i] / rxim;
|
||||
}
|
||||
|
||||
complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_));
|
||||
rmw *= rmw;
|
||||
float rmw_r = rmw.real();
|
||||
|
||||
new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i],
|
||||
rpsiws_[i],
|
||||
ratio_rxiw_rxim,
|
||||
rmw_r,
|
||||
mask_thresholds_[i]);
|
||||
|
||||
new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i],
|
||||
reflected_rpsiws_[i],
|
||||
ratio_rxiw_rxim,
|
||||
rmw_r,
|
||||
mask_thresholds_[i]);
|
||||
}
|
||||
|
||||
ApplyMaskTimeSmoothing();
|
||||
EstimateTargetPresence();
|
||||
ApplyLowFrequencyCorrection();
|
||||
ApplyHighFrequencyCorrection();
|
||||
ApplyMaskFrequencySmoothing();
|
||||
ApplyMasks(input, output);
|
||||
}
|
||||
|
||||
float NonlinearBeamformer::CalculatePostfilterMask(
|
||||
const ComplexMatrixF& interf_cov_mat,
|
||||
float rpsiw,
|
||||
float ratio_rxiw_rxim,
|
||||
float rmw_r,
|
||||
float mask_threshold) {
|
||||
float rpsim = Norm(interf_cov_mat, eig_m_);
|
||||
|
||||
// Find lambda.
|
||||
float ratio = 0.f;
|
||||
if (rpsim > 0.f) {
|
||||
ratio = rpsiw / rpsim;
|
||||
}
|
||||
float numerator = rmw_r - ratio;
|
||||
float denominator = ratio_rxiw_rxim - ratio;
|
||||
|
||||
float mask = 1.f;
|
||||
if (denominator > mask_threshold) {
|
||||
float lambda = numerator / denominator;
|
||||
mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum);
|
||||
}
|
||||
return mask;
|
||||
}
|
||||
|
||||
void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
|
||||
complex_f* const* output) {
|
||||
complex_f* output_channel = output[0];
|
||||
for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
|
||||
output_channel[f_ix] = complex_f(0.f, 0.f);
|
||||
|
||||
const complex_f* delay_sum_mask_els =
|
||||
normalized_delay_sum_masks_[f_ix].elements()[0];
|
||||
for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) {
|
||||
output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
|
||||
}
|
||||
|
||||
output_channel[f_ix] *= final_mask_[f_ix];
|
||||
}
|
||||
}
|
||||
|
||||
// Smooth new_mask_ into time_smooth_mask_.
|
||||
void NonlinearBeamformer::ApplyMaskTimeSmoothing() {
|
||||
for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
|
||||
time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +
|
||||
(1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Copy time_smooth_mask_ to final_mask_ and smooth over frequency.
|
||||
void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {
|
||||
// Smooth over frequency in both directions. The "frequency correction"
|
||||
// regions have constant value, but we enter them to smooth over the jump
|
||||
// that exists at the boundary. However, this does mean when smoothing "away"
|
||||
// from the region that we only need to use the last element.
|
||||
//
|
||||
// Upward smoothing:
|
||||
// low_mean_start_bin_
|
||||
// v
|
||||
// |------|------------|------|
|
||||
// ^------------------>^
|
||||
//
|
||||
// Downward smoothing:
|
||||
// high_mean_end_bin_
|
||||
// v
|
||||
// |------|------------|------|
|
||||
// ^<------------------^
|
||||
std::copy(time_smooth_mask_, time_smooth_mask_ + kNumFreqBins, final_mask_);
|
||||
for (size_t i = low_mean_start_bin_; i < kNumFreqBins; ++i) {
|
||||
final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] +
|
||||
(1 - kMaskFrequencySmoothAlpha) * final_mask_[i - 1];
|
||||
}
|
||||
for (size_t i = high_mean_end_bin_ + 1; i > 0; --i) {
|
||||
final_mask_[i - 1] = kMaskFrequencySmoothAlpha * final_mask_[i - 1] +
|
||||
(1 - kMaskFrequencySmoothAlpha) * final_mask_[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Apply low frequency correction to time_smooth_mask_.
|
||||
void NonlinearBeamformer::ApplyLowFrequencyCorrection() {
|
||||
const float low_frequency_mask =
|
||||
MaskRangeMean(low_mean_start_bin_, low_mean_end_bin_ + 1);
|
||||
std::fill(time_smooth_mask_, time_smooth_mask_ + low_mean_start_bin_,
|
||||
low_frequency_mask);
|
||||
}
|
||||
|
||||
// Apply high frequency correction to time_smooth_mask_. Update
|
||||
// high_pass_postfilter_mask_ to use for the high frequency time-domain bands.
|
||||
void NonlinearBeamformer::ApplyHighFrequencyCorrection() {
|
||||
high_pass_postfilter_mask_ =
|
||||
MaskRangeMean(high_mean_start_bin_, high_mean_end_bin_ + 1);
|
||||
std::fill(time_smooth_mask_ + high_mean_end_bin_ + 1,
|
||||
time_smooth_mask_ + kNumFreqBins, high_pass_postfilter_mask_);
|
||||
}
|
||||
|
||||
// Compute mean over the given range of time_smooth_mask_, [first, last).
|
||||
float NonlinearBeamformer::MaskRangeMean(size_t first, size_t last) {
|
||||
RTC_DCHECK_GT(last, first);
|
||||
const float sum = std::accumulate(time_smooth_mask_ + first,
|
||||
time_smooth_mask_ + last, 0.f);
|
||||
return sum / (last - first);
|
||||
}
|
||||
|
||||
void NonlinearBeamformer::EstimateTargetPresence() {
|
||||
const size_t quantile = static_cast<size_t>(
|
||||
(high_mean_end_bin_ - low_mean_start_bin_) * kMaskQuantile +
|
||||
low_mean_start_bin_);
|
||||
std::nth_element(new_mask_ + low_mean_start_bin_, new_mask_ + quantile,
|
||||
new_mask_ + high_mean_end_bin_ + 1);
|
||||
if (new_mask_[quantile] > kMaskTargetThreshold) {
|
||||
is_target_present_ = true;
|
||||
interference_blocks_count_ = 0;
|
||||
} else {
|
||||
is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,177 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
|
||||
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/common_audio/lapped_transform.h"
|
||||
#include "webrtc/common_audio/channel_buffer.h"
|
||||
#include "webrtc/modules/audio_processing/beamformer/beamformer.h"
|
||||
#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Enhances sound sources coming directly in front of a uniform linear array
|
||||
// and suppresses sound sources coming from all other directions. Operates on
|
||||
// multichannel signals and produces single-channel output.
|
||||
//
|
||||
// The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear
|
||||
// Beamforming Postprocessor" by Bastiaan Kleijn.
|
||||
//
|
||||
// TODO(aluebs): Target angle assumed to be 0. Parameterize target angle.
|
||||
class NonlinearBeamformer
|
||||
: public Beamformer<float>,
|
||||
public LappedTransform::Callback {
|
||||
public:
|
||||
// At the moment it only accepts uniform linear microphone arrays. Using the
|
||||
// first microphone as a reference position [0, 0, 0] is a natural choice.
|
||||
explicit NonlinearBeamformer(const std::vector<Point>& array_geometry);
|
||||
|
||||
// Sample rate corresponds to the lower band.
|
||||
// Needs to be called before the NonlinearBeamformer can be used.
|
||||
void Initialize(int chunk_size_ms, int sample_rate_hz) override;
|
||||
|
||||
// Process one time-domain chunk of audio. The audio is expected to be split
|
||||
// into frequency bands inside the ChannelBuffer. The number of frames and
|
||||
// channels must correspond to the constructor parameters. The same
|
||||
// ChannelBuffer can be passed in as |input| and |output|.
|
||||
void ProcessChunk(const ChannelBuffer<float>& input,
|
||||
ChannelBuffer<float>* output) override;
|
||||
|
||||
bool IsInBeam(const SphericalPointf& spherical_point) override;
|
||||
|
||||
// After processing each block |is_target_present_| is set to true if the
|
||||
// target signal es present and to false otherwise. This methods can be called
|
||||
// to know if the data is target signal or interference and process it
|
||||
// accordingly.
|
||||
bool is_target_present() override { return is_target_present_; }
|
||||
|
||||
protected:
|
||||
// Process one frequency-domain block of audio. This is where the fun
|
||||
// happens. Implements LappedTransform::Callback.
|
||||
void ProcessAudioBlock(const complex<float>* const* input,
|
||||
int num_input_channels,
|
||||
size_t num_freq_bins,
|
||||
int num_output_channels,
|
||||
complex<float>* const* output) override;
|
||||
|
||||
private:
|
||||
typedef Matrix<float> MatrixF;
|
||||
typedef ComplexMatrix<float> ComplexMatrixF;
|
||||
typedef complex<float> complex_f;
|
||||
|
||||
void InitDelaySumMasks();
|
||||
void InitTargetCovMats(); // TODO(aluebs): Make this depend on target angle.
|
||||
void InitInterfCovMats();
|
||||
|
||||
// An implementation of equation 18, which calculates postfilter masks that,
|
||||
// when applied, minimize the mean-square error of our estimation of the
|
||||
// desired signal. A sub-task is to calculate lambda, which is solved via
|
||||
// equation 13.
|
||||
float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,
|
||||
float rpsiw,
|
||||
float ratio_rxiw_rxim,
|
||||
float rmxi_r,
|
||||
float mask_threshold);
|
||||
|
||||
// Prevents the postfilter masks from degenerating too quickly (a cause of
|
||||
// musical noise).
|
||||
void ApplyMaskTimeSmoothing();
|
||||
void ApplyMaskFrequencySmoothing();
|
||||
|
||||
// The postfilter masks are unreliable at low frequencies. Calculates a better
|
||||
// mask by averaging mid-low frequency values.
|
||||
void ApplyLowFrequencyCorrection();
|
||||
|
||||
// Postfilter masks are also unreliable at high frequencies. Average mid-high
|
||||
// frequency masks to calculate a single mask per block which can be applied
|
||||
// in the time-domain. Further, we average these block-masks over a chunk,
|
||||
// resulting in one postfilter mask per audio chunk. This allows us to skip
|
||||
// both transforming and blocking the high-frequency signal.
|
||||
void ApplyHighFrequencyCorrection();
|
||||
|
||||
// Compute the means needed for the above frequency correction.
|
||||
float MaskRangeMean(size_t start_bin, size_t end_bin);
|
||||
|
||||
// Applies both sets of masks to |input| and store in |output|.
|
||||
void ApplyMasks(const complex_f* const* input, complex_f* const* output);
|
||||
|
||||
void EstimateTargetPresence();
|
||||
|
||||
static const size_t kFftSize = 256;
|
||||
static const size_t kNumFreqBins = kFftSize / 2 + 1;
|
||||
|
||||
// Deals with the fft transform and blocking.
|
||||
size_t chunk_length_;
|
||||
rtc::scoped_ptr<LappedTransform> lapped_transform_;
|
||||
float window_[kFftSize];
|
||||
|
||||
// Parameters exposed to the user.
|
||||
const int num_input_channels_;
|
||||
int sample_rate_hz_;
|
||||
|
||||
const std::vector<Point> array_geometry_;
|
||||
|
||||
// Calculated based on user-input and constants in the .cc file.
|
||||
size_t low_mean_start_bin_;
|
||||
size_t low_mean_end_bin_;
|
||||
size_t high_mean_start_bin_;
|
||||
size_t high_mean_end_bin_;
|
||||
|
||||
// Quickly varying mask updated every block.
|
||||
float new_mask_[kNumFreqBins];
|
||||
// Time smoothed mask.
|
||||
float time_smooth_mask_[kNumFreqBins];
|
||||
// Time and frequency smoothed mask.
|
||||
float final_mask_[kNumFreqBins];
|
||||
|
||||
// Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
|
||||
ComplexMatrixF delay_sum_masks_[kNumFreqBins];
|
||||
ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];
|
||||
|
||||
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
|
||||
// |num_input_channels_|.
|
||||
ComplexMatrixF target_cov_mats_[kNumFreqBins];
|
||||
|
||||
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
|
||||
// |num_input_channels_|.
|
||||
ComplexMatrixF interf_cov_mats_[kNumFreqBins];
|
||||
ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins];
|
||||
|
||||
// Of length |kNumFreqBins|.
|
||||
float mask_thresholds_[kNumFreqBins];
|
||||
float wave_numbers_[kNumFreqBins];
|
||||
|
||||
// Preallocated for ProcessAudioBlock()
|
||||
// Of length |kNumFreqBins|.
|
||||
float rxiws_[kNumFreqBins];
|
||||
float rpsiws_[kNumFreqBins];
|
||||
float reflected_rpsiws_[kNumFreqBins];
|
||||
|
||||
// The microphone normalization factor.
|
||||
ComplexMatrixF eig_m_;
|
||||
|
||||
// For processing the high-frequency input signal.
|
||||
float high_pass_postfilter_mask_;
|
||||
|
||||
// True when the target signal is present.
|
||||
bool is_target_present_;
|
||||
// Number of blocks after which the data is considered interference if the
|
||||
// mask does not pass |kMaskSignalThreshold|.
|
||||
size_t hold_target_blocks_;
|
||||
// Number of blocks since the last mask that passed |kMaskSignalThreshold|.
|
||||
size_t interference_blocks_count_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
|
Reference in New Issue
Block a user