Update code to current Chromium master

This corresponds to:

Chromium: 6555f9456074c0c0e5f7713564b978588ac04a5d
webrtc: c8b569e0a7ad0b369e15f0197b3a558699ec8efa
This commit is contained in:
Arun Raghavan
2015-11-04 10:07:52 +05:30
parent 9bc60d3e10
commit 34abadd258
108 changed files with 893 additions and 384 deletions

View File

@ -0,0 +1,118 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/beamformer/array_util.h"
#include <algorithm>
#include <limits>
#include "webrtc/base/checks.h"
namespace webrtc {
namespace {
const float kMaxDotProduct = 1e-6f;
} // namespace
float GetMinimumSpacing(const std::vector<Point>& array_geometry) {
RTC_CHECK_GT(array_geometry.size(), 1u);
float mic_spacing = std::numeric_limits<float>::max();
for (size_t i = 0; i < (array_geometry.size() - 1); ++i) {
for (size_t j = i + 1; j < array_geometry.size(); ++j) {
mic_spacing =
std::min(mic_spacing, Distance(array_geometry[i], array_geometry[j]));
}
}
return mic_spacing;
}
Point PairDirection(const Point& a, const Point& b) {
return {b.x() - a.x(), b.y() - a.y(), b.z() - a.z()};
}
float DotProduct(const Point& a, const Point& b) {
return a.x() * b.x() + a.y() * b.y() + a.z() * b.z();
}
Point CrossProduct(const Point& a, const Point& b) {
return {a.y() * b.z() - a.z() * b.y(), a.z() * b.x() - a.x() * b.z(),
a.x() * b.y() - a.y() * b.x()};
}
bool AreParallel(const Point& a, const Point& b) {
Point cross_product = CrossProduct(a, b);
return DotProduct(cross_product, cross_product) < kMaxDotProduct;
}
bool ArePerpendicular(const Point& a, const Point& b) {
return std::abs(DotProduct(a, b)) < kMaxDotProduct;
}
rtc::Maybe<Point> GetDirectionIfLinear(
const std::vector<Point>& array_geometry) {
RTC_DCHECK_GT(array_geometry.size(), 1u);
const Point first_pair_direction =
PairDirection(array_geometry[0], array_geometry[1]);
for (size_t i = 2u; i < array_geometry.size(); ++i) {
const Point pair_direction =
PairDirection(array_geometry[i - 1], array_geometry[i]);
if (!AreParallel(first_pair_direction, pair_direction)) {
return rtc::Maybe<Point>();
}
}
return rtc::Maybe<Point>(first_pair_direction);
}
rtc::Maybe<Point> GetNormalIfPlanar(const std::vector<Point>& array_geometry) {
RTC_DCHECK_GT(array_geometry.size(), 1u);
const Point first_pair_direction =
PairDirection(array_geometry[0], array_geometry[1]);
Point pair_direction(0.f, 0.f, 0.f);
size_t i = 2u;
bool is_linear = true;
for (; i < array_geometry.size() && is_linear; ++i) {
pair_direction = PairDirection(array_geometry[i - 1], array_geometry[i]);
if (!AreParallel(first_pair_direction, pair_direction)) {
is_linear = false;
}
}
if (is_linear) {
return rtc::Maybe<Point>();
}
const Point normal_direction =
CrossProduct(first_pair_direction, pair_direction);
for (; i < array_geometry.size(); ++i) {
pair_direction = PairDirection(array_geometry[i - 1], array_geometry[i]);
if (!ArePerpendicular(normal_direction, pair_direction)) {
return rtc::Maybe<Point>();
}
}
return rtc::Maybe<Point>(normal_direction);
}
rtc::Maybe<Point> GetArrayNormalIfExists(
const std::vector<Point>& array_geometry) {
const rtc::Maybe<Point> direction = GetDirectionIfLinear(array_geometry);
if (direction) {
return rtc::Maybe<Point>(Point(direction->y(), -direction->x(), 0.f));
}
const rtc::Maybe<Point> normal = GetNormalIfPlanar(array_geometry);
if (normal && normal->z() < kMaxDotProduct) {
return normal;
}
return rtc::Maybe<Point>();
}
Point AzimuthToPoint(float azimuth) {
return Point(std::cos(azimuth), std::sin(azimuth), 0.f);
}
} // namespace webrtc

View File

@ -12,12 +12,25 @@
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_
#include <cmath>
#include <vector>
#include "webrtc/base/maybe.h"
namespace webrtc {
// Coordinates in meters.
// Coordinates in meters. The convention used is:
// x: the horizontal dimension, with positive to the right from the camera's
// perspective.
// y: the depth dimension, with positive forward from the camera's
// perspective.
// z: the vertical dimension, with positive upwards.
template<typename T>
struct CartesianPoint {
CartesianPoint() {
c[0] = 0;
c[1] = 0;
c[2] = 0;
}
CartesianPoint(T x, T y, T z) {
c[0] = x;
c[1] = y;
@ -31,6 +44,35 @@ struct CartesianPoint {
using Point = CartesianPoint<float>;
// Calculates the direction from a to b.
Point PairDirection(const Point& a, const Point& b);
float DotProduct(const Point& a, const Point& b);
Point CrossProduct(const Point& a, const Point& b);
bool AreParallel(const Point& a, const Point& b);
bool ArePerpendicular(const Point& a, const Point& b);
// Returns the minimum distance between any two Points in the given
// |array_geometry|.
float GetMinimumSpacing(const std::vector<Point>& array_geometry);
// If the given array geometry is linear it returns the direction without
// normalizing.
rtc::Maybe<Point> GetDirectionIfLinear(
const std::vector<Point>& array_geometry);
// If the given array geometry is planar it returns the normal without
// normalizing.
rtc::Maybe<Point> GetNormalIfPlanar(const std::vector<Point>& array_geometry);
// Returns the normal of an array if it has one and it is in the xy-plane.
rtc::Maybe<Point> GetArrayNormalIfExists(
const std::vector<Point>& array_geometry);
// The resulting Point will be in the xy-plane.
Point AzimuthToPoint(float azimuth);
template<typename T>
float Distance(CartesianPoint<T> a, CartesianPoint<T> b) {
return std::sqrt((a.x() - b.x()) * (a.x() - b.x()) +
@ -38,6 +80,11 @@ float Distance(CartesianPoint<T> a, CartesianPoint<T> b) {
(a.z() - b.z()) * (a.z() - b.z()));
}
// The convention used:
// azimuth: zero is to the right from the camera's perspective, with positive
// angles in radians counter-clockwise.
// elevation: zero is horizontal, with positive angles in radians upwards.
// radius: distance from the camera in meters.
template <typename T>
struct SphericalPoint {
SphericalPoint(T azimuth, T elevation, T radius) {
@ -53,6 +100,17 @@ struct SphericalPoint {
using SphericalPointf = SphericalPoint<float>;
// Helper functions to transform degrees to radians and the inverse.
template <typename T>
T DegreesToRadians(T angle_degrees) {
return M_PI * angle_degrees / 180;
}
template <typename T>
T RadiansToDegrees(T angle_radians) {
return 180 * angle_radians / M_PI;
}
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_

View File

@ -32,6 +32,9 @@ class Beamformer {
// Needs to be called before the the Beamformer can be used.
virtual void Initialize(int chunk_size_ms, int sample_rate_hz) = 0;
// Aim the beamformer at a point in space.
virtual void AimAt(const SphericalPointf& spherical_point) = 0;
// Indicates whether a given point is inside of the beam.
virtual bool IsInBeam(const SphericalPointf& spherical_point) { return true; }

View File

@ -14,6 +14,7 @@
#include <cmath>
namespace webrtc {
namespace {
float BesselJ0(float x) {
@ -24,9 +25,19 @@ float BesselJ0(float x) {
#endif
}
} // namespace
// Calculates the Euclidean norm for a row vector.
float Norm(const ComplexMatrix<float>& x) {
RTC_CHECK_EQ(1, x.num_rows());
const size_t length = x.num_columns();
const complex<float>* elems = x.elements()[0];
float result = 0.f;
for (size_t i = 0u; i < length; ++i) {
result += std::norm(elems[i]);
}
return std::sqrt(result);
}
namespace webrtc {
} // namespace
void CovarianceMatrixGenerator::UniformCovarianceMatrix(
float wave_number,
@ -69,6 +80,7 @@ void CovarianceMatrixGenerator::AngledCovarianceMatrix(
geometry,
angle,
&interf_cov_vector);
interf_cov_vector.Scale(1.f / Norm(interf_cov_vector));
interf_cov_vector_transposed.Transpose(interf_cov_vector);
interf_cov_vector.PointwiseConjugate();
mat->Multiply(interf_cov_vector_transposed, interf_cov_vector);

View File

@ -27,33 +27,21 @@ namespace {
// Alpha for the Kaiser Bessel Derived window.
const float kKbdAlpha = 1.5f;
// The minimum value a post-processing mask can take.
const float kMaskMinimum = 0.01f;
const float kSpeedOfSoundMeterSeconds = 343;
// For both target and interference angles, PI / 2 is perpendicular to the
// microphone array, facing forwards. The positive direction goes
// counterclockwise.
// The angle at which we amplify sound.
const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;
// The minimum separation in radians between the target direction and an
// interferer scenario.
const float kMinAwayRadians = 0.2f;
// The angle at which we suppress sound. Suppression is symmetric around PI / 2
// radians, so sound is suppressed at both +|kInterfAngleRadians| and
// PI - |kInterfAngleRadians|. Since the beamformer is robust, this should
// suppress sound coming from close angles as well.
const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f;
// The separation between the target direction and the closest interferer
// scenario is proportional to this constant.
const float kAwaySlope = 0.008f;
// When calculating the interference covariance matrix, this is the weight for
// the weighted average between the uniform covariance matrix and the angled
// covariance matrix.
// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
const float kBalance = 0.4f;
const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f;
// TODO(claguna): need comment here.
const float kBeamwidthConstant = 0.00002f;
const float kBalance = 0.95f;
// Alpha coefficients for mask smoothing.
const float kMaskTimeSmoothAlpha = 0.2f;
@ -64,17 +52,28 @@ const float kMaskFrequencySmoothAlpha = 0.6f;
const int kLowMeanStartHz = 200;
const int kLowMeanEndHz = 400;
const int kHighMeanStartHz = 3000;
const int kHighMeanEndHz = 5000;
// Range limiter for subtractive terms in the nominator and denominator of the
// postfilter expression. It handles the scenario mismatch between the true and
// model sources (target and interference).
const float kCutOffConstant = 0.9999f;
// Quantile of mask values which is used to estimate target presence.
const float kMaskQuantile = 0.7f;
// Mask threshold over which the data is considered signal and not interference.
const float kMaskTargetThreshold = 0.3f;
// It has to be updated every time the postfilter calculation is changed
// significantly.
// TODO(aluebs): Write a tool to tune the target threshold automatically based
// on files annotated with target and interference ground truth.
const float kMaskTargetThreshold = 0.01f;
// Time in seconds after which the data is considered interference if the mask
// does not pass |kMaskTargetThreshold|.
const float kHoldTargetSeconds = 0.25f;
// To compensate for the attenuation this algorithm introduces to the target
// signal. It was estimated empirically from a low-noise low-reverberation
// recording from broadside.
const float kCompensationGain = 2.f;
// Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
// used; to accomplish this, we compute both multiplications in the same loop.
// The returned norm is clamped to be non-negative.
@ -179,13 +178,23 @@ std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) {
} // namespace
const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f);
// static
const size_t NonlinearBeamformer::kNumFreqBins;
NonlinearBeamformer::NonlinearBeamformer(
const std::vector<Point>& array_geometry)
: num_input_channels_(array_geometry.size()),
array_geometry_(GetCenteredArray(array_geometry)) {
const std::vector<Point>& array_geometry,
SphericalPointf target_direction)
: num_input_channels_(array_geometry.size()),
array_geometry_(GetCenteredArray(array_geometry)),
array_normal_(GetArrayNormalIfExists(array_geometry)),
min_mic_spacing_(GetMinimumSpacing(array_geometry)),
target_angle_radians_(target_direction.azimuth()),
away_radians_(std::min(
static_cast<float>(M_PI),
std::max(kMinAwayRadians,
kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) {
WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
}
@ -193,32 +202,12 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
chunk_length_ =
static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
sample_rate_hz_ = sample_rate_hz;
low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
// These bin indexes determine the regions over which a mean is taken. This
// is applied as a constant value over the adjacent end "frequency correction"
// regions.
//
// low_mean_start_bin_ high_mean_start_bin_
// v v constant
// |----------------|--------|----------------|-------|----------------|
// constant ^ ^
// low_mean_end_bin_ high_mean_end_bin_
//
RTC_DCHECK_GT(low_mean_start_bin_, 0U);
RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
high_pass_postfilter_mask_ = 1.f;
is_target_present_ = false;
hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
interference_blocks_count_ = hold_target_blocks_;
lapped_transform_.reset(new LappedTransform(num_input_channels_,
1,
chunk_length_,
@ -231,34 +220,88 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
final_mask_[i] = 1.f;
float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
mask_thresholds_[i] = num_input_channels_ * num_input_channels_ *
kBeamwidthConstant * wave_numbers_[i] *
wave_numbers_[i];
}
// Initialize all nonadaptive values before looping through the frames.
InitDelaySumMasks();
InitTargetCovMats();
InitInterfCovMats();
InitLowFrequencyCorrectionRanges();
InitDiffuseCovMats();
AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f));
}
for (size_t i = 0; i < kNumFreqBins; ++i) {
rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]);
reflected_rpsiws_[i] =
Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);
// These bin indexes determine the regions over which a mean is taken. This is
// applied as a constant value over the adjacent end "frequency correction"
// regions.
//
// low_mean_start_bin_ high_mean_start_bin_
// v v constant
// |----------------|--------|----------------|-------|----------------|
// constant ^ ^
// low_mean_end_bin_ high_mean_end_bin_
//
void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() {
low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
RTC_DCHECK_GT(low_mean_start_bin_, 0U);
RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
}
void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() {
const float kAliasingFreqHz =
kSpeedOfSoundMeterSeconds /
(min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_))));
const float kHighMeanStartHz = std::min(0.5f * kAliasingFreqHz,
sample_rate_hz_ / 2.f);
const float kHighMeanEndHz = std::min(0.75f * kAliasingFreqHz,
sample_rate_hz_ / 2.f);
high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
}
void NonlinearBeamformer::InitInterfAngles() {
interf_angles_radians_.clear();
const Point target_direction = AzimuthToPoint(target_angle_radians_);
const Point clockwise_interf_direction =
AzimuthToPoint(target_angle_radians_ - away_radians_);
if (!array_normal_ ||
DotProduct(*array_normal_, target_direction) *
DotProduct(*array_normal_, clockwise_interf_direction) >=
0.f) {
// The target and clockwise interferer are in the same half-plane defined
// by the array.
interf_angles_radians_.push_back(target_angle_radians_ - away_radians_);
} else {
// Otherwise, the interferer will begin reflecting back at the target.
// Instead rotate it away 180 degrees.
interf_angles_radians_.push_back(target_angle_radians_ - away_radians_ +
M_PI);
}
const Point counterclock_interf_direction =
AzimuthToPoint(target_angle_radians_ + away_radians_);
if (!array_normal_ ||
DotProduct(*array_normal_, target_direction) *
DotProduct(*array_normal_, counterclock_interf_direction) >=
0.f) {
// The target and counter-clockwise interferer are in the same half-plane
// defined by the array.
interf_angles_radians_.push_back(target_angle_radians_ + away_radians_);
} else {
// Otherwise, the interferer will begin reflecting back at the target.
// Instead rotate it away 180 degrees.
interf_angles_radians_.push_back(target_angle_radians_ + away_radians_ -
M_PI);
}
}
void NonlinearBeamformer::InitDelaySumMasks() {
for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix,
kFftSize,
sample_rate_hz_,
kSpeedOfSoundMeterSeconds,
array_geometry_,
kTargetAngleRadians,
&delay_sum_masks_[f_ix]);
CovarianceMatrixGenerator::PhaseAlignmentMasks(
f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds,
array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]);
complex_f norm_factor = sqrt(
ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
@ -273,40 +316,53 @@ void NonlinearBeamformer::InitTargetCovMats() {
for (size_t i = 0; i < kNumFreqBins; ++i) {
target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
complex_f normalization_factor = target_cov_mats_[i].Trace();
target_cov_mats_[i].Scale(1.f / normalization_factor);
}
}
void NonlinearBeamformer::InitDiffuseCovMats() {
for (size_t i = 0; i < kNumFreqBins; ++i) {
uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_);
CovarianceMatrixGenerator::UniformCovarianceMatrix(
wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]);
complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0];
uniform_cov_mat_[i].Scale(1.f / normalization_factor);
uniform_cov_mat_[i].Scale(1 - kBalance);
}
}
void NonlinearBeamformer::InitInterfCovMats() {
for (size_t i = 0; i < kNumFreqBins; ++i) {
interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);
ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
interf_cov_mats_[i].clear();
for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_,
num_input_channels_));
ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
CovarianceMatrixGenerator::AngledCovarianceMatrix(
kSpeedOfSoundMeterSeconds,
interf_angles_radians_[j],
i,
kFftSize,
kNumFreqBins,
sample_rate_hz_,
array_geometry_,
&angled_cov_mat);
// Normalize matrices before averaging them.
complex_f normalization_factor = angled_cov_mat.elements()[0][0];
angled_cov_mat.Scale(1.f / normalization_factor);
// Weighted average of matrices.
angled_cov_mat.Scale(kBalance);
interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat);
}
}
}
CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],
array_geometry_,
&uniform_cov_mat);
CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds,
kInterfAngleRadians,
i,
kFftSize,
kNumFreqBins,
sample_rate_hz_,
array_geometry_,
&angled_cov_mat);
// Normalize matrices before averaging them.
complex_f normalization_factor = uniform_cov_mat.Trace();
uniform_cov_mat.Scale(1.f / normalization_factor);
normalization_factor = angled_cov_mat.Trace();
angled_cov_mat.Scale(1.f / normalization_factor);
// Average matrices.
uniform_cov_mat.Scale(1 - kBalance);
angled_cov_mat.Scale(kBalance);
interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat);
reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]);
void NonlinearBeamformer::NormalizeCovMats() {
for (size_t i = 0; i < kNumFreqBins; ++i) {
rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
rpsiws_[i].clear();
for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
}
}
}
@ -322,28 +378,32 @@ void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
const float ramp_increment =
(high_pass_postfilter_mask_ - old_high_pass_mask) /
input.num_frames_per_band();
// Apply delay and sum and post-filter in the time domain. WARNING: only works
// because delay-and-sum is not frequency dependent.
// Apply the smoothed high-pass mask to the first channel of each band.
// This can be done because the effct of the linear beamformer is negligible
// compared to the post-filter.
for (size_t i = 1; i < input.num_bands(); ++i) {
float smoothed_mask = old_high_pass_mask;
for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
smoothed_mask += ramp_increment;
// Applying the delay and sum (at zero degrees, this is equivalent to
// averaging).
float sum = 0.f;
for (int k = 0; k < input.num_channels(); ++k) {
sum += input.channels(i)[k][j];
}
output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask;
output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask;
}
}
}
void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {
target_angle_radians_ = target_direction.azimuth();
InitHighFrequencyCorrectionRanges();
InitInterfAngles();
InitDelaySumMasks();
InitTargetCovMats();
InitInterfCovMats();
NormalizeCovMats();
}
bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {
// If more than half-beamwidth degrees away from the beam's center,
// you are out of the beam.
return fabs(spherical_point.azimuth() - kTargetAngleRadians) <
return fabs(spherical_point.azimuth() - target_angle_radians_) <
kHalfBeamWidthRadians;
}
@ -376,17 +436,19 @@ void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
rmw *= rmw;
float rmw_r = rmw.real();
new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i],
rpsiws_[i],
new_mask_[i] = CalculatePostfilterMask(*interf_cov_mats_[i][0],
rpsiws_[i][0],
ratio_rxiw_rxim,
rmw_r,
mask_thresholds_[i]);
new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i],
reflected_rpsiws_[i],
ratio_rxiw_rxim,
rmw_r,
mask_thresholds_[i]);
rmw_r);
for (size_t j = 1; j < interf_angles_radians_.size(); ++j) {
float tmp_mask = CalculatePostfilterMask(*interf_cov_mats_[i][j],
rpsiws_[i][j],
ratio_rxiw_rxim,
rmw_r);
if (tmp_mask < new_mask_[i]) {
new_mask_[i] = tmp_mask;
}
}
}
ApplyMaskTimeSmoothing();
@ -401,24 +463,16 @@ float NonlinearBeamformer::CalculatePostfilterMask(
const ComplexMatrixF& interf_cov_mat,
float rpsiw,
float ratio_rxiw_rxim,
float rmw_r,
float mask_threshold) {
float rmw_r) {
float rpsim = Norm(interf_cov_mat, eig_m_);
// Find lambda.
float ratio = 0.f;
if (rpsim > 0.f) {
ratio = rpsiw / rpsim;
}
float numerator = rmw_r - ratio;
float denominator = ratio_rxiw_rxim - ratio;
float mask = 1.f;
if (denominator > mask_threshold) {
float lambda = numerator / denominator;
mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum);
}
return mask;
return (1.f - std::min(kCutOffConstant, ratio / rmw_r)) /
(1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim));
}
void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
@ -433,7 +487,7 @@ void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
}
output_channel[f_ix] *= final_mask_[f_ix];
output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix];
}
}

View File

@ -11,12 +11,17 @@
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
// MSVC++ requires this to be set before any other includes to get M_PI.
#define _USE_MATH_DEFINES
#include <math.h>
#include <vector>
#include "webrtc/common_audio/lapped_transform.h"
#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/modules/audio_processing/beamformer/beamformer.h"
#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
#include "webrtc/system_wrappers/include/scoped_vector.h"
namespace webrtc {
@ -26,15 +31,16 @@ namespace webrtc {
//
// The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear
// Beamforming Postprocessor" by Bastiaan Kleijn.
//
// TODO(aluebs): Target angle assumed to be 0. Parameterize target angle.
class NonlinearBeamformer
: public Beamformer<float>,
public LappedTransform::Callback {
public:
// At the moment it only accepts uniform linear microphone arrays. Using the
// first microphone as a reference position [0, 0, 0] is a natural choice.
explicit NonlinearBeamformer(const std::vector<Point>& array_geometry);
static const float kHalfBeamWidthRadians;
explicit NonlinearBeamformer(
const std::vector<Point>& array_geometry,
SphericalPointf target_direction =
SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f));
// Sample rate corresponds to the lower band.
// Needs to be called before the NonlinearBeamformer can be used.
@ -47,6 +53,8 @@ class NonlinearBeamformer
void ProcessChunk(const ChannelBuffer<float>& input,
ChannelBuffer<float>* output) override;
void AimAt(const SphericalPointf& target_direction) override;
bool IsInBeam(const SphericalPointf& spherical_point) override;
// After processing each block |is_target_present_| is set to true if the
@ -65,23 +73,30 @@ class NonlinearBeamformer
complex<float>* const* output) override;
private:
#ifndef WEBRTC_AUDIO_PROCESSING_ONLY_BUILD
FRIEND_TEST_ALL_PREFIXES(NonlinearBeamformerTest,
InterfAnglesTakeAmbiguityIntoAccount);
#endif
typedef Matrix<float> MatrixF;
typedef ComplexMatrix<float> ComplexMatrixF;
typedef complex<float> complex_f;
void InitLowFrequencyCorrectionRanges();
void InitHighFrequencyCorrectionRanges();
void InitInterfAngles();
void InitDelaySumMasks();
void InitTargetCovMats(); // TODO(aluebs): Make this depend on target angle.
void InitTargetCovMats();
void InitDiffuseCovMats();
void InitInterfCovMats();
void NormalizeCovMats();
// An implementation of equation 18, which calculates postfilter masks that,
// when applied, minimize the mean-square error of our estimation of the
// desired signal. A sub-task is to calculate lambda, which is solved via
// equation 13.
// Calculates postfilter masks that minimize the mean squared error of our
// estimation of the desired signal.
float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,
float rpsiw,
float ratio_rxiw_rxim,
float rmxi_r,
float mask_threshold);
float rmxi_r);
// Prevents the postfilter masks from degenerating too quickly (a cause of
// musical noise).
@ -120,6 +135,11 @@ class NonlinearBeamformer
int sample_rate_hz_;
const std::vector<Point> array_geometry_;
// The normal direction of the array if it has one and it is in the xy-plane.
const rtc::Maybe<Point> array_normal_;
// Minimum spacing between microphone pairs.
const float min_mic_spacing_;
// Calculated based on user-input and constants in the .cc file.
size_t low_mean_start_bin_;
@ -134,28 +154,33 @@ class NonlinearBeamformer
// Time and frequency smoothed mask.
float final_mask_[kNumFreqBins];
float target_angle_radians_;
// Angles of the interferer scenarios.
std::vector<float> interf_angles_radians_;
// The angle between the target and the interferer scenarios.
const float away_radians_;
// Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
ComplexMatrixF delay_sum_masks_[kNumFreqBins];
ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
// Arrays of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
// |num_input_channels_|.
ComplexMatrixF target_cov_mats_[kNumFreqBins];
ComplexMatrixF uniform_cov_mat_[kNumFreqBins];
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
// |num_input_channels_|.
ComplexMatrixF interf_cov_mats_[kNumFreqBins];
ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins];
// |num_input_channels_|. ScopedVector has a size equal to the number of
// interferer scenarios.
ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins];
// Of length |kNumFreqBins|.
float mask_thresholds_[kNumFreqBins];
float wave_numbers_[kNumFreqBins];
// Preallocated for ProcessAudioBlock()
// Of length |kNumFreqBins|.
float rxiws_[kNumFreqBins];
float rpsiws_[kNumFreqBins];
float reflected_rpsiws_[kNumFreqBins];
// The vector has a size equal to the number of interferer scenarios.
std::vector<float> rpsiws_[kNumFreqBins];
// The microphone normalization factor.
ComplexMatrixF eig_m_;