Update code to current Chromium master

This corresponds to: Chromium: 6555f9456074c0c0e5f7713564b978588ac04a5d webrtc: c8b569e0a7ad0b369e15f0197b3a558699ec8efa
2015-11-04 10:07:52 +05:30
parent 9bc60d3e10
commit 34abadd258
108 changed files with 893 additions and 384 deletions
--- a/webrtc/modules/audio_processing/beamformer/array_util.cc
+++ b/webrtc/modules/audio_processing/beamformer/array_util.cc
@@ -0,0 +1,118 @@
+/*
+ *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "webrtc/modules/audio_processing/beamformer/array_util.h"
+
+#include <algorithm>
+#include <limits>
+
+#include "webrtc/base/checks.h"
+
+namespace webrtc {
+namespace {
+
+const float kMaxDotProduct = 1e-6f;
+
+}  // namespace
+
+float GetMinimumSpacing(const std::vector<Point>& array_geometry) {
+  RTC_CHECK_GT(array_geometry.size(), 1u);
+  float mic_spacing = std::numeric_limits<float>::max();
+  for (size_t i = 0; i < (array_geometry.size() - 1); ++i) {
+    for (size_t j = i + 1; j < array_geometry.size(); ++j) {
+      mic_spacing =
+          std::min(mic_spacing, Distance(array_geometry[i], array_geometry[j]));
+    }
+  }
+  return mic_spacing;
+}
+
+Point PairDirection(const Point& a, const Point& b) {
+  return {b.x() - a.x(), b.y() - a.y(), b.z() - a.z()};
+}
+
+float DotProduct(const Point& a, const Point& b) {
+  return a.x() * b.x() + a.y() * b.y() + a.z() * b.z();
+}
+
+Point CrossProduct(const Point& a, const Point& b) {
+  return {a.y() * b.z() - a.z() * b.y(), a.z() * b.x() - a.x() * b.z(),
+          a.x() * b.y() - a.y() * b.x()};
+}
+
+bool AreParallel(const Point& a, const Point& b) {
+  Point cross_product = CrossProduct(a, b);
+  return DotProduct(cross_product, cross_product) < kMaxDotProduct;
+}
+
+bool ArePerpendicular(const Point& a, const Point& b) {
+  return std::abs(DotProduct(a, b)) < kMaxDotProduct;
+}
+
+rtc::Maybe<Point> GetDirectionIfLinear(
+    const std::vector<Point>& array_geometry) {
+  RTC_DCHECK_GT(array_geometry.size(), 1u);
+  const Point first_pair_direction =
+      PairDirection(array_geometry[0], array_geometry[1]);
+  for (size_t i = 2u; i < array_geometry.size(); ++i) {
+    const Point pair_direction =
+        PairDirection(array_geometry[i - 1], array_geometry[i]);
+    if (!AreParallel(first_pair_direction, pair_direction)) {
+      return rtc::Maybe<Point>();
+    }
+  }
+  return rtc::Maybe<Point>(first_pair_direction);
+}
+
+rtc::Maybe<Point> GetNormalIfPlanar(const std::vector<Point>& array_geometry) {
+  RTC_DCHECK_GT(array_geometry.size(), 1u);
+  const Point first_pair_direction =
+      PairDirection(array_geometry[0], array_geometry[1]);
+  Point pair_direction(0.f, 0.f, 0.f);
+  size_t i = 2u;
+  bool is_linear = true;
+  for (; i < array_geometry.size() && is_linear; ++i) {
+    pair_direction = PairDirection(array_geometry[i - 1], array_geometry[i]);
+    if (!AreParallel(first_pair_direction, pair_direction)) {
+      is_linear = false;
+    }
+  }
+  if (is_linear) {
+    return rtc::Maybe<Point>();
+  }
+  const Point normal_direction =
+      CrossProduct(first_pair_direction, pair_direction);
+  for (; i < array_geometry.size(); ++i) {
+    pair_direction = PairDirection(array_geometry[i - 1], array_geometry[i]);
+    if (!ArePerpendicular(normal_direction, pair_direction)) {
+      return rtc::Maybe<Point>();
+    }
+  }
+  return rtc::Maybe<Point>(normal_direction);
+}
+
+rtc::Maybe<Point> GetArrayNormalIfExists(
+    const std::vector<Point>& array_geometry) {
+  const rtc::Maybe<Point> direction = GetDirectionIfLinear(array_geometry);
+  if (direction) {
+    return rtc::Maybe<Point>(Point(direction->y(), -direction->x(), 0.f));
+  }
+  const rtc::Maybe<Point> normal = GetNormalIfPlanar(array_geometry);
+  if (normal && normal->z() < kMaxDotProduct) {
+    return normal;
+  }
+  return rtc::Maybe<Point>();
+}
+
+Point AzimuthToPoint(float azimuth) {
+  return Point(std::cos(azimuth), std::sin(azimuth), 0.f);
+}
+
+}  // namespace webrtc
--- a/webrtc/modules/audio_processing/beamformer/array_util.h
+++ b/webrtc/modules/audio_processing/beamformer/array_util.h
@@ -12,12 +12,25 @@
 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_

 #include <cmath>
+#include <vector>
+
+#include "webrtc/base/maybe.h"

 namespace webrtc {

-// Coordinates in meters.
+// Coordinates in meters. The convention used is:
+// x: the horizontal dimension, with positive to the right from the camera's
+//    perspective.
+// y: the depth dimension, with positive forward from the camera's
+//    perspective.
+// z: the vertical dimension, with positive upwards.
 template<typename T>
 struct CartesianPoint {
+  CartesianPoint() {
+    c[0] = 0;
+    c[1] = 0;
+    c[2] = 0;
+  }
  CartesianPoint(T x, T y, T z) {
    c[0] = x;
    c[1] = y;
@@ -31,6 +44,35 @@ struct CartesianPoint {

 using Point = CartesianPoint<float>;

+// Calculates the direction from a to b.
+Point PairDirection(const Point& a, const Point& b);
+
+float DotProduct(const Point& a, const Point& b);
+Point CrossProduct(const Point& a, const Point& b);
+
+bool AreParallel(const Point& a, const Point& b);
+bool ArePerpendicular(const Point& a, const Point& b);
+
+// Returns the minimum distance between any two Points in the given
+// |array_geometry|.
+float GetMinimumSpacing(const std::vector<Point>& array_geometry);
+
+// If the given array geometry is linear it returns the direction without
+// normalizing.
+rtc::Maybe<Point> GetDirectionIfLinear(
+    const std::vector<Point>& array_geometry);
+
+// If the given array geometry is planar it returns the normal without
+// normalizing.
+rtc::Maybe<Point> GetNormalIfPlanar(const std::vector<Point>& array_geometry);
+
+// Returns the normal of an array if it has one and it is in the xy-plane.
+rtc::Maybe<Point> GetArrayNormalIfExists(
+    const std::vector<Point>& array_geometry);
+
+// The resulting Point will be in the xy-plane.
+Point AzimuthToPoint(float azimuth);
+
 template<typename T>
 float Distance(CartesianPoint<T> a, CartesianPoint<T> b) {
  return std::sqrt((a.x() - b.x()) * (a.x() - b.x()) +
@@ -38,6 +80,11 @@ float Distance(CartesianPoint<T> a, CartesianPoint<T> b) {
                   (a.z() - b.z()) * (a.z() - b.z()));
 }

+// The convention used:
+// azimuth: zero is to the right from the camera's perspective, with positive
+//          angles in radians counter-clockwise.
+// elevation: zero is horizontal, with positive angles in radians upwards.
+// radius: distance from the camera in meters.
 template <typename T>
 struct SphericalPoint {
  SphericalPoint(T azimuth, T elevation, T radius) {
@@ -53,6 +100,17 @@ struct SphericalPoint {

 using SphericalPointf = SphericalPoint<float>;

+// Helper functions to transform degrees to radians and the inverse.
+template <typename T>
+T DegreesToRadians(T angle_degrees) {
+  return M_PI * angle_degrees / 180;
+}
+
+template <typename T>
+T RadiansToDegrees(T angle_radians) {
+  return 180 * angle_radians / M_PI;
+}
+
 }  // namespace webrtc

 #endif  // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_
--- a/webrtc/modules/audio_processing/beamformer/beamformer.h
+++ b/webrtc/modules/audio_processing/beamformer/beamformer.h
@@ -32,6 +32,9 @@ class Beamformer {
  // Needs to be called before the the Beamformer can be used.
  virtual void Initialize(int chunk_size_ms, int sample_rate_hz) = 0;

+  // Aim the beamformer at a point in space.
+  virtual void AimAt(const SphericalPointf& spherical_point) = 0;
+
  // Indicates whether a given point is inside of the beam.
  virtual bool IsInBeam(const SphericalPointf& spherical_point) { return true; }

--- a/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc
+++ b/webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.cc
@@ -14,6 +14,7 @@

 #include <cmath>

+namespace webrtc {
 namespace {

 float BesselJ0(float x) {
@@ -24,9 +25,19 @@ float BesselJ0(float x) {
 #endif
 }

-}  // namespace
+// Calculates the Euclidean norm for a row vector.
+float Norm(const ComplexMatrix<float>& x) {
+  RTC_CHECK_EQ(1, x.num_rows());
+  const size_t length = x.num_columns();
+  const complex<float>* elems = x.elements()[0];
+  float result = 0.f;
+  for (size_t i = 0u; i < length; ++i) {
+    result += std::norm(elems[i]);
+  }
+  return std::sqrt(result);
+}

-namespace webrtc {
+}  // namespace

 void CovarianceMatrixGenerator::UniformCovarianceMatrix(
    float wave_number,
@@ -69,6 +80,7 @@ void CovarianceMatrixGenerator::AngledCovarianceMatrix(
                      geometry,
                      angle,
                      &interf_cov_vector);
+  interf_cov_vector.Scale(1.f / Norm(interf_cov_vector));
  interf_cov_vector_transposed.Transpose(interf_cov_vector);
  interf_cov_vector.PointwiseConjugate();
  mat->Multiply(interf_cov_vector_transposed, interf_cov_vector);
--- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
+++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.cc
@@ -27,33 +27,21 @@ namespace {
 // Alpha for the Kaiser Bessel Derived window.
 const float kKbdAlpha = 1.5f;

-// The minimum value a post-processing mask can take.
-const float kMaskMinimum = 0.01f;
-
 const float kSpeedOfSoundMeterSeconds = 343;

-// For both target and interference angles, PI / 2 is perpendicular to the
-// microphone array, facing forwards. The positive direction goes
-// counterclockwise.
-// The angle at which we amplify sound.
-const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;
+// The minimum separation in radians between the target direction and an
+// interferer scenario.
+const float kMinAwayRadians = 0.2f;

-// The angle at which we suppress sound. Suppression is symmetric around PI / 2
-// radians, so sound is suppressed at both +|kInterfAngleRadians| and
-// PI - |kInterfAngleRadians|. Since the beamformer is robust, this should
-// suppress sound coming from close angles as well.
-const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f;
+// The separation between the target direction and the closest interferer
+// scenario is proportional to this constant.
+const float kAwaySlope = 0.008f;

 // When calculating the interference covariance matrix, this is the weight for
 // the weighted average between the uniform covariance matrix and the angled
 // covariance matrix.
 // Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
-const float kBalance = 0.4f;
-
-const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f;
-
-// TODO(claguna): need comment here.
-const float kBeamwidthConstant = 0.00002f;
+const float kBalance = 0.95f;

 // Alpha coefficients for mask smoothing.
 const float kMaskTimeSmoothAlpha = 0.2f;
@@ -64,17 +52,28 @@ const float kMaskFrequencySmoothAlpha = 0.6f;
 const int kLowMeanStartHz = 200;
 const int kLowMeanEndHz = 400;

-const int kHighMeanStartHz = 3000;
-const int kHighMeanEndHz = 5000;
+// Range limiter for subtractive terms in the nominator and denominator of the
+// postfilter expression. It handles the scenario mismatch between the true and
+// model sources (target and interference).
+const float kCutOffConstant = 0.9999f;

 // Quantile of mask values which is used to estimate target presence.
 const float kMaskQuantile = 0.7f;
 // Mask threshold over which the data is considered signal and not interference.
-const float kMaskTargetThreshold = 0.3f;
+// It has to be updated every time the postfilter calculation is changed
+// significantly.
+// TODO(aluebs): Write a tool to tune the target threshold automatically based
+// on files annotated with target and interference ground truth.
+const float kMaskTargetThreshold = 0.01f;
 // Time in seconds after which the data is considered interference if the mask
 // does not pass |kMaskTargetThreshold|.
 const float kHoldTargetSeconds = 0.25f;

+// To compensate for the attenuation this algorithm introduces to the target
+// signal. It was estimated empirically from a low-noise low-reverberation
+// recording from broadside.
+const float kCompensationGain = 2.f;
+
 // Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
 // used; to accomplish this, we compute both multiplications in the same loop.
 // The returned norm is clamped to be non-negative.
@@ -179,13 +178,23 @@ std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) {

 }  // namespace

+const float NonlinearBeamformer::kHalfBeamWidthRadians = DegreesToRadians(20.f);
+
 // static
 const size_t NonlinearBeamformer::kNumFreqBins;

 NonlinearBeamformer::NonlinearBeamformer(
-    const std::vector<Point>& array_geometry)
-  : num_input_channels_(array_geometry.size()),
-      array_geometry_(GetCenteredArray(array_geometry)) {
+    const std::vector<Point>& array_geometry,
+    SphericalPointf target_direction)
+    : num_input_channels_(array_geometry.size()),
+      array_geometry_(GetCenteredArray(array_geometry)),
+      array_normal_(GetArrayNormalIfExists(array_geometry)),
+      min_mic_spacing_(GetMinimumSpacing(array_geometry)),
+      target_angle_radians_(target_direction.azimuth()),
+      away_radians_(std::min(
+          static_cast<float>(M_PI),
+          std::max(kMinAwayRadians,
+                   kAwaySlope * static_cast<float>(M_PI) / min_mic_spacing_))) {
  WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
 }

@@ -193,32 +202,12 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
  chunk_length_ =
      static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
  sample_rate_hz_ = sample_rate_hz;
-  low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
-  low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
-  high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
-  high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
-  // These bin indexes determine the regions over which a mean is taken. This
-  // is applied as a constant value over the adjacent end "frequency correction"
-  // regions.
-  //
-  //             low_mean_start_bin_     high_mean_start_bin_
-  //                   v                         v              constant
-  // |----------------|--------|----------------|-------|----------------|
-  //   constant               ^                        ^
-  //             low_mean_end_bin_       high_mean_end_bin_
-  //
-  RTC_DCHECK_GT(low_mean_start_bin_, 0U);
-  RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
-  RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
-  RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
-  RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);

  high_pass_postfilter_mask_ = 1.f;
  is_target_present_ = false;
  hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
  interference_blocks_count_ = hold_target_blocks_;

-
  lapped_transform_.reset(new LappedTransform(num_input_channels_,
                                              1,
                                              chunk_length_,
@@ -231,34 +220,88 @@ void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
    final_mask_[i] = 1.f;
    float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
    wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
-    mask_thresholds_[i] = num_input_channels_ * num_input_channels_ *
-                          kBeamwidthConstant * wave_numbers_[i] *
-                          wave_numbers_[i];
  }

-  // Initialize all nonadaptive values before looping through the frames.
-  InitDelaySumMasks();
-  InitTargetCovMats();
-  InitInterfCovMats();
+  InitLowFrequencyCorrectionRanges();
+  InitDiffuseCovMats();
+  AimAt(SphericalPointf(target_angle_radians_, 0.f, 1.f));
+}

-  for (size_t i = 0; i < kNumFreqBins; ++i) {
-    rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
-    rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]);
-    reflected_rpsiws_[i] =
-        Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);
+// These bin indexes determine the regions over which a mean is taken. This is
+// applied as a constant value over the adjacent end "frequency correction"
+// regions.
+//
+//             low_mean_start_bin_     high_mean_start_bin_
+//                   v                         v              constant
+// |----------------|--------|----------------|-------|----------------|
+//   constant               ^                        ^
+//             low_mean_end_bin_       high_mean_end_bin_
+//
+void NonlinearBeamformer::InitLowFrequencyCorrectionRanges() {
+  low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
+  low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
+
+  RTC_DCHECK_GT(low_mean_start_bin_, 0U);
+  RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
+}
+
+void NonlinearBeamformer::InitHighFrequencyCorrectionRanges() {
+  const float kAliasingFreqHz =
+      kSpeedOfSoundMeterSeconds /
+      (min_mic_spacing_ * (1.f + std::abs(std::cos(target_angle_radians_))));
+  const float kHighMeanStartHz = std::min(0.5f *  kAliasingFreqHz,
+                                          sample_rate_hz_ / 2.f);
+  const float kHighMeanEndHz = std::min(0.75f *  kAliasingFreqHz,
+                                        sample_rate_hz_ / 2.f);
+  high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
+  high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
+
+  RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
+  RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
+  RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
+}
+
+void NonlinearBeamformer::InitInterfAngles() {
+  interf_angles_radians_.clear();
+  const Point target_direction = AzimuthToPoint(target_angle_radians_);
+  const Point clockwise_interf_direction =
+      AzimuthToPoint(target_angle_radians_ - away_radians_);
+  if (!array_normal_ ||
+      DotProduct(*array_normal_, target_direction) *
+              DotProduct(*array_normal_, clockwise_interf_direction) >=
+          0.f) {
+    // The target and clockwise interferer are in the same half-plane defined
+    // by the array.
+    interf_angles_radians_.push_back(target_angle_radians_ - away_radians_);
+  } else {
+    // Otherwise, the interferer will begin reflecting back at the target.
+    // Instead rotate it away 180 degrees.
+    interf_angles_radians_.push_back(target_angle_radians_ - away_radians_ +
+                                     M_PI);
+  }
+  const Point counterclock_interf_direction =
+      AzimuthToPoint(target_angle_radians_ + away_radians_);
+  if (!array_normal_ ||
+      DotProduct(*array_normal_, target_direction) *
+              DotProduct(*array_normal_, counterclock_interf_direction) >=
+          0.f) {
+    // The target and counter-clockwise interferer are in the same half-plane
+    // defined by the array.
+    interf_angles_radians_.push_back(target_angle_radians_ + away_radians_);
+  } else {
+    // Otherwise, the interferer will begin reflecting back at the target.
+    // Instead rotate it away 180 degrees.
+    interf_angles_radians_.push_back(target_angle_radians_ + away_radians_ -
+                                     M_PI);
  }
 }

 void NonlinearBeamformer::InitDelaySumMasks() {
  for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
    delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
-    CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix,
-                                                   kFftSize,
-                                                   sample_rate_hz_,
-                                                   kSpeedOfSoundMeterSeconds,
-                                                   array_geometry_,
-                                                   kTargetAngleRadians,
-                                                   &delay_sum_masks_[f_ix]);
+    CovarianceMatrixGenerator::PhaseAlignmentMasks(
+        f_ix, kFftSize, sample_rate_hz_, kSpeedOfSoundMeterSeconds,
+        array_geometry_, target_angle_radians_, &delay_sum_masks_[f_ix]);

    complex_f norm_factor = sqrt(
        ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
@@ -273,40 +316,53 @@ void NonlinearBeamformer::InitTargetCovMats() {
  for (size_t i = 0; i < kNumFreqBins; ++i) {
    target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
    TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
-    complex_f normalization_factor = target_cov_mats_[i].Trace();
-    target_cov_mats_[i].Scale(1.f / normalization_factor);
+  }
+}
+
+void NonlinearBeamformer::InitDiffuseCovMats() {
+  for (size_t i = 0; i < kNumFreqBins; ++i) {
+    uniform_cov_mat_[i].Resize(num_input_channels_, num_input_channels_);
+    CovarianceMatrixGenerator::UniformCovarianceMatrix(
+        wave_numbers_[i], array_geometry_, &uniform_cov_mat_[i]);
+    complex_f normalization_factor = uniform_cov_mat_[i].elements()[0][0];
+    uniform_cov_mat_[i].Scale(1.f / normalization_factor);
+    uniform_cov_mat_[i].Scale(1 - kBalance);
  }
 }

 void NonlinearBeamformer::InitInterfCovMats() {
  for (size_t i = 0; i < kNumFreqBins; ++i) {
-    interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
-    ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);
-    ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
+    interf_cov_mats_[i].clear();
+    for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
+      interf_cov_mats_[i].push_back(new ComplexMatrixF(num_input_channels_,
+                                                       num_input_channels_));
+      ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
+      CovarianceMatrixGenerator::AngledCovarianceMatrix(
+          kSpeedOfSoundMeterSeconds,
+          interf_angles_radians_[j],
+          i,
+          kFftSize,
+          kNumFreqBins,
+          sample_rate_hz_,
+          array_geometry_,
+          &angled_cov_mat);
+      // Normalize matrices before averaging them.
+      complex_f normalization_factor = angled_cov_mat.elements()[0][0];
+      angled_cov_mat.Scale(1.f / normalization_factor);
+      // Weighted average of matrices.
+      angled_cov_mat.Scale(kBalance);
+      interf_cov_mats_[i][j]->Add(uniform_cov_mat_[i], angled_cov_mat);
+    }
+  }
+}

-    CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],
-                                                       array_geometry_,
-                                                       &uniform_cov_mat);
-
-    CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds,
-                                                      kInterfAngleRadians,
-                                                      i,
-                                                      kFftSize,
-                                                      kNumFreqBins,
-                                                      sample_rate_hz_,
-                                                      array_geometry_,
-                                                      &angled_cov_mat);
-    // Normalize matrices before averaging them.
-    complex_f normalization_factor = uniform_cov_mat.Trace();
-    uniform_cov_mat.Scale(1.f / normalization_factor);
-    normalization_factor = angled_cov_mat.Trace();
-    angled_cov_mat.Scale(1.f / normalization_factor);
-
-    // Average matrices.
-    uniform_cov_mat.Scale(1 - kBalance);
-    angled_cov_mat.Scale(kBalance);
-    interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat);
-    reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]);
+void NonlinearBeamformer::NormalizeCovMats() {
+  for (size_t i = 0; i < kNumFreqBins; ++i) {
+    rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
+    rpsiws_[i].clear();
+    for (size_t j = 0; j < interf_angles_radians_.size(); ++j) {
+      rpsiws_[i].push_back(Norm(*interf_cov_mats_[i][j], delay_sum_masks_[i]));
+    }
  }
 }

@@ -322,28 +378,32 @@ void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
  const float ramp_increment =
      (high_pass_postfilter_mask_ - old_high_pass_mask) /
      input.num_frames_per_band();
-  // Apply delay and sum and post-filter in the time domain. WARNING: only works
-  // because delay-and-sum is not frequency dependent.
+  // Apply the smoothed high-pass mask to the first channel of each band.
+  // This can be done because the effct of the linear beamformer is negligible
+  // compared to the post-filter.
  for (size_t i = 1; i < input.num_bands(); ++i) {
    float smoothed_mask = old_high_pass_mask;
    for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
      smoothed_mask += ramp_increment;
-
-      // Applying the delay and sum (at zero degrees, this is equivalent to
-      // averaging).
-      float sum = 0.f;
-      for (int k = 0; k < input.num_channels(); ++k) {
-        sum += input.channels(i)[k][j];
-      }
-      output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask;
+      output->channels(i)[0][j] = input.channels(i)[0][j] * smoothed_mask;
    }
  }
 }

+void NonlinearBeamformer::AimAt(const SphericalPointf& target_direction) {
+  target_angle_radians_ = target_direction.azimuth();
+  InitHighFrequencyCorrectionRanges();
+  InitInterfAngles();
+  InitDelaySumMasks();
+  InitTargetCovMats();
+  InitInterfCovMats();
+  NormalizeCovMats();
+}
+
 bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {
  // If more than half-beamwidth degrees away from the beam's center,
  // you are out of the beam.
-  return fabs(spherical_point.azimuth() - kTargetAngleRadians) <
+  return fabs(spherical_point.azimuth() - target_angle_radians_) <
         kHalfBeamWidthRadians;
 }

@@ -376,17 +436,19 @@ void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
    rmw *= rmw;
    float rmw_r = rmw.real();

-    new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i],
-                                           rpsiws_[i],
+    new_mask_[i] = CalculatePostfilterMask(*interf_cov_mats_[i][0],
+                                           rpsiws_[i][0],
                                           ratio_rxiw_rxim,
-                                           rmw_r,
-                                           mask_thresholds_[i]);
-
-    new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i],
-                                            reflected_rpsiws_[i],
-                                            ratio_rxiw_rxim,
-                                            rmw_r,
-                                            mask_thresholds_[i]);
+                                           rmw_r);
+    for (size_t j = 1; j < interf_angles_radians_.size(); ++j) {
+      float tmp_mask = CalculatePostfilterMask(*interf_cov_mats_[i][j],
+                                               rpsiws_[i][j],
+                                               ratio_rxiw_rxim,
+                                               rmw_r);
+      if (tmp_mask < new_mask_[i]) {
+        new_mask_[i] = tmp_mask;
+      }
+    }
  }

  ApplyMaskTimeSmoothing();
@@ -401,24 +463,16 @@ float NonlinearBeamformer::CalculatePostfilterMask(
    const ComplexMatrixF& interf_cov_mat,
    float rpsiw,
    float ratio_rxiw_rxim,
-    float rmw_r,
-    float mask_threshold) {
+    float rmw_r) {
  float rpsim = Norm(interf_cov_mat, eig_m_);

-  // Find lambda.
  float ratio = 0.f;
  if (rpsim > 0.f) {
    ratio = rpsiw / rpsim;
  }
-  float numerator = rmw_r - ratio;
-  float denominator = ratio_rxiw_rxim - ratio;

-  float mask = 1.f;
-  if (denominator > mask_threshold) {
-    float lambda = numerator / denominator;
-    mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum);
-  }
-  return mask;
+  return (1.f - std::min(kCutOffConstant, ratio / rmw_r)) /
+         (1.f - std::min(kCutOffConstant, ratio / ratio_rxiw_rxim));
 }

 void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
@@ -433,7 +487,7 @@ void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
      output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
    }

-    output_channel[f_ix] *= final_mask_[f_ix];
+    output_channel[f_ix] *= kCompensationGain * final_mask_[f_ix];
  }
 }

--- a/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h
+++ b/webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h
@@ -11,12 +11,17 @@
 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
 #define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_

+// MSVC++ requires this to be set before any other includes to get M_PI.
+#define _USE_MATH_DEFINES
+
+#include <math.h>
 #include <vector>

 #include "webrtc/common_audio/lapped_transform.h"
 #include "webrtc/common_audio/channel_buffer.h"
 #include "webrtc/modules/audio_processing/beamformer/beamformer.h"
 #include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
+#include "webrtc/system_wrappers/include/scoped_vector.h"

 namespace webrtc {

@@ -26,15 +31,16 @@ namespace webrtc {
 //
 // The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear
 // Beamforming Postprocessor" by Bastiaan Kleijn.
-//
-// TODO(aluebs): Target angle assumed to be 0. Parameterize target angle.
 class NonlinearBeamformer
  : public Beamformer<float>,
    public LappedTransform::Callback {
 public:
-  // At the moment it only accepts uniform linear microphone arrays. Using the
-  // first microphone as a reference position [0, 0, 0] is a natural choice.
-  explicit NonlinearBeamformer(const std::vector<Point>& array_geometry);
+  static const float kHalfBeamWidthRadians;
+
+  explicit NonlinearBeamformer(
+      const std::vector<Point>& array_geometry,
+      SphericalPointf target_direction =
+          SphericalPointf(static_cast<float>(M_PI) / 2.f, 0.f, 1.f));

  // Sample rate corresponds to the lower band.
  // Needs to be called before the NonlinearBeamformer can be used.
@@ -47,6 +53,8 @@ class NonlinearBeamformer
  void ProcessChunk(const ChannelBuffer<float>& input,
                    ChannelBuffer<float>* output) override;

+  void AimAt(const SphericalPointf& target_direction) override;
+
  bool IsInBeam(const SphericalPointf& spherical_point) override;

  // After processing each block |is_target_present_| is set to true if the
@@ -65,23 +73,30 @@ class NonlinearBeamformer
                         complex<float>* const* output) override;

 private:
+#ifndef WEBRTC_AUDIO_PROCESSING_ONLY_BUILD
+  FRIEND_TEST_ALL_PREFIXES(NonlinearBeamformerTest,
+                           InterfAnglesTakeAmbiguityIntoAccount);
+#endif
+
  typedef Matrix<float> MatrixF;
  typedef ComplexMatrix<float> ComplexMatrixF;
  typedef complex<float> complex_f;

+  void InitLowFrequencyCorrectionRanges();
+  void InitHighFrequencyCorrectionRanges();
+  void InitInterfAngles();
  void InitDelaySumMasks();
-  void InitTargetCovMats();  // TODO(aluebs): Make this depend on target angle.
+  void InitTargetCovMats();
+  void InitDiffuseCovMats();
  void InitInterfCovMats();
+  void NormalizeCovMats();

-  // An implementation of equation 18, which calculates postfilter masks that,
-  // when applied, minimize the mean-square error of our estimation of the
-  // desired signal. A sub-task is to calculate lambda, which is solved via
-  // equation 13.
+  // Calculates postfilter masks that minimize the mean squared error of our
+  // estimation of the desired signal.
  float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,
                                float rpsiw,
                                float ratio_rxiw_rxim,
-                                float rmxi_r,
-                                float mask_threshold);
+                                float rmxi_r);

  // Prevents the postfilter masks from degenerating too quickly (a cause of
  // musical noise).
@@ -120,6 +135,11 @@ class NonlinearBeamformer
  int sample_rate_hz_;

  const std::vector<Point> array_geometry_;
+  // The normal direction of the array if it has one and it is in the xy-plane.
+  const rtc::Maybe<Point> array_normal_;
+
+  // Minimum spacing between microphone pairs.
+  const float min_mic_spacing_;

  // Calculated based on user-input and constants in the .cc file.
  size_t low_mean_start_bin_;
@@ -134,28 +154,33 @@ class NonlinearBeamformer
  // Time and frequency smoothed mask.
  float final_mask_[kNumFreqBins];

+  float target_angle_radians_;
+  // Angles of the interferer scenarios.
+  std::vector<float> interf_angles_radians_;
+  // The angle between the target and the interferer scenarios.
+  const float away_radians_;
+
  // Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
  ComplexMatrixF delay_sum_masks_[kNumFreqBins];
  ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];

-  // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
+  // Arrays of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
  // |num_input_channels_|.
  ComplexMatrixF target_cov_mats_[kNumFreqBins];
-
+  ComplexMatrixF uniform_cov_mat_[kNumFreqBins];
  // Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
-  // |num_input_channels_|.
-  ComplexMatrixF interf_cov_mats_[kNumFreqBins];
-  ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins];
+  // |num_input_channels_|. ScopedVector has a size equal to the number of
+  // interferer scenarios.
+  ScopedVector<ComplexMatrixF> interf_cov_mats_[kNumFreqBins];

  // Of length |kNumFreqBins|.
-  float mask_thresholds_[kNumFreqBins];
  float wave_numbers_[kNumFreqBins];

  // Preallocated for ProcessAudioBlock()
  // Of length |kNumFreqBins|.
  float rxiws_[kNumFreqBins];
-  float rpsiws_[kNumFreqBins];
-  float reflected_rpsiws_[kNumFreqBins];
+  // The vector has a size equal to the number of interferer scenarios.
+  std::vector<float> rpsiws_[kNumFreqBins];

  // The microphone normalization factor.
  ComplexMatrixF eig_m_;