Bump to WebRTC M120 release

Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone. We're continuing to carry iSAC even though it's gone upstream, but maybe we'll want to drop that soon.
2023-12-12 10:42:58 -05:00
parent 9a202fb8c2
commit c6abf6cd3f
479 changed files with 20900 additions and 11996 deletions
--- a/webrtc/modules/audio_processing/include/aec_dump.cc
+++ b/webrtc/modules/audio_processing/include/aec_dump.cc
@ -17,7 +17,7 @@ InternalAPMConfig::InternalAPMConfig(InternalAPMConfig&&) = default;
 InternalAPMConfig& InternalAPMConfig::operator=(const InternalAPMConfig&) =
    default;

-bool InternalAPMConfig::operator==(const InternalAPMConfig& other) {
+bool InternalAPMConfig::operator==(const InternalAPMConfig& other) const {
  return aec_enabled == other.aec_enabled &&
         aec_delay_agnostic_enabled == other.aec_delay_agnostic_enabled &&
         aec_drift_compensation_enabled ==
--- a/webrtc/modules/audio_processing/include/aec_dump.h
+++ b/webrtc/modules/audio_processing/include/aec_dump.h
@ -15,9 +15,10 @@

 #include <string>

+#include "absl/base/attributes.h"
+#include "absl/types/optional.h"
 #include "modules/audio_processing/include/audio_frame_view.h"
 #include "modules/audio_processing/include/audio_processing.h"
-#include "rtc_base/deprecation.h"

 namespace webrtc {

@ -31,7 +32,7 @@ struct InternalAPMConfig {
  InternalAPMConfig& operator=(const InternalAPMConfig&);
  InternalAPMConfig& operator=(InternalAPMConfig&&) = delete;

-  bool operator==(const InternalAPMConfig& other);
+  bool operator==(const InternalAPMConfig& other) const;

  bool aec_enabled = false;
  bool aec_delay_agnostic_enabled = false;
@ -67,7 +68,7 @@ class AecDump {
  struct AudioProcessingState {
    int delay;
    int drift;
-    int level;
+    absl::optional<int> applied_input_volume;
    bool keypress;
  };

@ -76,7 +77,8 @@ class AecDump {
  // Logs Event::Type INIT message.
  virtual void WriteInitMessage(const ProcessingConfig& api_format,
                                int64_t time_now_ms) = 0;
-  RTC_DEPRECATED void WriteInitMessage(const ProcessingConfig& api_format) {
+  ABSL_DEPRECATED("")
+  void WriteInitMessage(const ProcessingConfig& api_format) {
    WriteInitMessage(api_format, 0);
  }

--- a/webrtc/modules/audio_processing/include/audio_frame_proxies.cc
+++ b/webrtc/modules/audio_processing/include/audio_frame_proxies.cc
@ -20,10 +20,8 @@ int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame) {
    return AudioProcessing::Error::kNullPointerError;
  }

-  StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_,
-                            /*has_keyboard=*/false);
-  StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_,
-                             /*has_keyboard=*/false);
+  StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_);
+  StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_);
  RTC_DCHECK_EQ(frame->samples_per_channel(), input_config.num_frames());

  int result = ap->ProcessStream(frame->data(), input_config, output_config,
@ -57,10 +55,8 @@ int ProcessReverseAudioFrame(AudioProcessing* ap, AudioFrame* frame) {
    return AudioProcessing::Error::kBadNumberChannelsError;
  }

-  StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_,
-                            /*has_keyboard=*/false);
-  StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_,
-                             /*has_keyboard=*/false);
+  StreamConfig input_config(frame->sample_rate_hz_, frame->num_channels_);
+  StreamConfig output_config(frame->sample_rate_hz_, frame->num_channels_);

  int result = ap->ProcessReverseStream(frame->data(), input_config,
                                        output_config, frame->mutable_data());
--- a/webrtc/modules/audio_processing/include/audio_frame_proxies.h
+++ b/webrtc/modules/audio_processing/include/audio_frame_proxies.h
@ -16,21 +16,21 @@ namespace webrtc {
 class AudioFrame;
 class AudioProcessing;

-// Processes a 10 ms |frame| of the primary audio stream using the provided
+// Processes a 10 ms `frame` of the primary audio stream using the provided
 // AudioProcessing object. On the client-side, this is the near-end (or
-// captured) audio. The |sample_rate_hz_|, |num_channels_|, and
-// |samples_per_channel_| members of |frame| must be valid. If changed from the
+// captured) audio. The `sample_rate_hz_`, `num_channels_`, and
+// `samples_per_channel_` members of `frame` must be valid. If changed from the
 // previous call to this function, it will trigger an initialization of the
 // provided AudioProcessing object.
 // The function returns any error codes passed from the AudioProcessing
 // ProcessStream method.
 int ProcessAudioFrame(AudioProcessing* ap, AudioFrame* frame);

-// Processes a 10 ms |frame| of the reverse direction audio stream using the
+// Processes a 10 ms `frame` of the reverse direction audio stream using the
 // provided AudioProcessing object. The frame may be modified. On the
 // client-side, this is the far-end (or to be rendered) audio. The
-// |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_| members of
-// |frame| must be valid. If changed from the previous call to this function, it
+// `sample_rate_hz_`, `num_channels_`, and `samples_per_channel_` members of
+// `frame` must be valid. If changed from the previous call to this function, it
 // will trigger an initialization of the provided AudioProcessing object.
 // The function returns any error codes passed from the AudioProcessing
 // ProcessReverseStream method.
--- a/webrtc/modules/audio_processing/include/audio_frame_view.h
+++ b/webrtc/modules/audio_processing/include/audio_frame_view.h
@ -19,15 +19,16 @@ namespace webrtc {
 template <class T>
 class AudioFrameView {
 public:
-  // |num_channels| and |channel_size| describe the T**
-  // |audio_samples|. |audio_samples| is assumed to point to a
+  // `num_channels` and `channel_size` describe the T**
+  // `audio_samples`. `audio_samples` is assumed to point to a
  // two-dimensional |num_channels * channel_size| array of floats.
-  AudioFrameView(T* const* audio_samples,
-                 size_t num_channels,
-                 size_t channel_size)
+  AudioFrameView(T* const* audio_samples, int num_channels, int channel_size)
      : audio_samples_(audio_samples),
        num_channels_(num_channels),
-        channel_size_(channel_size) {}
+        channel_size_(channel_size) {
+    RTC_DCHECK_GE(num_channels_, 0);
+    RTC_DCHECK_GE(channel_size_, 0);
+  }

  // Implicit cast to allow converting Frame<float> to
  // Frame<const float>.
@ -39,17 +40,17 @@ class AudioFrameView {

  AudioFrameView() = delete;

-  size_t num_channels() const { return num_channels_; }
+  int num_channels() const { return num_channels_; }

-  size_t samples_per_channel() const { return channel_size_; }
+  int samples_per_channel() const { return channel_size_; }

-  rtc::ArrayView<T> channel(size_t idx) {
+  rtc::ArrayView<T> channel(int idx) {
    RTC_DCHECK_LE(0, idx);
    RTC_DCHECK_LE(idx, num_channels_);
    return rtc::ArrayView<T>(audio_samples_[idx], channel_size_);
  }

-  rtc::ArrayView<const T> channel(size_t idx) const {
+  rtc::ArrayView<const T> channel(int idx) const {
    RTC_DCHECK_LE(0, idx);
    RTC_DCHECK_LE(idx, num_channels_);
    return rtc::ArrayView<const T>(audio_samples_[idx], channel_size_);
@ -59,8 +60,8 @@ class AudioFrameView {

 private:
  T* const* audio_samples_;
-  size_t num_channels_;
-  size_t channel_size_;
+  int num_channels_;
+  int channel_size_;
 };
 }  // namespace webrtc

--- a/webrtc/modules/audio_processing/include/audio_processing.cc
+++ b/webrtc/modules/audio_processing/include/audio_processing.cc
@ -16,6 +16,9 @@
 namespace webrtc {
 namespace {

+using Agc1Config = AudioProcessing::Config::GainController1;
+using Agc2Config = AudioProcessing::Config::GainController2;
+
 std::string NoiseSuppressionLevelToString(
    const AudioProcessing::Config::NoiseSuppression::Level& level) {
  switch (level) {
@ -28,36 +31,19 @@ std::string NoiseSuppressionLevelToString(
    case AudioProcessing::Config::NoiseSuppression::Level::kVeryHigh:
      return "VeryHigh";
  }
+  RTC_CHECK_NOTREACHED();
 }

-std::string GainController1ModeToString(
-    const AudioProcessing::Config::GainController1::Mode& mode) {
+std::string GainController1ModeToString(const Agc1Config::Mode& mode) {
  switch (mode) {
-    case AudioProcessing::Config::GainController1::Mode::kAdaptiveAnalog:
+    case Agc1Config::Mode::kAdaptiveAnalog:
      return "AdaptiveAnalog";
-    case AudioProcessing::Config::GainController1::Mode::kAdaptiveDigital:
+    case Agc1Config::Mode::kAdaptiveDigital:
      return "AdaptiveDigital";
-    case AudioProcessing::Config::GainController1::Mode::kFixedDigital:
+    case Agc1Config::Mode::kFixedDigital:
      return "FixedDigital";
  }
-}
-
-std::string GainController2LevelEstimatorToString(
-    const AudioProcessing::Config::GainController2::LevelEstimator& level) {
-  switch (level) {
-    case AudioProcessing::Config::GainController2::LevelEstimator::kRms:
-      return "Rms";
-    case AudioProcessing::Config::GainController2::LevelEstimator::kPeak:
-      return "Peak";
-  }
-}
-
-int GetDefaultMaxInternalRate() {
-#ifdef WEBRTC_ARCH_ARM_FAMILY
-  return 32000;
-#else
-  return 48000;
-#endif
+  RTC_CHECK_NOTREACHED();
 }

 }  // namespace
@ -67,25 +53,92 @@ constexpr int AudioProcessing::kNativeSampleRatesHz[];
 void CustomProcessing::SetRuntimeSetting(
    AudioProcessing::RuntimeSetting setting) {}

-AudioProcessing::Config::Pipeline::Pipeline()
-    : maximum_internal_processing_rate(GetDefaultMaxInternalRate()) {}
+bool Agc1Config::operator==(const Agc1Config& rhs) const {
+  const auto& analog_lhs = analog_gain_controller;
+  const auto& analog_rhs = rhs.analog_gain_controller;
+  return enabled == rhs.enabled && mode == rhs.mode &&
+         target_level_dbfs == rhs.target_level_dbfs &&
+         compression_gain_db == rhs.compression_gain_db &&
+         enable_limiter == rhs.enable_limiter &&
+         analog_lhs.enabled == analog_rhs.enabled &&
+         analog_lhs.startup_min_volume == analog_rhs.startup_min_volume &&
+         analog_lhs.clipped_level_min == analog_rhs.clipped_level_min &&
+         analog_lhs.enable_digital_adaptive ==
+             analog_rhs.enable_digital_adaptive &&
+         analog_lhs.clipped_level_step == analog_rhs.clipped_level_step &&
+         analog_lhs.clipped_ratio_threshold ==
+             analog_rhs.clipped_ratio_threshold &&
+         analog_lhs.clipped_wait_frames == analog_rhs.clipped_wait_frames &&
+         analog_lhs.clipping_predictor.mode ==
+             analog_rhs.clipping_predictor.mode &&
+         analog_lhs.clipping_predictor.window_length ==
+             analog_rhs.clipping_predictor.window_length &&
+         analog_lhs.clipping_predictor.reference_window_length ==
+             analog_rhs.clipping_predictor.reference_window_length &&
+         analog_lhs.clipping_predictor.reference_window_delay ==
+             analog_rhs.clipping_predictor.reference_window_delay &&
+         analog_lhs.clipping_predictor.clipping_threshold ==
+             analog_rhs.clipping_predictor.clipping_threshold &&
+         analog_lhs.clipping_predictor.crest_factor_margin ==
+             analog_rhs.clipping_predictor.crest_factor_margin &&
+         analog_lhs.clipping_predictor.use_predicted_step ==
+             analog_rhs.clipping_predictor.use_predicted_step;
+}
+
+bool Agc2Config::AdaptiveDigital::operator==(
+    const Agc2Config::AdaptiveDigital& rhs) const {
+  return enabled == rhs.enabled && headroom_db == rhs.headroom_db &&
+         max_gain_db == rhs.max_gain_db &&
+         initial_gain_db == rhs.initial_gain_db &&
+         max_gain_change_db_per_second == rhs.max_gain_change_db_per_second &&
+         max_output_noise_level_dbfs == rhs.max_output_noise_level_dbfs;
+}
+
+bool Agc2Config::InputVolumeController::operator==(
+    const Agc2Config::InputVolumeController& rhs) const {
+  return enabled == rhs.enabled;
+}
+
+bool Agc2Config::operator==(const Agc2Config& rhs) const {
+  return enabled == rhs.enabled &&
+         fixed_digital.gain_db == rhs.fixed_digital.gain_db &&
+         adaptive_digital == rhs.adaptive_digital &&
+         input_volume_controller == rhs.input_volume_controller;
+}
+
+bool AudioProcessing::Config::CaptureLevelAdjustment::operator==(
+    const AudioProcessing::Config::CaptureLevelAdjustment& rhs) const {
+  return enabled == rhs.enabled && pre_gain_factor == rhs.pre_gain_factor &&
+         post_gain_factor == rhs.post_gain_factor &&
+         analog_mic_gain_emulation == rhs.analog_mic_gain_emulation;
+}
+
+bool AudioProcessing::Config::CaptureLevelAdjustment::AnalogMicGainEmulation::
+operator==(const AudioProcessing::Config::CaptureLevelAdjustment::
+               AnalogMicGainEmulation& rhs) const {
+  return enabled == rhs.enabled && initial_level == rhs.initial_level;
+}

 std::string AudioProcessing::Config::ToString() const {
-  char buf[1024];
+  char buf[2048];
  rtc::SimpleStringBuilder builder(buf);
  builder << "AudioProcessing::Config{ "
-             "pipeline: {"
+             "pipeline: { "
             "maximum_internal_processing_rate: "
          << pipeline.maximum_internal_processing_rate
          << ", multi_channel_render: " << pipeline.multi_channel_render
-          << ", "
-             ", multi_channel_capture: "
-          << pipeline.multi_channel_capture
-          << "}, "
-             "pre_amplifier: { enabled: "
-          << pre_amplifier.enabled
+          << ", multi_channel_capture: " << pipeline.multi_channel_capture
+          << " }, pre_amplifier: { enabled: " << pre_amplifier.enabled
          << ", fixed_gain_factor: " << pre_amplifier.fixed_gain_factor
-          << " }, high_pass_filter: { enabled: " << high_pass_filter.enabled
+          << " },capture_level_adjustment: { enabled: "
+          << capture_level_adjustment.enabled
+          << ", pre_gain_factor: " << capture_level_adjustment.pre_gain_factor
+          << ", post_gain_factor: " << capture_level_adjustment.post_gain_factor
+          << ", analog_mic_gain_emulation: { enabled: "
+          << capture_level_adjustment.analog_mic_gain_emulation.enabled
+          << ", initial_level: "
+          << capture_level_adjustment.analog_mic_gain_emulation.initial_level
+          << " }}, high_pass_filter: { enabled: " << high_pass_filter.enabled
          << " }, echo_canceller: { enabled: " << echo_canceller.enabled
          << ", mobile_mode: " << echo_canceller.mobile_mode
          << ", enforce_high_pass_filtering: "
@ -95,29 +148,62 @@ std::string AudioProcessing::Config::ToString() const {
          << NoiseSuppressionLevelToString(noise_suppression.level)
          << " }, transient_suppression: { enabled: "
          << transient_suppression.enabled
-          << " }, voice_detection: { enabled: " << voice_detection.enabled
          << " }, gain_controller1: { enabled: " << gain_controller1.enabled
          << ", mode: " << GainController1ModeToString(gain_controller1.mode)
          << ", target_level_dbfs: " << gain_controller1.target_level_dbfs
          << ", compression_gain_db: " << gain_controller1.compression_gain_db
          << ", enable_limiter: " << gain_controller1.enable_limiter
-          << ", analog_level_minimum: " << gain_controller1.analog_level_minimum
-          << ", analog_level_maximum: " << gain_controller1.analog_level_maximum
-          << " }, gain_controller2: { enabled: " << gain_controller2.enabled
+          << ", analog_gain_controller { enabled: "
+          << gain_controller1.analog_gain_controller.enabled
+          << ", startup_min_volume: "
+          << gain_controller1.analog_gain_controller.startup_min_volume
+          << ", clipped_level_min: "
+          << gain_controller1.analog_gain_controller.clipped_level_min
+          << ", enable_digital_adaptive: "
+          << gain_controller1.analog_gain_controller.enable_digital_adaptive
+          << ", clipped_level_step: "
+          << gain_controller1.analog_gain_controller.clipped_level_step
+          << ", clipped_ratio_threshold: "
+          << gain_controller1.analog_gain_controller.clipped_ratio_threshold
+          << ", clipped_wait_frames: "
+          << gain_controller1.analog_gain_controller.clipped_wait_frames
+          << ", clipping_predictor:  { enabled: "
+          << gain_controller1.analog_gain_controller.clipping_predictor.enabled
+          << ", mode: "
+          << gain_controller1.analog_gain_controller.clipping_predictor.mode
+          << ", window_length: "
+          << gain_controller1.analog_gain_controller.clipping_predictor
+                 .window_length
+          << ", reference_window_length: "
+          << gain_controller1.analog_gain_controller.clipping_predictor
+                 .reference_window_length
+          << ", reference_window_delay: "
+          << gain_controller1.analog_gain_controller.clipping_predictor
+                 .reference_window_delay
+          << ", clipping_threshold: "
+          << gain_controller1.analog_gain_controller.clipping_predictor
+                 .clipping_threshold
+          << ", crest_factor_margin: "
+          << gain_controller1.analog_gain_controller.clipping_predictor
+                 .crest_factor_margin
+          << ", use_predicted_step: "
+          << gain_controller1.analog_gain_controller.clipping_predictor
+                 .use_predicted_step
+          << " }}}, gain_controller2: { enabled: " << gain_controller2.enabled
          << ", fixed_digital: { gain_db: "
          << gain_controller2.fixed_digital.gain_db
          << " }, adaptive_digital: { enabled: "
-          << gain_controller2.adaptive_digital.enabled << ", level_estimator: "
-          << GainController2LevelEstimatorToString(
-                 gain_controller2.adaptive_digital.level_estimator)
-          << ", use_saturation_protector: "
-          << gain_controller2.adaptive_digital.use_saturation_protector
-          << ", extra_saturation_margin_db: "
-          << gain_controller2.adaptive_digital.extra_saturation_margin_db
-          << " } }, residual_echo_detector: { enabled: "
-          << residual_echo_detector.enabled
-          << " }, level_estimation: { enabled: " << level_estimation.enabled
-          << " } }";
+          << gain_controller2.adaptive_digital.enabled
+          << ", headroom_db: " << gain_controller2.adaptive_digital.headroom_db
+          << ", max_gain_db: " << gain_controller2.adaptive_digital.max_gain_db
+          << ", initial_gain_db: "
+          << gain_controller2.adaptive_digital.initial_gain_db
+          << ", max_gain_change_db_per_second: "
+          << gain_controller2.adaptive_digital.max_gain_change_db_per_second
+          << ", max_output_noise_level_dbfs: "
+          << gain_controller2.adaptive_digital.max_output_noise_level_dbfs
+          << " }, input_volume_control : { enabled "
+          << gain_controller2.input_volume_controller.enabled << "}}";
  return builder.str();
 }

--- a/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/webrtc/modules/audio_processing/include/audio_processing.h
@ -23,16 +23,14 @@

 #include <vector>

+#include "absl/strings/string_view.h"
 #include "absl/types/optional.h"
 #include "api/array_view.h"
 #include "api/audio/echo_canceller3_config.h"
 #include "api/audio/echo_control.h"
 #include "api/scoped_refptr.h"
 #include "modules/audio_processing/include/audio_processing_statistics.h"
-#include "modules/audio_processing/include/config.h"
 #include "rtc_base/arraysize.h"
-#include "rtc_base/constructor_magic.h"
-#include "rtc_base/deprecation.h"
 #include "rtc_base/ref_count.h"
 #include "rtc_base/system/file_wrapper.h"
 #include "rtc_base/system/rtc_export.h"
@ -53,74 +51,13 @@ class EchoDetector;
 class CustomAudioAnalyzer;
 class CustomProcessing;

-// Use to enable experimental gain control (AGC). At startup the experimental
-// AGC moves the microphone volume up to |startup_min_volume| if the current
-// microphone volume is set too low. The value is clamped to its operating range
-// [12, 255]. Here, 255 maps to 100%.
-//
-// Must be provided through AudioProcessingBuilder().Create(config).
-#if defined(WEBRTC_CHROMIUM_BUILD)
-static const int kAgcStartupMinVolume = 85;
-#else
-static const int kAgcStartupMinVolume = 0;
-#endif  // defined(WEBRTC_CHROMIUM_BUILD)
-static constexpr int kClippedLevelMin = 70;
-
-// To be deprecated: Please instead use the flag in the
-// AudioProcessing::Config::AnalogGainController.
-// TODO(webrtc:5298): Remove.
-struct ExperimentalAgc {
-  ExperimentalAgc() = default;
-  explicit ExperimentalAgc(bool enabled) : enabled(enabled) {}
-  ExperimentalAgc(bool enabled,
-                  bool enabled_agc2_level_estimator,
-                  bool digital_adaptive_disabled)
-      : enabled(enabled),
-        enabled_agc2_level_estimator(enabled_agc2_level_estimator),
-        digital_adaptive_disabled(digital_adaptive_disabled) {}
-  // Deprecated constructor: will be removed.
-  ExperimentalAgc(bool enabled,
-                  bool enabled_agc2_level_estimator,
-                  bool digital_adaptive_disabled,
-                  bool analyze_before_aec)
-      : enabled(enabled),
-        enabled_agc2_level_estimator(enabled_agc2_level_estimator),
-        digital_adaptive_disabled(digital_adaptive_disabled) {}
-  ExperimentalAgc(bool enabled, int startup_min_volume)
-      : enabled(enabled), startup_min_volume(startup_min_volume) {}
-  ExperimentalAgc(bool enabled, int startup_min_volume, int clipped_level_min)
-      : enabled(enabled),
-        startup_min_volume(startup_min_volume),
-        clipped_level_min(clipped_level_min) {}
-  static const ConfigOptionID identifier = ConfigOptionID::kExperimentalAgc;
-  bool enabled = true;
-  int startup_min_volume = kAgcStartupMinVolume;
-  // Lowest microphone level that will be applied in response to clipping.
-  int clipped_level_min = kClippedLevelMin;
-  bool enabled_agc2_level_estimator = false;
-  bool digital_adaptive_disabled = false;
-};
-
-// To be deprecated: Please instead use the flag in the
-// AudioProcessing::Config::TransientSuppression.
-//
-// Use to enable experimental noise suppression. It can be set in the
-// constructor.
-// TODO(webrtc:5298): Remove.
-struct ExperimentalNs {
-  ExperimentalNs() : enabled(false) {}
-  explicit ExperimentalNs(bool enabled) : enabled(enabled) {}
-  static const ConfigOptionID identifier = ConfigOptionID::kExperimentalNs;
-  bool enabled;
-};
-
 // The Audio Processing Module (APM) provides a collection of voice processing
 // components designed for real-time communications software.
 //
 // APM operates on two audio streams on a frame-by-frame basis. Frames of the
 // primary stream, on which all processing is applied, are passed to
-// |ProcessStream()|. Frames of the reverse direction stream are passed to
-// |ProcessReverseStream()|. On the client-side, this will typically be the
+// `ProcessStream()`. Frames of the reverse direction stream are passed to
+// `ProcessReverseStream()`. On the client-side, this will typically be the
 // near-end (capture) and far-end (render) streams, respectively. APM should be
 // placed in the signal chain as close to the audio hardware abstraction layer
 // (HAL) as possible.
@ -143,12 +80,13 @@ struct ExperimentalNs {
 //   2. Parameter getters are never called concurrently with the corresponding
 //      setter.
 //
-// APM accepts only linear PCM audio data in chunks of 10 ms. The int16
-// interfaces use interleaved data, while the float interfaces use deinterleaved
-// data.
+// APM accepts only linear PCM audio data in chunks of ~10 ms (see
+// AudioProcessing::GetFrameSize() for details) and sample rates ranging from
+// 8000 Hz to 384000 Hz. The int16 interfaces use interleaved data, while the
+// float interfaces use deinterleaved data.
 //
 // Usage example, omitting error checking:
-// AudioProcessing* apm = AudioProcessingBuilder().Create();
+// rtc::scoped_refptr<AudioProcessing> apm = AudioProcessingBuilder().Create();
 //
 // AudioProcessing::Config config;
 // config.echo_canceller.enabled = true;
@ -164,13 +102,8 @@ struct ExperimentalNs {
 //
 // config.high_pass_filter.enabled = true;
 //
-// config.voice_detection.enabled = true;
-//
 // apm->ApplyConfig(config)
 //
-// apm->noise_reduction()->set_level(kHighSuppression);
-// apm->noise_reduction()->Enable(true);
-//
 // // Start a voice call...
 //
 // // ... Render frame arrives bound for the audio HAL ...
@ -187,12 +120,12 @@ struct ExperimentalNs {
 // analog_level = apm->recommended_stream_analog_level();
 // has_voice = apm->stream_has_voice();
 //
-// // Repeate render and capture processing for the duration of the call...
+// // Repeat render and capture processing for the duration of the call...
 // // Start a new call...
 // apm->Initialize();
 //
 // // Close the application...
-// delete apm;
+// apm.reset();
 //
 class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
 public:
@ -211,30 +144,60 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
  // submodule resets, affecting the audio quality. Use the RuntimeSetting
  // construct for runtime configuration.
  struct RTC_EXPORT Config {
-
    // Sets the properties of the audio processing pipeline.
    struct RTC_EXPORT Pipeline {
-      Pipeline();
+      // Ways to downmix a multi-channel track to mono.
+      enum class DownmixMethod {
+        kAverageChannels,  // Average across channels.
+        kUseFirstChannel   // Use the first channel.
+      };

      // Maximum allowed processing rate used internally. May only be set to
-      // 32000 or 48000 and any differing values will be treated as 48000. The
-      // default rate is currently selected based on the CPU architecture, but
-      // that logic may change.
-      int maximum_internal_processing_rate;
+      // 32000 or 48000 and any differing values will be treated as 48000.
+      int maximum_internal_processing_rate = 48000;
      // Allow multi-channel processing of render audio.
      bool multi_channel_render = false;
      // Allow multi-channel processing of capture audio when AEC3 is active
      // or a custom AEC is injected..
      bool multi_channel_capture = false;
+      // Indicates how to downmix multi-channel capture audio to mono (when
+      // needed).
+      DownmixMethod capture_downmix_method = DownmixMethod::kAverageChannels;
    } pipeline;

    // Enabled the pre-amplifier. It amplifies the capture signal
    // before any other processing is done.
+    // TODO(webrtc:5298): Deprecate and use the pre-gain functionality in
+    // capture_level_adjustment instead.
    struct PreAmplifier {
      bool enabled = false;
-      float fixed_gain_factor = 1.f;
+      float fixed_gain_factor = 1.0f;
    } pre_amplifier;

+    // Functionality for general level adjustment in the capture pipeline. This
+    // should not be used together with the legacy PreAmplifier functionality.
+    struct CaptureLevelAdjustment {
+      bool operator==(const CaptureLevelAdjustment& rhs) const;
+      bool operator!=(const CaptureLevelAdjustment& rhs) const {
+        return !(*this == rhs);
+      }
+      bool enabled = false;
+      // The `pre_gain_factor` scales the signal before any processing is done.
+      float pre_gain_factor = 1.0f;
+      // The `post_gain_factor` scales the signal after all processing is done.
+      float post_gain_factor = 1.0f;
+      struct AnalogMicGainEmulation {
+        bool operator==(const AnalogMicGainEmulation& rhs) const;
+        bool operator!=(const AnalogMicGainEmulation& rhs) const {
+          return !(*this == rhs);
+        }
+        bool enabled = false;
+        // Initial analog gain level to use for the emulated analog gain. Must
+        // be in the range [0...255].
+        int initial_level = 255;
+      } analog_mic_gain_emulation;
+    } capture_level_adjustment;
+
    struct HighPassFilter {
      bool enabled = false;
      bool apply_in_full_band = true;
@ -262,18 +225,18 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
      bool enabled = false;
    } transient_suppression;

-    // Enables reporting of |voice_detected| in webrtc::AudioProcessingStats.
-    struct VoiceDetection {
-      bool enabled = false;
-    } voice_detection;
-
    // Enables automatic gain control (AGC) functionality.
    // The automatic gain control (AGC) component brings the signal to an
    // appropriate range. This is done by applying a digital gain directly and,
    // in the analog mode, prescribing an analog gain to be applied at the audio
    // HAL.
    // Recommended to be enabled on the client-side.
-    struct GainController1 {
+    struct RTC_EXPORT GainController1 {
+      bool operator==(const GainController1& rhs) const;
+      bool operator!=(const GainController1& rhs) const {
+        return !(*this == rhs);
+      }
+
      bool enabled = false;
      enum Mode {
        // Adaptive mode intended for use if an analog volume control is
@ -314,73 +277,112 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
      // target level. Otherwise, the signal will be compressed but not limited
      // above the target level.
      bool enable_limiter = true;
-      // Sets the minimum and maximum analog levels of the audio capture device.
-      // Must be set if an analog mode is used. Limited to [0, 65535].
-      int analog_level_minimum = 0;
-      int analog_level_maximum = 255;

      // Enables the analog gain controller functionality.
      struct AnalogGainController {
        bool enabled = true;
-        int startup_min_volume = kAgcStartupMinVolume;
+        // TODO(bugs.webrtc.org/7494): Deprecated. Stop using and remove.
+        int startup_min_volume = 0;
        // Lowest analog microphone level that will be applied in response to
        // clipping.
-        int clipped_level_min = kClippedLevelMin;
-        bool enable_agc2_level_estimator = false;
+        int clipped_level_min = 70;
+        // If true, an adaptive digital gain is applied.
        bool enable_digital_adaptive = true;
+        // Amount the microphone level is lowered with every clipping event.
+        // Limited to (0, 255].
+        int clipped_level_step = 15;
+        // Proportion of clipped samples required to declare a clipping event.
+        // Limited to (0.f, 1.f).
+        float clipped_ratio_threshold = 0.1f;
+        // Time in frames to wait after a clipping event before checking again.
+        // Limited to values higher than 0.
+        int clipped_wait_frames = 300;
+
+        // Enables clipping prediction functionality.
+        struct ClippingPredictor {
+          bool enabled = false;
+          enum Mode {
+            // Clipping event prediction mode with fixed step estimation.
+            kClippingEventPrediction,
+            // Clipped peak estimation mode with adaptive step estimation.
+            kAdaptiveStepClippingPeakPrediction,
+            // Clipped peak estimation mode with fixed step estimation.
+            kFixedStepClippingPeakPrediction,
+          };
+          Mode mode = kClippingEventPrediction;
+          // Number of frames in the sliding analysis window.
+          int window_length = 5;
+          // Number of frames in the sliding reference window.
+          int reference_window_length = 5;
+          // Reference window delay (unit: number of frames).
+          int reference_window_delay = 5;
+          // Clipping prediction threshold (dBFS).
+          float clipping_threshold = -1.0f;
+          // Crest factor drop threshold (dB).
+          float crest_factor_margin = 3.0f;
+          // If true, the recommended clipped level step is used to modify the
+          // analog gain. Otherwise, the predictor runs without affecting the
+          // analog gain.
+          bool use_predicted_step = true;
+        } clipping_predictor;
      } analog_gain_controller;
    } gain_controller1;

-    // Enables the next generation AGC functionality. This feature replaces the
-    // standard methods of gain control in the previous AGC. Enabling this
-    // submodule enables an adaptive digital AGC followed by a limiter. By
-    // setting |fixed_gain_db|, the limiter can be turned into a compressor that
-    // first applies a fixed gain. The adaptive digital AGC can be turned off by
-    // setting |adaptive_digital_mode=false|.
-    struct GainController2 {
-      enum LevelEstimator { kRms, kPeak };
+    // Parameters for AGC2, an Automatic Gain Control (AGC) sub-module which
+    // replaces the AGC sub-module parametrized by `gain_controller1`.
+    // AGC2 brings the captured audio signal to the desired level by combining
+    // three different controllers (namely, input volume controller, adapative
+    // digital controller and fixed digital controller) and a limiter.
+    // TODO(bugs.webrtc.org:7494): Name `GainController` when AGC1 removed.
+    struct RTC_EXPORT GainController2 {
+      bool operator==(const GainController2& rhs) const;
+      bool operator!=(const GainController2& rhs) const {
+        return !(*this == rhs);
+      }
+
+      // AGC2 must be created if and only if `enabled` is true.
      bool enabled = false;
-      struct {
-        float gain_db = 0.f;
-      } fixed_digital;
-      struct {
+
+      // Parameters for the input volume controller, which adjusts the input
+      // volume applied when the audio is captured (e.g., microphone volume on
+      // a soundcard, input volume on HAL).
+      struct InputVolumeController {
+        bool operator==(const InputVolumeController& rhs) const;
+        bool operator!=(const InputVolumeController& rhs) const {
+          return !(*this == rhs);
+        }
        bool enabled = false;
-        float vad_probability_attack = 1.f;
-        LevelEstimator level_estimator = kRms;
-        int level_estimator_adjacent_speech_frames_threshold = 1;
-        // TODO(crbug.com/webrtc/7494): Remove `use_saturation_protector`.
-        bool use_saturation_protector = true;
-        float initial_saturation_margin_db = 20.f;
-        float extra_saturation_margin_db = 2.f;
-        int gain_applier_adjacent_speech_frames_threshold = 1;
-        float max_gain_change_db_per_second = 3.f;
-        float max_output_noise_level_dbfs = -50.f;
+      } input_volume_controller;
+
+      // Parameters for the adaptive digital controller, which adjusts and
+      // applies a digital gain after echo cancellation and after noise
+      // suppression.
+      struct RTC_EXPORT AdaptiveDigital {
+        bool operator==(const AdaptiveDigital& rhs) const;
+        bool operator!=(const AdaptiveDigital& rhs) const {
+          return !(*this == rhs);
+        }
+        bool enabled = false;
+        float headroom_db = 6.0f;
+        float max_gain_db = 30.0f;
+        float initial_gain_db = 8.0f;
+        float max_gain_change_db_per_second = 3.0f;
+        float max_output_noise_level_dbfs = -50.0f;
      } adaptive_digital;
+
+      // Parameters for the fixed digital controller, which applies a fixed
+      // digital gain after the adaptive digital controller and before the
+      // limiter.
+      struct FixedDigital {
+        // By setting `gain_db` to a value greater than zero, the limiter can be
+        // turned into a compressor that first applies a fixed gain.
+        float gain_db = 0.0f;
+      } fixed_digital;
    } gain_controller2;

-    struct ResidualEchoDetector {
-      bool enabled = true;
-    } residual_echo_detector;
-
-    // Enables reporting of |output_rms_dbfs| in webrtc::AudioProcessingStats.
-    struct LevelEstimation {
-      bool enabled = false;
-    } level_estimation;
-
    std::string ToString() const;
  };

-  // TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone.
-  enum ChannelLayout {
-    kMono,
-    // Left, right.
-    kStereo,
-    // Mono, keyboard, and mic.
-    kMonoAndKeyboard,
-    // Left, right, keyboard, and mic.
-    kStereoAndKeyboard
-  };
-
  // Specifies the properties of a setting to be passed to AudioProcessing at
  // runtime.
  class RuntimeSetting {
@ -393,6 +395,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
      kPlayoutVolumeChange,
      kCustomRenderProcessingRuntimeSetting,
      kPlayoutAudioDeviceChange,
+      kCapturePostGain,
      kCaptureOutputUsed
    };

@ -402,14 +405,17 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
      int max_volume;  // Maximum play-out volume.
    };

-    RuntimeSetting() : type_(Type::kNotSpecified), value_(0.f) {}
+    RuntimeSetting() : type_(Type::kNotSpecified), value_(0.0f) {}
    ~RuntimeSetting() = default;

    static RuntimeSetting CreateCapturePreGain(float gain) {
-      RTC_DCHECK_GE(gain, 1.f) << "Attenuation is not allowed.";
      return {Type::kCapturePreGain, gain};
    }

+    static RuntimeSetting CreateCapturePostGain(float gain) {
+      return {Type::kCapturePostGain, gain};
+    }
+
    // Corresponds to Config::GainController1::compression_gain_db, but for
    // runtime configuration.
    static RuntimeSetting CreateCompressionGainDb(int gain_db) {
@ -421,8 +427,8 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
    // Corresponds to Config::GainController2::fixed_digital::gain_db, but for
    // runtime configuration.
    static RuntimeSetting CreateCaptureFixedPostGain(float gain_db) {
-      RTC_DCHECK_GE(gain_db, 0.f);
-      RTC_DCHECK_LE(gain_db, 90.f);
+      RTC_DCHECK_GE(gain_db, 0.0f);
+      RTC_DCHECK_LE(gain_db, 90.0f);
      return {Type::kCaptureFixedPostGain, gain_db};
    }

@ -434,7 +440,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
    }

    // Creates a runtime setting to notify play-out (aka render) volume changes.
-    // |volume| is the unnormalized volume, the maximum of which
+    // `volume` is the unnormalized volume, the maximum of which
    static RuntimeSetting CreatePlayoutVolumeChange(int volume) {
      return {Type::kPlayoutVolumeChange, volume};
    }
@ -443,8 +449,9 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
      return {Type::kCustomRenderProcessingRuntimeSetting, payload};
    }

-    static RuntimeSetting CreateCaptureOutputUsedSetting(bool payload) {
-      return {Type::kCaptureOutputUsed, payload};
+    static RuntimeSetting CreateCaptureOutputUsedSetting(
+        bool capture_output_used) {
+      return {Type::kCaptureOutputUsed, capture_output_used};
    }

    Type type() const { return type_; }
@ -494,32 +501,22 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
  //
  // It is also not necessary to call if the audio parameters (sample
  // rate and number of channels) have changed. Passing updated parameters
-  // directly to |ProcessStream()| and |ProcessReverseStream()| is permissible.
+  // directly to `ProcessStream()` and `ProcessReverseStream()` is permissible.
  // If the parameters are known at init-time though, they may be provided.
  // TODO(webrtc:5298): Change to return void.
  virtual int Initialize() = 0;

  // The int16 interfaces require:
-  //   - only |NativeRate|s be used
+  //   - only `NativeRate`s be used
  //   - that the input, output and reverse rates must match
-  //   - that |processing_config.output_stream()| matches
-  //     |processing_config.input_stream()|.
+  //   - that `processing_config.output_stream()` matches
+  //     `processing_config.input_stream()`.
  //
  // The float interfaces accept arbitrary rates and support differing input and
  // output layouts, but the output must have either one channel or the same
  // number of channels as the input.
  virtual int Initialize(const ProcessingConfig& processing_config) = 0;

-  // Initialize with unpacked parameters. See Initialize() above for details.
-  //
-  // TODO(mgraczyk): Remove once clients are updated to use the new interface.
-  virtual int Initialize(int capture_input_sample_rate_hz,
-                         int capture_output_sample_rate_hz,
-                         int render_sample_rate_hz,
-                         ChannelLayout capture_input_layout,
-                         ChannelLayout capture_output_layout,
-                         ChannelLayout render_input_layout) = 0;
-
  // TODO(peah): This method is a temporary solution used to take control
  // over the parameters in the audio processing module and is likely to change.
  virtual void ApplyConfig(const Config& config) = 0;
@ -536,14 +533,19 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
  // Set to true when the output of AudioProcessing will be muted or in some
  // other way not used. Ideally, the captured audio would still be processed,
  // but some components may change behavior based on this information.
-  // Default false.
+  // Default false. This method takes a lock. To achieve this in a lock-less
+  // manner the PostRuntimeSetting can instead be used.
  virtual void set_output_will_be_muted(bool muted) = 0;

-  // Enqueue a runtime setting.
+  // Enqueues a runtime setting.
  virtual void SetRuntimeSetting(RuntimeSetting setting) = 0;

-  // Accepts and produces a 10 ms frame interleaved 16 bit integer audio as
-  // specified in |input_config| and |output_config|. |src| and |dest| may use
+  // Enqueues a runtime setting. Returns a bool indicating whether the
+  // enqueueing was successfull.
+  virtual bool PostRuntimeSetting(RuntimeSetting setting) = 0;
+
+  // Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio as
+  // specified in `input_config` and `output_config`. `src` and `dest` may use
  // the same memory, if desired.
  virtual int ProcessStream(const int16_t* const src,
                            const StreamConfig& input_config,
@ -551,58 +553,59 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
                            int16_t* const dest) = 0;

  // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
-  // |src| points to a channel buffer, arranged according to |input_stream|. At
-  // output, the channels will be arranged according to |output_stream| in
-  // |dest|.
+  // `src` points to a channel buffer, arranged according to `input_stream`. At
+  // output, the channels will be arranged according to `output_stream` in
+  // `dest`.
  //
-  // The output must have one channel or as many channels as the input. |src|
-  // and |dest| may use the same memory, if desired.
+  // The output must have one channel or as many channels as the input. `src`
+  // and `dest` may use the same memory, if desired.
  virtual int ProcessStream(const float* const* src,
                            const StreamConfig& input_config,
                            const StreamConfig& output_config,
                            float* const* dest) = 0;

-  // Accepts and produces a 10 ms frame of interleaved 16 bit integer audio for
-  // the reverse direction audio stream as specified in |input_config| and
-  // |output_config|. |src| and |dest| may use the same memory, if desired.
+  // Accepts and produces a ~10 ms frame of interleaved 16 bit integer audio for
+  // the reverse direction audio stream as specified in `input_config` and
+  // `output_config`. `src` and `dest` may use the same memory, if desired.
  virtual int ProcessReverseStream(const int16_t* const src,
                                   const StreamConfig& input_config,
                                   const StreamConfig& output_config,
                                   int16_t* const dest) = 0;

  // Accepts deinterleaved float audio with the range [-1, 1]. Each element of
-  // |data| points to a channel buffer, arranged according to |reverse_config|.
+  // `data` points to a channel buffer, arranged according to `reverse_config`.
  virtual int ProcessReverseStream(const float* const* src,
                                   const StreamConfig& input_config,
                                   const StreamConfig& output_config,
                                   float* const* dest) = 0;

  // Accepts deinterleaved float audio with the range [-1, 1]. Each element
-  // of |data| points to a channel buffer, arranged according to
-  // |reverse_config|.
+  // of `data` points to a channel buffer, arranged according to
+  // `reverse_config`.
  virtual int AnalyzeReverseStream(const float* const* data,
                                   const StreamConfig& reverse_config) = 0;

-  // Returns the most recently produced 10 ms of the linear AEC output at a rate
-  // of 16 kHz. If there is more than one capture channel, a mono representation
-  // of the input is returned. Returns true/false to indicate whether an output
-  // returned.
+  // Returns the most recently produced ~10 ms of the linear AEC output at a
+  // rate of 16 kHz. If there is more than one capture channel, a mono
+  // representation of the input is returned. Returns true/false to indicate
+  // whether an output returned.
  virtual bool GetLinearAecOutput(
      rtc::ArrayView<std::array<float, 160>> linear_output) const = 0;

  // This must be called prior to ProcessStream() if and only if adaptive analog
  // gain control is enabled, to pass the current analog level from the audio
-  // HAL. Must be within the range provided in Config::GainController1.
+  // HAL. Must be within the range [0, 255].
  virtual void set_stream_analog_level(int level) = 0;

-  // When an analog mode is set, this should be called after ProcessStream()
-  // to obtain the recommended new analog level for the audio HAL. It is the
-  // user's responsibility to apply this level.
+  // When an analog mode is set, this should be called after
+  // `set_stream_analog_level()` and `ProcessStream()` to obtain the recommended
+  // new analog level for the audio HAL. It is the user's responsibility to
+  // apply this level.
  virtual int recommended_stream_analog_level() const = 0;

  // This must be called if and only if echo processing is enabled.
  //
-  // Sets the |delay| in ms between ProcessReverseStream() receiving a far-end
+  // Sets the `delay` in ms between ProcessReverseStream() receiving a far-end
  // frame and ProcessStream() receiving a near-end frame containing the
  // corresponding echo. On the client-side this can be expressed as
  //   delay = (t_render - t_analyze) + (t_process - t_capture)
@ -622,14 +625,14 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {

  // Creates and attaches an webrtc::AecDump for recording debugging
  // information.
-  // The |worker_queue| may not be null and must outlive the created
+  // The `worker_queue` may not be null and must outlive the created
  // AecDump instance. |max_log_size_bytes == -1| means the log size
-  // will be unlimited. |handle| may not be null. The AecDump takes
-  // responsibility for |handle| and closes it in the destructor. A
+  // will be unlimited. `handle` may not be null. The AecDump takes
+  // responsibility for `handle` and closes it in the destructor. A
  // return value of true indicates that the file has been
  // sucessfully opened, while a value of false indicates that
  // opening the file failed.
-  virtual bool CreateAndAttachAecDump(const std::string& file_name,
+  virtual bool CreateAndAttachAecDump(absl::string_view file_name,
                                      int64_t max_log_size_bytes,
                                      rtc::TaskQueue* worker_queue) = 0;
  virtual bool CreateAndAttachAecDump(FILE* handle,
@ -653,7 +656,7 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {

  // Get audio processing statistics.
  virtual AudioProcessingStats GetStatistics() = 0;
-  // TODO(webrtc:5298) Deprecated variant. The |has_remote_tracks| argument
+  // TODO(webrtc:5298) Deprecated variant. The `has_remote_tracks` argument
  // should be set if there are active remote tracks (this would usually be true
  // during a call). If there are no remote tracks some of the stats will not be
  // set by AudioProcessing, because they only make sense if there is at least
@ -703,77 +706,101 @@ class RTC_EXPORT AudioProcessing : public rtc::RefCountInterface {
  static constexpr int kMaxNativeSampleRateHz =
      kNativeSampleRatesHz[kNumNativeSampleRates - 1];

-  static const int kChunkSizeMs = 10;
+  // APM processes audio in chunks of about 10 ms. See GetFrameSize() for
+  // details.
+  static constexpr int kChunkSizeMs = 10;
+
+  // Returns floor(sample_rate_hz/100): the number of samples per channel used
+  // as input and output to the audio processing module in calls to
+  // ProcessStream, ProcessReverseStream, AnalyzeReverseStream, and
+  // GetLinearAecOutput.
+  //
+  // This is exactly 10 ms for sample rates divisible by 100. For example:
+  //  - 48000 Hz (480 samples per channel),
+  //  - 44100 Hz (441 samples per channel),
+  //  - 16000 Hz (160 samples per channel).
+  //
+  // Sample rates not divisible by 100 are received/produced in frames of
+  // approximately 10 ms. For example:
+  //  - 22050 Hz (220 samples per channel, or ~9.98 ms per frame),
+  //  - 11025 Hz (110 samples per channel, or ~9.98 ms per frame).
+  // These nondivisible sample rates yield lower audio quality compared to
+  // multiples of 100. Internal resampling to 10 ms frames causes a simulated
+  // clock drift effect which impacts the performance of (for example) echo
+  // cancellation.
+  static int GetFrameSize(int sample_rate_hz) { return sample_rate_hz / 100; }
 };

 class RTC_EXPORT AudioProcessingBuilder {
 public:
  AudioProcessingBuilder();
+  AudioProcessingBuilder(const AudioProcessingBuilder&) = delete;
+  AudioProcessingBuilder& operator=(const AudioProcessingBuilder&) = delete;
  ~AudioProcessingBuilder();
-  // The AudioProcessingBuilder takes ownership of the echo_control_factory.
+
+  // Sets the APM configuration.
+  AudioProcessingBuilder& SetConfig(const AudioProcessing::Config& config) {
+    config_ = config;
+    return *this;
+  }
+
+  // Sets the echo controller factory to inject when APM is created.
  AudioProcessingBuilder& SetEchoControlFactory(
      std::unique_ptr<EchoControlFactory> echo_control_factory) {
    echo_control_factory_ = std::move(echo_control_factory);
    return *this;
  }
-  // The AudioProcessingBuilder takes ownership of the capture_post_processing.
+
+  // Sets the capture post-processing sub-module to inject when APM is created.
  AudioProcessingBuilder& SetCapturePostProcessing(
      std::unique_ptr<CustomProcessing> capture_post_processing) {
    capture_post_processing_ = std::move(capture_post_processing);
    return *this;
  }
-  // The AudioProcessingBuilder takes ownership of the render_pre_processing.
+
+  // Sets the render pre-processing sub-module to inject when APM is created.
  AudioProcessingBuilder& SetRenderPreProcessing(
      std::unique_ptr<CustomProcessing> render_pre_processing) {
    render_pre_processing_ = std::move(render_pre_processing);
    return *this;
  }
-  // The AudioProcessingBuilder takes ownership of the echo_detector.
+
+  // Sets the echo detector to inject when APM is created.
  AudioProcessingBuilder& SetEchoDetector(
      rtc::scoped_refptr<EchoDetector> echo_detector) {
    echo_detector_ = std::move(echo_detector);
    return *this;
  }
-  // The AudioProcessingBuilder takes ownership of the capture_analyzer.
+
+  // Sets the capture analyzer sub-module to inject when APM is created.
  AudioProcessingBuilder& SetCaptureAnalyzer(
      std::unique_ptr<CustomAudioAnalyzer> capture_analyzer) {
    capture_analyzer_ = std::move(capture_analyzer);
    return *this;
  }
-  // This creates an APM instance using the previously set components. Calling
-  // the Create function resets the AudioProcessingBuilder to its initial state.
-  AudioProcessing* Create();
-  AudioProcessing* Create(const webrtc::Config& config);
+
+  // Creates an APM instance with the specified config or the default one if
+  // unspecified. Injects the specified components transferring the ownership
+  // to the newly created APM instance - i.e., except for the config, the
+  // builder is reset to its initial state.
+  rtc::scoped_refptr<AudioProcessing> Create();

 private:
+  AudioProcessing::Config config_;
  std::unique_ptr<EchoControlFactory> echo_control_factory_;
  std::unique_ptr<CustomProcessing> capture_post_processing_;
  std::unique_ptr<CustomProcessing> render_pre_processing_;
  rtc::scoped_refptr<EchoDetector> echo_detector_;
  std::unique_ptr<CustomAudioAnalyzer> capture_analyzer_;
-  RTC_DISALLOW_COPY_AND_ASSIGN(AudioProcessingBuilder);
 };

 class StreamConfig {
 public:
  // sample_rate_hz: The sampling rate of the stream.
-  //
-  // num_channels: The number of audio channels in the stream, excluding the
-  //               keyboard channel if it is present. When passing a
-  //               StreamConfig with an array of arrays T*[N],
-  //
-  //                N == {num_channels + 1  if  has_keyboard
-  //                     {num_channels      if  !has_keyboard
-  //
-  // has_keyboard: True if the stream has a keyboard channel. When has_keyboard
-  //               is true, the last channel in any corresponding list of
-  //               channels is the keyboard channel.
-  StreamConfig(int sample_rate_hz = 0,
-               size_t num_channels = 0,
-               bool has_keyboard = false)
+  // num_channels: The number of audio channels in the stream.
+  StreamConfig(int sample_rate_hz = 0, size_t num_channels = 0)
      : sample_rate_hz_(sample_rate_hz),
        num_channels_(num_channels),
-        has_keyboard_(has_keyboard),
        num_frames_(calculate_frames(sample_rate_hz)) {}

  void set_sample_rate_hz(int value) {
@ -781,35 +808,29 @@ class StreamConfig {
    num_frames_ = calculate_frames(value);
  }
  void set_num_channels(size_t value) { num_channels_ = value; }
-  void set_has_keyboard(bool value) { has_keyboard_ = value; }

  int sample_rate_hz() const { return sample_rate_hz_; }

-  // The number of channels in the stream, not including the keyboard channel if
-  // present.
+  // The number of channels in the stream.
  size_t num_channels() const { return num_channels_; }

-  bool has_keyboard() const { return has_keyboard_; }
  size_t num_frames() const { return num_frames_; }
  size_t num_samples() const { return num_channels_ * num_frames_; }

  bool operator==(const StreamConfig& other) const {
    return sample_rate_hz_ == other.sample_rate_hz_ &&
-           num_channels_ == other.num_channels_ &&
-           has_keyboard_ == other.has_keyboard_;
+           num_channels_ == other.num_channels_;
  }

  bool operator!=(const StreamConfig& other) const { return !(*this == other); }

 private:
  static size_t calculate_frames(int sample_rate_hz) {
-    return static_cast<size_t>(AudioProcessing::kChunkSizeMs * sample_rate_hz /
-                               1000);
+    return static_cast<size_t>(AudioProcessing::GetFrameSize(sample_rate_hz));
  }

  int sample_rate_hz_;
  size_t num_channels_;
-  bool has_keyboard_;
  size_t num_frames_;
 };

@ -899,17 +920,13 @@ class EchoDetector : public rtc::RefCountInterface {
                          int render_sample_rate_hz,
                          int num_render_channels) = 0;

-  // Analysis (not changing) of the render signal.
+  // Analysis (not changing) of the first channel of the render signal.
  virtual void AnalyzeRenderAudio(rtc::ArrayView<const float> render_audio) = 0;

  // Analysis (not changing) of the capture signal.
  virtual void AnalyzeCaptureAudio(
      rtc::ArrayView<const float> capture_audio) = 0;

-  // Pack an AudioBuffer into a vector<float>.
-  static void PackRenderAudioBuffer(AudioBuffer* audio,
-                                    std::vector<float>* packed_buffer);
-
  struct Metrics {
    absl::optional<double> echo_likelihood;
    absl::optional<double> echo_likelihood_recent_max;
--- a/webrtc/modules/audio_processing/include/audio_processing_statistics.h
+++ b/webrtc/modules/audio_processing/include/audio_processing_statistics.h
@ -24,14 +24,8 @@ struct RTC_EXPORT AudioProcessingStats {
  AudioProcessingStats(const AudioProcessingStats& other);
  ~AudioProcessingStats();

-  // The root mean square (RMS) level in dBFS (decibels from digital
-  // full-scale) of the last capture frame, after processing. It is
-  // constrained to [-127, 0].
-  // The computation follows: https://tools.ietf.org/html/rfc6465
-  // with the intent that it can provide the RTP audio level indication.
-  // Only reported if level estimation is enabled in AudioProcessing::Config.
-  absl::optional<int> output_rms_dbfs;
-
+  // Deprecated.
+  // TODO(bugs.webrtc.org/11226): Remove.
  // True if voice is detected in the last capture frame, after processing.
  // It is conservative in flagging audio as speech, with low likelihood of
  // incorrectly flagging a frame as voice.
@ -50,9 +44,9 @@ struct RTC_EXPORT AudioProcessingStats {
  // The delay metrics consists of the delay median and standard deviation. It
  // also consists of the fraction of delay estimates that can make the echo
  // cancellation perform poorly. The values are aggregated until the first
-  // call to |GetStatistics()| and afterwards aggregated and updated every
+  // call to `GetStatistics()` and afterwards aggregated and updated every
  // second. Note that if there are several clients pulling metrics from
-  // |GetStatistics()| during a session the first call from any of them will
+  // `GetStatistics()` during a session the first call from any of them will
  // change to one second aggregation window for all.
  absl::optional<int32_t> delay_median_ms;
  absl::optional<int32_t> delay_standard_deviation_ms;
@ -64,7 +58,7 @@ struct RTC_EXPORT AudioProcessingStats {

  // The instantaneous delay estimate produced in the AEC. The unit is in
  // milliseconds and the value is the instantaneous value at the time of the
-  // call to |GetStatistics()|.
+  // call to `GetStatistics()`.
  absl::optional<int32_t> delay_ms;
 };