Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone. We're continuing to carry iSAC even though it's gone upstream, but maybe we'll want to drop that soon.
92 lines
2.9 KiB
C++
92 lines
2.9 KiB
C++
/*
|
|
* Copyright (c) 2018 The WebRTC project authors. All Rights Reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "modules/audio_processing/agc2/rnn_vad/rnn.h"
|
|
|
|
#include "rtc_base/checks.h"
|
|
#include "third_party/rnnoise/src/rnn_vad_weights.h"
|
|
|
|
namespace webrtc {
|
|
namespace rnn_vad {
|
|
namespace {
|
|
|
|
using ::rnnoise::kInputLayerInputSize;
|
|
static_assert(kFeatureVectorSize == kInputLayerInputSize, "");
|
|
using ::rnnoise::kInputDenseBias;
|
|
using ::rnnoise::kInputDenseWeights;
|
|
using ::rnnoise::kInputLayerOutputSize;
|
|
static_assert(kInputLayerOutputSize <= kFullyConnectedLayerMaxUnits, "");
|
|
|
|
using ::rnnoise::kHiddenGruBias;
|
|
using ::rnnoise::kHiddenGruRecurrentWeights;
|
|
using ::rnnoise::kHiddenGruWeights;
|
|
using ::rnnoise::kHiddenLayerOutputSize;
|
|
static_assert(kHiddenLayerOutputSize <= kGruLayerMaxUnits, "");
|
|
|
|
using ::rnnoise::kOutputDenseBias;
|
|
using ::rnnoise::kOutputDenseWeights;
|
|
using ::rnnoise::kOutputLayerOutputSize;
|
|
static_assert(kOutputLayerOutputSize <= kFullyConnectedLayerMaxUnits, "");
|
|
|
|
} // namespace
|
|
|
|
RnnVad::RnnVad(const AvailableCpuFeatures& cpu_features)
|
|
: input_(kInputLayerInputSize,
|
|
kInputLayerOutputSize,
|
|
kInputDenseBias,
|
|
kInputDenseWeights,
|
|
ActivationFunction::kTansigApproximated,
|
|
cpu_features,
|
|
/*layer_name=*/"FC1"),
|
|
hidden_(kInputLayerOutputSize,
|
|
kHiddenLayerOutputSize,
|
|
kHiddenGruBias,
|
|
kHiddenGruWeights,
|
|
kHiddenGruRecurrentWeights,
|
|
cpu_features,
|
|
/*layer_name=*/"GRU1"),
|
|
output_(kHiddenLayerOutputSize,
|
|
kOutputLayerOutputSize,
|
|
kOutputDenseBias,
|
|
kOutputDenseWeights,
|
|
ActivationFunction::kSigmoidApproximated,
|
|
// The output layer is just 24x1. The unoptimized code is faster.
|
|
NoAvailableCpuFeatures(),
|
|
/*layer_name=*/"FC2") {
|
|
// Input-output chaining size checks.
|
|
RTC_DCHECK_EQ(input_.size(), hidden_.input_size())
|
|
<< "The input and the hidden layers sizes do not match.";
|
|
RTC_DCHECK_EQ(hidden_.size(), output_.input_size())
|
|
<< "The hidden and the output layers sizes do not match.";
|
|
}
|
|
|
|
RnnVad::~RnnVad() = default;
|
|
|
|
void RnnVad::Reset() {
|
|
hidden_.Reset();
|
|
}
|
|
|
|
float RnnVad::ComputeVadProbability(
|
|
rtc::ArrayView<const float, kFeatureVectorSize> feature_vector,
|
|
bool is_silence) {
|
|
if (is_silence) {
|
|
Reset();
|
|
return 0.f;
|
|
}
|
|
input_.ComputeOutput(feature_vector);
|
|
hidden_.ComputeOutput(input_);
|
|
output_.ComputeOutput(hidden_);
|
|
RTC_DCHECK_EQ(output_.size(), 1);
|
|
return output_.data()[0];
|
|
}
|
|
|
|
} // namespace rnn_vad
|
|
} // namespace webrtc
|