Update audio_processing module

Corresponds to upstream commit 524e9b043e7e86fd72353b987c9d5f6a1ebf83e1

Update notes:

 * Pull in third party license file

 * Replace .gypi files with BUILD.gn to keep track of what changes
   upstream

 * Bunch of new filse pulled in as dependencies

 * Won't build yet due to changes needed on top of these
This commit is contained in:
Arun Raghavan
2015-10-13 17:25:22 +05:30
parent 5ae7a5d6cd
commit 753eada3aa
324 changed files with 52533 additions and 16117 deletions

View File

@ -0,0 +1,114 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include <cfloat>
#include <cstdio>
#include <cstdlib>
#include <vector>
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
#include "webrtc/modules/audio_processing/transient/file_utils.h"
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/system_wrappers/interface/file_wrapper.h"
using rtc::scoped_ptr;
using webrtc::FileWrapper;
using webrtc::TransientDetector;
// Application to generate a RTP timing file.
// Opens the PCM file and divides the signal in frames.
// Creates a send times array, one for each step.
// Each block that contains a transient, has an infinite send time.
// The resultant array is written to a DAT file
// Returns -1 on error or |lost_packets| otherwise.
int main(int argc, char* argv[]) {
if (argc != 5) {
printf("\n%s - Application to generate a RTP timing file.\n\n", argv[0]);
printf("%s PCMfile DATfile chunkSize sampleRate\n\n", argv[0]);
printf("Opens the PCMfile with sampleRate in Hertz.\n");
printf("Creates a send times array, one for each chunkSize ");
printf("milliseconds step.\n");
printf("Each block that contains a transient, has an infinite send time. ");
printf("The resultant array is written to a DATfile.\n\n");
return 0;
}
scoped_ptr<FileWrapper> pcm_file(FileWrapper::Create());
pcm_file->OpenFile(argv[1], true, false, false);
if (!pcm_file->Open()) {
printf("\nThe %s could not be opened.\n\n", argv[1]);
return -1;
}
scoped_ptr<FileWrapper> dat_file(FileWrapper::Create());
dat_file->OpenFile(argv[2], false, false, false);
if (!dat_file->Open()) {
printf("\nThe %s could not be opened.\n\n", argv[2]);
return -1;
}
int chunk_size_ms = atoi(argv[3]);
if (chunk_size_ms <= 0) {
printf("\nThe chunkSize must be a positive integer\n\n");
return -1;
}
int sample_rate_hz = atoi(argv[4]);
if (sample_rate_hz <= 0) {
printf("\nThe sampleRate must be a positive integer\n\n");
return -1;
}
TransientDetector detector(sample_rate_hz);
int lost_packets = 0;
size_t audio_buffer_length = chunk_size_ms * sample_rate_hz / 1000;
scoped_ptr<float[]> audio_buffer(new float[audio_buffer_length]);
std::vector<float> send_times;
// Read first buffer from the PCM test file.
size_t file_samples_read = ReadInt16FromFileToFloatBuffer(
pcm_file.get(),
audio_buffer_length,
audio_buffer.get());
for (int time = 0; file_samples_read > 0; time += chunk_size_ms) {
// Pad the rest of the buffer with zeros.
for (size_t i = file_samples_read; i < audio_buffer_length; ++i) {
audio_buffer[i] = 0.0;
}
float value =
detector.Detect(audio_buffer.get(), audio_buffer_length, NULL, 0);
if (value < 0.5f) {
value = time;
} else {
value = FLT_MAX;
++lost_packets;
}
send_times.push_back(value);
// Read next buffer from the PCM test file.
file_samples_read = ReadInt16FromFileToFloatBuffer(pcm_file.get(),
audio_buffer_length,
audio_buffer.get());
}
size_t floats_written = WriteFloatBufferToFile(dat_file.get(),
send_times.size(),
&send_times[0]);
if (floats_written == 0) {
printf("\nThe send times could not be written to DAT file\n\n");
return -1;
}
pcm_file->CloseFile();
dat_file->CloseFile();
return lost_packets;
}

View File

@ -0,0 +1,27 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_
namespace webrtc {
namespace ts {
static const float kPi = 3.14159265358979323846f;
static const int kChunkSizeMs = 10;
enum {
kSampleRate8kHz = 8000,
kSampleRate16kHz = 16000,
kSampleRate32kHz = 32000,
kSampleRate48kHz = 48000
};
} // namespace ts
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_

View File

@ -0,0 +1,63 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// This header file defines the coefficients of the FIR based approximation of
// the Meyer Wavelet
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_
// Decomposition coefficients Daubechies 8.
namespace webrtc {
const int kDaubechies8CoefficientsLength = 16;
const float kDaubechies8HighPassCoefficients[kDaubechies8CoefficientsLength]
= {
-5.44158422430816093862e-02f,
3.12871590914465924627e-01f,
-6.75630736298012846142e-01f,
5.85354683654869090148e-01f,
1.58291052560238926228e-02f,
-2.84015542962428091389e-01f,
-4.72484573997972536787e-04f,
1.28747426620186011803e-01f,
1.73693010020221083600e-02f,
-4.40882539310647192377e-02f,
-1.39810279170155156436e-02f,
8.74609404701565465445e-03f,
4.87035299301066034600e-03f,
-3.91740372995977108837e-04f,
-6.75449405998556772109e-04f,
-1.17476784002281916305e-04f
};
const float kDaubechies8LowPassCoefficients[kDaubechies8CoefficientsLength] = {
-1.17476784002281916305e-04f,
6.75449405998556772109e-04f,
-3.91740372995977108837e-04f,
-4.87035299301066034600e-03f,
8.74609404701565465445e-03f,
1.39810279170155156436e-02f,
-4.40882539310647192377e-02f,
-1.73693010020221083600e-02f,
1.28747426620186011803e-01f,
4.72484573997972536787e-04f,
-2.84015542962428091389e-01f,
-1.58291052560238926228e-02f,
5.85354683654869090148e-01f,
6.75630736298012846142e-01f,
3.12871590914465924627e-01f,
5.44158422430816093862e-02f
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_

View File

@ -0,0 +1,70 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_
#include <cstdlib>
#include "webrtc/typedefs.h"
// Provides a set of static methods to perform dyadic decimations.
namespace webrtc {
// Returns the proper length of the output buffer that you should use for the
// given |in_length| and decimation |odd_sequence|.
// Return -1 on error.
inline size_t GetOutLengthToDyadicDecimate(size_t in_length,
bool odd_sequence) {
size_t out_length = in_length / 2;
if (in_length % 2 == 1 && !odd_sequence) {
++out_length;
}
return out_length;
}
// Performs a dyadic decimation: removes every odd/even member of a sequence
// halving its overall length.
// Arguments:
// in: array of |in_length|.
// odd_sequence: If false, the odd members will be removed (1, 3, 5, ...);
// if true, the even members will be removed (0, 2, 4, ...).
// out: array of |out_length|. |out_length| must be large enough to
// hold the decimated output. The necessary length can be provided by
// GetOutLengthToDyadicDecimate().
// Must be previously allocated.
// Returns the number of output samples, -1 on error.
template<typename T>
static size_t DyadicDecimate(const T* in,
size_t in_length,
bool odd_sequence,
T* out,
size_t out_length) {
size_t half_length = GetOutLengthToDyadicDecimate(in_length, odd_sequence);
if (!in || !out || in_length <= 0 || out_length < half_length) {
return 0;
}
size_t output_samples = 0;
size_t index_adjustment = odd_sequence ? 1 : 0;
for (output_samples = 0; output_samples < half_length; ++output_samples) {
out[output_samples] = in[output_samples * 2 + index_adjustment];
}
return output_samples;
}
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_

View File

@ -0,0 +1,257 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/file_utils.h"
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/system_wrappers/interface/file_wrapper.h"
#include "webrtc/typedefs.h"
namespace webrtc {
int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out) {
if (!bytes || !out) {
return -1;
}
uint32_t binary_value = 0;
for (int i = 3; i >= 0; --i) {
binary_value <<= 8;
binary_value += bytes[i];
}
*out = bit_cast<float>(binary_value);
return 0;
}
int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out) {
if (!bytes || !out) {
return -1;
}
uint64_t binary_value = 0;
for (int i = 7; i >= 0; --i) {
binary_value <<= 8;
binary_value += bytes[i];
}
*out = bit_cast<double>(binary_value);
return 0;
}
int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]) {
if (!out_bytes) {
return -1;
}
uint32_t binary_value = bit_cast<uint32_t>(value);
for (size_t i = 0; i < 4; ++i) {
out_bytes[i] = binary_value;
binary_value >>= 8;
}
return 0;
}
int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]) {
if (!out_bytes) {
return -1;
}
uint64_t binary_value = bit_cast<uint64_t>(value);
for (size_t i = 0; i < 8; ++i) {
out_bytes[i] = binary_value;
binary_value >>= 8;
}
return 0;
}
size_t ReadInt16BufferFromFile(FileWrapper* file,
size_t length,
int16_t* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
rtc::scoped_ptr<uint8_t[]> byte_array(new uint8_t[2]);
size_t int16s_read = 0;
while (int16s_read < length) {
size_t bytes_read = file->Read(byte_array.get(), 2);
if (bytes_read < 2) {
break;
}
int16_t value = byte_array[1];
value <<= 8;
value += byte_array[0];
buffer[int16s_read] = value;
++int16s_read;
}
return int16s_read;
}
size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file,
size_t length,
float* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
rtc::scoped_ptr<int16_t[]> buffer16(new int16_t[length]);
size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get());
for (size_t i = 0; i < int16s_read; ++i) {
buffer[i] = buffer16[i];
}
return int16s_read;
}
size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file,
size_t length,
double* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
rtc::scoped_ptr<int16_t[]> buffer16(new int16_t[length]);
size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get());
for (size_t i = 0; i < int16s_read; ++i) {
buffer[i] = buffer16[i];
}
return int16s_read;
}
size_t ReadFloatBufferFromFile(FileWrapper* file,
size_t length,
float* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
rtc::scoped_ptr<uint8_t[]> byte_array(new uint8_t[4]);
size_t floats_read = 0;
while (floats_read < length) {
size_t bytes_read = file->Read(byte_array.get(), 4);
if (bytes_read < 4) {
break;
}
ConvertByteArrayToFloat(byte_array.get(), &buffer[floats_read]);
++floats_read;
}
return floats_read;
}
size_t ReadDoubleBufferFromFile(FileWrapper* file,
size_t length,
double* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
rtc::scoped_ptr<uint8_t[]> byte_array(new uint8_t[8]);
size_t doubles_read = 0;
while (doubles_read < length) {
size_t bytes_read = file->Read(byte_array.get(), 8);
if (bytes_read < 8) {
break;
}
ConvertByteArrayToDouble(byte_array.get(), &buffer[doubles_read]);
++doubles_read;
}
return doubles_read;
}
size_t WriteInt16BufferToFile(FileWrapper* file,
size_t length,
const int16_t* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
rtc::scoped_ptr<uint8_t[]> byte_array(new uint8_t[2]);
size_t int16s_written = 0;
for (int16s_written = 0; int16s_written < length; ++int16s_written) {
// Get byte representation.
byte_array[0] = buffer[int16s_written] & 0xFF;
byte_array[1] = (buffer[int16s_written] >> 8) & 0xFF;
file->Write(byte_array.get(), 2);
}
file->Flush();
return int16s_written;
}
size_t WriteFloatBufferToFile(FileWrapper* file,
size_t length,
const float* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
rtc::scoped_ptr<uint8_t[]> byte_array(new uint8_t[4]);
size_t floats_written = 0;
for (floats_written = 0; floats_written < length; ++floats_written) {
// Get byte representation.
ConvertFloatToByteArray(buffer[floats_written], byte_array.get());
file->Write(byte_array.get(), 4);
}
file->Flush();
return floats_written;
}
size_t WriteDoubleBufferToFile(FileWrapper* file,
size_t length,
const double* buffer) {
if (!file || !file->Open() || !buffer || length <= 0) {
return 0;
}
rtc::scoped_ptr<uint8_t[]> byte_array(new uint8_t[8]);
size_t doubles_written = 0;
for (doubles_written = 0; doubles_written < length; ++doubles_written) {
// Get byte representation.
ConvertDoubleToByteArray(buffer[doubles_written], byte_array.get());
file->Write(byte_array.get(), 8);
}
file->Flush();
return doubles_written;
}
} // namespace webrtc

View File

@ -0,0 +1,118 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_
#include <string.h>
#include "webrtc/system_wrappers/interface/file_wrapper.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This is a copy of the cast included in the Chromium codebase here:
// http://cs.chromium.org/src/third_party/cld/base/casts.h
template <class Dest, class Source>
inline Dest bit_cast(const Source& source) {
// A compile error here means your Dest and Source have different sizes.
static_assert(sizeof(Dest) == sizeof(Source),
"Dest and Source have different sizes");
Dest dest;
memcpy(&dest, &source, sizeof(dest));
return dest;
}
// Converts the byte array with binary float representation to float.
// Bytes must be in little-endian order.
// Returns 0 if correct, -1 on error.
int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out);
// Converts the byte array with binary double representation to double.
// Bytes must be in little-endian order.
// Returns 0 if correct, -1 on error.
int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out);
// Converts a float to a byte array with binary float representation.
// Bytes will be in little-endian order.
// Returns 0 if correct, -1 on error.
int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]);
// Converts a double to a byte array with binary double representation.
// Bytes will be in little-endian order.
// Returns 0 if correct, -1 on error.
int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]);
// Reads |length| 16-bit integers from |file| to |buffer|.
// |file| must be previously opened.
// Returns the number of 16-bit integers read or -1 on error.
size_t ReadInt16BufferFromFile(FileWrapper* file,
size_t length,
int16_t* buffer);
// Reads |length| 16-bit integers from |file| and stores those values
// (converting them) in |buffer|.
// |file| must be previously opened.
// Returns the number of 16-bit integers read or -1 on error.
size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file,
size_t length,
float* buffer);
// Reads |length| 16-bit integers from |file| and stores those values
// (converting them) in |buffer|.
// |file| must be previously opened.
// Returns the number of 16-bit integers read or -1 on error.
size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file,
size_t length,
double* buffer);
// Reads |length| floats in binary representation (4 bytes) from |file| to
// |buffer|.
// |file| must be previously opened.
// Returns the number of floats read or -1 on error.
size_t ReadFloatBufferFromFile(FileWrapper* file, size_t length, float* buffer);
// Reads |length| doubles in binary representation (8 bytes) from |file| to
// |buffer|.
// |file| must be previously opened.
// Returns the number of doubles read or -1 on error.
size_t ReadDoubleBufferFromFile(FileWrapper* file,
size_t length,
double* buffer);
// Writes |length| 16-bit integers from |buffer| in binary representation (2
// bytes) to |file|. It flushes |file|, so after this call there are no
// writings pending.
// |file| must be previously opened.
// Returns the number of doubles written or -1 on error.
size_t WriteInt16BufferToFile(FileWrapper* file,
size_t length,
const int16_t* buffer);
// Writes |length| floats from |buffer| in binary representation (4 bytes) to
// |file|. It flushes |file|, so after this call there are no writtings pending.
// |file| must be previously opened.
// Returns the number of doubles written or -1 on error.
size_t WriteFloatBufferToFile(FileWrapper* file,
size_t length,
const float* buffer);
// Writes |length| doubles from |buffer| in binary representation (8 bytes) to
// |file|. It flushes |file|, so after this call there are no writings pending.
// |file| must be previously opened.
// Returns the number of doubles written or -1 on error.
size_t WriteDoubleBufferToFile(FileWrapper* file,
size_t length,
const double* buffer);
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_

View File

@ -0,0 +1,49 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
#include <math.h>
#include <string.h>
#include "webrtc/base/scoped_ptr.h"
namespace webrtc {
MovingMoments::MovingMoments(size_t length)
: length_(length),
queue_(),
sum_(0.0),
sum_of_squares_(0.0) {
assert(length > 0);
for (size_t i = 0; i < length; ++i) {
queue_.push(0.0);
}
}
MovingMoments::~MovingMoments() {}
void MovingMoments::CalculateMoments(const float* in, size_t in_length,
float* first, float* second) {
assert(in && in_length > 0 && first && second);
for (size_t i = 0; i < in_length; ++i) {
const float old_value = queue_.front();
queue_.pop();
queue_.push(in[i]);
sum_ += in[i] - old_value;
sum_of_squares_ += in[i] * in[i] - old_value * old_value;
first[i] = sum_ / length_;
second[i] = sum_of_squares_ / length_;
}
}
} // namespace webrtc

View File

@ -0,0 +1,52 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_
#include <queue>
#include "webrtc/base/scoped_ptr.h"
namespace webrtc {
// Calculates the first and second moments for each value of a buffer taking
// into account a given number of previous values.
// It preserves its state, so it can be multiple-called.
// TODO(chadan): Implement a function that takes a buffer of first moments and a
// buffer of second moments; and calculates the variances. When needed.
// TODO(chadan): Add functionality to update with a buffer but only output are
// the last values of the moments. When needed.
class MovingMoments {
public:
// Creates a Moving Moments object, that uses the last |length| values
// (including the new value introduced in every new calculation).
explicit MovingMoments(size_t length);
~MovingMoments();
// Calculates the new values using |in|. Results will be in the out buffers.
// |first| and |second| must be allocated with at least |in_length|.
void CalculateMoments(const float* in, size_t in_length,
float* first, float* second);
private:
size_t length_;
// A queue holding the |length_| latest input values.
std::queue<float> queue_;
// Sum of the values of the queue.
float sum_;
// Sum of the squares of the values of the queue.
float sum_of_squares_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_

View File

@ -0,0 +1,173 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
#include <assert.h>
#include <float.h>
#include <math.h>
#include <string.h>
#include "webrtc/modules/audio_processing/transient/common.h"
#include "webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h"
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
namespace webrtc {
static const int kTransientLengthMs = 30;
static const int kChunksAtStartupLeftToDelete =
kTransientLengthMs / ts::kChunkSizeMs;
static const float kDetectThreshold = 16.f;
TransientDetector::TransientDetector(int sample_rate_hz)
: samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000),
last_first_moment_(),
last_second_moment_(),
chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete),
reference_energy_(1.f),
using_reference_(false) {
assert(sample_rate_hz == ts::kSampleRate8kHz ||
sample_rate_hz == ts::kSampleRate16kHz ||
sample_rate_hz == ts::kSampleRate32kHz ||
sample_rate_hz == ts::kSampleRate48kHz);
int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000;
// Adjustment to avoid data loss while downsampling, making
// |samples_per_chunk_| and |samples_per_transient| always divisible by
// |kLeaves|.
samples_per_chunk_ -= samples_per_chunk_ % kLeaves;
samples_per_transient -= samples_per_transient % kLeaves;
tree_leaves_data_length_ = samples_per_chunk_ / kLeaves;
wpd_tree_.reset(new WPDTree(samples_per_chunk_,
kDaubechies8HighPassCoefficients,
kDaubechies8LowPassCoefficients,
kDaubechies8CoefficientsLength,
kLevels));
for (size_t i = 0; i < kLeaves; ++i) {
moving_moments_[i].reset(
new MovingMoments(samples_per_transient / kLeaves));
}
first_moments_.reset(new float[tree_leaves_data_length_]);
second_moments_.reset(new float[tree_leaves_data_length_]);
for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) {
previous_results_.push_back(0.f);
}
}
TransientDetector::~TransientDetector() {}
float TransientDetector::Detect(const float* data,
size_t data_length,
const float* reference_data,
size_t reference_length) {
assert(data && data_length == samples_per_chunk_);
// TODO(aluebs): Check if these errors can logically happen and if not assert
// on them.
if (wpd_tree_->Update(data, samples_per_chunk_) != 0) {
return -1.f;
}
float result = 0.f;
for (size_t i = 0; i < kLeaves; ++i) {
WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i);
moving_moments_[i]->CalculateMoments(leaf->data(),
tree_leaves_data_length_,
first_moments_.get(),
second_moments_.get());
// Add value delayed (Use the last moments from the last call to Detect).
float unbiased_data = leaf->data()[0] - last_first_moment_[i];
result +=
unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN);
// Add new values.
for (size_t j = 1; j < tree_leaves_data_length_; ++j) {
unbiased_data = leaf->data()[j] - first_moments_[j - 1];
result +=
unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN);
}
last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1];
last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1];
}
result /= tree_leaves_data_length_;
result *= ReferenceDetectionValue(reference_data, reference_length);
if (chunks_at_startup_left_to_delete_ > 0) {
chunks_at_startup_left_to_delete_--;
result = 0.f;
}
if (result >= kDetectThreshold) {
result = 1.f;
} else {
// Get proportional value.
// Proportion achieved with a squared raised cosine function with domain
// [0, kDetectThreshold) and image [0, 1), it's always increasing.
const float horizontal_scaling = ts::kPi / kDetectThreshold;
const float kHorizontalShift = ts::kPi;
const float kVerticalScaling = 0.5f;
const float kVerticalShift = 1.f;
result = (cos(result * horizontal_scaling + kHorizontalShift)
+ kVerticalShift) * kVerticalScaling;
result *= result;
}
previous_results_.pop_front();
previous_results_.push_back(result);
// In the current implementation we return the max of the current result and
// the previous results, so the high results have a width equals to
// |transient_length|.
return *std::max_element(previous_results_.begin(), previous_results_.end());
}
// Looks for the highest slope and compares it with the previous ones.
// An exponential transformation takes this to the [0, 1] range. This value is
// multiplied by the detection result to avoid false positives.
float TransientDetector::ReferenceDetectionValue(const float* data,
size_t length) {
if (data == NULL) {
using_reference_ = false;
return 1.f;
}
static const float kEnergyRatioThreshold = 0.2f;
static const float kReferenceNonLinearity = 20.f;
static const float kMemory = 0.99f;
float reference_energy = 0.f;
for (size_t i = 1; i < length; ++i) {
reference_energy += data[i] * data[i];
}
if (reference_energy == 0.f) {
using_reference_ = false;
return 1.f;
}
assert(reference_energy_ != 0);
float result = 1.f / (1.f + exp(kReferenceNonLinearity *
(kEnergyRatioThreshold -
reference_energy / reference_energy_)));
reference_energy_ =
kMemory * reference_energy_ + (1.f - kMemory) * reference_energy;
using_reference_ = true;
return result;
}
} // namespace webrtc

View File

@ -0,0 +1,87 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_
#include <deque>
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
namespace webrtc {
// This is an implementation of the transient detector described in "Causal
// Wavelet based transient detector".
// Calculates the log-likelihood of a transient to happen on a signal at any
// given time based on the previous samples; it uses a WPD tree to analyze the
// signal. It preserves its state, so it can be multiple-called.
class TransientDetector {
public:
// TODO(chadan): The only supported wavelet is Daubechies 8 using a WPD tree
// of 3 levels. Make an overloaded constructor to allow different wavelets and
// depths of the tree. When needed.
// Creates a wavelet based transient detector.
TransientDetector(int sample_rate_hz);
~TransientDetector();
// Calculates the log-likelihood of the existence of a transient in |data|.
// |data_length| has to be equal to |samples_per_chunk_|.
// Returns a value between 0 and 1, as a non linear representation of this
// likelihood.
// Returns a negative value on error.
float Detect(const float* data,
size_t data_length,
const float* reference_data,
size_t reference_length);
bool using_reference() { return using_reference_; }
private:
float ReferenceDetectionValue(const float* data, size_t length);
static const size_t kLevels = 3;
static const size_t kLeaves = 1 << kLevels;
size_t samples_per_chunk_;
rtc::scoped_ptr<WPDTree> wpd_tree_;
size_t tree_leaves_data_length_;
// A MovingMoments object is needed for each leaf in the WPD tree.
rtc::scoped_ptr<MovingMoments> moving_moments_[kLeaves];
rtc::scoped_ptr<float[]> first_moments_;
rtc::scoped_ptr<float[]> second_moments_;
// Stores the last calculated moments from the previous detection.
float last_first_moment_[kLeaves];
float last_second_moment_[kLeaves];
// We keep track of the previous results from the previous chunks, so it can
// be used to effectively give results according to the |transient_length|.
std::deque<float> previous_results_;
// Number of chunks that are going to return only zeros at the beginning of
// the detection. It helps to avoid infs and nans due to the lack of
// information.
int chunks_at_startup_left_to_delete_;
float reference_energy_;
bool using_reference_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_

View File

@ -0,0 +1,424 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
#include <math.h>
#include <string.h>
#include <cmath>
#include <complex>
#include <deque>
#include <set>
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/common_audio/fft4g.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/transient/common.h"
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
#include "webrtc/modules/audio_processing/ns/windows_private.h"
#include "webrtc/system_wrappers/interface/logging.h"
#include "webrtc/typedefs.h"
namespace webrtc {
static const float kMeanIIRCoefficient = 0.5f;
static const float kVoiceThreshold = 0.02f;
// TODO(aluebs): Check if these values work also for 48kHz.
static const size_t kMinVoiceBin = 3;
static const size_t kMaxVoiceBin = 60;
namespace {
float ComplexMagnitude(float a, float b) {
return std::abs(a) + std::abs(b);
}
} // namespace
TransientSuppressor::TransientSuppressor()
: data_length_(0),
detection_length_(0),
analysis_length_(0),
buffer_delay_(0),
complex_analysis_length_(0),
num_channels_(0),
window_(NULL),
detector_smoothed_(0.f),
keypress_counter_(0),
chunks_since_keypress_(0),
detection_enabled_(false),
suppression_enabled_(false),
use_hard_restoration_(false),
chunks_since_voice_change_(0),
seed_(182),
using_reference_(false) {
}
TransientSuppressor::~TransientSuppressor() {}
int TransientSuppressor::Initialize(int sample_rate_hz,
int detection_rate_hz,
int num_channels) {
switch (sample_rate_hz) {
case ts::kSampleRate8kHz:
analysis_length_ = 128u;
window_ = kBlocks80w128;
break;
case ts::kSampleRate16kHz:
analysis_length_ = 256u;
window_ = kBlocks160w256;
break;
case ts::kSampleRate32kHz:
analysis_length_ = 512u;
window_ = kBlocks320w512;
break;
case ts::kSampleRate48kHz:
analysis_length_ = 1024u;
window_ = kBlocks480w1024;
break;
default:
return -1;
}
if (detection_rate_hz != ts::kSampleRate8kHz &&
detection_rate_hz != ts::kSampleRate16kHz &&
detection_rate_hz != ts::kSampleRate32kHz &&
detection_rate_hz != ts::kSampleRate48kHz) {
return -1;
}
if (num_channels <= 0) {
return -1;
}
detector_.reset(new TransientDetector(detection_rate_hz));
data_length_ = sample_rate_hz * ts::kChunkSizeMs / 1000;
if (data_length_ > analysis_length_) {
assert(false);
return -1;
}
buffer_delay_ = analysis_length_ - data_length_;
complex_analysis_length_ = analysis_length_ / 2 + 1;
assert(complex_analysis_length_ >= kMaxVoiceBin);
num_channels_ = num_channels;
in_buffer_.reset(new float[analysis_length_ * num_channels_]);
memset(in_buffer_.get(),
0,
analysis_length_ * num_channels_ * sizeof(in_buffer_[0]));
detection_length_ = detection_rate_hz * ts::kChunkSizeMs / 1000;
detection_buffer_.reset(new float[detection_length_]);
memset(detection_buffer_.get(),
0,
detection_length_ * sizeof(detection_buffer_[0]));
out_buffer_.reset(new float[analysis_length_ * num_channels_]);
memset(out_buffer_.get(),
0,
analysis_length_ * num_channels_ * sizeof(out_buffer_[0]));
// ip[0] must be zero to trigger initialization using rdft().
size_t ip_length = 2 + sqrtf(analysis_length_);
ip_.reset(new size_t[ip_length]());
memset(ip_.get(), 0, ip_length * sizeof(ip_[0]));
wfft_.reset(new float[complex_analysis_length_ - 1]);
memset(wfft_.get(), 0, (complex_analysis_length_ - 1) * sizeof(wfft_[0]));
spectral_mean_.reset(new float[complex_analysis_length_ * num_channels_]);
memset(spectral_mean_.get(),
0,
complex_analysis_length_ * num_channels_ * sizeof(spectral_mean_[0]));
fft_buffer_.reset(new float[analysis_length_ + 2]);
memset(fft_buffer_.get(), 0, (analysis_length_ + 2) * sizeof(fft_buffer_[0]));
magnitudes_.reset(new float[complex_analysis_length_]);
memset(magnitudes_.get(),
0,
complex_analysis_length_ * sizeof(magnitudes_[0]));
mean_factor_.reset(new float[complex_analysis_length_]);
static const float kFactorHeight = 10.f;
static const float kLowSlope = 1.f;
static const float kHighSlope = 0.3f;
for (size_t i = 0; i < complex_analysis_length_; ++i) {
mean_factor_[i] =
kFactorHeight /
(1.f + exp(kLowSlope * static_cast<int>(i - kMinVoiceBin))) +
kFactorHeight /
(1.f + exp(kHighSlope * static_cast<int>(kMaxVoiceBin - i)));
}
detector_smoothed_ = 0.f;
keypress_counter_ = 0;
chunks_since_keypress_ = 0;
detection_enabled_ = false;
suppression_enabled_ = false;
use_hard_restoration_ = false;
chunks_since_voice_change_ = 0;
seed_ = 182;
using_reference_ = false;
return 0;
}
int TransientSuppressor::Suppress(float* data,
size_t data_length,
int num_channels,
const float* detection_data,
size_t detection_length,
const float* reference_data,
size_t reference_length,
float voice_probability,
bool key_pressed) {
if (!data || data_length != data_length_ || num_channels != num_channels_ ||
detection_length != detection_length_ || voice_probability < 0 ||
voice_probability > 1) {
return -1;
}
UpdateKeypress(key_pressed);
UpdateBuffers(data);
int result = 0;
if (detection_enabled_) {
UpdateRestoration(voice_probability);
if (!detection_data) {
// Use the input data of the first channel if special detection data is
// not supplied.
detection_data = &in_buffer_[buffer_delay_];
}
float detector_result = detector_->Detect(
detection_data, detection_length, reference_data, reference_length);
if (detector_result < 0) {
return -1;
}
using_reference_ = detector_->using_reference();
// |detector_smoothed_| follows the |detector_result| when this last one is
// increasing, but has an exponential decaying tail to be able to suppress
// the ringing of keyclicks.
float smooth_factor = using_reference_ ? 0.6 : 0.1;
detector_smoothed_ = detector_result >= detector_smoothed_
? detector_result
: smooth_factor * detector_smoothed_ +
(1 - smooth_factor) * detector_result;
for (int i = 0; i < num_channels_; ++i) {
Suppress(&in_buffer_[i * analysis_length_],
&spectral_mean_[i * complex_analysis_length_],
&out_buffer_[i * analysis_length_]);
}
}
// If the suppression isn't enabled, we use the in buffer to delay the signal
// appropriately. This also gives time for the out buffer to be refreshed with
// new data between detection and suppression getting enabled.
for (int i = 0; i < num_channels_; ++i) {
memcpy(&data[i * data_length_],
suppression_enabled_ ? &out_buffer_[i * analysis_length_]
: &in_buffer_[i * analysis_length_],
data_length_ * sizeof(*data));
}
return result;
}
// This should only be called when detection is enabled. UpdateBuffers() must
// have been called. At return, |out_buffer_| will be filled with the
// processed output.
void TransientSuppressor::Suppress(float* in_ptr,
float* spectral_mean,
float* out_ptr) {
// Go to frequency domain.
for (size_t i = 0; i < analysis_length_; ++i) {
// TODO(aluebs): Rename windows
fft_buffer_[i] = in_ptr[i] * window_[i];
}
WebRtc_rdft(analysis_length_, 1, fft_buffer_.get(), ip_.get(), wfft_.get());
// Since WebRtc_rdft puts R[n/2] in fft_buffer_[1], we move it to the end
// for convenience.
fft_buffer_[analysis_length_] = fft_buffer_[1];
fft_buffer_[analysis_length_ + 1] = 0.f;
fft_buffer_[1] = 0.f;
for (size_t i = 0; i < complex_analysis_length_; ++i) {
magnitudes_[i] = ComplexMagnitude(fft_buffer_[i * 2],
fft_buffer_[i * 2 + 1]);
}
// Restore audio if necessary.
if (suppression_enabled_) {
if (use_hard_restoration_) {
HardRestoration(spectral_mean);
} else {
SoftRestoration(spectral_mean);
}
}
// Update the spectral mean.
for (size_t i = 0; i < complex_analysis_length_; ++i) {
spectral_mean[i] = (1 - kMeanIIRCoefficient) * spectral_mean[i] +
kMeanIIRCoefficient * magnitudes_[i];
}
// Back to time domain.
// Put R[n/2] back in fft_buffer_[1].
fft_buffer_[1] = fft_buffer_[analysis_length_];
WebRtc_rdft(analysis_length_,
-1,
fft_buffer_.get(),
ip_.get(),
wfft_.get());
const float fft_scaling = 2.f / analysis_length_;
for (size_t i = 0; i < analysis_length_; ++i) {
out_ptr[i] += fft_buffer_[i] * window_[i] * fft_scaling;
}
}
void TransientSuppressor::UpdateKeypress(bool key_pressed) {
const int kKeypressPenalty = 1000 / ts::kChunkSizeMs;
const int kIsTypingThreshold = 1000 / ts::kChunkSizeMs;
const int kChunksUntilNotTyping = 4000 / ts::kChunkSizeMs; // 4 seconds.
if (key_pressed) {
keypress_counter_ += kKeypressPenalty;
chunks_since_keypress_ = 0;
detection_enabled_ = true;
}
keypress_counter_ = std::max(0, keypress_counter_ - 1);
if (keypress_counter_ > kIsTypingThreshold) {
if (!suppression_enabled_) {
LOG(LS_INFO) << "[ts] Transient suppression is now enabled.";
}
suppression_enabled_ = true;
keypress_counter_ = 0;
}
if (detection_enabled_ &&
++chunks_since_keypress_ > kChunksUntilNotTyping) {
if (suppression_enabled_) {
LOG(LS_INFO) << "[ts] Transient suppression is now disabled.";
}
detection_enabled_ = false;
suppression_enabled_ = false;
keypress_counter_ = 0;
}
}
void TransientSuppressor::UpdateRestoration(float voice_probability) {
const int kHardRestorationOffsetDelay = 3;
const int kHardRestorationOnsetDelay = 80;
bool not_voiced = voice_probability < kVoiceThreshold;
if (not_voiced == use_hard_restoration_) {
chunks_since_voice_change_ = 0;
} else {
++chunks_since_voice_change_;
if ((use_hard_restoration_ &&
chunks_since_voice_change_ > kHardRestorationOffsetDelay) ||
(!use_hard_restoration_ &&
chunks_since_voice_change_ > kHardRestorationOnsetDelay)) {
use_hard_restoration_ = not_voiced;
chunks_since_voice_change_ = 0;
}
}
}
// Shift buffers to make way for new data. Must be called after
// |detection_enabled_| is updated by UpdateKeypress().
void TransientSuppressor::UpdateBuffers(float* data) {
// TODO(aluebs): Change to ring buffer.
memmove(in_buffer_.get(),
&in_buffer_[data_length_],
(buffer_delay_ + (num_channels_ - 1) * analysis_length_) *
sizeof(in_buffer_[0]));
// Copy new chunk to buffer.
for (int i = 0; i < num_channels_; ++i) {
memcpy(&in_buffer_[buffer_delay_ + i * analysis_length_],
&data[i * data_length_],
data_length_ * sizeof(*data));
}
if (detection_enabled_) {
// Shift previous chunk in out buffer.
memmove(out_buffer_.get(),
&out_buffer_[data_length_],
(buffer_delay_ + (num_channels_ - 1) * analysis_length_) *
sizeof(out_buffer_[0]));
// Initialize new chunk in out buffer.
for (int i = 0; i < num_channels_; ++i) {
memset(&out_buffer_[buffer_delay_ + i * analysis_length_],
0,
data_length_ * sizeof(out_buffer_[0]));
}
}
}
// Restores the unvoiced signal if a click is present.
// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds
// the spectral mean. The attenuation depends on |detector_smoothed_|.
// If a restoration takes place, the |magnitudes_| are updated to the new value.
void TransientSuppressor::HardRestoration(float* spectral_mean) {
const float detector_result =
1.f - pow(1.f - detector_smoothed_, using_reference_ ? 200.f : 50.f);
// To restore, we get the peaks in the spectrum. If higher than the previous
// spectral mean we adjust them.
for (size_t i = 0; i < complex_analysis_length_; ++i) {
if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0) {
// RandU() generates values on [0, int16::max()]
const float phase = 2 * ts::kPi * WebRtcSpl_RandU(&seed_) /
std::numeric_limits<int16_t>::max();
const float scaled_mean = detector_result * spectral_mean[i];
fft_buffer_[i * 2] = (1 - detector_result) * fft_buffer_[i * 2] +
scaled_mean * cosf(phase);
fft_buffer_[i * 2 + 1] = (1 - detector_result) * fft_buffer_[i * 2 + 1] +
scaled_mean * sinf(phase);
magnitudes_[i] = magnitudes_[i] -
detector_result * (magnitudes_[i] - spectral_mean[i]);
}
}
}
// Restores the voiced signal if a click is present.
// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds
// the spectral mean and that is lower than some function of the current block
// frequency mean. The attenuation depends on |detector_smoothed_|.
// If a restoration takes place, the |magnitudes_| are updated to the new value.
void TransientSuppressor::SoftRestoration(float* spectral_mean) {
// Get the spectral magnitude mean of the current block.
float block_frequency_mean = 0;
for (size_t i = kMinVoiceBin; i < kMaxVoiceBin; ++i) {
block_frequency_mean += magnitudes_[i];
}
block_frequency_mean /= (kMaxVoiceBin - kMinVoiceBin);
// To restore, we get the peaks in the spectrum. If higher than the
// previous spectral mean and lower than a factor of the block mean
// we adjust them. The factor is a double sigmoid that has a minimum in the
// voice frequency range (300Hz - 3kHz).
for (size_t i = 0; i < complex_analysis_length_; ++i) {
if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0 &&
(using_reference_ ||
magnitudes_[i] < block_frequency_mean * mean_factor_[i])) {
const float new_magnitude =
magnitudes_[i] -
detector_smoothed_ * (magnitudes_[i] - spectral_mean[i]);
const float magnitude_ratio = new_magnitude / magnitudes_[i];
fft_buffer_[i * 2] *= magnitude_ratio;
fft_buffer_[i * 2 + 1] *= magnitude_ratio;
magnitudes_[i] = new_magnitude;
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,120 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
#include <deque>
#include <set>
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/test/testsupport/gtest_prod_util.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class TransientDetector;
// Detects transients in an audio stream and suppress them using a simple
// restoration algorithm that attenuates unexpected spikes in the spectrum.
class TransientSuppressor {
public:
TransientSuppressor();
~TransientSuppressor();
int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels);
// Processes a |data| chunk, and returns it with keystrokes suppressed from
// it. The float format is assumed to be int16 ranged. If there are more than
// one channel, the chunks are concatenated one after the other in |data|.
// |data_length| must be equal to |data_length_|.
// |num_channels| must be equal to |num_channels_|.
// A sub-band, ideally the higher, can be used as |detection_data|. If it is
// NULL, |data| is used for the detection too. The |detection_data| is always
// assumed mono.
// If a reference signal (e.g. keyboard microphone) is available, it can be
// passed in as |reference_data|. It is assumed mono and must have the same
// length as |data|. NULL is accepted if unavailable.
// This suppressor performs better if voice information is available.
// |voice_probability| is the probability of voice being present in this chunk
// of audio. If voice information is not available, |voice_probability| must
// always be set to 1.
// |key_pressed| determines if a key was pressed on this audio chunk.
// Returns 0 on success and -1 otherwise.
int Suppress(float* data,
size_t data_length,
int num_channels,
const float* detection_data,
size_t detection_length,
const float* reference_data,
size_t reference_length,
float voice_probability,
bool key_pressed);
private:
FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest,
TypingDetectionLogicWorksAsExpectedForMono);
void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr);
void UpdateKeypress(bool key_pressed);
void UpdateRestoration(float voice_probability);
void UpdateBuffers(float* data);
void HardRestoration(float* spectral_mean);
void SoftRestoration(float* spectral_mean);
rtc::scoped_ptr<TransientDetector> detector_;
size_t data_length_;
size_t detection_length_;
size_t analysis_length_;
size_t buffer_delay_;
size_t complex_analysis_length_;
int num_channels_;
// Input buffer where the original samples are stored.
rtc::scoped_ptr<float[]> in_buffer_;
rtc::scoped_ptr<float[]> detection_buffer_;
// Output buffer where the restored samples are stored.
rtc::scoped_ptr<float[]> out_buffer_;
// Arrays for fft.
rtc::scoped_ptr<size_t[]> ip_;
rtc::scoped_ptr<float[]> wfft_;
rtc::scoped_ptr<float[]> spectral_mean_;
// Stores the data for the fft.
rtc::scoped_ptr<float[]> fft_buffer_;
rtc::scoped_ptr<float[]> magnitudes_;
const float* window_;
rtc::scoped_ptr<float[]> mean_factor_;
float detector_smoothed_;
int keypress_counter_;
int chunks_since_keypress_;
bool detection_enabled_;
bool suppression_enabled_;
bool use_hard_restoration_;
int chunks_since_voice_change_;
uint32_t seed_;
bool using_reference_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_

View File

@ -0,0 +1,71 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/wpd_node.h"
#include <assert.h>
#include <math.h>
#include <string.h>
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/common_audio/fir_filter.h"
#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h"
namespace webrtc {
WPDNode::WPDNode(size_t length,
const float* coefficients,
size_t coefficients_length)
: // The data buffer has parent data length to be able to contain and filter
// it.
data_(new float[2 * length + 1]),
length_(length),
filter_(FIRFilter::Create(coefficients,
coefficients_length,
2 * length + 1)) {
assert(length > 0 && coefficients && coefficients_length > 0);
memset(data_.get(), 0.f, (2 * length + 1) * sizeof(data_[0]));
}
WPDNode::~WPDNode() {}
int WPDNode::Update(const float* parent_data, size_t parent_data_length) {
if (!parent_data || (parent_data_length / 2) != length_) {
return -1;
}
// Filter data.
filter_->Filter(parent_data, parent_data_length, data_.get());
// Decimate data.
const bool kOddSequence = true;
size_t output_samples = DyadicDecimate(
data_.get(), parent_data_length, kOddSequence, data_.get(), length_);
if (output_samples != length_) {
return -1;
}
// Get abs to all values.
for (size_t i = 0; i < length_; ++i) {
data_[i] = fabs(data_[i]);
}
return 0;
}
int WPDNode::set_data(const float* new_data, size_t length) {
if (!new_data || length != length_) {
return -1;
}
memcpy(data_.get(), new_data, length * sizeof(data_[0]));
return 0;
}
} // namespace webrtc

View File

@ -0,0 +1,46 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class FIRFilter;
// A single node of a Wavelet Packet Decomposition (WPD) tree.
class WPDNode {
public:
// Creates a WPDNode. The data vector will contain zeros. The filter will have
// the coefficients provided.
WPDNode(size_t length, const float* coefficients, size_t coefficients_length);
~WPDNode();
// Updates the node data. |parent_data| / 2 must be equals to |length_|.
// Returns 0 if correct, and -1 otherwise.
int Update(const float* parent_data, size_t parent_data_length);
const float* data() const { return data_.get(); }
// Returns 0 if correct, and -1 otherwise.
int set_data(const float* new_data, size_t length);
size_t length() const { return length_; }
private:
rtc::scoped_ptr<float[]> data_;
size_t length_;
rtc::scoped_ptr<FIRFilter> filter_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_

View File

@ -0,0 +1,119 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
#include <assert.h>
#include <math.h>
#include <string.h>
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h"
#include "webrtc/modules/audio_processing/transient/wpd_node.h"
namespace webrtc {
WPDTree::WPDTree(size_t data_length, const float* high_pass_coefficients,
const float* low_pass_coefficients, size_t coefficients_length,
int levels)
: data_length_(data_length),
levels_(levels),
num_nodes_((1 << (levels + 1)) - 1) {
assert(data_length > (static_cast<size_t>(1) << levels) &&
high_pass_coefficients &&
low_pass_coefficients &&
levels > 0);
// Size is 1 more, so we can use the array as 1-based. nodes_[0] is never
// allocated.
nodes_.reset(new rtc::scoped_ptr<WPDNode>[num_nodes_ + 1]);
// Create the first node
const float kRootCoefficient = 1.f; // Identity Coefficient.
nodes_[1].reset(new WPDNode(data_length, &kRootCoefficient, 1));
// Variables used to create the rest of the nodes.
size_t index = 1;
size_t index_left_child = 0;
size_t index_right_child = 0;
int num_nodes_at_curr_level = 0;
// Branching each node in each level to create its children. The last level is
// not branched (all the nodes of that level are leaves).
for (int current_level = 0; current_level < levels; ++current_level) {
num_nodes_at_curr_level = 1 << current_level;
for (int i = 0; i < num_nodes_at_curr_level; ++i) {
index = (1 << current_level) + i;
// Obtain the index of the current node children.
index_left_child = index * 2;
index_right_child = index_left_child + 1;
nodes_[index_left_child].reset(new WPDNode(nodes_[index]->length() / 2,
low_pass_coefficients,
coefficients_length));
nodes_[index_right_child].reset(new WPDNode(nodes_[index]->length() / 2,
high_pass_coefficients,
coefficients_length));
}
}
}
WPDTree::~WPDTree() {}
WPDNode* WPDTree::NodeAt(int level, int index) {
const int kNumNodesAtLevel = 1 << level;
if (level < 0 || level > levels_ || index < 0 || index >= kNumNodesAtLevel) {
return NULL;
}
return nodes_[(1 << level) + index].get();
}
int WPDTree::Update(const float* data, size_t data_length) {
if (!data || data_length != data_length_) {
return -1;
}
// Update the root node.
int update_result = nodes_[1]->set_data(data, data_length);
if (update_result != 0) {
return -1;
}
// Variables used to update the rest of the nodes.
size_t index = 1;
size_t index_left_child = 0;
size_t index_right_child = 0;
int num_nodes_at_curr_level = 0;
for (int current_level = 0; current_level < levels_; ++current_level) {
num_nodes_at_curr_level = 1 << current_level;
for (int i = 0; i < num_nodes_at_curr_level; ++i) {
index = (1 << current_level) + i;
// Obtain the index of the current node children.
index_left_child = index * 2;
index_right_child = index_left_child + 1;
update_result = nodes_[index_left_child]->Update(
nodes_[index]->data(), nodes_[index]->length());
if (update_result != 0) {
return -1;
}
update_result = nodes_[index_right_child]->Update(
nodes_[index]->data(), nodes_[index]->length());
if (update_result != 0) {
return -1;
}
}
}
return 0;
}
} // namespace webrtc

View File

@ -0,0 +1,91 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_processing/transient/wpd_node.h"
namespace webrtc {
// Tree of a Wavelet Packet Decomposition (WPD).
//
// The root node contains all the data provided; for each node in the tree, the
// left child contains the approximation coefficients extracted from the node,
// and the right child contains the detail coefficients.
// It preserves its state, so it can be multiple-called.
//
// The number of nodes in the tree will be 2 ^ levels - 1.
//
// Implementation details: Since the tree always will be a complete binary tree,
// it is implemented using a single linear array instead of managing the
// relationships in each node. For convience is better to use a array that
// starts in 1 (instead of 0). Taking that into account, the following formulas
// apply:
// Root node index: 1.
// Node(Level, Index in that level): 2 ^ Level + (Index in that level).
// Left Child: Current node index * 2.
// Right Child: Current node index * 2 + 1.
// Parent: Current Node Index / 2 (Integer division).
class WPDTree {
public:
// Creates a WPD tree using the data length and coefficients provided.
WPDTree(size_t data_length,
const float* high_pass_coefficients,
const float* low_pass_coefficients,
size_t coefficients_length,
int levels);
~WPDTree();
// Returns the number of nodes at any given level.
static int NumberOfNodesAtLevel(int level) {
return 1 << level;
}
// Returns a pointer to the node at the given level and index(of that level).
// Level goes from 0 to levels().
// Index goes from 0 to the number of NumberOfNodesAtLevel(level) - 1.
//
// You can use the following formulas to get any node within the tree:
// Notation: (Level, Index of node in that level).
// Root node: (0/0).
// Left Child: (Current node level + 1, Current node index * 2).
// Right Child: (Current node level + 1, Current node index * 2 + 1).
// Parent: (Current node level - 1, Current node index / 2) (Integer division)
//
// If level or index are out of bounds the function will return NULL.
WPDNode* NodeAt(int level, int index);
// Updates all the nodes of the tree with the new data. |data_length| must be
// teh same that was used for the creation of the tree.
// Returns 0 if correct, and -1 otherwise.
int Update(const float* data, size_t data_length);
// Returns the total number of levels below the root. Root is cosidered level
// 0.
int levels() const { return levels_; }
// Returns the total number of nodes.
int num_nodes() const { return num_nodes_; }
// Returns the total number of leaves.
int num_leaves() const { return 1 << levels_; }
private:
size_t data_length_;
int levels_;
int num_nodes_;
rtc::scoped_ptr<rtc::scoped_ptr<WPDNode>[]> nodes_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_