Update audio_processing module
Corresponds to upstream commit 524e9b043e7e86fd72353b987c9d5f6a1ebf83e1 Update notes: * Pull in third party license file * Replace .gypi files with BUILD.gn to keep track of what changes upstream * Bunch of new filse pulled in as dependencies * Won't build yet due to changes needed on top of these
This commit is contained in:
114
webrtc/modules/audio_processing/transient/click_annotate.cc
Normal file
114
webrtc/modules/audio_processing/transient/click_annotate.cc
Normal file
@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include <cfloat>
|
||||
#include <cstdio>
|
||||
#include <cstdlib>
|
||||
#include <vector>
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
|
||||
#include "webrtc/modules/audio_processing/transient/file_utils.h"
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/system_wrappers/interface/file_wrapper.h"
|
||||
|
||||
using rtc::scoped_ptr;
|
||||
using webrtc::FileWrapper;
|
||||
using webrtc::TransientDetector;
|
||||
|
||||
// Application to generate a RTP timing file.
|
||||
// Opens the PCM file and divides the signal in frames.
|
||||
// Creates a send times array, one for each step.
|
||||
// Each block that contains a transient, has an infinite send time.
|
||||
// The resultant array is written to a DAT file
|
||||
// Returns -1 on error or |lost_packets| otherwise.
|
||||
int main(int argc, char* argv[]) {
|
||||
if (argc != 5) {
|
||||
printf("\n%s - Application to generate a RTP timing file.\n\n", argv[0]);
|
||||
printf("%s PCMfile DATfile chunkSize sampleRate\n\n", argv[0]);
|
||||
printf("Opens the PCMfile with sampleRate in Hertz.\n");
|
||||
printf("Creates a send times array, one for each chunkSize ");
|
||||
printf("milliseconds step.\n");
|
||||
printf("Each block that contains a transient, has an infinite send time. ");
|
||||
printf("The resultant array is written to a DATfile.\n\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
scoped_ptr<FileWrapper> pcm_file(FileWrapper::Create());
|
||||
pcm_file->OpenFile(argv[1], true, false, false);
|
||||
if (!pcm_file->Open()) {
|
||||
printf("\nThe %s could not be opened.\n\n", argv[1]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
scoped_ptr<FileWrapper> dat_file(FileWrapper::Create());
|
||||
dat_file->OpenFile(argv[2], false, false, false);
|
||||
if (!dat_file->Open()) {
|
||||
printf("\nThe %s could not be opened.\n\n", argv[2]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int chunk_size_ms = atoi(argv[3]);
|
||||
if (chunk_size_ms <= 0) {
|
||||
printf("\nThe chunkSize must be a positive integer\n\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
int sample_rate_hz = atoi(argv[4]);
|
||||
if (sample_rate_hz <= 0) {
|
||||
printf("\nThe sampleRate must be a positive integer\n\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
TransientDetector detector(sample_rate_hz);
|
||||
int lost_packets = 0;
|
||||
size_t audio_buffer_length = chunk_size_ms * sample_rate_hz / 1000;
|
||||
scoped_ptr<float[]> audio_buffer(new float[audio_buffer_length]);
|
||||
std::vector<float> send_times;
|
||||
|
||||
// Read first buffer from the PCM test file.
|
||||
size_t file_samples_read = ReadInt16FromFileToFloatBuffer(
|
||||
pcm_file.get(),
|
||||
audio_buffer_length,
|
||||
audio_buffer.get());
|
||||
for (int time = 0; file_samples_read > 0; time += chunk_size_ms) {
|
||||
// Pad the rest of the buffer with zeros.
|
||||
for (size_t i = file_samples_read; i < audio_buffer_length; ++i) {
|
||||
audio_buffer[i] = 0.0;
|
||||
}
|
||||
float value =
|
||||
detector.Detect(audio_buffer.get(), audio_buffer_length, NULL, 0);
|
||||
if (value < 0.5f) {
|
||||
value = time;
|
||||
} else {
|
||||
value = FLT_MAX;
|
||||
++lost_packets;
|
||||
}
|
||||
send_times.push_back(value);
|
||||
|
||||
// Read next buffer from the PCM test file.
|
||||
file_samples_read = ReadInt16FromFileToFloatBuffer(pcm_file.get(),
|
||||
audio_buffer_length,
|
||||
audio_buffer.get());
|
||||
}
|
||||
|
||||
size_t floats_written = WriteFloatBufferToFile(dat_file.get(),
|
||||
send_times.size(),
|
||||
&send_times[0]);
|
||||
|
||||
if (floats_written == 0) {
|
||||
printf("\nThe send times could not be written to DAT file\n\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
pcm_file->CloseFile();
|
||||
dat_file->CloseFile();
|
||||
|
||||
return lost_packets;
|
||||
}
|
27
webrtc/modules/audio_processing/transient/common.h
Normal file
27
webrtc/modules/audio_processing/transient/common.h
Normal file
@ -0,0 +1,27 @@
|
||||
/*
|
||||
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_
|
||||
namespace webrtc {
|
||||
namespace ts {
|
||||
|
||||
static const float kPi = 3.14159265358979323846f;
|
||||
static const int kChunkSizeMs = 10;
|
||||
enum {
|
||||
kSampleRate8kHz = 8000,
|
||||
kSampleRate16kHz = 16000,
|
||||
kSampleRate32kHz = 32000,
|
||||
kSampleRate48kHz = 48000
|
||||
};
|
||||
|
||||
} // namespace ts
|
||||
} // namespace webrtc
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_COMMON_H_
|
@ -0,0 +1,63 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
// This header file defines the coefficients of the FIR based approximation of
|
||||
// the Meyer Wavelet
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_
|
||||
|
||||
// Decomposition coefficients Daubechies 8.
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
const int kDaubechies8CoefficientsLength = 16;
|
||||
|
||||
const float kDaubechies8HighPassCoefficients[kDaubechies8CoefficientsLength]
|
||||
= {
|
||||
-5.44158422430816093862e-02f,
|
||||
3.12871590914465924627e-01f,
|
||||
-6.75630736298012846142e-01f,
|
||||
5.85354683654869090148e-01f,
|
||||
1.58291052560238926228e-02f,
|
||||
-2.84015542962428091389e-01f,
|
||||
-4.72484573997972536787e-04f,
|
||||
1.28747426620186011803e-01f,
|
||||
1.73693010020221083600e-02f,
|
||||
-4.40882539310647192377e-02f,
|
||||
-1.39810279170155156436e-02f,
|
||||
8.74609404701565465445e-03f,
|
||||
4.87035299301066034600e-03f,
|
||||
-3.91740372995977108837e-04f,
|
||||
-6.75449405998556772109e-04f,
|
||||
-1.17476784002281916305e-04f
|
||||
};
|
||||
|
||||
const float kDaubechies8LowPassCoefficients[kDaubechies8CoefficientsLength] = {
|
||||
-1.17476784002281916305e-04f,
|
||||
6.75449405998556772109e-04f,
|
||||
-3.91740372995977108837e-04f,
|
||||
-4.87035299301066034600e-03f,
|
||||
8.74609404701565465445e-03f,
|
||||
1.39810279170155156436e-02f,
|
||||
-4.40882539310647192377e-02f,
|
||||
-1.73693010020221083600e-02f,
|
||||
1.28747426620186011803e-01f,
|
||||
4.72484573997972536787e-04f,
|
||||
-2.84015542962428091389e-01f,
|
||||
-1.58291052560238926228e-02f,
|
||||
5.85354683654869090148e-01f,
|
||||
6.75630736298012846142e-01f,
|
||||
3.12871590914465924627e-01f,
|
||||
5.44158422430816093862e-02f
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DAUBECHIES_8_WAVELET_COEFFS_H_
|
70
webrtc/modules/audio_processing/transient/dyadic_decimator.h
Normal file
70
webrtc/modules/audio_processing/transient/dyadic_decimator.h
Normal file
@ -0,0 +1,70 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_
|
||||
|
||||
#include <cstdlib>
|
||||
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
// Provides a set of static methods to perform dyadic decimations.
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Returns the proper length of the output buffer that you should use for the
|
||||
// given |in_length| and decimation |odd_sequence|.
|
||||
// Return -1 on error.
|
||||
inline size_t GetOutLengthToDyadicDecimate(size_t in_length,
|
||||
bool odd_sequence) {
|
||||
size_t out_length = in_length / 2;
|
||||
|
||||
if (in_length % 2 == 1 && !odd_sequence) {
|
||||
++out_length;
|
||||
}
|
||||
|
||||
return out_length;
|
||||
}
|
||||
|
||||
// Performs a dyadic decimation: removes every odd/even member of a sequence
|
||||
// halving its overall length.
|
||||
// Arguments:
|
||||
// in: array of |in_length|.
|
||||
// odd_sequence: If false, the odd members will be removed (1, 3, 5, ...);
|
||||
// if true, the even members will be removed (0, 2, 4, ...).
|
||||
// out: array of |out_length|. |out_length| must be large enough to
|
||||
// hold the decimated output. The necessary length can be provided by
|
||||
// GetOutLengthToDyadicDecimate().
|
||||
// Must be previously allocated.
|
||||
// Returns the number of output samples, -1 on error.
|
||||
template<typename T>
|
||||
static size_t DyadicDecimate(const T* in,
|
||||
size_t in_length,
|
||||
bool odd_sequence,
|
||||
T* out,
|
||||
size_t out_length) {
|
||||
size_t half_length = GetOutLengthToDyadicDecimate(in_length, odd_sequence);
|
||||
|
||||
if (!in || !out || in_length <= 0 || out_length < half_length) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t output_samples = 0;
|
||||
size_t index_adjustment = odd_sequence ? 1 : 0;
|
||||
for (output_samples = 0; output_samples < half_length; ++output_samples) {
|
||||
out[output_samples] = in[output_samples * 2 + index_adjustment];
|
||||
}
|
||||
|
||||
return output_samples;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_DYADIC_DECIMATOR_H_
|
257
webrtc/modules/audio_processing/transient/file_utils.cc
Normal file
257
webrtc/modules/audio_processing/transient/file_utils.cc
Normal file
@ -0,0 +1,257 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/file_utils.h"
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/system_wrappers/interface/file_wrapper.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out) {
|
||||
if (!bytes || !out) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t binary_value = 0;
|
||||
for (int i = 3; i >= 0; --i) {
|
||||
binary_value <<= 8;
|
||||
binary_value += bytes[i];
|
||||
}
|
||||
|
||||
*out = bit_cast<float>(binary_value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out) {
|
||||
if (!bytes || !out) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint64_t binary_value = 0;
|
||||
for (int i = 7; i >= 0; --i) {
|
||||
binary_value <<= 8;
|
||||
binary_value += bytes[i];
|
||||
}
|
||||
|
||||
*out = bit_cast<double>(binary_value);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]) {
|
||||
if (!out_bytes) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint32_t binary_value = bit_cast<uint32_t>(value);
|
||||
for (size_t i = 0; i < 4; ++i) {
|
||||
out_bytes[i] = binary_value;
|
||||
binary_value >>= 8;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]) {
|
||||
if (!out_bytes) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint64_t binary_value = bit_cast<uint64_t>(value);
|
||||
for (size_t i = 0; i < 8; ++i) {
|
||||
out_bytes[i] = binary_value;
|
||||
binary_value >>= 8;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t ReadInt16BufferFromFile(FileWrapper* file,
|
||||
size_t length,
|
||||
int16_t* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rtc::scoped_ptr<uint8_t[]> byte_array(new uint8_t[2]);
|
||||
|
||||
size_t int16s_read = 0;
|
||||
|
||||
while (int16s_read < length) {
|
||||
size_t bytes_read = file->Read(byte_array.get(), 2);
|
||||
if (bytes_read < 2) {
|
||||
break;
|
||||
}
|
||||
int16_t value = byte_array[1];
|
||||
value <<= 8;
|
||||
value += byte_array[0];
|
||||
buffer[int16s_read] = value;
|
||||
++int16s_read;
|
||||
}
|
||||
|
||||
return int16s_read;
|
||||
}
|
||||
|
||||
size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file,
|
||||
size_t length,
|
||||
float* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rtc::scoped_ptr<int16_t[]> buffer16(new int16_t[length]);
|
||||
|
||||
size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get());
|
||||
|
||||
for (size_t i = 0; i < int16s_read; ++i) {
|
||||
buffer[i] = buffer16[i];
|
||||
}
|
||||
|
||||
return int16s_read;
|
||||
}
|
||||
|
||||
size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file,
|
||||
size_t length,
|
||||
double* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rtc::scoped_ptr<int16_t[]> buffer16(new int16_t[length]);
|
||||
|
||||
size_t int16s_read = ReadInt16BufferFromFile(file, length, buffer16.get());
|
||||
|
||||
for (size_t i = 0; i < int16s_read; ++i) {
|
||||
buffer[i] = buffer16[i];
|
||||
}
|
||||
|
||||
return int16s_read;
|
||||
}
|
||||
|
||||
size_t ReadFloatBufferFromFile(FileWrapper* file,
|
||||
size_t length,
|
||||
float* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rtc::scoped_ptr<uint8_t[]> byte_array(new uint8_t[4]);
|
||||
|
||||
size_t floats_read = 0;
|
||||
|
||||
while (floats_read < length) {
|
||||
size_t bytes_read = file->Read(byte_array.get(), 4);
|
||||
if (bytes_read < 4) {
|
||||
break;
|
||||
}
|
||||
ConvertByteArrayToFloat(byte_array.get(), &buffer[floats_read]);
|
||||
++floats_read;
|
||||
}
|
||||
|
||||
return floats_read;
|
||||
}
|
||||
|
||||
size_t ReadDoubleBufferFromFile(FileWrapper* file,
|
||||
size_t length,
|
||||
double* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rtc::scoped_ptr<uint8_t[]> byte_array(new uint8_t[8]);
|
||||
|
||||
size_t doubles_read = 0;
|
||||
|
||||
while (doubles_read < length) {
|
||||
size_t bytes_read = file->Read(byte_array.get(), 8);
|
||||
if (bytes_read < 8) {
|
||||
break;
|
||||
}
|
||||
ConvertByteArrayToDouble(byte_array.get(), &buffer[doubles_read]);
|
||||
++doubles_read;
|
||||
}
|
||||
|
||||
return doubles_read;
|
||||
}
|
||||
|
||||
size_t WriteInt16BufferToFile(FileWrapper* file,
|
||||
size_t length,
|
||||
const int16_t* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rtc::scoped_ptr<uint8_t[]> byte_array(new uint8_t[2]);
|
||||
|
||||
size_t int16s_written = 0;
|
||||
|
||||
for (int16s_written = 0; int16s_written < length; ++int16s_written) {
|
||||
// Get byte representation.
|
||||
byte_array[0] = buffer[int16s_written] & 0xFF;
|
||||
byte_array[1] = (buffer[int16s_written] >> 8) & 0xFF;
|
||||
|
||||
file->Write(byte_array.get(), 2);
|
||||
}
|
||||
|
||||
file->Flush();
|
||||
|
||||
return int16s_written;
|
||||
}
|
||||
|
||||
size_t WriteFloatBufferToFile(FileWrapper* file,
|
||||
size_t length,
|
||||
const float* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rtc::scoped_ptr<uint8_t[]> byte_array(new uint8_t[4]);
|
||||
|
||||
size_t floats_written = 0;
|
||||
|
||||
for (floats_written = 0; floats_written < length; ++floats_written) {
|
||||
// Get byte representation.
|
||||
ConvertFloatToByteArray(buffer[floats_written], byte_array.get());
|
||||
|
||||
file->Write(byte_array.get(), 4);
|
||||
}
|
||||
|
||||
file->Flush();
|
||||
|
||||
return floats_written;
|
||||
}
|
||||
|
||||
size_t WriteDoubleBufferToFile(FileWrapper* file,
|
||||
size_t length,
|
||||
const double* buffer) {
|
||||
if (!file || !file->Open() || !buffer || length <= 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
rtc::scoped_ptr<uint8_t[]> byte_array(new uint8_t[8]);
|
||||
|
||||
size_t doubles_written = 0;
|
||||
|
||||
for (doubles_written = 0; doubles_written < length; ++doubles_written) {
|
||||
// Get byte representation.
|
||||
ConvertDoubleToByteArray(buffer[doubles_written], byte_array.get());
|
||||
|
||||
file->Write(byte_array.get(), 8);
|
||||
}
|
||||
|
||||
file->Flush();
|
||||
|
||||
return doubles_written;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
118
webrtc/modules/audio_processing/transient/file_utils.h
Normal file
118
webrtc/modules/audio_processing/transient/file_utils.h
Normal file
@ -0,0 +1,118 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_
|
||||
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/system_wrappers/interface/file_wrapper.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This is a copy of the cast included in the Chromium codebase here:
|
||||
// http://cs.chromium.org/src/third_party/cld/base/casts.h
|
||||
template <class Dest, class Source>
|
||||
inline Dest bit_cast(const Source& source) {
|
||||
// A compile error here means your Dest and Source have different sizes.
|
||||
static_assert(sizeof(Dest) == sizeof(Source),
|
||||
"Dest and Source have different sizes");
|
||||
|
||||
Dest dest;
|
||||
memcpy(&dest, &source, sizeof(dest));
|
||||
return dest;
|
||||
}
|
||||
|
||||
// Converts the byte array with binary float representation to float.
|
||||
// Bytes must be in little-endian order.
|
||||
// Returns 0 if correct, -1 on error.
|
||||
int ConvertByteArrayToFloat(const uint8_t bytes[4], float* out);
|
||||
|
||||
// Converts the byte array with binary double representation to double.
|
||||
// Bytes must be in little-endian order.
|
||||
// Returns 0 if correct, -1 on error.
|
||||
int ConvertByteArrayToDouble(const uint8_t bytes[8], double* out);
|
||||
|
||||
// Converts a float to a byte array with binary float representation.
|
||||
// Bytes will be in little-endian order.
|
||||
// Returns 0 if correct, -1 on error.
|
||||
int ConvertFloatToByteArray(float value, uint8_t out_bytes[4]);
|
||||
|
||||
// Converts a double to a byte array with binary double representation.
|
||||
// Bytes will be in little-endian order.
|
||||
// Returns 0 if correct, -1 on error.
|
||||
int ConvertDoubleToByteArray(double value, uint8_t out_bytes[8]);
|
||||
|
||||
// Reads |length| 16-bit integers from |file| to |buffer|.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of 16-bit integers read or -1 on error.
|
||||
size_t ReadInt16BufferFromFile(FileWrapper* file,
|
||||
size_t length,
|
||||
int16_t* buffer);
|
||||
|
||||
// Reads |length| 16-bit integers from |file| and stores those values
|
||||
// (converting them) in |buffer|.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of 16-bit integers read or -1 on error.
|
||||
size_t ReadInt16FromFileToFloatBuffer(FileWrapper* file,
|
||||
size_t length,
|
||||
float* buffer);
|
||||
|
||||
// Reads |length| 16-bit integers from |file| and stores those values
|
||||
// (converting them) in |buffer|.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of 16-bit integers read or -1 on error.
|
||||
size_t ReadInt16FromFileToDoubleBuffer(FileWrapper* file,
|
||||
size_t length,
|
||||
double* buffer);
|
||||
|
||||
// Reads |length| floats in binary representation (4 bytes) from |file| to
|
||||
// |buffer|.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of floats read or -1 on error.
|
||||
size_t ReadFloatBufferFromFile(FileWrapper* file, size_t length, float* buffer);
|
||||
|
||||
// Reads |length| doubles in binary representation (8 bytes) from |file| to
|
||||
// |buffer|.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of doubles read or -1 on error.
|
||||
size_t ReadDoubleBufferFromFile(FileWrapper* file,
|
||||
size_t length,
|
||||
double* buffer);
|
||||
|
||||
// Writes |length| 16-bit integers from |buffer| in binary representation (2
|
||||
// bytes) to |file|. It flushes |file|, so after this call there are no
|
||||
// writings pending.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of doubles written or -1 on error.
|
||||
size_t WriteInt16BufferToFile(FileWrapper* file,
|
||||
size_t length,
|
||||
const int16_t* buffer);
|
||||
|
||||
// Writes |length| floats from |buffer| in binary representation (4 bytes) to
|
||||
// |file|. It flushes |file|, so after this call there are no writtings pending.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of doubles written or -1 on error.
|
||||
size_t WriteFloatBufferToFile(FileWrapper* file,
|
||||
size_t length,
|
||||
const float* buffer);
|
||||
|
||||
// Writes |length| doubles from |buffer| in binary representation (8 bytes) to
|
||||
// |file|. It flushes |file|, so after this call there are no writings pending.
|
||||
// |file| must be previously opened.
|
||||
// Returns the number of doubles written or -1 on error.
|
||||
size_t WriteDoubleBufferToFile(FileWrapper* file,
|
||||
size_t length,
|
||||
const double* buffer);
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_FILE_UTILS_H_
|
49
webrtc/modules/audio_processing/transient/moving_moments.cc
Normal file
49
webrtc/modules/audio_processing/transient/moving_moments.cc
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
MovingMoments::MovingMoments(size_t length)
|
||||
: length_(length),
|
||||
queue_(),
|
||||
sum_(0.0),
|
||||
sum_of_squares_(0.0) {
|
||||
assert(length > 0);
|
||||
for (size_t i = 0; i < length; ++i) {
|
||||
queue_.push(0.0);
|
||||
}
|
||||
}
|
||||
|
||||
MovingMoments::~MovingMoments() {}
|
||||
|
||||
void MovingMoments::CalculateMoments(const float* in, size_t in_length,
|
||||
float* first, float* second) {
|
||||
assert(in && in_length > 0 && first && second);
|
||||
|
||||
for (size_t i = 0; i < in_length; ++i) {
|
||||
const float old_value = queue_.front();
|
||||
queue_.pop();
|
||||
queue_.push(in[i]);
|
||||
|
||||
sum_ += in[i] - old_value;
|
||||
sum_of_squares_ += in[i] * in[i] - old_value * old_value;
|
||||
first[i] = sum_ / length_;
|
||||
second[i] = sum_of_squares_ / length_;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
52
webrtc/modules/audio_processing/transient/moving_moments.h
Normal file
52
webrtc/modules/audio_processing/transient/moving_moments.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_
|
||||
|
||||
#include <queue>
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Calculates the first and second moments for each value of a buffer taking
|
||||
// into account a given number of previous values.
|
||||
// It preserves its state, so it can be multiple-called.
|
||||
// TODO(chadan): Implement a function that takes a buffer of first moments and a
|
||||
// buffer of second moments; and calculates the variances. When needed.
|
||||
// TODO(chadan): Add functionality to update with a buffer but only output are
|
||||
// the last values of the moments. When needed.
|
||||
class MovingMoments {
|
||||
public:
|
||||
// Creates a Moving Moments object, that uses the last |length| values
|
||||
// (including the new value introduced in every new calculation).
|
||||
explicit MovingMoments(size_t length);
|
||||
~MovingMoments();
|
||||
|
||||
// Calculates the new values using |in|. Results will be in the out buffers.
|
||||
// |first| and |second| must be allocated with at least |in_length|.
|
||||
void CalculateMoments(const float* in, size_t in_length,
|
||||
float* first, float* second);
|
||||
|
||||
private:
|
||||
size_t length_;
|
||||
// A queue holding the |length_| latest input values.
|
||||
std::queue<float> queue_;
|
||||
// Sum of the values of the queue.
|
||||
float sum_;
|
||||
// Sum of the squares of the values of the queue.
|
||||
float sum_of_squares_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_MOVING_MOMENTS_H_
|
173
webrtc/modules/audio_processing/transient/transient_detector.cc
Normal file
173
webrtc/modules/audio_processing/transient/transient_detector.cc
Normal file
@ -0,0 +1,173 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/common.h"
|
||||
#include "webrtc/modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h"
|
||||
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
|
||||
#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const int kTransientLengthMs = 30;
|
||||
static const int kChunksAtStartupLeftToDelete =
|
||||
kTransientLengthMs / ts::kChunkSizeMs;
|
||||
static const float kDetectThreshold = 16.f;
|
||||
|
||||
TransientDetector::TransientDetector(int sample_rate_hz)
|
||||
: samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000),
|
||||
last_first_moment_(),
|
||||
last_second_moment_(),
|
||||
chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete),
|
||||
reference_energy_(1.f),
|
||||
using_reference_(false) {
|
||||
assert(sample_rate_hz == ts::kSampleRate8kHz ||
|
||||
sample_rate_hz == ts::kSampleRate16kHz ||
|
||||
sample_rate_hz == ts::kSampleRate32kHz ||
|
||||
sample_rate_hz == ts::kSampleRate48kHz);
|
||||
int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000;
|
||||
// Adjustment to avoid data loss while downsampling, making
|
||||
// |samples_per_chunk_| and |samples_per_transient| always divisible by
|
||||
// |kLeaves|.
|
||||
samples_per_chunk_ -= samples_per_chunk_ % kLeaves;
|
||||
samples_per_transient -= samples_per_transient % kLeaves;
|
||||
|
||||
tree_leaves_data_length_ = samples_per_chunk_ / kLeaves;
|
||||
wpd_tree_.reset(new WPDTree(samples_per_chunk_,
|
||||
kDaubechies8HighPassCoefficients,
|
||||
kDaubechies8LowPassCoefficients,
|
||||
kDaubechies8CoefficientsLength,
|
||||
kLevels));
|
||||
for (size_t i = 0; i < kLeaves; ++i) {
|
||||
moving_moments_[i].reset(
|
||||
new MovingMoments(samples_per_transient / kLeaves));
|
||||
}
|
||||
|
||||
first_moments_.reset(new float[tree_leaves_data_length_]);
|
||||
second_moments_.reset(new float[tree_leaves_data_length_]);
|
||||
|
||||
for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) {
|
||||
previous_results_.push_back(0.f);
|
||||
}
|
||||
}
|
||||
|
||||
TransientDetector::~TransientDetector() {}
|
||||
|
||||
float TransientDetector::Detect(const float* data,
|
||||
size_t data_length,
|
||||
const float* reference_data,
|
||||
size_t reference_length) {
|
||||
assert(data && data_length == samples_per_chunk_);
|
||||
|
||||
// TODO(aluebs): Check if these errors can logically happen and if not assert
|
||||
// on them.
|
||||
if (wpd_tree_->Update(data, samples_per_chunk_) != 0) {
|
||||
return -1.f;
|
||||
}
|
||||
|
||||
float result = 0.f;
|
||||
|
||||
for (size_t i = 0; i < kLeaves; ++i) {
|
||||
WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i);
|
||||
|
||||
moving_moments_[i]->CalculateMoments(leaf->data(),
|
||||
tree_leaves_data_length_,
|
||||
first_moments_.get(),
|
||||
second_moments_.get());
|
||||
|
||||
// Add value delayed (Use the last moments from the last call to Detect).
|
||||
float unbiased_data = leaf->data()[0] - last_first_moment_[i];
|
||||
result +=
|
||||
unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN);
|
||||
|
||||
// Add new values.
|
||||
for (size_t j = 1; j < tree_leaves_data_length_; ++j) {
|
||||
unbiased_data = leaf->data()[j] - first_moments_[j - 1];
|
||||
result +=
|
||||
unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN);
|
||||
}
|
||||
|
||||
last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1];
|
||||
last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1];
|
||||
}
|
||||
|
||||
result /= tree_leaves_data_length_;
|
||||
|
||||
result *= ReferenceDetectionValue(reference_data, reference_length);
|
||||
|
||||
if (chunks_at_startup_left_to_delete_ > 0) {
|
||||
chunks_at_startup_left_to_delete_--;
|
||||
result = 0.f;
|
||||
}
|
||||
|
||||
if (result >= kDetectThreshold) {
|
||||
result = 1.f;
|
||||
} else {
|
||||
// Get proportional value.
|
||||
// Proportion achieved with a squared raised cosine function with domain
|
||||
// [0, kDetectThreshold) and image [0, 1), it's always increasing.
|
||||
const float horizontal_scaling = ts::kPi / kDetectThreshold;
|
||||
const float kHorizontalShift = ts::kPi;
|
||||
const float kVerticalScaling = 0.5f;
|
||||
const float kVerticalShift = 1.f;
|
||||
|
||||
result = (cos(result * horizontal_scaling + kHorizontalShift)
|
||||
+ kVerticalShift) * kVerticalScaling;
|
||||
result *= result;
|
||||
}
|
||||
|
||||
previous_results_.pop_front();
|
||||
previous_results_.push_back(result);
|
||||
|
||||
// In the current implementation we return the max of the current result and
|
||||
// the previous results, so the high results have a width equals to
|
||||
// |transient_length|.
|
||||
return *std::max_element(previous_results_.begin(), previous_results_.end());
|
||||
}
|
||||
|
||||
// Looks for the highest slope and compares it with the previous ones.
|
||||
// An exponential transformation takes this to the [0, 1] range. This value is
|
||||
// multiplied by the detection result to avoid false positives.
|
||||
float TransientDetector::ReferenceDetectionValue(const float* data,
|
||||
size_t length) {
|
||||
if (data == NULL) {
|
||||
using_reference_ = false;
|
||||
return 1.f;
|
||||
}
|
||||
static const float kEnergyRatioThreshold = 0.2f;
|
||||
static const float kReferenceNonLinearity = 20.f;
|
||||
static const float kMemory = 0.99f;
|
||||
float reference_energy = 0.f;
|
||||
for (size_t i = 1; i < length; ++i) {
|
||||
reference_energy += data[i] * data[i];
|
||||
}
|
||||
if (reference_energy == 0.f) {
|
||||
using_reference_ = false;
|
||||
return 1.f;
|
||||
}
|
||||
assert(reference_energy_ != 0);
|
||||
float result = 1.f / (1.f + exp(kReferenceNonLinearity *
|
||||
(kEnergyRatioThreshold -
|
||||
reference_energy / reference_energy_)));
|
||||
reference_energy_ =
|
||||
kMemory * reference_energy_ + (1.f - kMemory) * reference_energy;
|
||||
|
||||
using_reference_ = true;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
@ -0,0 +1,87 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_
|
||||
|
||||
#include <deque>
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/modules/audio_processing/transient/moving_moments.h"
|
||||
#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// This is an implementation of the transient detector described in "Causal
|
||||
// Wavelet based transient detector".
|
||||
// Calculates the log-likelihood of a transient to happen on a signal at any
|
||||
// given time based on the previous samples; it uses a WPD tree to analyze the
|
||||
// signal. It preserves its state, so it can be multiple-called.
|
||||
class TransientDetector {
|
||||
public:
|
||||
// TODO(chadan): The only supported wavelet is Daubechies 8 using a WPD tree
|
||||
// of 3 levels. Make an overloaded constructor to allow different wavelets and
|
||||
// depths of the tree. When needed.
|
||||
|
||||
// Creates a wavelet based transient detector.
|
||||
TransientDetector(int sample_rate_hz);
|
||||
|
||||
~TransientDetector();
|
||||
|
||||
// Calculates the log-likelihood of the existence of a transient in |data|.
|
||||
// |data_length| has to be equal to |samples_per_chunk_|.
|
||||
// Returns a value between 0 and 1, as a non linear representation of this
|
||||
// likelihood.
|
||||
// Returns a negative value on error.
|
||||
float Detect(const float* data,
|
||||
size_t data_length,
|
||||
const float* reference_data,
|
||||
size_t reference_length);
|
||||
|
||||
bool using_reference() { return using_reference_; }
|
||||
|
||||
private:
|
||||
float ReferenceDetectionValue(const float* data, size_t length);
|
||||
|
||||
static const size_t kLevels = 3;
|
||||
static const size_t kLeaves = 1 << kLevels;
|
||||
|
||||
size_t samples_per_chunk_;
|
||||
|
||||
rtc::scoped_ptr<WPDTree> wpd_tree_;
|
||||
size_t tree_leaves_data_length_;
|
||||
|
||||
// A MovingMoments object is needed for each leaf in the WPD tree.
|
||||
rtc::scoped_ptr<MovingMoments> moving_moments_[kLeaves];
|
||||
|
||||
rtc::scoped_ptr<float[]> first_moments_;
|
||||
rtc::scoped_ptr<float[]> second_moments_;
|
||||
|
||||
// Stores the last calculated moments from the previous detection.
|
||||
float last_first_moment_[kLeaves];
|
||||
float last_second_moment_[kLeaves];
|
||||
|
||||
// We keep track of the previous results from the previous chunks, so it can
|
||||
// be used to effectively give results according to the |transient_length|.
|
||||
std::deque<float> previous_results_;
|
||||
|
||||
// Number of chunks that are going to return only zeros at the beginning of
|
||||
// the detection. It helps to avoid infs and nans due to the lack of
|
||||
// information.
|
||||
int chunks_at_startup_left_to_delete_;
|
||||
|
||||
float reference_energy_;
|
||||
|
||||
bool using_reference_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_DETECTOR_H_
|
@ -0,0 +1,424 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/transient_suppressor.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
#include <deque>
|
||||
#include <set>
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/common_audio/fft4g.h"
|
||||
#include "webrtc/common_audio/include/audio_util.h"
|
||||
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
|
||||
#include "webrtc/modules/audio_processing/transient/common.h"
|
||||
#include "webrtc/modules/audio_processing/transient/transient_detector.h"
|
||||
#include "webrtc/modules/audio_processing/ns/windows_private.h"
|
||||
#include "webrtc/system_wrappers/interface/logging.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
static const float kMeanIIRCoefficient = 0.5f;
|
||||
static const float kVoiceThreshold = 0.02f;
|
||||
|
||||
// TODO(aluebs): Check if these values work also for 48kHz.
|
||||
static const size_t kMinVoiceBin = 3;
|
||||
static const size_t kMaxVoiceBin = 60;
|
||||
|
||||
namespace {
|
||||
|
||||
float ComplexMagnitude(float a, float b) {
|
||||
return std::abs(a) + std::abs(b);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
TransientSuppressor::TransientSuppressor()
|
||||
: data_length_(0),
|
||||
detection_length_(0),
|
||||
analysis_length_(0),
|
||||
buffer_delay_(0),
|
||||
complex_analysis_length_(0),
|
||||
num_channels_(0),
|
||||
window_(NULL),
|
||||
detector_smoothed_(0.f),
|
||||
keypress_counter_(0),
|
||||
chunks_since_keypress_(0),
|
||||
detection_enabled_(false),
|
||||
suppression_enabled_(false),
|
||||
use_hard_restoration_(false),
|
||||
chunks_since_voice_change_(0),
|
||||
seed_(182),
|
||||
using_reference_(false) {
|
||||
}
|
||||
|
||||
TransientSuppressor::~TransientSuppressor() {}
|
||||
|
||||
int TransientSuppressor::Initialize(int sample_rate_hz,
|
||||
int detection_rate_hz,
|
||||
int num_channels) {
|
||||
switch (sample_rate_hz) {
|
||||
case ts::kSampleRate8kHz:
|
||||
analysis_length_ = 128u;
|
||||
window_ = kBlocks80w128;
|
||||
break;
|
||||
case ts::kSampleRate16kHz:
|
||||
analysis_length_ = 256u;
|
||||
window_ = kBlocks160w256;
|
||||
break;
|
||||
case ts::kSampleRate32kHz:
|
||||
analysis_length_ = 512u;
|
||||
window_ = kBlocks320w512;
|
||||
break;
|
||||
case ts::kSampleRate48kHz:
|
||||
analysis_length_ = 1024u;
|
||||
window_ = kBlocks480w1024;
|
||||
break;
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
if (detection_rate_hz != ts::kSampleRate8kHz &&
|
||||
detection_rate_hz != ts::kSampleRate16kHz &&
|
||||
detection_rate_hz != ts::kSampleRate32kHz &&
|
||||
detection_rate_hz != ts::kSampleRate48kHz) {
|
||||
return -1;
|
||||
}
|
||||
if (num_channels <= 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
detector_.reset(new TransientDetector(detection_rate_hz));
|
||||
data_length_ = sample_rate_hz * ts::kChunkSizeMs / 1000;
|
||||
if (data_length_ > analysis_length_) {
|
||||
assert(false);
|
||||
return -1;
|
||||
}
|
||||
buffer_delay_ = analysis_length_ - data_length_;
|
||||
|
||||
complex_analysis_length_ = analysis_length_ / 2 + 1;
|
||||
assert(complex_analysis_length_ >= kMaxVoiceBin);
|
||||
num_channels_ = num_channels;
|
||||
in_buffer_.reset(new float[analysis_length_ * num_channels_]);
|
||||
memset(in_buffer_.get(),
|
||||
0,
|
||||
analysis_length_ * num_channels_ * sizeof(in_buffer_[0]));
|
||||
detection_length_ = detection_rate_hz * ts::kChunkSizeMs / 1000;
|
||||
detection_buffer_.reset(new float[detection_length_]);
|
||||
memset(detection_buffer_.get(),
|
||||
0,
|
||||
detection_length_ * sizeof(detection_buffer_[0]));
|
||||
out_buffer_.reset(new float[analysis_length_ * num_channels_]);
|
||||
memset(out_buffer_.get(),
|
||||
0,
|
||||
analysis_length_ * num_channels_ * sizeof(out_buffer_[0]));
|
||||
// ip[0] must be zero to trigger initialization using rdft().
|
||||
size_t ip_length = 2 + sqrtf(analysis_length_);
|
||||
ip_.reset(new size_t[ip_length]());
|
||||
memset(ip_.get(), 0, ip_length * sizeof(ip_[0]));
|
||||
wfft_.reset(new float[complex_analysis_length_ - 1]);
|
||||
memset(wfft_.get(), 0, (complex_analysis_length_ - 1) * sizeof(wfft_[0]));
|
||||
spectral_mean_.reset(new float[complex_analysis_length_ * num_channels_]);
|
||||
memset(spectral_mean_.get(),
|
||||
0,
|
||||
complex_analysis_length_ * num_channels_ * sizeof(spectral_mean_[0]));
|
||||
fft_buffer_.reset(new float[analysis_length_ + 2]);
|
||||
memset(fft_buffer_.get(), 0, (analysis_length_ + 2) * sizeof(fft_buffer_[0]));
|
||||
magnitudes_.reset(new float[complex_analysis_length_]);
|
||||
memset(magnitudes_.get(),
|
||||
0,
|
||||
complex_analysis_length_ * sizeof(magnitudes_[0]));
|
||||
mean_factor_.reset(new float[complex_analysis_length_]);
|
||||
|
||||
static const float kFactorHeight = 10.f;
|
||||
static const float kLowSlope = 1.f;
|
||||
static const float kHighSlope = 0.3f;
|
||||
for (size_t i = 0; i < complex_analysis_length_; ++i) {
|
||||
mean_factor_[i] =
|
||||
kFactorHeight /
|
||||
(1.f + exp(kLowSlope * static_cast<int>(i - kMinVoiceBin))) +
|
||||
kFactorHeight /
|
||||
(1.f + exp(kHighSlope * static_cast<int>(kMaxVoiceBin - i)));
|
||||
}
|
||||
detector_smoothed_ = 0.f;
|
||||
keypress_counter_ = 0;
|
||||
chunks_since_keypress_ = 0;
|
||||
detection_enabled_ = false;
|
||||
suppression_enabled_ = false;
|
||||
use_hard_restoration_ = false;
|
||||
chunks_since_voice_change_ = 0;
|
||||
seed_ = 182;
|
||||
using_reference_ = false;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int TransientSuppressor::Suppress(float* data,
|
||||
size_t data_length,
|
||||
int num_channels,
|
||||
const float* detection_data,
|
||||
size_t detection_length,
|
||||
const float* reference_data,
|
||||
size_t reference_length,
|
||||
float voice_probability,
|
||||
bool key_pressed) {
|
||||
if (!data || data_length != data_length_ || num_channels != num_channels_ ||
|
||||
detection_length != detection_length_ || voice_probability < 0 ||
|
||||
voice_probability > 1) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
UpdateKeypress(key_pressed);
|
||||
UpdateBuffers(data);
|
||||
|
||||
int result = 0;
|
||||
if (detection_enabled_) {
|
||||
UpdateRestoration(voice_probability);
|
||||
|
||||
if (!detection_data) {
|
||||
// Use the input data of the first channel if special detection data is
|
||||
// not supplied.
|
||||
detection_data = &in_buffer_[buffer_delay_];
|
||||
}
|
||||
|
||||
float detector_result = detector_->Detect(
|
||||
detection_data, detection_length, reference_data, reference_length);
|
||||
if (detector_result < 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
using_reference_ = detector_->using_reference();
|
||||
|
||||
// |detector_smoothed_| follows the |detector_result| when this last one is
|
||||
// increasing, but has an exponential decaying tail to be able to suppress
|
||||
// the ringing of keyclicks.
|
||||
float smooth_factor = using_reference_ ? 0.6 : 0.1;
|
||||
detector_smoothed_ = detector_result >= detector_smoothed_
|
||||
? detector_result
|
||||
: smooth_factor * detector_smoothed_ +
|
||||
(1 - smooth_factor) * detector_result;
|
||||
|
||||
for (int i = 0; i < num_channels_; ++i) {
|
||||
Suppress(&in_buffer_[i * analysis_length_],
|
||||
&spectral_mean_[i * complex_analysis_length_],
|
||||
&out_buffer_[i * analysis_length_]);
|
||||
}
|
||||
}
|
||||
|
||||
// If the suppression isn't enabled, we use the in buffer to delay the signal
|
||||
// appropriately. This also gives time for the out buffer to be refreshed with
|
||||
// new data between detection and suppression getting enabled.
|
||||
for (int i = 0; i < num_channels_; ++i) {
|
||||
memcpy(&data[i * data_length_],
|
||||
suppression_enabled_ ? &out_buffer_[i * analysis_length_]
|
||||
: &in_buffer_[i * analysis_length_],
|
||||
data_length_ * sizeof(*data));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// This should only be called when detection is enabled. UpdateBuffers() must
|
||||
// have been called. At return, |out_buffer_| will be filled with the
|
||||
// processed output.
|
||||
void TransientSuppressor::Suppress(float* in_ptr,
|
||||
float* spectral_mean,
|
||||
float* out_ptr) {
|
||||
// Go to frequency domain.
|
||||
for (size_t i = 0; i < analysis_length_; ++i) {
|
||||
// TODO(aluebs): Rename windows
|
||||
fft_buffer_[i] = in_ptr[i] * window_[i];
|
||||
}
|
||||
|
||||
WebRtc_rdft(analysis_length_, 1, fft_buffer_.get(), ip_.get(), wfft_.get());
|
||||
|
||||
// Since WebRtc_rdft puts R[n/2] in fft_buffer_[1], we move it to the end
|
||||
// for convenience.
|
||||
fft_buffer_[analysis_length_] = fft_buffer_[1];
|
||||
fft_buffer_[analysis_length_ + 1] = 0.f;
|
||||
fft_buffer_[1] = 0.f;
|
||||
|
||||
for (size_t i = 0; i < complex_analysis_length_; ++i) {
|
||||
magnitudes_[i] = ComplexMagnitude(fft_buffer_[i * 2],
|
||||
fft_buffer_[i * 2 + 1]);
|
||||
}
|
||||
// Restore audio if necessary.
|
||||
if (suppression_enabled_) {
|
||||
if (use_hard_restoration_) {
|
||||
HardRestoration(spectral_mean);
|
||||
} else {
|
||||
SoftRestoration(spectral_mean);
|
||||
}
|
||||
}
|
||||
|
||||
// Update the spectral mean.
|
||||
for (size_t i = 0; i < complex_analysis_length_; ++i) {
|
||||
spectral_mean[i] = (1 - kMeanIIRCoefficient) * spectral_mean[i] +
|
||||
kMeanIIRCoefficient * magnitudes_[i];
|
||||
}
|
||||
|
||||
// Back to time domain.
|
||||
// Put R[n/2] back in fft_buffer_[1].
|
||||
fft_buffer_[1] = fft_buffer_[analysis_length_];
|
||||
|
||||
WebRtc_rdft(analysis_length_,
|
||||
-1,
|
||||
fft_buffer_.get(),
|
||||
ip_.get(),
|
||||
wfft_.get());
|
||||
const float fft_scaling = 2.f / analysis_length_;
|
||||
|
||||
for (size_t i = 0; i < analysis_length_; ++i) {
|
||||
out_ptr[i] += fft_buffer_[i] * window_[i] * fft_scaling;
|
||||
}
|
||||
}
|
||||
|
||||
void TransientSuppressor::UpdateKeypress(bool key_pressed) {
|
||||
const int kKeypressPenalty = 1000 / ts::kChunkSizeMs;
|
||||
const int kIsTypingThreshold = 1000 / ts::kChunkSizeMs;
|
||||
const int kChunksUntilNotTyping = 4000 / ts::kChunkSizeMs; // 4 seconds.
|
||||
|
||||
if (key_pressed) {
|
||||
keypress_counter_ += kKeypressPenalty;
|
||||
chunks_since_keypress_ = 0;
|
||||
detection_enabled_ = true;
|
||||
}
|
||||
keypress_counter_ = std::max(0, keypress_counter_ - 1);
|
||||
|
||||
if (keypress_counter_ > kIsTypingThreshold) {
|
||||
if (!suppression_enabled_) {
|
||||
LOG(LS_INFO) << "[ts] Transient suppression is now enabled.";
|
||||
}
|
||||
suppression_enabled_ = true;
|
||||
keypress_counter_ = 0;
|
||||
}
|
||||
|
||||
if (detection_enabled_ &&
|
||||
++chunks_since_keypress_ > kChunksUntilNotTyping) {
|
||||
if (suppression_enabled_) {
|
||||
LOG(LS_INFO) << "[ts] Transient suppression is now disabled.";
|
||||
}
|
||||
detection_enabled_ = false;
|
||||
suppression_enabled_ = false;
|
||||
keypress_counter_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
void TransientSuppressor::UpdateRestoration(float voice_probability) {
|
||||
const int kHardRestorationOffsetDelay = 3;
|
||||
const int kHardRestorationOnsetDelay = 80;
|
||||
|
||||
bool not_voiced = voice_probability < kVoiceThreshold;
|
||||
|
||||
if (not_voiced == use_hard_restoration_) {
|
||||
chunks_since_voice_change_ = 0;
|
||||
} else {
|
||||
++chunks_since_voice_change_;
|
||||
|
||||
if ((use_hard_restoration_ &&
|
||||
chunks_since_voice_change_ > kHardRestorationOffsetDelay) ||
|
||||
(!use_hard_restoration_ &&
|
||||
chunks_since_voice_change_ > kHardRestorationOnsetDelay)) {
|
||||
use_hard_restoration_ = not_voiced;
|
||||
chunks_since_voice_change_ = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Shift buffers to make way for new data. Must be called after
|
||||
// |detection_enabled_| is updated by UpdateKeypress().
|
||||
void TransientSuppressor::UpdateBuffers(float* data) {
|
||||
// TODO(aluebs): Change to ring buffer.
|
||||
memmove(in_buffer_.get(),
|
||||
&in_buffer_[data_length_],
|
||||
(buffer_delay_ + (num_channels_ - 1) * analysis_length_) *
|
||||
sizeof(in_buffer_[0]));
|
||||
// Copy new chunk to buffer.
|
||||
for (int i = 0; i < num_channels_; ++i) {
|
||||
memcpy(&in_buffer_[buffer_delay_ + i * analysis_length_],
|
||||
&data[i * data_length_],
|
||||
data_length_ * sizeof(*data));
|
||||
}
|
||||
if (detection_enabled_) {
|
||||
// Shift previous chunk in out buffer.
|
||||
memmove(out_buffer_.get(),
|
||||
&out_buffer_[data_length_],
|
||||
(buffer_delay_ + (num_channels_ - 1) * analysis_length_) *
|
||||
sizeof(out_buffer_[0]));
|
||||
// Initialize new chunk in out buffer.
|
||||
for (int i = 0; i < num_channels_; ++i) {
|
||||
memset(&out_buffer_[buffer_delay_ + i * analysis_length_],
|
||||
0,
|
||||
data_length_ * sizeof(out_buffer_[0]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Restores the unvoiced signal if a click is present.
|
||||
// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds
|
||||
// the spectral mean. The attenuation depends on |detector_smoothed_|.
|
||||
// If a restoration takes place, the |magnitudes_| are updated to the new value.
|
||||
void TransientSuppressor::HardRestoration(float* spectral_mean) {
|
||||
const float detector_result =
|
||||
1.f - pow(1.f - detector_smoothed_, using_reference_ ? 200.f : 50.f);
|
||||
// To restore, we get the peaks in the spectrum. If higher than the previous
|
||||
// spectral mean we adjust them.
|
||||
for (size_t i = 0; i < complex_analysis_length_; ++i) {
|
||||
if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0) {
|
||||
// RandU() generates values on [0, int16::max()]
|
||||
const float phase = 2 * ts::kPi * WebRtcSpl_RandU(&seed_) /
|
||||
std::numeric_limits<int16_t>::max();
|
||||
const float scaled_mean = detector_result * spectral_mean[i];
|
||||
|
||||
fft_buffer_[i * 2] = (1 - detector_result) * fft_buffer_[i * 2] +
|
||||
scaled_mean * cosf(phase);
|
||||
fft_buffer_[i * 2 + 1] = (1 - detector_result) * fft_buffer_[i * 2 + 1] +
|
||||
scaled_mean * sinf(phase);
|
||||
magnitudes_[i] = magnitudes_[i] -
|
||||
detector_result * (magnitudes_[i] - spectral_mean[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Restores the voiced signal if a click is present.
|
||||
// Attenuates by a certain factor every peak in the |fft_buffer_| that exceeds
|
||||
// the spectral mean and that is lower than some function of the current block
|
||||
// frequency mean. The attenuation depends on |detector_smoothed_|.
|
||||
// If a restoration takes place, the |magnitudes_| are updated to the new value.
|
||||
void TransientSuppressor::SoftRestoration(float* spectral_mean) {
|
||||
// Get the spectral magnitude mean of the current block.
|
||||
float block_frequency_mean = 0;
|
||||
for (size_t i = kMinVoiceBin; i < kMaxVoiceBin; ++i) {
|
||||
block_frequency_mean += magnitudes_[i];
|
||||
}
|
||||
block_frequency_mean /= (kMaxVoiceBin - kMinVoiceBin);
|
||||
|
||||
// To restore, we get the peaks in the spectrum. If higher than the
|
||||
// previous spectral mean and lower than a factor of the block mean
|
||||
// we adjust them. The factor is a double sigmoid that has a minimum in the
|
||||
// voice frequency range (300Hz - 3kHz).
|
||||
for (size_t i = 0; i < complex_analysis_length_; ++i) {
|
||||
if (magnitudes_[i] > spectral_mean[i] && magnitudes_[i] > 0 &&
|
||||
(using_reference_ ||
|
||||
magnitudes_[i] < block_frequency_mean * mean_factor_[i])) {
|
||||
const float new_magnitude =
|
||||
magnitudes_[i] -
|
||||
detector_smoothed_ * (magnitudes_[i] - spectral_mean[i]);
|
||||
const float magnitude_ratio = new_magnitude / magnitudes_[i];
|
||||
|
||||
fft_buffer_[i * 2] *= magnitude_ratio;
|
||||
fft_buffer_[i * 2 + 1] *= magnitude_ratio;
|
||||
magnitudes_[i] = new_magnitude;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
120
webrtc/modules/audio_processing/transient/transient_suppressor.h
Normal file
120
webrtc/modules/audio_processing/transient/transient_suppressor.h
Normal file
@ -0,0 +1,120 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
|
||||
|
||||
#include <deque>
|
||||
#include <set>
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/test/testsupport/gtest_prod_util.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class TransientDetector;
|
||||
|
||||
// Detects transients in an audio stream and suppress them using a simple
|
||||
// restoration algorithm that attenuates unexpected spikes in the spectrum.
|
||||
class TransientSuppressor {
|
||||
public:
|
||||
TransientSuppressor();
|
||||
~TransientSuppressor();
|
||||
|
||||
int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels);
|
||||
|
||||
// Processes a |data| chunk, and returns it with keystrokes suppressed from
|
||||
// it. The float format is assumed to be int16 ranged. If there are more than
|
||||
// one channel, the chunks are concatenated one after the other in |data|.
|
||||
// |data_length| must be equal to |data_length_|.
|
||||
// |num_channels| must be equal to |num_channels_|.
|
||||
// A sub-band, ideally the higher, can be used as |detection_data|. If it is
|
||||
// NULL, |data| is used for the detection too. The |detection_data| is always
|
||||
// assumed mono.
|
||||
// If a reference signal (e.g. keyboard microphone) is available, it can be
|
||||
// passed in as |reference_data|. It is assumed mono and must have the same
|
||||
// length as |data|. NULL is accepted if unavailable.
|
||||
// This suppressor performs better if voice information is available.
|
||||
// |voice_probability| is the probability of voice being present in this chunk
|
||||
// of audio. If voice information is not available, |voice_probability| must
|
||||
// always be set to 1.
|
||||
// |key_pressed| determines if a key was pressed on this audio chunk.
|
||||
// Returns 0 on success and -1 otherwise.
|
||||
int Suppress(float* data,
|
||||
size_t data_length,
|
||||
int num_channels,
|
||||
const float* detection_data,
|
||||
size_t detection_length,
|
||||
const float* reference_data,
|
||||
size_t reference_length,
|
||||
float voice_probability,
|
||||
bool key_pressed);
|
||||
|
||||
private:
|
||||
FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest,
|
||||
TypingDetectionLogicWorksAsExpectedForMono);
|
||||
void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr);
|
||||
|
||||
void UpdateKeypress(bool key_pressed);
|
||||
void UpdateRestoration(float voice_probability);
|
||||
|
||||
void UpdateBuffers(float* data);
|
||||
|
||||
void HardRestoration(float* spectral_mean);
|
||||
void SoftRestoration(float* spectral_mean);
|
||||
|
||||
rtc::scoped_ptr<TransientDetector> detector_;
|
||||
|
||||
size_t data_length_;
|
||||
size_t detection_length_;
|
||||
size_t analysis_length_;
|
||||
size_t buffer_delay_;
|
||||
size_t complex_analysis_length_;
|
||||
int num_channels_;
|
||||
// Input buffer where the original samples are stored.
|
||||
rtc::scoped_ptr<float[]> in_buffer_;
|
||||
rtc::scoped_ptr<float[]> detection_buffer_;
|
||||
// Output buffer where the restored samples are stored.
|
||||
rtc::scoped_ptr<float[]> out_buffer_;
|
||||
|
||||
// Arrays for fft.
|
||||
rtc::scoped_ptr<size_t[]> ip_;
|
||||
rtc::scoped_ptr<float[]> wfft_;
|
||||
|
||||
rtc::scoped_ptr<float[]> spectral_mean_;
|
||||
|
||||
// Stores the data for the fft.
|
||||
rtc::scoped_ptr<float[]> fft_buffer_;
|
||||
|
||||
rtc::scoped_ptr<float[]> magnitudes_;
|
||||
|
||||
const float* window_;
|
||||
|
||||
rtc::scoped_ptr<float[]> mean_factor_;
|
||||
|
||||
float detector_smoothed_;
|
||||
|
||||
int keypress_counter_;
|
||||
int chunks_since_keypress_;
|
||||
bool detection_enabled_;
|
||||
bool suppression_enabled_;
|
||||
|
||||
bool use_hard_restoration_;
|
||||
int chunks_since_voice_change_;
|
||||
|
||||
uint32_t seed_;
|
||||
|
||||
bool using_reference_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
|
71
webrtc/modules/audio_processing/transient/wpd_node.cc
Normal file
71
webrtc/modules/audio_processing/transient/wpd_node.cc
Normal file
@ -0,0 +1,71 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/wpd_node.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/common_audio/fir_filter.h"
|
||||
#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
WPDNode::WPDNode(size_t length,
|
||||
const float* coefficients,
|
||||
size_t coefficients_length)
|
||||
: // The data buffer has parent data length to be able to contain and filter
|
||||
// it.
|
||||
data_(new float[2 * length + 1]),
|
||||
length_(length),
|
||||
filter_(FIRFilter::Create(coefficients,
|
||||
coefficients_length,
|
||||
2 * length + 1)) {
|
||||
assert(length > 0 && coefficients && coefficients_length > 0);
|
||||
memset(data_.get(), 0.f, (2 * length + 1) * sizeof(data_[0]));
|
||||
}
|
||||
|
||||
WPDNode::~WPDNode() {}
|
||||
|
||||
int WPDNode::Update(const float* parent_data, size_t parent_data_length) {
|
||||
if (!parent_data || (parent_data_length / 2) != length_) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Filter data.
|
||||
filter_->Filter(parent_data, parent_data_length, data_.get());
|
||||
|
||||
// Decimate data.
|
||||
const bool kOddSequence = true;
|
||||
size_t output_samples = DyadicDecimate(
|
||||
data_.get(), parent_data_length, kOddSequence, data_.get(), length_);
|
||||
if (output_samples != length_) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Get abs to all values.
|
||||
for (size_t i = 0; i < length_; ++i) {
|
||||
data_[i] = fabs(data_[i]);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int WPDNode::set_data(const float* new_data, size_t length) {
|
||||
if (!new_data || length != length_) {
|
||||
return -1;
|
||||
}
|
||||
memcpy(data_.get(), new_data, length * sizeof(data_[0]));
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
46
webrtc/modules/audio_processing/transient/wpd_node.h
Normal file
46
webrtc/modules/audio_processing/transient/wpd_node.h
Normal file
@ -0,0 +1,46 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/typedefs.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
class FIRFilter;
|
||||
|
||||
// A single node of a Wavelet Packet Decomposition (WPD) tree.
|
||||
class WPDNode {
|
||||
public:
|
||||
// Creates a WPDNode. The data vector will contain zeros. The filter will have
|
||||
// the coefficients provided.
|
||||
WPDNode(size_t length, const float* coefficients, size_t coefficients_length);
|
||||
~WPDNode();
|
||||
|
||||
// Updates the node data. |parent_data| / 2 must be equals to |length_|.
|
||||
// Returns 0 if correct, and -1 otherwise.
|
||||
int Update(const float* parent_data, size_t parent_data_length);
|
||||
|
||||
const float* data() const { return data_.get(); }
|
||||
// Returns 0 if correct, and -1 otherwise.
|
||||
int set_data(const float* new_data, size_t length);
|
||||
size_t length() const { return length_; }
|
||||
|
||||
private:
|
||||
rtc::scoped_ptr<float[]> data_;
|
||||
size_t length_;
|
||||
rtc::scoped_ptr<FIRFilter> filter_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_NODE_H_
|
119
webrtc/modules/audio_processing/transient/wpd_tree.cc
Normal file
119
webrtc/modules/audio_processing/transient/wpd_tree.cc
Normal file
@ -0,0 +1,119 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#include "webrtc/modules/audio_processing/transient/wpd_tree.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/modules/audio_processing/transient/dyadic_decimator.h"
|
||||
#include "webrtc/modules/audio_processing/transient/wpd_node.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
WPDTree::WPDTree(size_t data_length, const float* high_pass_coefficients,
|
||||
const float* low_pass_coefficients, size_t coefficients_length,
|
||||
int levels)
|
||||
: data_length_(data_length),
|
||||
levels_(levels),
|
||||
num_nodes_((1 << (levels + 1)) - 1) {
|
||||
assert(data_length > (static_cast<size_t>(1) << levels) &&
|
||||
high_pass_coefficients &&
|
||||
low_pass_coefficients &&
|
||||
levels > 0);
|
||||
// Size is 1 more, so we can use the array as 1-based. nodes_[0] is never
|
||||
// allocated.
|
||||
nodes_.reset(new rtc::scoped_ptr<WPDNode>[num_nodes_ + 1]);
|
||||
|
||||
// Create the first node
|
||||
const float kRootCoefficient = 1.f; // Identity Coefficient.
|
||||
nodes_[1].reset(new WPDNode(data_length, &kRootCoefficient, 1));
|
||||
// Variables used to create the rest of the nodes.
|
||||
size_t index = 1;
|
||||
size_t index_left_child = 0;
|
||||
size_t index_right_child = 0;
|
||||
|
||||
int num_nodes_at_curr_level = 0;
|
||||
|
||||
// Branching each node in each level to create its children. The last level is
|
||||
// not branched (all the nodes of that level are leaves).
|
||||
for (int current_level = 0; current_level < levels; ++current_level) {
|
||||
num_nodes_at_curr_level = 1 << current_level;
|
||||
for (int i = 0; i < num_nodes_at_curr_level; ++i) {
|
||||
index = (1 << current_level) + i;
|
||||
// Obtain the index of the current node children.
|
||||
index_left_child = index * 2;
|
||||
index_right_child = index_left_child + 1;
|
||||
nodes_[index_left_child].reset(new WPDNode(nodes_[index]->length() / 2,
|
||||
low_pass_coefficients,
|
||||
coefficients_length));
|
||||
nodes_[index_right_child].reset(new WPDNode(nodes_[index]->length() / 2,
|
||||
high_pass_coefficients,
|
||||
coefficients_length));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
WPDTree::~WPDTree() {}
|
||||
|
||||
WPDNode* WPDTree::NodeAt(int level, int index) {
|
||||
const int kNumNodesAtLevel = 1 << level;
|
||||
if (level < 0 || level > levels_ || index < 0 || index >= kNumNodesAtLevel) {
|
||||
return NULL;
|
||||
}
|
||||
return nodes_[(1 << level) + index].get();
|
||||
}
|
||||
|
||||
int WPDTree::Update(const float* data, size_t data_length) {
|
||||
if (!data || data_length != data_length_) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Update the root node.
|
||||
int update_result = nodes_[1]->set_data(data, data_length);
|
||||
if (update_result != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Variables used to update the rest of the nodes.
|
||||
size_t index = 1;
|
||||
size_t index_left_child = 0;
|
||||
size_t index_right_child = 0;
|
||||
|
||||
int num_nodes_at_curr_level = 0;
|
||||
|
||||
for (int current_level = 0; current_level < levels_; ++current_level) {
|
||||
num_nodes_at_curr_level = 1 << current_level;
|
||||
for (int i = 0; i < num_nodes_at_curr_level; ++i) {
|
||||
index = (1 << current_level) + i;
|
||||
// Obtain the index of the current node children.
|
||||
index_left_child = index * 2;
|
||||
index_right_child = index_left_child + 1;
|
||||
|
||||
update_result = nodes_[index_left_child]->Update(
|
||||
nodes_[index]->data(), nodes_[index]->length());
|
||||
if (update_result != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
update_result = nodes_[index_right_child]->Update(
|
||||
nodes_[index]->data(), nodes_[index]->length());
|
||||
if (update_result != 0) {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace webrtc
|
91
webrtc/modules/audio_processing/transient/wpd_tree.h
Normal file
91
webrtc/modules/audio_processing/transient/wpd_tree.h
Normal file
@ -0,0 +1,91 @@
|
||||
/*
|
||||
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
|
||||
*
|
||||
* Use of this source code is governed by a BSD-style license
|
||||
* that can be found in the LICENSE file in the root of the source
|
||||
* tree. An additional intellectual property rights grant can be found
|
||||
* in the file PATENTS. All contributing project authors may
|
||||
* be found in the AUTHORS file in the root of the source tree.
|
||||
*/
|
||||
|
||||
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_
|
||||
#define WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_
|
||||
|
||||
#include "webrtc/base/scoped_ptr.h"
|
||||
#include "webrtc/modules/audio_processing/transient/wpd_node.h"
|
||||
|
||||
namespace webrtc {
|
||||
|
||||
// Tree of a Wavelet Packet Decomposition (WPD).
|
||||
//
|
||||
// The root node contains all the data provided; for each node in the tree, the
|
||||
// left child contains the approximation coefficients extracted from the node,
|
||||
// and the right child contains the detail coefficients.
|
||||
// It preserves its state, so it can be multiple-called.
|
||||
//
|
||||
// The number of nodes in the tree will be 2 ^ levels - 1.
|
||||
//
|
||||
// Implementation details: Since the tree always will be a complete binary tree,
|
||||
// it is implemented using a single linear array instead of managing the
|
||||
// relationships in each node. For convience is better to use a array that
|
||||
// starts in 1 (instead of 0). Taking that into account, the following formulas
|
||||
// apply:
|
||||
// Root node index: 1.
|
||||
// Node(Level, Index in that level): 2 ^ Level + (Index in that level).
|
||||
// Left Child: Current node index * 2.
|
||||
// Right Child: Current node index * 2 + 1.
|
||||
// Parent: Current Node Index / 2 (Integer division).
|
||||
class WPDTree {
|
||||
public:
|
||||
// Creates a WPD tree using the data length and coefficients provided.
|
||||
WPDTree(size_t data_length,
|
||||
const float* high_pass_coefficients,
|
||||
const float* low_pass_coefficients,
|
||||
size_t coefficients_length,
|
||||
int levels);
|
||||
~WPDTree();
|
||||
|
||||
// Returns the number of nodes at any given level.
|
||||
static int NumberOfNodesAtLevel(int level) {
|
||||
return 1 << level;
|
||||
}
|
||||
|
||||
// Returns a pointer to the node at the given level and index(of that level).
|
||||
// Level goes from 0 to levels().
|
||||
// Index goes from 0 to the number of NumberOfNodesAtLevel(level) - 1.
|
||||
//
|
||||
// You can use the following formulas to get any node within the tree:
|
||||
// Notation: (Level, Index of node in that level).
|
||||
// Root node: (0/0).
|
||||
// Left Child: (Current node level + 1, Current node index * 2).
|
||||
// Right Child: (Current node level + 1, Current node index * 2 + 1).
|
||||
// Parent: (Current node level - 1, Current node index / 2) (Integer division)
|
||||
//
|
||||
// If level or index are out of bounds the function will return NULL.
|
||||
WPDNode* NodeAt(int level, int index);
|
||||
|
||||
// Updates all the nodes of the tree with the new data. |data_length| must be
|
||||
// teh same that was used for the creation of the tree.
|
||||
// Returns 0 if correct, and -1 otherwise.
|
||||
int Update(const float* data, size_t data_length);
|
||||
|
||||
// Returns the total number of levels below the root. Root is cosidered level
|
||||
// 0.
|
||||
int levels() const { return levels_; }
|
||||
|
||||
// Returns the total number of nodes.
|
||||
int num_nodes() const { return num_nodes_; }
|
||||
|
||||
// Returns the total number of leaves.
|
||||
int num_leaves() const { return 1 << levels_; }
|
||||
|
||||
private:
|
||||
size_t data_length_;
|
||||
int levels_;
|
||||
int num_nodes_;
|
||||
rtc::scoped_ptr<rtc::scoped_ptr<WPDNode>[]> nodes_;
|
||||
};
|
||||
|
||||
} // namespace webrtc
|
||||
|
||||
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_TRANSIENT_WPD_TREE_H_
|
Reference in New Issue
Block a user