Update audio_processing module

Corresponds to upstream commit 524e9b043e7e86fd72353b987c9d5f6a1ebf83e1

Update notes:

 * Pull in third party license file

 * Replace .gypi files with BUILD.gn to keep track of what changes
   upstream

 * Bunch of new filse pulled in as dependencies

 * Won't build yet due to changes needed on top of these
This commit is contained in:
Arun Raghavan
2015-10-13 17:25:22 +05:30
parent 5ae7a5d6cd
commit 753eada3aa
324 changed files with 52533 additions and 16117 deletions

View File

@ -0,0 +1,284 @@
# Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
import("//build/config/arm.gni")
import("//third_party/protobuf/proto_library.gni")
import("../../build/webrtc.gni")
declare_args() {
# Outputs some low-level debug files.
aec_debug_dump = false
# Disables the usual mode where we trust the reported system delay
# values the AEC receives. The corresponding define is set appropriately
# in the code, but it can be force-enabled here for testing.
aec_untrusted_delay_for_testing = false
}
source_set("audio_processing") {
sources = [
"aec/aec_core.c",
"aec/aec_core.h",
"aec/aec_core_internal.h",
"aec/aec_rdft.c",
"aec/aec_rdft.h",
"aec/aec_resampler.c",
"aec/aec_resampler.h",
"aec/echo_cancellation.c",
"aec/echo_cancellation_internal.h",
"aec/include/echo_cancellation.h",
"aecm/aecm_core.c",
"aecm/aecm_core.h",
"aecm/echo_control_mobile.c",
"aecm/include/echo_control_mobile.h",
"agc/agc.cc",
"agc/agc.h",
"agc/agc_manager_direct.cc",
"agc/agc_manager_direct.h",
"agc/gain_map_internal.h",
"agc/histogram.cc",
"agc/histogram.h",
"agc/legacy/analog_agc.c",
"agc/legacy/analog_agc.h",
"agc/legacy/digital_agc.c",
"agc/legacy/digital_agc.h",
"agc/legacy/gain_control.h",
"agc/utility.cc",
"agc/utility.h",
"audio_buffer.cc",
"audio_buffer.h",
"audio_processing_impl.cc",
"audio_processing_impl.h",
"beamformer/beamformer.h",
"beamformer/complex_matrix.h",
"beamformer/covariance_matrix_generator.cc",
"beamformer/covariance_matrix_generator.h",
"beamformer/matrix.h",
"beamformer/nonlinear_beamformer.cc",
"beamformer/nonlinear_beamformer.h",
"common.h",
"echo_cancellation_impl.cc",
"echo_cancellation_impl.h",
"echo_control_mobile_impl.cc",
"echo_control_mobile_impl.h",
"gain_control_impl.cc",
"gain_control_impl.h",
"high_pass_filter_impl.cc",
"high_pass_filter_impl.h",
"include/audio_processing.h",
"intelligibility/intelligibility_enhancer.cc",
"intelligibility/intelligibility_enhancer.h",
"intelligibility/intelligibility_utils.cc",
"intelligibility/intelligibility_utils.h",
"level_estimator_impl.cc",
"level_estimator_impl.h",
"logging/aec_logging.h",
"logging/aec_logging_file_handling.cc",
"logging/aec_logging_file_handling.h",
"noise_suppression_impl.cc",
"noise_suppression_impl.h",
"processing_component.cc",
"processing_component.h",
"rms_level.cc",
"rms_level.h",
"splitting_filter.cc",
"splitting_filter.h",
"three_band_filter_bank.cc",
"three_band_filter_bank.h",
"transient/common.h",
"transient/daubechies_8_wavelet_coeffs.h",
"transient/dyadic_decimator.h",
"transient/moving_moments.cc",
"transient/moving_moments.h",
"transient/transient_detector.cc",
"transient/transient_detector.h",
"transient/transient_suppressor.cc",
"transient/transient_suppressor.h",
"transient/wpd_node.cc",
"transient/wpd_node.h",
"transient/wpd_tree.cc",
"transient/wpd_tree.h",
"typing_detection.cc",
"typing_detection.h",
"utility/delay_estimator.c",
"utility/delay_estimator.h",
"utility/delay_estimator_internal.h",
"utility/delay_estimator_wrapper.c",
"utility/delay_estimator_wrapper.h",
"vad/common.h",
"vad/gmm.cc",
"vad/gmm.h",
"vad/noise_gmm_tables.h",
"vad/pitch_based_vad.cc",
"vad/pitch_based_vad.h",
"vad/pitch_internal.cc",
"vad/pitch_internal.h",
"vad/pole_zero_filter.cc",
"vad/pole_zero_filter.h",
"vad/standalone_vad.cc",
"vad/standalone_vad.h",
"vad/vad_audio_proc.cc",
"vad/vad_audio_proc.h",
"vad/vad_audio_proc_internal.h",
"vad/vad_circular_buffer.cc",
"vad/vad_circular_buffer.h",
"vad/voice_activity_detector.cc",
"vad/voice_activity_detector.h",
"vad/voice_gmm_tables.h",
"voice_detection_impl.cc",
"voice_detection_impl.h",
]
configs += [ "../..:common_config" ]
public_configs = [ "../..:common_inherited_config" ]
defines = []
deps = [
"../..:webrtc_common",
"../audio_coding:isac",
]
if (aec_debug_dump) {
defines += [ "WEBRTC_AEC_DEBUG_DUMP" ]
}
if (aec_untrusted_delay_for_testing) {
defines += [ "WEBRTC_UNTRUSTED_DELAY" ]
}
if (rtc_enable_protobuf) {
defines += [ "WEBRTC_AUDIOPROC_DEBUG_DUMP" ]
deps += [ ":audioproc_debug_proto" ]
}
if (rtc_prefer_fixed_point) {
defines += [ "WEBRTC_NS_FIXED" ]
sources += [
"ns/include/noise_suppression_x.h",
"ns/noise_suppression_x.c",
"ns/nsx_core.c",
"ns/nsx_core.h",
"ns/nsx_defines.h",
]
if (current_cpu == "mipsel") {
sources += [ "ns/nsx_core_mips.c" ]
} else {
sources += [ "ns/nsx_core_c.c" ]
}
} else {
defines += [ "WEBRTC_NS_FLOAT" ]
sources += [
"ns/defines.h",
"ns/include/noise_suppression.h",
"ns/noise_suppression.c",
"ns/ns_core.c",
"ns/ns_core.h",
"ns/windows_private.h",
]
}
if (current_cpu == "x86" || current_cpu == "x64") {
deps += [ ":audio_processing_sse2" ]
}
if (rtc_build_with_neon) {
deps += [ ":audio_processing_neon" ]
}
if (current_cpu == "mipsel") {
sources += [ "aecm/aecm_core_mips.c" ]
if (mips_float_abi == "hard") {
sources += [
"aec/aec_core_mips.c",
"aec/aec_rdft_mips.c",
]
}
} else {
sources += [ "aecm/aecm_core_c.c" ]
}
if (is_win) {
cflags = [
# TODO(jschuh): Bug 1348: fix this warning.
"/wd4267", # size_t to int truncations
]
}
if (is_clang) {
# Suppress warnings from Chrome's Clang plugins.
# See http://code.google.com/p/webrtc/issues/detail?id=163 for details.
configs -= [ "//build/config/clang:find_bad_constructs" ]
}
deps += [
"../../base:rtc_base_approved",
"../../common_audio",
"../../system_wrappers",
]
}
if (rtc_enable_protobuf) {
proto_library("audioproc_debug_proto") {
sources = [
"debug.proto",
]
proto_out_dir = "webrtc/audio_processing"
}
}
if (current_cpu == "x86" || current_cpu == "x64") {
source_set("audio_processing_sse2") {
sources = [
"aec/aec_core_sse2.c",
"aec/aec_rdft_sse2.c",
]
if (is_posix) {
cflags = [ "-msse2" ]
}
configs += [ "../..:common_config" ]
public_configs = [ "../..:common_inherited_config" ]
}
}
if (rtc_build_with_neon) {
source_set("audio_processing_neon") {
sources = [
"aec/aec_core_neon.c",
"aec/aec_rdft_neon.c",
"aecm/aecm_core_neon.c",
"ns/nsx_core_neon.c",
]
if (current_cpu != "arm64") {
# Enable compilation for the NEON instruction set. This is needed
# since //build/config/arm.gni only enables NEON for iOS, not Android.
# This provides the same functionality as webrtc/build/arm_neon.gypi.
configs -= [ "//build/config/compiler:compiler_arm_fpu" ]
cflags = [ "-mfpu=neon" ]
}
# Disable LTO on NEON targets due to compiler bug.
# TODO(fdegans): Enable this. See crbug.com/408997.
if (rtc_use_lto) {
cflags -= [
"-flto",
"-ffat-lto-objects",
]
}
configs += [ "../..:common_config" ]
public_configs = [ "../..:common_inherited_config" ]
deps = [
"../../common_audio",
]
}
}

View File

@ -1,26 +1,104 @@
SUBDIRS = utility ns aec aecm agc
lib_LTLIBRARIES = libwebrtc_audio_processing.la
if NS_FIXED
COMMON_CXXFLAGS += -DWEBRTC_NS_FIXED=1
NS_LIB = libns_fix
else
COMMON_CXXFLAGS += -DWEBRTC_NS_FLOAT=1
NS_LIB = libns
endif
webrtcincludedir = $(includedir)/webrtc_audio_processing
webrtcinclude_HEADERS = $(top_srcdir)/src/typedefs.h \
$(top_srcdir)/src/modules/interface/module.h \
interface/audio_processing.h \
$(top_srcdir)/src/common_types.h \
$(top_srcdir)/src/modules/interface/module_common_types.h
webrtcinclude_HEADERS = $(top_srcdir)/webrtc/base/arraysize.h \
$(top_srcdir)/webrtc/base/platform_file.h \
$(top_srcdir)/webrtc/common.h \
$(top_srcdir)/webrtc/typedefs.h \
$(top_srcdir)/webrtc/modules/audio_processing/beamformer/array_util.h \
include/audio_processing.h
libwebrtc_audio_processing_la_SOURCES = interface/audio_processing.h \
libwebrtc_audio_processing_la_SOURCES = include/audio_processing.h \
aec/include/echo_cancellation.h \
aec/aec_common.h \
aec/aec_core.c \
aec/aec_core.h \
aec/aec_core_internal.h \
aec/aec_core_sse2.c \
aec/aec_rdft.c \
aec/aec_rdft.h \
aec/aec_rdft_sse2.c \
aec/aec_resampler.c \
aec/aec_resampler.h \
aec/echo_cancellation.c \
aec/echo_cancellation_internal.h \
aecm/include/echo_control_mobile.h \
aecm/echo_control_mobile.c \
aecm/aecm_core.c \
aecm/aecm_core.h \
aecm/aecm_core_c.c \
agc/legacy/analog_agc.c \
agc/legacy/analog_agc.h \
agc/legacy/gain_control.h \
agc/legacy/digital_agc.c \
agc/legacy/digital_agc.h \
agc/agc.cc \
agc/agc.h \
agc/agc_manager_direct.cc \
agc/agc_manager_direct.h \
agc/gain_map_internal.h \
agc/histogram.cc \
agc/histogram.h \
agc/utility.cc \
agc/utility.h \
beamformer/array_util.h \
beamformer/beamformer.h \
beamformer/complex_matrix.h \
beamformer/covariance_matrix_generator.h \
beamformer/matrix.h \
beamformer/matrix_test_helpers.h \
beamformer/nonlinear_beamformer.h \
beamformer/covariance_matrix_generator.cc \
beamformer/nonlinear_beamformer.cc \
logging/aec_logging.h \
logging/aec_logging_file_handling.h \
logging/aec_logging_file_handling.cc \
transient/common.h \
transient/daubechies_8_wavelet_coeffs.h \
transient/dyadic_decimator.h \
transient/file_utils.h \
transient/moving_moments.h \
transient/transient_detector.h \
transient/transient_suppressor.h \
transient/wpd_node.h \
transient/wpd_tree.h \
transient/click_annotate.cc \
transient/file_utils.cc \
transient/moving_moments.cc \
transient/transient_detector.cc \
transient/transient_suppressor.cc \
transient/wpd_node.cc \
transient/wpd_tree.cc \
utility/delay_estimator.c \
utility/delay_estimator.h \
utility/delay_estimator_internal.h \
utility/delay_estimator_wrapper.c \
utility/delay_estimator_wrapper.h \
vad/common.h \
vad/gmm.h \
vad/noise_gmm_tables.h \
vad/pitch_based_vad.h \
vad/pitch_internal.h \
vad/pole_zero_filter.h \
vad/standalone_vad.h \
vad/vad_audio_proc.h \
vad/vad_audio_proc_internal.h \
vad/vad_circular_buffer.h \
vad/voice_activity_detector.h \
vad/voice_gmm_tables.h \
vad/gmm.cc \
vad/pitch_based_vad.cc \
vad/pitch_internal.cc \
vad/pole_zero_filter.cc \
vad/standalone_vad.cc \
vad/vad_audio_proc.cc \
vad/vad_circular_buffer.cc \
vad/voice_activity_detector.cc \
audio_buffer.cc \
audio_buffer.h \
audio_processing_impl.cc \
audio_processing_impl.h \
common.h \
echo_cancellation_impl.cc \
echo_cancellation_impl.h \
echo_control_mobile_impl.cc \
@ -33,27 +111,56 @@ libwebrtc_audio_processing_la_SOURCES = interface/audio_processing.h \
level_estimator_impl.h \
noise_suppression_impl.cc \
noise_suppression_impl.h \
rms_level.cc \
rms_level.h \
splitting_filter.cc \
splitting_filter.h \
processing_component.cc \
processing_component.h \
three_band_filter_bank.cc \
three_band_filter_bank.h \
typing_detection.cc \
typing_detection.h \
voice_detection_impl.cc \
voice_detection_impl.h
libwebrtc_audio_processing_la_CXXFLAGS = $(AM_CXXFLAGS) $(COMMON_CXXFLAGS) \
-I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-I$(top_srcdir)/src/common_audio/vad/main/interface \
-I$(top_srcdir)/src/system_wrappers/interface \
-I$(top_srcdir)/src/modules/audio_processing/utility \
-I$(top_srcdir)/src/modules/audio_processing/ns/interface \
-I$(top_srcdir)/src/modules/audio_processing/aec/interface \
-I$(top_srcdir)/src/modules/audio_processing/aecm/interface \
-I$(top_srcdir)/src/modules/audio_processing/agc/interface
libwebrtc_audio_processing_la_LIBADD = $(top_builddir)/src/system_wrappers/libsystem_wrappers.la \
$(top_builddir)/src/common_audio/signal_processing_library/libspl.la \
$(top_builddir)/src/common_audio/vad/libvad.la \
$(top_builddir)/src/modules/audio_processing/utility/libapm_util.la \
$(top_builddir)/src/modules/audio_processing/ns/$(NS_LIB).la \
$(top_builddir)/src/modules/audio_processing/aec/libaec.la \
$(top_builddir)/src/modules/audio_processing/aecm/libaecm.la \
$(top_builddir)/src/modules/audio_processing/agc/libagc.la
if NS_FIXED
COMMON_CXXFLAGS += -DWEBRTC_NS_FIXED=0
libwebrtc_audio_processing_la_SOURCES += \
ns/include/noise_suppression_x.h \
ns/noise_suppression_x.c \
ns/nsx_defines.h \
ns/nsx_core.c \
ns/nsx_core.h \
ns/nsx_core_c.c
else
COMMON_CXXFLAGS += -DWEBRTC_NS_FIXED=1
libwebrtc_audio_processing_la_SOURCES += \
ns/include/noise_suppression.h \
ns/noise_suppression.c \
ns/defines.h \
ns/ns_core.c \
ns/ns_core.h \
ns/windows_private.h
endif
libwebrtc_audio_processing_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS)
libwebrtc_audio_processing_la_CXXFLAGS = $(AM_CXXFLAGS) $(COMMON_CXXFLAGS)
libwebrtc_audio_processing_la_LIBADD = $(top_builddir)/webrtc/base/libbase.la \
$(top_builddir)/webrtc/system_wrappers/libsystem_wrappers.la \
$(top_builddir)/webrtc/common_audio/libcommon_audio.la
$(top_builddir)/webrtc/modules/audio_coding/libaudio_coding.la
libwebrtc_audio_processing_la_LDFLAGS = $(AM_LDFLAGS) -version-info $(LIBWEBRTC_AUDIO_PROCESSING_VERSION_INFO)
# FIXME:
# x86: aec/aec_core_sse2.c
# aec/aec_rdft_sse2.c
# NEON: aec/aec_core_neon.c
# aec/aec_rdft_neon.c
# aecm/aecm_core_neon.c
# ns/nsx_core_neon.c
# MIPS: aec/aec_core_mips.c
# aec/aec_rdft_neon.c
# aecm/aecm_core_mips.c
# ns/nsx_core_mips.c

View File

@ -1,2 +0,0 @@
andrew@webrtc.org
bjornv@webrtc.org

View File

@ -1,16 +0,0 @@
noinst_LTLIBRARIES = libaec.la
libaec_la_SOURCES = interface/echo_cancellation.h \
echo_cancellation.c \
aec_core.h \
aec_core.c \
aec_core_sse2.c \
aec_rdft.h \
aec_rdft.c \
aec_rdft_sse2.c \
resampler.h \
resampler.c
libaec_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
-I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-I$(top_srcdir)/src/system_wrappers/interface \
-I$(top_srcdir)/src/modules/audio_processing/utility

View File

@ -1,40 +0,0 @@
# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'targets': [
{
'target_name': 'aec',
'type': '<(library)',
'dependencies': [
'<(webrtc_root)/common_audio/common_audio.gyp:spl',
'apm_util'
],
'include_dirs': [
'interface',
],
'direct_dependent_settings': {
'include_dirs': [
'interface',
],
},
'sources': [
'interface/echo_cancellation.h',
'echo_cancellation.c',
'aec_core.h',
'aec_core.c',
'aec_core_sse2.c',
'aec_rdft.h',
'aec_rdft.c',
'aec_rdft_sse2.c',
'resampler.h',
'resampler.c',
],
},
],
}

View File

@ -0,0 +1,32 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_
#include "webrtc/typedefs.h"
#ifdef _MSC_VER /* visual c++ */
#define ALIGN16_BEG __declspec(align(16))
#define ALIGN16_END
#else /* gcc or icc */
#define ALIGN16_BEG
#define ALIGN16_END __attribute__((aligned(16)))
#endif
extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65];
extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65];
extern ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65];
extern const float WebRtcAec_kExtendedSmoothingCoefficients[2][2];
extern const float WebRtcAec_kNormalSmoothingCoefficients[2][2];
extern const float WebRtcAec_kMinFarendPSD;
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_COMMON_H_

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -12,29 +12,18 @@
* Specifies the interface for the AEC core.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_
#include <stdio.h>
#include <stddef.h>
#include "signal_processing_library.h"
#include "typedefs.h"
//#define AEC_DEBUG // for recording files
#include "webrtc/typedefs.h"
#define FRAME_LEN 80
#define PART_LEN 64 // Length of partition
#define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients
#define PART_LEN2 (PART_LEN * 2) // Length of partition * 2
#define NR_PART 12 // Number of partitions
#define FILT_LEN (PART_LEN * NR_PART) // Filter length
#define FILT_LEN2 (FILT_LEN * 2) // Double filter length
#define FAR_BUF_LEN (FILT_LEN2 * 2)
#define PREF_BAND_SIZE 24
#define BLOCKL_MAX FRAME_LEN
// Maximum delay in fixed point delay estimator, used for logging
enum {kMaxDelay = 100};
#define PART_LEN 64 // Length of partition
#define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients
#define PART_LEN2 (PART_LEN * 2) // Length of partition * 2
#define NUM_HIGH_BANDS_MAX 2 // Max number of high bands
typedef float complex_t[2];
// For performance reasons, some arrays of complex numbers are replaced by twice
@ -46,136 +35,95 @@ typedef float complex_t[2];
// compile time.
// Metrics
enum {offsetLevel = -100};
enum {
kOffsetLevel = -100
};
typedef struct {
float sfrsum;
int sfrcounter;
float framelevel;
float frsum;
int frcounter;
float minlevel;
float averagelevel;
} power_level_t;
typedef struct Stats {
float instant;
float average;
float min;
float max;
float sum;
float hisum;
float himean;
int counter;
int hicounter;
} Stats;
typedef struct {
float instant;
float average;
float min;
float max;
float sum;
float hisum;
float himean;
int counter;
int hicounter;
} stats_t;
typedef struct AecCore AecCore;
typedef struct {
int farBufWritePos, farBufReadPos;
int knownDelay;
int inSamples, outSamples;
int delayEstCtr;
void *farFrBuf, *nearFrBuf, *outFrBuf;
void *nearFrBufH;
void *outFrBufH;
float xBuf[PART_LEN2]; // farend
float dBuf[PART_LEN2]; // nearend
float eBuf[PART_LEN2]; // error
float dBufH[PART_LEN2]; // nearend
float xPow[PART_LEN1];
float dPow[PART_LEN1];
float dMinPow[PART_LEN1];
float dInitMinPow[PART_LEN1];
float *noisePow;
float xfBuf[2][NR_PART * PART_LEN1]; // farend fft buffer
float wfBuf[2][NR_PART * PART_LEN1]; // filter fft
complex_t sde[PART_LEN1]; // cross-psd of nearend and error
complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend
complex_t xfwBuf[NR_PART * PART_LEN1]; // farend windowed fft buffer
float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near and error psd
float hNs[PART_LEN1];
float hNlFbMin, hNlFbLocalMin;
float hNlXdAvgMin;
int hNlNewMin, hNlMinCtr;
float overDrive, overDriveSm;
float targetSupp, minOverDrive;
float outBuf[PART_LEN];
int delayIdx;
short stNearState, echoState;
short divergeState;
int xfBufBlockPos;
short farBuf[FILT_LEN2 * 2];
short mult; // sampling frequency multiple
int sampFreq;
WebRtc_UWord32 seed;
float mu; // stepsize
float errThresh; // error threshold
int noiseEstCtr;
power_level_t farlevel;
power_level_t nearlevel;
power_level_t linoutlevel;
power_level_t nlpoutlevel;
int metricsMode;
int stateCounter;
stats_t erl;
stats_t erle;
stats_t aNlp;
stats_t rerl;
// Quantities to control H band scaling for SWB input
int freq_avg_ic; //initial bin for averaging nlp gain
int flag_Hband_cn; //for comfort noise
float cn_scale_Hband; //scale for comfort noise in H band
int delay_histogram[kMaxDelay];
int delay_logging_enabled;
void* delay_estimator;
#ifdef AEC_DEBUG
FILE *farFile;
FILE *nearFile;
FILE *outFile;
FILE *outLpFile;
#endif
} aec_t;
typedef void (*WebRtcAec_FilterFar_t)(aec_t *aec, float yf[2][PART_LEN1]);
extern WebRtcAec_FilterFar_t WebRtcAec_FilterFar;
typedef void (*WebRtcAec_ScaleErrorSignal_t)(aec_t *aec, float ef[2][PART_LEN1]);
extern WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
typedef void (*WebRtcAec_FilterAdaptation_t)
(aec_t *aec, float *fft, float ef[2][PART_LEN1]);
extern WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;
typedef void (*WebRtcAec_OverdriveAndSuppress_t)
(aec_t *aec, float hNl[PART_LEN1], const float hNlFb, float efw[2][PART_LEN1]);
extern WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress;
int WebRtcAec_CreateAec(aec_t **aec);
int WebRtcAec_FreeAec(aec_t *aec);
int WebRtcAec_InitAec(aec_t *aec, int sampFreq);
AecCore* WebRtcAec_CreateAec(); // Returns NULL on error.
void WebRtcAec_FreeAec(AecCore* aec);
int WebRtcAec_InitAec(AecCore* aec, int sampFreq);
void WebRtcAec_InitAec_SSE2(void);
#if defined(MIPS_FPU_LE)
void WebRtcAec_InitAec_mips(void);
#endif
#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
void WebRtcAec_InitAec_neon(void);
#endif
void WebRtcAec_InitMetrics(aec_t *aec);
void WebRtcAec_ProcessFrame(aec_t *aec, const short *farend,
const short *nearend, const short *nearendH,
short *out, short *outH,
int knownDelay);
void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend);
void WebRtcAec_ProcessFrames(AecCore* aec,
const float* const* nearend,
size_t num_bands,
size_t num_samples,
int knownDelay,
float* const* out);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_CORE_H_
// A helper function to call WebRtc_MoveReadPtr() for all far-end buffers.
// Returns the number of elements moved, and adjusts |system_delay| by the
// corresponding amount in ms.
int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements);
// Calculates the median, standard deviation and amount of poor values among the
// delay estimates aggregated up to the first call to the function. After that
// first call the metrics are aggregated and updated every second. With poor
// values we mean values that most likely will cause the AEC to perform poorly.
// TODO(bjornv): Consider changing tests and tools to handle constant
// constant aggregation window throughout the session instead.
int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std,
float* fraction_poor_delays);
// Returns the echo state (1: echo, 0: no echo).
int WebRtcAec_echo_state(AecCore* self);
// Gets statistics of the echo metrics ERL, ERLE, A_NLP.
void WebRtcAec_GetEchoStats(AecCore* self,
Stats* erl,
Stats* erle,
Stats* a_nlp);
#ifdef WEBRTC_AEC_DEBUG_DUMP
void* WebRtcAec_far_time_buf(AecCore* self);
#endif
// Sets local configuration modes.
void WebRtcAec_SetConfigCore(AecCore* self,
int nlp_mode,
int metrics_mode,
int delay_logging);
// Non-zero enables, zero disables.
void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable);
// Returns non-zero if delay agnostic (i.e., signal based delay estimation) is
// enabled and zero if disabled.
int WebRtcAec_delay_agnostic_enabled(AecCore* self);
// Enables or disables extended filter mode. Non-zero enables, zero disables.
void WebRtcAec_enable_extended_filter(AecCore* self, int enable);
// Returns non-zero if extended filter mode is enabled and zero if disabled.
int WebRtcAec_extended_filter_enabled(AecCore* self);
// Returns the current |system_delay|, i.e., the buffered difference between
// far-end and near-end.
int WebRtcAec_system_delay(AecCore* self);
// Sets the |system_delay| to |value|. Note that if the value is changed
// improperly, there can be a performance regression. So it should be used with
// care.
void WebRtcAec_SetSystemDelay(AecCore* self, int delay);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_H_

View File

@ -0,0 +1,202 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_
#include "webrtc/common_audio/ring_buffer.h"
#include "webrtc/common_audio/wav_file.h"
#include "webrtc/modules/audio_processing/aec/aec_common.h"
#include "webrtc/modules/audio_processing/aec/aec_core.h"
#include "webrtc/typedefs.h"
// Number of partitions for the extended filter mode. The first one is an enum
// to be used in array declarations, as it represents the maximum filter length.
enum {
kExtendedNumPartitions = 32
};
static const int kNormalNumPartitions = 12;
// Delay estimator constants, used for logging and delay compensation if
// if reported delays are disabled.
enum {
kLookaheadBlocks = 15
};
enum {
// 500 ms for 16 kHz which is equivalent with the limit of reported delays.
kHistorySizeBlocks = 125
};
// Extended filter adaptation parameters.
// TODO(ajm): No narrowband tuning yet.
static const float kExtendedMu = 0.4f;
static const float kExtendedErrorThreshold = 1.0e-6f;
typedef struct PowerLevel {
float sfrsum;
int sfrcounter;
float framelevel;
float frsum;
int frcounter;
float minlevel;
float averagelevel;
} PowerLevel;
struct AecCore {
int farBufWritePos, farBufReadPos;
int knownDelay;
int inSamples, outSamples;
int delayEstCtr;
RingBuffer* nearFrBuf;
RingBuffer* outFrBuf;
RingBuffer* nearFrBufH[NUM_HIGH_BANDS_MAX];
RingBuffer* outFrBufH[NUM_HIGH_BANDS_MAX];
float dBuf[PART_LEN2]; // nearend
float eBuf[PART_LEN2]; // error
float dBufH[NUM_HIGH_BANDS_MAX][PART_LEN2]; // nearend
float xPow[PART_LEN1];
float dPow[PART_LEN1];
float dMinPow[PART_LEN1];
float dInitMinPow[PART_LEN1];
float* noisePow;
float xfBuf[2][kExtendedNumPartitions * PART_LEN1]; // farend fft buffer
float wfBuf[2][kExtendedNumPartitions * PART_LEN1]; // filter fft
complex_t sde[PART_LEN1]; // cross-psd of nearend and error
complex_t sxd[PART_LEN1]; // cross-psd of farend and nearend
// Farend windowed fft buffer.
complex_t xfwBuf[kExtendedNumPartitions * PART_LEN1];
float sx[PART_LEN1], sd[PART_LEN1], se[PART_LEN1]; // far, near, error psd
float hNs[PART_LEN1];
float hNlFbMin, hNlFbLocalMin;
float hNlXdAvgMin;
int hNlNewMin, hNlMinCtr;
float overDrive, overDriveSm;
int nlp_mode;
float outBuf[PART_LEN];
int delayIdx;
short stNearState, echoState;
short divergeState;
int xfBufBlockPos;
RingBuffer* far_buf;
RingBuffer* far_buf_windowed;
int system_delay; // Current system delay buffered in AEC.
int mult; // sampling frequency multiple
int sampFreq;
size_t num_bands;
uint32_t seed;
float normal_mu; // stepsize
float normal_error_threshold; // error threshold
int noiseEstCtr;
PowerLevel farlevel;
PowerLevel nearlevel;
PowerLevel linoutlevel;
PowerLevel nlpoutlevel;
int metricsMode;
int stateCounter;
Stats erl;
Stats erle;
Stats aNlp;
Stats rerl;
// Quantities to control H band scaling for SWB input
int freq_avg_ic; // initial bin for averaging nlp gain
int flag_Hband_cn; // for comfort noise
float cn_scale_Hband; // scale for comfort noise in H band
int delay_metrics_delivered;
int delay_histogram[kHistorySizeBlocks];
int num_delay_values;
int delay_median;
int delay_std;
float fraction_poor_delays;
int delay_logging_enabled;
void* delay_estimator_farend;
void* delay_estimator;
// Variables associated with delay correction through signal based delay
// estimation feedback.
int signal_delay_correction;
int previous_delay;
int delay_correction_count;
int shift_offset;
float delay_quality_threshold;
int frame_count;
// 0 = delay agnostic mode (signal based delay correction) disabled.
// Otherwise enabled.
int delay_agnostic_enabled;
// 1 = extended filter mode enabled, 0 = disabled.
int extended_filter_enabled;
// Runtime selection of number of filter partitions.
int num_partitions;
#ifdef WEBRTC_AEC_DEBUG_DUMP
// Sequence number of this AEC instance, so that different instances can
// choose different dump file names.
int instance_index;
// Number of times we've restarted dumping; used to pick new dump file names
// each time.
int debug_dump_count;
RingBuffer* far_time_buf;
rtc_WavWriter* farFile;
rtc_WavWriter* nearFile;
rtc_WavWriter* outFile;
rtc_WavWriter* outLinearFile;
FILE* e_fft_file;
#endif
};
typedef void (*WebRtcAecFilterFar)(AecCore* aec, float yf[2][PART_LEN1]);
extern WebRtcAecFilterFar WebRtcAec_FilterFar;
typedef void (*WebRtcAecScaleErrorSignal)(AecCore* aec, float ef[2][PART_LEN1]);
extern WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal;
typedef void (*WebRtcAecFilterAdaptation)(AecCore* aec,
float* fft,
float ef[2][PART_LEN1]);
extern WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation;
typedef void (*WebRtcAecOverdriveAndSuppress)(AecCore* aec,
float hNl[PART_LEN1],
const float hNlFb,
float efw[2][PART_LEN1]);
extern WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress;
typedef void (*WebRtcAecComfortNoise)(AecCore* aec,
float efw[2][PART_LEN1],
complex_t* comfortNoiseHband,
const float* noisePow,
const float* lambda);
extern WebRtcAecComfortNoise WebRtcAec_ComfortNoise;
typedef void (*WebRtcAecSubBandCoherence)(AecCore* aec,
float efw[2][PART_LEN1],
float xfw[2][PART_LEN1],
float* fft,
float* cohde,
float* cohxd);
extern WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence;
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_CORE_INTERNAL_H_

View File

@ -0,0 +1,774 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* The core AEC algorithm, which is presented with time-aligned signals.
*/
#include "webrtc/modules/audio_processing/aec/aec_core.h"
#include <math.h>
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
static const int flagHbandCn = 1; // flag for adding comfort noise in H band
extern const float WebRtcAec_weightCurve[65];
extern const float WebRtcAec_overDriveCurve[65];
void WebRtcAec_ComfortNoise_mips(AecCore* aec,
float efw[2][PART_LEN1],
complex_t* comfortNoiseHband,
const float* noisePow,
const float* lambda) {
int i, num;
float rand[PART_LEN];
float noise, noiseAvg, tmp, tmpAvg;
int16_t randW16[PART_LEN];
complex_t u[PART_LEN1];
const float pi2 = 6.28318530717959f;
const float pi2t = pi2 / 32768;
// Generate a uniform random array on [0 1]
WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
int16_t* randWptr = randW16;
float randTemp, randTemp2, randTemp3, randTemp4;
int32_t tmp1s, tmp2s, tmp3s, tmp4s;
for (i = 0; i < PART_LEN; i+=4) {
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"lh %[tmp1s], 0(%[randWptr]) \n\t"
"lh %[tmp2s], 2(%[randWptr]) \n\t"
"lh %[tmp3s], 4(%[randWptr]) \n\t"
"lh %[tmp4s], 6(%[randWptr]) \n\t"
"mtc1 %[tmp1s], %[randTemp] \n\t"
"mtc1 %[tmp2s], %[randTemp2] \n\t"
"mtc1 %[tmp3s], %[randTemp3] \n\t"
"mtc1 %[tmp4s], %[randTemp4] \n\t"
"cvt.s.w %[randTemp], %[randTemp] \n\t"
"cvt.s.w %[randTemp2], %[randTemp2] \n\t"
"cvt.s.w %[randTemp3], %[randTemp3] \n\t"
"cvt.s.w %[randTemp4], %[randTemp4] \n\t"
"addiu %[randWptr], %[randWptr], 8 \n\t"
"mul.s %[randTemp], %[randTemp], %[pi2t] \n\t"
"mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t"
"mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t"
"mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t"
".set pop \n\t"
: [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp),
[randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3),
[randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s),
[tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s),
[tmp4s] "=&r" (tmp4s)
: [pi2t] "f" (pi2t)
: "memory"
);
u[i+1][0] = cosf(randTemp);
u[i+1][1] = sinf(randTemp);
u[i+2][0] = cosf(randTemp2);
u[i+2][1] = sinf(randTemp2);
u[i+3][0] = cosf(randTemp3);
u[i+3][1] = sinf(randTemp3);
u[i+4][0] = cosf(randTemp4);
u[i+4][1] = sinf(randTemp4);
}
// Reject LF noise
float* u_ptr = &u[1][0];
float noise2, noise3, noise4;
float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f;
u[0][0] = 0;
u[0][1] = 0;
for (i = 1; i < PART_LEN1; i+=4) {
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"lwc1 %[noise], 4(%[noisePow]) \n\t"
"lwc1 %[noise2], 8(%[noisePow]) \n\t"
"lwc1 %[noise3], 12(%[noisePow]) \n\t"
"lwc1 %[noise4], 16(%[noisePow]) \n\t"
"sqrt.s %[noise], %[noise] \n\t"
"sqrt.s %[noise2], %[noise2] \n\t"
"sqrt.s %[noise3], %[noise3] \n\t"
"sqrt.s %[noise4], %[noise4] \n\t"
"lwc1 %[tmp1f], 0(%[u_ptr]) \n\t"
"lwc1 %[tmp2f], 4(%[u_ptr]) \n\t"
"lwc1 %[tmp3f], 8(%[u_ptr]) \n\t"
"lwc1 %[tmp4f], 12(%[u_ptr]) \n\t"
"lwc1 %[tmp5f], 16(%[u_ptr]) \n\t"
"lwc1 %[tmp6f], 20(%[u_ptr]) \n\t"
"lwc1 %[tmp7f], 24(%[u_ptr]) \n\t"
"lwc1 %[tmp8f], 28(%[u_ptr]) \n\t"
"addiu %[noisePow], %[noisePow], 16 \n\t"
"mul.s %[tmp1f], %[tmp1f], %[noise] \n\t"
"mul.s %[tmp2f], %[tmp2f], %[noise] \n\t"
"mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t"
"mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t"
"mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t"
"mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t"
"swc1 %[tmp1f], 0(%[u_ptr]) \n\t"
"swc1 %[tmp3f], 8(%[u_ptr]) \n\t"
"mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t"
"mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t"
"neg.s %[tmp2f] \n\t"
"neg.s %[tmp4f] \n\t"
"neg.s %[tmp6f] \n\t"
"neg.s %[tmp8f] \n\t"
"swc1 %[tmp5f], 16(%[u_ptr]) \n\t"
"swc1 %[tmp7f], 24(%[u_ptr]) \n\t"
"swc1 %[tmp2f], 4(%[u_ptr]) \n\t"
"swc1 %[tmp4f], 12(%[u_ptr]) \n\t"
"swc1 %[tmp6f], 20(%[u_ptr]) \n\t"
"swc1 %[tmp8f], 28(%[u_ptr]) \n\t"
"addiu %[u_ptr], %[u_ptr], 32 \n\t"
".set pop \n\t"
: [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow),
[noise] "=&f" (noise), [noise2] "=&f" (noise2),
[noise3] "=&f" (noise3), [noise4] "=&f" (noise4),
[tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f),
[tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f),
[tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f),
[tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f)
:
: "memory"
);
}
u[PART_LEN][1] = 0;
noisePow -= PART_LEN;
u_ptr = &u[0][0];
float* u_ptr_end = &u[PART_LEN][0];
float* efw_ptr_0 = &efw[0][0];
float* efw_ptr_1 = &efw[1][0];
float tmp9f, tmp10f;
const float tmp1c = 1.0;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lwc1 %[tmp1f], 0(%[lambda]) \n\t"
"lwc1 %[tmp6f], 4(%[lambda]) \n\t"
"addiu %[lambda], %[lambda], 8 \n\t"
"c.lt.s %[tmp1f], %[tmp1c] \n\t"
"bc1f 4f \n\t"
" nop \n\t"
"c.lt.s %[tmp6f], %[tmp1c] \n\t"
"bc1f 3f \n\t"
" nop \n\t"
"2: \n\t"
"mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"
"mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"
"sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"
"sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"
"sqrt.s %[tmp1f], %[tmp1f] \n\t"
"sqrt.s %[tmp6f], %[tmp6f] \n\t"
"lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
"lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"
"lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
"lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"
"lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
"lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"
"lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
"lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"
#if !defined(MIPS32_R2_LE)
"mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"
"add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"
"mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"
"add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"
"mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"
"add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"
"mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"
"add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"
#else // #if !defined(MIPS32_R2_LE)
"madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"
"madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"
"madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"
"madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"
#endif // #if !defined(MIPS32_R2_LE)
"swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
"swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
"swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
"b 5f \n\t"
" swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
"3: \n\t"
"mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t"
"sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t"
"sqrt.s %[tmp1f], %[tmp1f] \n\t"
"lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
"lwc1 %[tmp3f], 0(%[u_ptr]) \n\t"
"lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
"lwc1 %[tmp5f], 4(%[u_ptr]) \n\t"
#if !defined(MIPS32_R2_LE)
"mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t"
"add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t"
"mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t"
"add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t"
#else // #if !defined(MIPS32_R2_LE)
"madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t"
"madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t"
#endif // #if !defined(MIPS32_R2_LE)
"swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t"
"b 5f \n\t"
" swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t"
"4: \n\t"
"c.lt.s %[tmp6f], %[tmp1c] \n\t"
"bc1f 5f \n\t"
" nop \n\t"
"mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t"
"sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t"
"sqrt.s %[tmp6f], %[tmp6f] \n\t"
"lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
"lwc1 %[tmp8f], 8(%[u_ptr]) \n\t"
"lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
"lwc1 %[tmp10f], 12(%[u_ptr]) \n\t"
#if !defined(MIPS32_R2_LE)
"mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t"
"add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t"
"mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t"
"add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t"
#else // #if !defined(MIPS32_R2_LE)
"madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t"
"madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t"
#endif // #if !defined(MIPS32_R2_LE)
"swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t"
"swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t"
"5: \n\t"
"addiu %[u_ptr], %[u_ptr], 16 \n\t"
"addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t"
"bne %[u_ptr], %[u_ptr_end], 1b \n\t"
" addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t"
".set pop \n\t"
: [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr),
[efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1),
[tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f),
[tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f),
[tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f),
[tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f)
: [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end)
: "memory"
);
lambda -= PART_LEN;
tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0));
//tmp = 1 - lambda[i];
efw[0][PART_LEN] += tmp * u[PART_LEN][0];
efw[1][PART_LEN] += tmp * u[PART_LEN][1];
// For H band comfort noise
// TODO: don't compute noise and "tmp" twice. Use the previous results.
noiseAvg = 0.0;
tmpAvg = 0.0;
num = 0;
if ((aec->sampFreq == 32000 || aec->sampFreq == 48000) && flagHbandCn == 1) {
for (i = 0; i < PART_LEN; i++) {
rand[i] = ((float)randW16[i]) / 32768;
}
// average noise scale
// average over second half of freq spectrum (i.e., 4->8khz)
// TODO: we shouldn't need num. We know how many elements we're summing.
for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
num++;
noiseAvg += sqrtf(noisePow[i]);
}
noiseAvg /= (float)num;
// average nlp scale
// average over second half of freq spectrum (i.e., 4->8khz)
// TODO: we shouldn't need num. We know how many elements we're summing.
num = 0;
for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
num++;
tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
}
tmpAvg /= (float)num;
// Use average noise for H band
// TODO: we should probably have a new random vector here.
// Reject LF noise
u[0][0] = 0;
u[0][1] = 0;
for (i = 1; i < PART_LEN1; i++) {
tmp = pi2 * rand[i - 1];
// Use average noise for H band
u[i][0] = noiseAvg * (float)cos(tmp);
u[i][1] = -noiseAvg * (float)sin(tmp);
}
u[PART_LEN][1] = 0;
for (i = 0; i < PART_LEN1; i++) {
// Use average NLP weight for H band
comfortNoiseHband[i][0] = tmpAvg * u[i][0];
comfortNoiseHband[i][1] = tmpAvg * u[i][1];
}
}
}
void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) {
int i;
for (i = 0; i < aec->num_partitions; i++) {
int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
int pos = i * PART_LEN1;
// Check for wrap
if (i + aec->xfBufBlockPos >= aec->num_partitions) {
xPos -= aec->num_partitions * (PART_LEN1);
}
float* yf0 = yf[0];
float* yf1 = yf[1];
float* aRe = aec->xfBuf[0] + xPos;
float* aIm = aec->xfBuf[1] + xPos;
float* bRe = aec->wfBuf[0] + pos;
float* bIm = aec->wfBuf[1] + pos;
float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13;
int len = PART_LEN1 >> 1;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lwc1 %[f0], 0(%[aRe]) \n\t"
"lwc1 %[f1], 0(%[bRe]) \n\t"
"lwc1 %[f2], 0(%[bIm]) \n\t"
"lwc1 %[f3], 0(%[aIm]) \n\t"
"lwc1 %[f4], 4(%[aRe]) \n\t"
"lwc1 %[f5], 4(%[bRe]) \n\t"
"lwc1 %[f6], 4(%[bIm]) \n\t"
"mul.s %[f8], %[f0], %[f1] \n\t"
"mul.s %[f0], %[f0], %[f2] \n\t"
"mul.s %[f9], %[f4], %[f5] \n\t"
"mul.s %[f4], %[f4], %[f6] \n\t"
"lwc1 %[f7], 4(%[aIm]) \n\t"
#if !defined(MIPS32_R2_LE)
"mul.s %[f12], %[f2], %[f3] \n\t"
"mul.s %[f1], %[f3], %[f1] \n\t"
"mul.s %[f11], %[f6], %[f7] \n\t"
"addiu %[aRe], %[aRe], 8 \n\t"
"addiu %[aIm], %[aIm], 8 \n\t"
"addiu %[len], %[len], -1 \n\t"
"sub.s %[f8], %[f8], %[f12] \n\t"
"mul.s %[f12], %[f7], %[f5] \n\t"
"lwc1 %[f2], 0(%[yf0]) \n\t"
"add.s %[f1], %[f0], %[f1] \n\t"
"lwc1 %[f3], 0(%[yf1]) \n\t"
"sub.s %[f9], %[f9], %[f11] \n\t"
"lwc1 %[f6], 4(%[yf0]) \n\t"
"add.s %[f4], %[f4], %[f12] \n\t"
#else // #if !defined(MIPS32_R2_LE)
"addiu %[aRe], %[aRe], 8 \n\t"
"addiu %[aIm], %[aIm], 8 \n\t"
"addiu %[len], %[len], -1 \n\t"
"nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t"
"lwc1 %[f2], 0(%[yf0]) \n\t"
"madd.s %[f1], %[f0], %[f3], %[f1] \n\t"
"lwc1 %[f3], 0(%[yf1]) \n\t"
"nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t"
"lwc1 %[f6], 4(%[yf0]) \n\t"
"madd.s %[f4], %[f4], %[f7], %[f5] \n\t"
#endif // #if !defined(MIPS32_R2_LE)
"lwc1 %[f5], 4(%[yf1]) \n\t"
"add.s %[f2], %[f2], %[f8] \n\t"
"addiu %[bRe], %[bRe], 8 \n\t"
"addiu %[bIm], %[bIm], 8 \n\t"
"add.s %[f3], %[f3], %[f1] \n\t"
"add.s %[f6], %[f6], %[f9] \n\t"
"add.s %[f5], %[f5], %[f4] \n\t"
"swc1 %[f2], 0(%[yf0]) \n\t"
"swc1 %[f3], 0(%[yf1]) \n\t"
"swc1 %[f6], 4(%[yf0]) \n\t"
"swc1 %[f5], 4(%[yf1]) \n\t"
"addiu %[yf0], %[yf0], 8 \n\t"
"bgtz %[len], 1b \n\t"
" addiu %[yf1], %[yf1], 8 \n\t"
"lwc1 %[f0], 0(%[aRe]) \n\t"
"lwc1 %[f1], 0(%[bRe]) \n\t"
"lwc1 %[f2], 0(%[bIm]) \n\t"
"lwc1 %[f3], 0(%[aIm]) \n\t"
"mul.s %[f8], %[f0], %[f1] \n\t"
"mul.s %[f0], %[f0], %[f2] \n\t"
#if !defined(MIPS32_R2_LE)
"mul.s %[f12], %[f2], %[f3] \n\t"
"mul.s %[f1], %[f3], %[f1] \n\t"
"sub.s %[f8], %[f8], %[f12] \n\t"
"lwc1 %[f2], 0(%[yf0]) \n\t"
"add.s %[f1], %[f0], %[f1] \n\t"
"lwc1 %[f3], 0(%[yf1]) \n\t"
#else // #if !defined(MIPS32_R2_LE)
"nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t"
"lwc1 %[f2], 0(%[yf0]) \n\t"
"madd.s %[f1], %[f0], %[f3], %[f1] \n\t"
"lwc1 %[f3], 0(%[yf1]) \n\t"
#endif // #if !defined(MIPS32_R2_LE)
"add.s %[f2], %[f2], %[f8] \n\t"
"add.s %[f3], %[f3], %[f1] \n\t"
"swc1 %[f2], 0(%[yf0]) \n\t"
"swc1 %[f3], 0(%[yf1]) \n\t"
".set pop \n\t"
: [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
[f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
[f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
[f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
[f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe),
[aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm),
[yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len)
:
: "memory"
);
}
}
void WebRtcAec_FilterAdaptation_mips(AecCore* aec,
float* fft,
float ef[2][PART_LEN1]) {
int i;
for (i = 0; i < aec->num_partitions; i++) {
int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
int pos;
// Check for wrap
if (i + aec->xfBufBlockPos >= aec->num_partitions) {
xPos -= aec->num_partitions * PART_LEN1;
}
pos = i * PART_LEN1;
float* aRe = aec->xfBuf[0] + xPos;
float* aIm = aec->xfBuf[1] + xPos;
float* bRe = ef[0];
float* bIm = ef[1];
float* fft_tmp;
float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12;
int len = PART_LEN >> 1;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[fft_tmp], %[fft], 0 \n\t"
"1: \n\t"
"lwc1 %[f0], 0(%[aRe]) \n\t"
"lwc1 %[f1], 0(%[bRe]) \n\t"
"lwc1 %[f2], 0(%[bIm]) \n\t"
"lwc1 %[f4], 4(%[aRe]) \n\t"
"lwc1 %[f5], 4(%[bRe]) \n\t"
"lwc1 %[f6], 4(%[bIm]) \n\t"
"addiu %[aRe], %[aRe], 8 \n\t"
"addiu %[bRe], %[bRe], 8 \n\t"
"mul.s %[f8], %[f0], %[f1] \n\t"
"mul.s %[f0], %[f0], %[f2] \n\t"
"lwc1 %[f3], 0(%[aIm]) \n\t"
"mul.s %[f9], %[f4], %[f5] \n\t"
"lwc1 %[f7], 4(%[aIm]) \n\t"
"mul.s %[f4], %[f4], %[f6] \n\t"
#if !defined(MIPS32_R2_LE)
"mul.s %[f10], %[f3], %[f2] \n\t"
"mul.s %[f1], %[f3], %[f1] \n\t"
"mul.s %[f11], %[f7], %[f6] \n\t"
"mul.s %[f5], %[f7], %[f5] \n\t"
"addiu %[aIm], %[aIm], 8 \n\t"
"addiu %[bIm], %[bIm], 8 \n\t"
"addiu %[len], %[len], -1 \n\t"
"add.s %[f8], %[f8], %[f10] \n\t"
"sub.s %[f1], %[f0], %[f1] \n\t"
"add.s %[f9], %[f9], %[f11] \n\t"
"sub.s %[f5], %[f4], %[f5] \n\t"
#else // #if !defined(MIPS32_R2_LE)
"addiu %[aIm], %[aIm], 8 \n\t"
"addiu %[bIm], %[bIm], 8 \n\t"
"addiu %[len], %[len], -1 \n\t"
"madd.s %[f8], %[f8], %[f3], %[f2] \n\t"
"nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t"
"madd.s %[f9], %[f9], %[f7], %[f6] \n\t"
"nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t"
#endif // #if !defined(MIPS32_R2_LE)
"swc1 %[f8], 0(%[fft_tmp]) \n\t"
"swc1 %[f1], 4(%[fft_tmp]) \n\t"
"swc1 %[f9], 8(%[fft_tmp]) \n\t"
"swc1 %[f5], 12(%[fft_tmp]) \n\t"
"bgtz %[len], 1b \n\t"
" addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
"lwc1 %[f0], 0(%[aRe]) \n\t"
"lwc1 %[f1], 0(%[bRe]) \n\t"
"lwc1 %[f2], 0(%[bIm]) \n\t"
"lwc1 %[f3], 0(%[aIm]) \n\t"
"mul.s %[f8], %[f0], %[f1] \n\t"
#if !defined(MIPS32_R2_LE)
"mul.s %[f10], %[f3], %[f2] \n\t"
"add.s %[f8], %[f8], %[f10] \n\t"
#else // #if !defined(MIPS32_R2_LE)
"madd.s %[f8], %[f8], %[f3], %[f2] \n\t"
#endif // #if !defined(MIPS32_R2_LE)
"swc1 %[f8], 4(%[fft]) \n\t"
".set pop \n\t"
: [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
[f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
[f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8),
[f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11),
[f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm),
[bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp),
[len] "+r" (len)
: [fft] "r" (fft)
: "memory"
);
aec_rdft_inverse_128(fft);
memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
// fft scaling
{
float scale = 2.0f / PART_LEN2;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[fft_tmp], %[fft], 0 \n\t"
"addiu %[len], $zero, 8 \n\t"
"1: \n\t"
"addiu %[len], %[len], -1 \n\t"
"lwc1 %[f0], 0(%[fft_tmp]) \n\t"
"lwc1 %[f1], 4(%[fft_tmp]) \n\t"
"lwc1 %[f2], 8(%[fft_tmp]) \n\t"
"lwc1 %[f3], 12(%[fft_tmp]) \n\t"
"mul.s %[f0], %[f0], %[scale] \n\t"
"mul.s %[f1], %[f1], %[scale] \n\t"
"mul.s %[f2], %[f2], %[scale] \n\t"
"mul.s %[f3], %[f3], %[scale] \n\t"
"lwc1 %[f4], 16(%[fft_tmp]) \n\t"
"lwc1 %[f5], 20(%[fft_tmp]) \n\t"
"lwc1 %[f6], 24(%[fft_tmp]) \n\t"
"lwc1 %[f7], 28(%[fft_tmp]) \n\t"
"mul.s %[f4], %[f4], %[scale] \n\t"
"mul.s %[f5], %[f5], %[scale] \n\t"
"mul.s %[f6], %[f6], %[scale] \n\t"
"mul.s %[f7], %[f7], %[scale] \n\t"
"swc1 %[f0], 0(%[fft_tmp]) \n\t"
"swc1 %[f1], 4(%[fft_tmp]) \n\t"
"swc1 %[f2], 8(%[fft_tmp]) \n\t"
"swc1 %[f3], 12(%[fft_tmp]) \n\t"
"swc1 %[f4], 16(%[fft_tmp]) \n\t"
"swc1 %[f5], 20(%[fft_tmp]) \n\t"
"swc1 %[f6], 24(%[fft_tmp]) \n\t"
"swc1 %[f7], 28(%[fft_tmp]) \n\t"
"bgtz %[len], 1b \n\t"
" addiu %[fft_tmp], %[fft_tmp], 32 \n\t"
".set pop \n\t"
: [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
[f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
[f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
[fft_tmp] "=&r" (fft_tmp)
: [scale] "f" (scale), [fft] "r" (fft)
: "memory"
);
}
aec_rdft_forward_128(fft);
aRe = aec->wfBuf[0] + pos;
aIm = aec->wfBuf[1] + pos;
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"addiu %[fft_tmp], %[fft], 0 \n\t"
"addiu %[len], $zero, 31 \n\t"
"lwc1 %[f0], 0(%[aRe]) \n\t"
"lwc1 %[f1], 0(%[fft_tmp]) \n\t"
"lwc1 %[f2], 256(%[aRe]) \n\t"
"lwc1 %[f3], 4(%[fft_tmp]) \n\t"
"lwc1 %[f4], 4(%[aRe]) \n\t"
"lwc1 %[f5], 8(%[fft_tmp]) \n\t"
"lwc1 %[f6], 4(%[aIm]) \n\t"
"lwc1 %[f7], 12(%[fft_tmp]) \n\t"
"add.s %[f0], %[f0], %[f1] \n\t"
"add.s %[f2], %[f2], %[f3] \n\t"
"add.s %[f4], %[f4], %[f5] \n\t"
"add.s %[f6], %[f6], %[f7] \n\t"
"addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
"swc1 %[f0], 0(%[aRe]) \n\t"
"swc1 %[f2], 256(%[aRe]) \n\t"
"swc1 %[f4], 4(%[aRe]) \n\t"
"addiu %[aRe], %[aRe], 8 \n\t"
"swc1 %[f6], 4(%[aIm]) \n\t"
"addiu %[aIm], %[aIm], 8 \n\t"
"1: \n\t"
"lwc1 %[f0], 0(%[aRe]) \n\t"
"lwc1 %[f1], 0(%[fft_tmp]) \n\t"
"lwc1 %[f2], 0(%[aIm]) \n\t"
"lwc1 %[f3], 4(%[fft_tmp]) \n\t"
"lwc1 %[f4], 4(%[aRe]) \n\t"
"lwc1 %[f5], 8(%[fft_tmp]) \n\t"
"lwc1 %[f6], 4(%[aIm]) \n\t"
"lwc1 %[f7], 12(%[fft_tmp]) \n\t"
"add.s %[f0], %[f0], %[f1] \n\t"
"add.s %[f2], %[f2], %[f3] \n\t"
"add.s %[f4], %[f4], %[f5] \n\t"
"add.s %[f6], %[f6], %[f7] \n\t"
"addiu %[len], %[len], -1 \n\t"
"addiu %[fft_tmp], %[fft_tmp], 16 \n\t"
"swc1 %[f0], 0(%[aRe]) \n\t"
"swc1 %[f2], 0(%[aIm]) \n\t"
"swc1 %[f4], 4(%[aRe]) \n\t"
"addiu %[aRe], %[aRe], 8 \n\t"
"swc1 %[f6], 4(%[aIm]) \n\t"
"bgtz %[len], 1b \n\t"
" addiu %[aIm], %[aIm], 8 \n\t"
".set pop \n\t"
: [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
[f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5),
[f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len),
[fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm)
: [fft] "r" (fft)
: "memory"
);
}
}
void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec,
float hNl[PART_LEN1],
const float hNlFb,
float efw[2][PART_LEN1]) {
int i;
const float one = 1.0;
float* p_hNl;
float* p_efw0;
float* p_efw1;
float* p_WebRtcAec_wC;
float temp1, temp2, temp3, temp4;
p_hNl = &hNl[0];
p_efw0 = &efw[0][0];
p_efw1 = &efw[1][0];
p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0];
for (i = 0; i < PART_LEN1; i++) {
// Weight subbands
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"lwc1 %[temp1], 0(%[p_hNl]) \n\t"
"lwc1 %[temp2], 0(%[p_wC]) \n\t"
"c.lt.s %[hNlFb], %[temp1] \n\t"
"bc1f 1f \n\t"
" mul.s %[temp3], %[temp2], %[hNlFb] \n\t"
"sub.s %[temp4], %[one], %[temp2] \n\t"
#if !defined(MIPS32_R2_LE)
"mul.s %[temp1], %[temp1], %[temp4] \n\t"
"add.s %[temp1], %[temp3], %[temp1] \n\t"
#else // #if !defined(MIPS32_R2_LE)
"madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t"
#endif // #if !defined(MIPS32_R2_LE)
"swc1 %[temp1], 0(%[p_hNl]) \n\t"
"1: \n\t"
"addiu %[p_wC], %[p_wC], 4 \n\t"
".set pop \n\t"
: [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
[temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC)
: [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl)
: "memory"
);
hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
__asm __volatile (
"lwc1 %[temp1], 0(%[p_hNl]) \n\t"
"lwc1 %[temp3], 0(%[p_efw1]) \n\t"
"lwc1 %[temp2], 0(%[p_efw0]) \n\t"
"addiu %[p_hNl], %[p_hNl], 4 \n\t"
"mul.s %[temp3], %[temp3], %[temp1] \n\t"
"mul.s %[temp2], %[temp2], %[temp1] \n\t"
"addiu %[p_efw0], %[p_efw0], 4 \n\t"
"addiu %[p_efw1], %[p_efw1], 4 \n\t"
"neg.s %[temp4], %[temp3] \n\t"
"swc1 %[temp2], -4(%[p_efw0]) \n\t"
"swc1 %[temp4], -4(%[p_efw1]) \n\t"
: [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3),
[temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1),
[p_hNl] "+r" (p_hNl)
:
: "memory"
);
}
}
void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) {
const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
const float error_threshold = aec->extended_filter_enabled
? kExtendedErrorThreshold
: aec->normal_error_threshold;
int len = (PART_LEN1);
float* ef0 = ef[0];
float* ef1 = ef[1];
float* xPow = aec->xPow;
float fac1 = 1e-10f;
float err_th2 = error_threshold * error_threshold;
float f0, f1, f2;
#if !defined(MIPS32_R2_LE)
float f3;
#endif
__asm __volatile (
".set push \n\t"
".set noreorder \n\t"
"1: \n\t"
"lwc1 %[f0], 0(%[xPow]) \n\t"
"lwc1 %[f1], 0(%[ef0]) \n\t"
"lwc1 %[f2], 0(%[ef1]) \n\t"
"add.s %[f0], %[f0], %[fac1] \n\t"
"div.s %[f1], %[f1], %[f0] \n\t"
"div.s %[f2], %[f2], %[f0] \n\t"
"mul.s %[f0], %[f1], %[f1] \n\t"
#if defined(MIPS32_R2_LE)
"madd.s %[f0], %[f0], %[f2], %[f2] \n\t"
#else
"mul.s %[f3], %[f2], %[f2] \n\t"
"add.s %[f0], %[f0], %[f3] \n\t"
#endif
"c.le.s %[f0], %[err_th2] \n\t"
"nop \n\t"
"bc1t 2f \n\t"
" nop \n\t"
"sqrt.s %[f0], %[f0] \n\t"
"add.s %[f0], %[f0], %[fac1] \n\t"
"div.s %[f0], %[err_th], %[f0] \n\t"
"mul.s %[f1], %[f1], %[f0] \n\t"
"mul.s %[f2], %[f2], %[f0] \n\t"
"2: \n\t"
"mul.s %[f1], %[f1], %[mu] \n\t"
"mul.s %[f2], %[f2], %[mu] \n\t"
"swc1 %[f1], 0(%[ef0]) \n\t"
"swc1 %[f2], 0(%[ef1]) \n\t"
"addiu %[len], %[len], -1 \n\t"
"addiu %[xPow], %[xPow], 4 \n\t"
"addiu %[ef0], %[ef0], 4 \n\t"
"bgtz %[len], 1b \n\t"
" addiu %[ef1], %[ef1], 4 \n\t"
".set pop \n\t"
: [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2),
#if !defined(MIPS32_R2_LE)
[f3] "=&f" (f3),
#endif
[xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1),
[len] "+r" (len)
: [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu),
[err_th] "f" (error_threshold)
: "memory"
);
}
void WebRtcAec_InitAec_mips(void) {
WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips;
WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips;
WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips;
WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips;
WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips;
}

View File

@ -0,0 +1,736 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* The core AEC algorithm, neon version of speed-critical functions.
*
* Based on aec_core_sse2.c.
*/
#include <arm_neon.h>
#include <math.h>
#include <string.h> // memset
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/aec/aec_common.h"
#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
enum { kShiftExponentIntoTopMantissa = 8 };
enum { kFloatExponentShift = 23 };
__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
return aRe * bRe - aIm * bIm;
}
__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
return aRe * bIm + aIm * bRe;
}
static void FilterFarNEON(AecCore* aec, float yf[2][PART_LEN1]) {
int i;
const int num_partitions = aec->num_partitions;
for (i = 0; i < num_partitions; i++) {
int j;
int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
int pos = i * PART_LEN1;
// Check for wrap
if (i + aec->xfBufBlockPos >= num_partitions) {
xPos -= num_partitions * PART_LEN1;
}
// vectorized code (four at once)
for (j = 0; j + 3 < PART_LEN1; j += 4) {
const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
const float32x4_t wfBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
const float32x4_t wfBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
const float32x4_t yf_re = vld1q_f32(&yf[0][j]);
const float32x4_t yf_im = vld1q_f32(&yf[1][j]);
const float32x4_t a = vmulq_f32(xfBuf_re, wfBuf_re);
const float32x4_t e = vmlsq_f32(a, xfBuf_im, wfBuf_im);
const float32x4_t c = vmulq_f32(xfBuf_re, wfBuf_im);
const float32x4_t f = vmlaq_f32(c, xfBuf_im, wfBuf_re);
const float32x4_t g = vaddq_f32(yf_re, e);
const float32x4_t h = vaddq_f32(yf_im, f);
vst1q_f32(&yf[0][j], g);
vst1q_f32(&yf[1][j], h);
}
// scalar code for the remaining items.
for (; j < PART_LEN1; j++) {
yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
aec->xfBuf[1][xPos + j],
aec->wfBuf[0][pos + j],
aec->wfBuf[1][pos + j]);
yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
aec->xfBuf[1][xPos + j],
aec->wfBuf[0][pos + j],
aec->wfBuf[1][pos + j]);
}
}
}
// ARM64's arm_neon.h has already defined vdivq_f32 vsqrtq_f32.
#if !defined (WEBRTC_ARCH_ARM64)
static float32x4_t vdivq_f32(float32x4_t a, float32x4_t b) {
int i;
float32x4_t x = vrecpeq_f32(b);
// from arm documentation
// The Newton-Raphson iteration:
// x[n+1] = x[n] * (2 - d * x[n])
// converges to (1/d) if x0 is the result of VRECPE applied to d.
//
// Note: The precision did not improve after 2 iterations.
for (i = 0; i < 2; i++) {
x = vmulq_f32(vrecpsq_f32(b, x), x);
}
// a/b = a*(1/b)
return vmulq_f32(a, x);
}
static float32x4_t vsqrtq_f32(float32x4_t s) {
int i;
float32x4_t x = vrsqrteq_f32(s);
// Code to handle sqrt(0).
// If the input to sqrtf() is zero, a zero will be returned.
// If the input to vrsqrteq_f32() is zero, positive infinity is returned.
const uint32x4_t vec_p_inf = vdupq_n_u32(0x7F800000);
// check for divide by zero
const uint32x4_t div_by_zero = vceqq_u32(vec_p_inf, vreinterpretq_u32_f32(x));
// zero out the positive infinity results
x = vreinterpretq_f32_u32(vandq_u32(vmvnq_u32(div_by_zero),
vreinterpretq_u32_f32(x)));
// from arm documentation
// The Newton-Raphson iteration:
// x[n+1] = x[n] * (3 - d * (x[n] * x[n])) / 2)
// converges to (1/√d) if x0 is the result of VRSQRTE applied to d.
//
// Note: The precision did not improve after 2 iterations.
for (i = 0; i < 2; i++) {
x = vmulq_f32(vrsqrtsq_f32(vmulq_f32(x, x), s), x);
}
// sqrt(s) = s * 1/sqrt(s)
return vmulq_f32(s, x);;
}
#endif // WEBRTC_ARCH_ARM64
static void ScaleErrorSignalNEON(AecCore* aec, float ef[2][PART_LEN1]) {
const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
const float error_threshold = aec->extended_filter_enabled ?
kExtendedErrorThreshold : aec->normal_error_threshold;
const float32x4_t k1e_10f = vdupq_n_f32(1e-10f);
const float32x4_t kMu = vmovq_n_f32(mu);
const float32x4_t kThresh = vmovq_n_f32(error_threshold);
int i;
// vectorized code (four at once)
for (i = 0; i + 3 < PART_LEN1; i += 4) {
const float32x4_t xPow = vld1q_f32(&aec->xPow[i]);
const float32x4_t ef_re_base = vld1q_f32(&ef[0][i]);
const float32x4_t ef_im_base = vld1q_f32(&ef[1][i]);
const float32x4_t xPowPlus = vaddq_f32(xPow, k1e_10f);
float32x4_t ef_re = vdivq_f32(ef_re_base, xPowPlus);
float32x4_t ef_im = vdivq_f32(ef_im_base, xPowPlus);
const float32x4_t ef_re2 = vmulq_f32(ef_re, ef_re);
const float32x4_t ef_sum2 = vmlaq_f32(ef_re2, ef_im, ef_im);
const float32x4_t absEf = vsqrtq_f32(ef_sum2);
const uint32x4_t bigger = vcgtq_f32(absEf, kThresh);
const float32x4_t absEfPlus = vaddq_f32(absEf, k1e_10f);
const float32x4_t absEfInv = vdivq_f32(kThresh, absEfPlus);
uint32x4_t ef_re_if = vreinterpretq_u32_f32(vmulq_f32(ef_re, absEfInv));
uint32x4_t ef_im_if = vreinterpretq_u32_f32(vmulq_f32(ef_im, absEfInv));
uint32x4_t ef_re_u32 = vandq_u32(vmvnq_u32(bigger),
vreinterpretq_u32_f32(ef_re));
uint32x4_t ef_im_u32 = vandq_u32(vmvnq_u32(bigger),
vreinterpretq_u32_f32(ef_im));
ef_re_if = vandq_u32(bigger, ef_re_if);
ef_im_if = vandq_u32(bigger, ef_im_if);
ef_re_u32 = vorrq_u32(ef_re_u32, ef_re_if);
ef_im_u32 = vorrq_u32(ef_im_u32, ef_im_if);
ef_re = vmulq_f32(vreinterpretq_f32_u32(ef_re_u32), kMu);
ef_im = vmulq_f32(vreinterpretq_f32_u32(ef_im_u32), kMu);
vst1q_f32(&ef[0][i], ef_re);
vst1q_f32(&ef[1][i], ef_im);
}
// scalar code for the remaining items.
for (; i < PART_LEN1; i++) {
float abs_ef;
ef[0][i] /= (aec->xPow[i] + 1e-10f);
ef[1][i] /= (aec->xPow[i] + 1e-10f);
abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
if (abs_ef > error_threshold) {
abs_ef = error_threshold / (abs_ef + 1e-10f);
ef[0][i] *= abs_ef;
ef[1][i] *= abs_ef;
}
// Stepsize factor
ef[0][i] *= mu;
ef[1][i] *= mu;
}
}
static void FilterAdaptationNEON(AecCore* aec,
float* fft,
float ef[2][PART_LEN1]) {
int i;
const int num_partitions = aec->num_partitions;
for (i = 0; i < num_partitions; i++) {
int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
int pos = i * PART_LEN1;
int j;
// Check for wrap
if (i + aec->xfBufBlockPos >= num_partitions) {
xPos -= num_partitions * PART_LEN1;
}
// Process the whole array...
for (j = 0; j < PART_LEN; j += 4) {
// Load xfBuf and ef.
const float32x4_t xfBuf_re = vld1q_f32(&aec->xfBuf[0][xPos + j]);
const float32x4_t xfBuf_im = vld1q_f32(&aec->xfBuf[1][xPos + j]);
const float32x4_t ef_re = vld1q_f32(&ef[0][j]);
const float32x4_t ef_im = vld1q_f32(&ef[1][j]);
// Calculate the product of conjugate(xfBuf) by ef.
// re(conjugate(a) * b) = aRe * bRe + aIm * bIm
// im(conjugate(a) * b)= aRe * bIm - aIm * bRe
const float32x4_t a = vmulq_f32(xfBuf_re, ef_re);
const float32x4_t e = vmlaq_f32(a, xfBuf_im, ef_im);
const float32x4_t c = vmulq_f32(xfBuf_re, ef_im);
const float32x4_t f = vmlsq_f32(c, xfBuf_im, ef_re);
// Interleave real and imaginary parts.
const float32x4x2_t g_n_h = vzipq_f32(e, f);
// Store
vst1q_f32(&fft[2 * j + 0], g_n_h.val[0]);
vst1q_f32(&fft[2 * j + 4], g_n_h.val[1]);
}
// ... and fixup the first imaginary entry.
fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
-aec->xfBuf[1][xPos + PART_LEN],
ef[0][PART_LEN],
ef[1][PART_LEN]);
aec_rdft_inverse_128(fft);
memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
// fft scaling
{
const float scale = 2.0f / PART_LEN2;
const float32x4_t scale_ps = vmovq_n_f32(scale);
for (j = 0; j < PART_LEN; j += 4) {
const float32x4_t fft_ps = vld1q_f32(&fft[j]);
const float32x4_t fft_scale = vmulq_f32(fft_ps, scale_ps);
vst1q_f32(&fft[j], fft_scale);
}
}
aec_rdft_forward_128(fft);
{
const float wt1 = aec->wfBuf[1][pos];
aec->wfBuf[0][pos + PART_LEN] += fft[1];
for (j = 0; j < PART_LEN; j += 4) {
float32x4_t wtBuf_re = vld1q_f32(&aec->wfBuf[0][pos + j]);
float32x4_t wtBuf_im = vld1q_f32(&aec->wfBuf[1][pos + j]);
const float32x4_t fft0 = vld1q_f32(&fft[2 * j + 0]);
const float32x4_t fft4 = vld1q_f32(&fft[2 * j + 4]);
const float32x4x2_t fft_re_im = vuzpq_f32(fft0, fft4);
wtBuf_re = vaddq_f32(wtBuf_re, fft_re_im.val[0]);
wtBuf_im = vaddq_f32(wtBuf_im, fft_re_im.val[1]);
vst1q_f32(&aec->wfBuf[0][pos + j], wtBuf_re);
vst1q_f32(&aec->wfBuf[1][pos + j], wtBuf_im);
}
aec->wfBuf[1][pos] = wt1;
}
}
}
static float32x4_t vpowq_f32(float32x4_t a, float32x4_t b) {
// a^b = exp2(b * log2(a))
// exp2(x) and log2(x) are calculated using polynomial approximations.
float32x4_t log2_a, b_log2_a, a_exp_b;
// Calculate log2(x), x = a.
{
// To calculate log2(x), we decompose x like this:
// x = y * 2^n
// n is an integer
// y is in the [1.0, 2.0) range
//
// log2(x) = log2(y) + n
// n can be evaluated by playing with float representation.
// log2(y) in a small range can be approximated, this code uses an order
// five polynomial approximation. The coefficients have been
// estimated with the Remez algorithm and the resulting
// polynomial has a maximum relative error of 0.00086%.
// Compute n.
// This is done by masking the exponent, shifting it into the top bit of
// the mantissa, putting eight into the biased exponent (to shift/
// compensate the fact that the exponent has been shifted in the top/
// fractional part and finally getting rid of the implicit leading one
// from the mantissa by substracting it out.
const uint32x4_t vec_float_exponent_mask = vdupq_n_u32(0x7F800000);
const uint32x4_t vec_eight_biased_exponent = vdupq_n_u32(0x43800000);
const uint32x4_t vec_implicit_leading_one = vdupq_n_u32(0x43BF8000);
const uint32x4_t two_n = vandq_u32(vreinterpretq_u32_f32(a),
vec_float_exponent_mask);
const uint32x4_t n_1 = vshrq_n_u32(two_n, kShiftExponentIntoTopMantissa);
const uint32x4_t n_0 = vorrq_u32(n_1, vec_eight_biased_exponent);
const float32x4_t n =
vsubq_f32(vreinterpretq_f32_u32(n_0),
vreinterpretq_f32_u32(vec_implicit_leading_one));
// Compute y.
const uint32x4_t vec_mantissa_mask = vdupq_n_u32(0x007FFFFF);
const uint32x4_t vec_zero_biased_exponent_is_one = vdupq_n_u32(0x3F800000);
const uint32x4_t mantissa = vandq_u32(vreinterpretq_u32_f32(a),
vec_mantissa_mask);
const float32x4_t y =
vreinterpretq_f32_u32(vorrq_u32(mantissa,
vec_zero_biased_exponent_is_one));
// Approximate log2(y) ~= (y - 1) * pol5(y).
// pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
const float32x4_t C5 = vdupq_n_f32(-3.4436006e-2f);
const float32x4_t C4 = vdupq_n_f32(3.1821337e-1f);
const float32x4_t C3 = vdupq_n_f32(-1.2315303f);
const float32x4_t C2 = vdupq_n_f32(2.5988452f);
const float32x4_t C1 = vdupq_n_f32(-3.3241990f);
const float32x4_t C0 = vdupq_n_f32(3.1157899f);
float32x4_t pol5_y = C5;
pol5_y = vmlaq_f32(C4, y, pol5_y);
pol5_y = vmlaq_f32(C3, y, pol5_y);
pol5_y = vmlaq_f32(C2, y, pol5_y);
pol5_y = vmlaq_f32(C1, y, pol5_y);
pol5_y = vmlaq_f32(C0, y, pol5_y);
const float32x4_t y_minus_one =
vsubq_f32(y, vreinterpretq_f32_u32(vec_zero_biased_exponent_is_one));
const float32x4_t log2_y = vmulq_f32(y_minus_one, pol5_y);
// Combine parts.
log2_a = vaddq_f32(n, log2_y);
}
// b * log2(a)
b_log2_a = vmulq_f32(b, log2_a);
// Calculate exp2(x), x = b * log2(a).
{
// To calculate 2^x, we decompose x like this:
// x = n + y
// n is an integer, the value of x - 0.5 rounded down, therefore
// y is in the [0.5, 1.5) range
//
// 2^x = 2^n * 2^y
// 2^n can be evaluated by playing with float representation.
// 2^y in a small range can be approximated, this code uses an order two
// polynomial approximation. The coefficients have been estimated
// with the Remez algorithm and the resulting polynomial has a
// maximum relative error of 0.17%.
// To avoid over/underflow, we reduce the range of input to ]-127, 129].
const float32x4_t max_input = vdupq_n_f32(129.f);
const float32x4_t min_input = vdupq_n_f32(-126.99999f);
const float32x4_t x_min = vminq_f32(b_log2_a, max_input);
const float32x4_t x_max = vmaxq_f32(x_min, min_input);
// Compute n.
const float32x4_t half = vdupq_n_f32(0.5f);
const float32x4_t x_minus_half = vsubq_f32(x_max, half);
const int32x4_t x_minus_half_floor = vcvtq_s32_f32(x_minus_half);
// Compute 2^n.
const int32x4_t float_exponent_bias = vdupq_n_s32(127);
const int32x4_t two_n_exponent =
vaddq_s32(x_minus_half_floor, float_exponent_bias);
const float32x4_t two_n =
vreinterpretq_f32_s32(vshlq_n_s32(two_n_exponent, kFloatExponentShift));
// Compute y.
const float32x4_t y = vsubq_f32(x_max, vcvtq_f32_s32(x_minus_half_floor));
// Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
const float32x4_t C2 = vdupq_n_f32(3.3718944e-1f);
const float32x4_t C1 = vdupq_n_f32(6.5763628e-1f);
const float32x4_t C0 = vdupq_n_f32(1.0017247f);
float32x4_t exp2_y = C2;
exp2_y = vmlaq_f32(C1, y, exp2_y);
exp2_y = vmlaq_f32(C0, y, exp2_y);
// Combine parts.
a_exp_b = vmulq_f32(exp2_y, two_n);
}
return a_exp_b;
}
static void OverdriveAndSuppressNEON(AecCore* aec,
float hNl[PART_LEN1],
const float hNlFb,
float efw[2][PART_LEN1]) {
int i;
const float32x4_t vec_hNlFb = vmovq_n_f32(hNlFb);
const float32x4_t vec_one = vdupq_n_f32(1.0f);
const float32x4_t vec_minus_one = vdupq_n_f32(-1.0f);
const float32x4_t vec_overDriveSm = vmovq_n_f32(aec->overDriveSm);
// vectorized code (four at once)
for (i = 0; i + 3 < PART_LEN1; i += 4) {
// Weight subbands
float32x4_t vec_hNl = vld1q_f32(&hNl[i]);
const float32x4_t vec_weightCurve = vld1q_f32(&WebRtcAec_weightCurve[i]);
const uint32x4_t bigger = vcgtq_f32(vec_hNl, vec_hNlFb);
const float32x4_t vec_weightCurve_hNlFb = vmulq_f32(vec_weightCurve,
vec_hNlFb);
const float32x4_t vec_one_weightCurve = vsubq_f32(vec_one, vec_weightCurve);
const float32x4_t vec_one_weightCurve_hNl = vmulq_f32(vec_one_weightCurve,
vec_hNl);
const uint32x4_t vec_if0 = vandq_u32(vmvnq_u32(bigger),
vreinterpretq_u32_f32(vec_hNl));
const float32x4_t vec_one_weightCurve_add =
vaddq_f32(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl);
const uint32x4_t vec_if1 =
vandq_u32(bigger, vreinterpretq_u32_f32(vec_one_weightCurve_add));
vec_hNl = vreinterpretq_f32_u32(vorrq_u32(vec_if0, vec_if1));
{
const float32x4_t vec_overDriveCurve =
vld1q_f32(&WebRtcAec_overDriveCurve[i]);
const float32x4_t vec_overDriveSm_overDriveCurve =
vmulq_f32(vec_overDriveSm, vec_overDriveCurve);
vec_hNl = vpowq_f32(vec_hNl, vec_overDriveSm_overDriveCurve);
vst1q_f32(&hNl[i], vec_hNl);
}
// Suppress error signal
{
float32x4_t vec_efw_re = vld1q_f32(&efw[0][i]);
float32x4_t vec_efw_im = vld1q_f32(&efw[1][i]);
vec_efw_re = vmulq_f32(vec_efw_re, vec_hNl);
vec_efw_im = vmulq_f32(vec_efw_im, vec_hNl);
// Ooura fft returns incorrect sign on imaginary component. It matters
// here because we are making an additive change with comfort noise.
vec_efw_im = vmulq_f32(vec_efw_im, vec_minus_one);
vst1q_f32(&efw[0][i], vec_efw_re);
vst1q_f32(&efw[1][i], vec_efw_im);
}
}
// scalar code for the remaining items.
for (; i < PART_LEN1; i++) {
// Weight subbands
if (hNl[i] > hNlFb) {
hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
(1 - WebRtcAec_weightCurve[i]) * hNl[i];
}
hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
// Suppress error signal
efw[0][i] *= hNl[i];
efw[1][i] *= hNl[i];
// Ooura fft returns incorrect sign on imaginary component. It matters
// here because we are making an additive change with comfort noise.
efw[1][i] *= -1;
}
}
static int PartitionDelay(const AecCore* aec) {
// Measures the energy in each filter partition and returns the partition with
// highest energy.
// TODO(bjornv): Spread computational cost by computing one partition per
// block?
float wfEnMax = 0;
int i;
int delay = 0;
for (i = 0; i < aec->num_partitions; i++) {
int j;
int pos = i * PART_LEN1;
float wfEn = 0;
float32x4_t vec_wfEn = vdupq_n_f32(0.0f);
// vectorized code (four at once)
for (j = 0; j + 3 < PART_LEN1; j += 4) {
const float32x4_t vec_wfBuf0 = vld1q_f32(&aec->wfBuf[0][pos + j]);
const float32x4_t vec_wfBuf1 = vld1q_f32(&aec->wfBuf[1][pos + j]);
vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf0, vec_wfBuf0);
vec_wfEn = vmlaq_f32(vec_wfEn, vec_wfBuf1, vec_wfBuf1);
}
{
float32x2_t vec_total;
// A B C D
vec_total = vpadd_f32(vget_low_f32(vec_wfEn), vget_high_f32(vec_wfEn));
// A+B C+D
vec_total = vpadd_f32(vec_total, vec_total);
// A+B+C+D A+B+C+D
wfEn = vget_lane_f32(vec_total, 0);
}
// scalar code for the remaining items.
for (; j < PART_LEN1; j++) {
wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
}
if (wfEn > wfEnMax) {
wfEnMax = wfEn;
delay = i;
}
}
return delay;
}
// Updates the following smoothed Power Spectral Densities (PSD):
// - sd : near-end
// - se : residual echo
// - sx : far-end
// - sde : cross-PSD of near-end and residual echo
// - sxd : cross-PSD of near-end and far-end
//
// In addition to updating the PSDs, also the filter diverge state is determined
// upon actions are taken.
static void SmoothedPSD(AecCore* aec,
float efw[2][PART_LEN1],
float dfw[2][PART_LEN1],
float xfw[2][PART_LEN1]) {
// Power estimate smoothing coefficients.
const float* ptrGCoh = aec->extended_filter_enabled
? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
: WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
int i;
float sdSum = 0, seSum = 0;
const float32x4_t vec_15 = vdupq_n_f32(WebRtcAec_kMinFarendPSD);
float32x4_t vec_sdSum = vdupq_n_f32(0.0f);
float32x4_t vec_seSum = vdupq_n_f32(0.0f);
for (i = 0; i + 3 < PART_LEN1; i += 4) {
const float32x4_t vec_dfw0 = vld1q_f32(&dfw[0][i]);
const float32x4_t vec_dfw1 = vld1q_f32(&dfw[1][i]);
const float32x4_t vec_efw0 = vld1q_f32(&efw[0][i]);
const float32x4_t vec_efw1 = vld1q_f32(&efw[1][i]);
const float32x4_t vec_xfw0 = vld1q_f32(&xfw[0][i]);
const float32x4_t vec_xfw1 = vld1q_f32(&xfw[1][i]);
float32x4_t vec_sd = vmulq_n_f32(vld1q_f32(&aec->sd[i]), ptrGCoh[0]);
float32x4_t vec_se = vmulq_n_f32(vld1q_f32(&aec->se[i]), ptrGCoh[0]);
float32x4_t vec_sx = vmulq_n_f32(vld1q_f32(&aec->sx[i]), ptrGCoh[0]);
float32x4_t vec_dfw_sumsq = vmulq_f32(vec_dfw0, vec_dfw0);
float32x4_t vec_efw_sumsq = vmulq_f32(vec_efw0, vec_efw0);
float32x4_t vec_xfw_sumsq = vmulq_f32(vec_xfw0, vec_xfw0);
vec_dfw_sumsq = vmlaq_f32(vec_dfw_sumsq, vec_dfw1, vec_dfw1);
vec_efw_sumsq = vmlaq_f32(vec_efw_sumsq, vec_efw1, vec_efw1);
vec_xfw_sumsq = vmlaq_f32(vec_xfw_sumsq, vec_xfw1, vec_xfw1);
vec_xfw_sumsq = vmaxq_f32(vec_xfw_sumsq, vec_15);
vec_sd = vmlaq_n_f32(vec_sd, vec_dfw_sumsq, ptrGCoh[1]);
vec_se = vmlaq_n_f32(vec_se, vec_efw_sumsq, ptrGCoh[1]);
vec_sx = vmlaq_n_f32(vec_sx, vec_xfw_sumsq, ptrGCoh[1]);
vst1q_f32(&aec->sd[i], vec_sd);
vst1q_f32(&aec->se[i], vec_se);
vst1q_f32(&aec->sx[i], vec_sx);
{
float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
float32x4_t vec_dfwefw0011 = vmulq_f32(vec_dfw0, vec_efw0);
float32x4_t vec_dfwefw0110 = vmulq_f32(vec_dfw0, vec_efw1);
vec_sde.val[0] = vmulq_n_f32(vec_sde.val[0], ptrGCoh[0]);
vec_sde.val[1] = vmulq_n_f32(vec_sde.val[1], ptrGCoh[0]);
vec_dfwefw0011 = vmlaq_f32(vec_dfwefw0011, vec_dfw1, vec_efw1);
vec_dfwefw0110 = vmlsq_f32(vec_dfwefw0110, vec_dfw1, vec_efw0);
vec_sde.val[0] = vmlaq_n_f32(vec_sde.val[0], vec_dfwefw0011, ptrGCoh[1]);
vec_sde.val[1] = vmlaq_n_f32(vec_sde.val[1], vec_dfwefw0110, ptrGCoh[1]);
vst2q_f32(&aec->sde[i][0], vec_sde);
}
{
float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
float32x4_t vec_dfwxfw0011 = vmulq_f32(vec_dfw0, vec_xfw0);
float32x4_t vec_dfwxfw0110 = vmulq_f32(vec_dfw0, vec_xfw1);
vec_sxd.val[0] = vmulq_n_f32(vec_sxd.val[0], ptrGCoh[0]);
vec_sxd.val[1] = vmulq_n_f32(vec_sxd.val[1], ptrGCoh[0]);
vec_dfwxfw0011 = vmlaq_f32(vec_dfwxfw0011, vec_dfw1, vec_xfw1);
vec_dfwxfw0110 = vmlsq_f32(vec_dfwxfw0110, vec_dfw1, vec_xfw0);
vec_sxd.val[0] = vmlaq_n_f32(vec_sxd.val[0], vec_dfwxfw0011, ptrGCoh[1]);
vec_sxd.val[1] = vmlaq_n_f32(vec_sxd.val[1], vec_dfwxfw0110, ptrGCoh[1]);
vst2q_f32(&aec->sxd[i][0], vec_sxd);
}
vec_sdSum = vaddq_f32(vec_sdSum, vec_sd);
vec_seSum = vaddq_f32(vec_seSum, vec_se);
}
{
float32x2_t vec_sdSum_total;
float32x2_t vec_seSum_total;
// A B C D
vec_sdSum_total = vpadd_f32(vget_low_f32(vec_sdSum),
vget_high_f32(vec_sdSum));
vec_seSum_total = vpadd_f32(vget_low_f32(vec_seSum),
vget_high_f32(vec_seSum));
// A+B C+D
vec_sdSum_total = vpadd_f32(vec_sdSum_total, vec_sdSum_total);
vec_seSum_total = vpadd_f32(vec_seSum_total, vec_seSum_total);
// A+B+C+D A+B+C+D
sdSum = vget_lane_f32(vec_sdSum_total, 0);
seSum = vget_lane_f32(vec_seSum_total, 0);
}
// scalar code for the remaining items.
for (; i < PART_LEN1; i++) {
aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
aec->se[i] = ptrGCoh[0] * aec->se[i] +
ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
// We threshold here to protect against the ill-effects of a zero farend.
// The threshold is not arbitrarily chosen, but balances protection and
// adverse interaction with the algorithm's tuning.
// TODO(bjornv): investigate further why this is so sensitive.
aec->sx[i] =
ptrGCoh[0] * aec->sx[i] +
ptrGCoh[1] * WEBRTC_SPL_MAX(
xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
WebRtcAec_kMinFarendPSD);
aec->sde[i][0] =
ptrGCoh[0] * aec->sde[i][0] +
ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
aec->sde[i][1] =
ptrGCoh[0] * aec->sde[i][1] +
ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
aec->sxd[i][0] =
ptrGCoh[0] * aec->sxd[i][0] +
ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
aec->sxd[i][1] =
ptrGCoh[0] * aec->sxd[i][1] +
ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
sdSum += aec->sd[i];
seSum += aec->se[i];
}
// Divergent filter safeguard.
aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
if (aec->divergeState)
memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
// Reset if error is significantly larger than nearend (13 dB).
if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
}
// Window time domain data to be used by the fft.
__inline static void WindowData(float* x_windowed, const float* x) {
int i;
for (i = 0; i < PART_LEN; i += 4) {
const float32x4_t vec_Buf1 = vld1q_f32(&x[i]);
const float32x4_t vec_Buf2 = vld1q_f32(&x[PART_LEN + i]);
const float32x4_t vec_sqrtHanning = vld1q_f32(&WebRtcAec_sqrtHanning[i]);
// A B C D
float32x4_t vec_sqrtHanning_rev =
vld1q_f32(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
// B A D C
vec_sqrtHanning_rev = vrev64q_f32(vec_sqrtHanning_rev);
// D C B A
vec_sqrtHanning_rev = vcombine_f32(vget_high_f32(vec_sqrtHanning_rev),
vget_low_f32(vec_sqrtHanning_rev));
vst1q_f32(&x_windowed[i], vmulq_f32(vec_Buf1, vec_sqrtHanning));
vst1q_f32(&x_windowed[PART_LEN + i],
vmulq_f32(vec_Buf2, vec_sqrtHanning_rev));
}
}
// Puts fft output data into a complex valued array.
__inline static void StoreAsComplex(const float* data,
float data_complex[2][PART_LEN1]) {
int i;
for (i = 0; i < PART_LEN; i += 4) {
const float32x4x2_t vec_data = vld2q_f32(&data[2 * i]);
vst1q_f32(&data_complex[0][i], vec_data.val[0]);
vst1q_f32(&data_complex[1][i], vec_data.val[1]);
}
// fix beginning/end values
data_complex[1][0] = 0;
data_complex[1][PART_LEN] = 0;
data_complex[0][0] = data[0];
data_complex[0][PART_LEN] = data[1];
}
static void SubbandCoherenceNEON(AecCore* aec,
float efw[2][PART_LEN1],
float xfw[2][PART_LEN1],
float* fft,
float* cohde,
float* cohxd) {
float dfw[2][PART_LEN1];
int i;
if (aec->delayEstCtr == 0)
aec->delayIdx = PartitionDelay(aec);
// Use delayed far.
memcpy(xfw,
aec->xfwBuf + aec->delayIdx * PART_LEN1,
sizeof(xfw[0][0]) * 2 * PART_LEN1);
// Windowed near fft
WindowData(fft, aec->dBuf);
aec_rdft_forward_128(fft);
StoreAsComplex(fft, dfw);
// Windowed error fft
WindowData(fft, aec->eBuf);
aec_rdft_forward_128(fft);
StoreAsComplex(fft, efw);
SmoothedPSD(aec, efw, dfw, xfw);
{
const float32x4_t vec_1eminus10 = vdupq_n_f32(1e-10f);
// Subband coherence
for (i = 0; i + 3 < PART_LEN1; i += 4) {
const float32x4_t vec_sd = vld1q_f32(&aec->sd[i]);
const float32x4_t vec_se = vld1q_f32(&aec->se[i]);
const float32x4_t vec_sx = vld1q_f32(&aec->sx[i]);
const float32x4_t vec_sdse = vmlaq_f32(vec_1eminus10, vec_sd, vec_se);
const float32x4_t vec_sdsx = vmlaq_f32(vec_1eminus10, vec_sd, vec_sx);
float32x4x2_t vec_sde = vld2q_f32(&aec->sde[i][0]);
float32x4x2_t vec_sxd = vld2q_f32(&aec->sxd[i][0]);
float32x4_t vec_cohde = vmulq_f32(vec_sde.val[0], vec_sde.val[0]);
float32x4_t vec_cohxd = vmulq_f32(vec_sxd.val[0], vec_sxd.val[0]);
vec_cohde = vmlaq_f32(vec_cohde, vec_sde.val[1], vec_sde.val[1]);
vec_cohde = vdivq_f32(vec_cohde, vec_sdse);
vec_cohxd = vmlaq_f32(vec_cohxd, vec_sxd.val[1], vec_sxd.val[1]);
vec_cohxd = vdivq_f32(vec_cohxd, vec_sdsx);
vst1q_f32(&cohde[i], vec_cohde);
vst1q_f32(&cohxd[i], vec_cohxd);
}
}
// scalar code for the remaining items.
for (; i < PART_LEN1; i++) {
cohde[i] =
(aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
(aec->sd[i] * aec->se[i] + 1e-10f);
cohxd[i] =
(aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
(aec->sx[i] * aec->sd[i] + 1e-10f);
}
}
void WebRtcAec_InitAec_neon(void) {
WebRtcAec_FilterFar = FilterFarNEON;
WebRtcAec_ScaleErrorSignal = ScaleErrorSignalNEON;
WebRtcAec_FilterAdaptation = FilterAdaptationNEON;
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressNEON;
WebRtcAec_SubbandCoherence = SubbandCoherenceNEON;
}

View File

@ -12,35 +12,33 @@
* The core AEC algorithm, SSE2 version of speed-critical functions.
*/
#include "typedefs.h"
#if defined(WEBRTC_USE_SSE2)
#include <emmintrin.h>
#include <math.h>
#include <string.h> // memset
#include "aec_core.h"
#include "aec_rdft.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/aec/aec_common.h"
#include "webrtc/modules/audio_processing/aec/aec_core_internal.h"
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
__inline static float MulRe(float aRe, float aIm, float bRe, float bIm)
{
__inline static float MulRe(float aRe, float aIm, float bRe, float bIm) {
return aRe * bRe - aIm * bIm;
}
__inline static float MulIm(float aRe, float aIm, float bRe, float bIm)
{
__inline static float MulIm(float aRe, float aIm, float bRe, float bIm) {
return aRe * bIm + aIm * bRe;
}
static void FilterFarSSE2(aec_t *aec, float yf[2][PART_LEN1])
{
static void FilterFarSSE2(AecCore* aec, float yf[2][PART_LEN1]) {
int i;
for (i = 0; i < NR_PART; i++) {
const int num_partitions = aec->num_partitions;
for (i = 0; i < num_partitions; i++) {
int j;
int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
int pos = i * PART_LEN1;
// Check for wrap
if (i + aec->xfBufBlockPos >= NR_PART) {
xPos -= NR_PART*(PART_LEN1);
if (i + aec->xfBufBlockPos >= num_partitions) {
xPos -= num_partitions * (PART_LEN1);
}
// vectorized code (four at once)
@ -64,19 +62,25 @@ static void FilterFarSSE2(aec_t *aec, float yf[2][PART_LEN1])
}
// scalar code for the remaining items.
for (; j < PART_LEN1; j++) {
yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
yf[0][j] += MulRe(aec->xfBuf[0][xPos + j],
aec->xfBuf[1][xPos + j],
aec->wfBuf[0][pos + j],
aec->wfBuf[1][pos + j]);
yf[1][j] += MulIm(aec->xfBuf[0][xPos + j],
aec->xfBuf[1][xPos + j],
aec->wfBuf[0][pos + j],
aec->wfBuf[1][pos + j]);
}
}
}
static void ScaleErrorSignalSSE2(aec_t *aec, float ef[2][PART_LEN1])
{
static void ScaleErrorSignalSSE2(AecCore* aec, float ef[2][PART_LEN1]) {
const __m128 k1e_10f = _mm_set1_ps(1e-10f);
const __m128 kThresh = _mm_set1_ps(aec->errThresh);
const __m128 kMu = _mm_set1_ps(aec->mu);
const __m128 kMu = aec->extended_filter_enabled ? _mm_set1_ps(kExtendedMu)
: _mm_set1_ps(aec->normal_mu);
const __m128 kThresh = aec->extended_filter_enabled
? _mm_set1_ps(kExtendedErrorThreshold)
: _mm_set1_ps(aec->normal_error_threshold);
int i;
// vectorized code (four at once)
@ -110,36 +114,46 @@ static void ScaleErrorSignalSSE2(aec_t *aec, float ef[2][PART_LEN1])
_mm_storeu_ps(&ef[1][i], ef_im);
}
// scalar code for the remaining items.
for (; i < (PART_LEN1); i++) {
float absEf;
ef[0][i] /= (aec->xPow[i] + 1e-10f);
ef[1][i] /= (aec->xPow[i] + 1e-10f);
absEf = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
{
const float mu =
aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu;
const float error_threshold = aec->extended_filter_enabled
? kExtendedErrorThreshold
: aec->normal_error_threshold;
for (; i < (PART_LEN1); i++) {
float abs_ef;
ef[0][i] /= (aec->xPow[i] + 1e-10f);
ef[1][i] /= (aec->xPow[i] + 1e-10f);
abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
if (absEf > aec->errThresh) {
absEf = aec->errThresh / (absEf + 1e-10f);
ef[0][i] *= absEf;
ef[1][i] *= absEf;
if (abs_ef > error_threshold) {
abs_ef = error_threshold / (abs_ef + 1e-10f);
ef[0][i] *= abs_ef;
ef[1][i] *= abs_ef;
}
// Stepsize factor
ef[0][i] *= mu;
ef[1][i] *= mu;
}
// Stepsize factor
ef[0][i] *= aec->mu;
ef[1][i] *= aec->mu;
}
}
static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1]) {
static void FilterAdaptationSSE2(AecCore* aec,
float* fft,
float ef[2][PART_LEN1]) {
int i, j;
for (i = 0; i < NR_PART; i++) {
int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
const int num_partitions = aec->num_partitions;
for (i = 0; i < num_partitions; i++) {
int xPos = (i + aec->xfBufBlockPos) * (PART_LEN1);
int pos = i * PART_LEN1;
// Check for wrap
if (i + aec->xfBufBlockPos >= NR_PART) {
xPos -= NR_PART * PART_LEN1;
if (i + aec->xfBufBlockPos >= num_partitions) {
xPos -= num_partitions * PART_LEN1;
}
// Process the whole array...
for (j = 0; j < PART_LEN; j+= 4) {
for (j = 0; j < PART_LEN; j += 4) {
// Load xfBuf and ef.
const __m128 xfBuf_re = _mm_loadu_ps(&aec->xfBuf[0][xPos + j]);
const __m128 xfBuf_im = _mm_loadu_ps(&aec->xfBuf[1][xPos + j]);
@ -158,22 +172,23 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1])
const __m128 g = _mm_unpacklo_ps(e, f);
const __m128 h = _mm_unpackhi_ps(e, f);
// Store
_mm_storeu_ps(&fft[2*j + 0], g);
_mm_storeu_ps(&fft[2*j + 4], h);
_mm_storeu_ps(&fft[2 * j + 0], g);
_mm_storeu_ps(&fft[2 * j + 4], h);
}
// ... and fixup the first imaginary entry.
fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
-aec->xfBuf[1][xPos + PART_LEN],
ef[0][PART_LEN], ef[1][PART_LEN]);
ef[0][PART_LEN],
ef[1][PART_LEN]);
aec_rdft_inverse_128(fft);
memset(fft + PART_LEN, 0, sizeof(float)*PART_LEN);
memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
// fft scaling
{
float scale = 2.0f / PART_LEN2;
const __m128 scale_ps = _mm_load_ps1(&scale);
for (j = 0; j < PART_LEN; j+=4) {
for (j = 0; j < PART_LEN; j += 4) {
const __m128 fft_ps = _mm_loadu_ps(&fft[j]);
const __m128 fft_scale = _mm_mul_ps(fft_ps, scale_ps);
_mm_storeu_ps(&fft[j], fft_scale);
@ -184,13 +199,15 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1])
{
float wt1 = aec->wfBuf[1][pos];
aec->wfBuf[0][pos + PART_LEN] += fft[1];
for (j = 0; j < PART_LEN; j+= 4) {
for (j = 0; j < PART_LEN; j += 4) {
__m128 wtBuf_re = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
__m128 wtBuf_im = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
const __m128 fft0 = _mm_loadu_ps(&fft[2 * j + 0]);
const __m128 fft4 = _mm_loadu_ps(&fft[2 * j + 4]);
const __m128 fft_re = _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2 ,0));
const __m128 fft_im = _mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3 ,1));
const __m128 fft_re =
_mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(2, 0, 2, 0));
const __m128 fft_im =
_mm_shuffle_ps(fft0, fft4, _MM_SHUFFLE(3, 1, 3, 1));
wtBuf_re = _mm_add_ps(wtBuf_re, fft_re);
wtBuf_im = _mm_add_ps(wtBuf_im, fft_im);
_mm_storeu_ps(&aec->wfBuf[0][pos + j], wtBuf_re);
@ -201,8 +218,7 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1])
}
}
static __m128 mm_pow_ps(__m128 a, __m128 b)
{
static __m128 mm_pow_ps(__m128 a, __m128 b) {
// a^b = exp2(b * log2(a))
// exp2(x) and log2(x) are calculated using polynomial approximations.
__m128 log2_a, b_log2_a, a_exp_b;
@ -227,55 +243,55 @@ static __m128 mm_pow_ps(__m128 a, __m128 b)
// compensate the fact that the exponent has been shifted in the top/
// fractional part and finally getting rid of the implicit leading one
// from the mantissa by substracting it out.
static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END =
{0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END =
{0x43800000, 0x43800000, 0x43800000, 0x43800000};
static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END =
{0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};
static const ALIGN16_BEG int float_exponent_mask[4] ALIGN16_END = {
0x7F800000, 0x7F800000, 0x7F800000, 0x7F800000};
static const ALIGN16_BEG int eight_biased_exponent[4] ALIGN16_END = {
0x43800000, 0x43800000, 0x43800000, 0x43800000};
static const ALIGN16_BEG int implicit_leading_one[4] ALIGN16_END = {
0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};
static const int shift_exponent_into_top_mantissa = 8;
const __m128 two_n = _mm_and_ps(a, *((__m128 *)float_exponent_mask));
const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(_mm_castps_si128(two_n),
shift_exponent_into_top_mantissa));
const __m128 n_0 = _mm_or_ps(n_1, *((__m128 *)eight_biased_exponent));
const __m128 n = _mm_sub_ps(n_0, *((__m128 *)implicit_leading_one));
const __m128 two_n = _mm_and_ps(a, *((__m128*)float_exponent_mask));
const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(
_mm_castps_si128(two_n), shift_exponent_into_top_mantissa));
const __m128 n_0 = _mm_or_ps(n_1, *((__m128*)eight_biased_exponent));
const __m128 n = _mm_sub_ps(n_0, *((__m128*)implicit_leading_one));
// Compute y.
static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END =
{0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};
static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END =
{0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000};
const __m128 mantissa = _mm_and_ps(a, *((__m128 *)mantissa_mask));
const __m128 y = _mm_or_ps(
mantissa, *((__m128 *)zero_biased_exponent_is_one));
static const ALIGN16_BEG int mantissa_mask[4] ALIGN16_END = {
0x007FFFFF, 0x007FFFFF, 0x007FFFFF, 0x007FFFFF};
static const ALIGN16_BEG int zero_biased_exponent_is_one[4] ALIGN16_END = {
0x3F800000, 0x3F800000, 0x3F800000, 0x3F800000};
const __m128 mantissa = _mm_and_ps(a, *((__m128*)mantissa_mask));
const __m128 y =
_mm_or_ps(mantissa, *((__m128*)zero_biased_exponent_is_one));
// Approximate log2(y) ~= (y - 1) * pol5(y).
// pol5(y) = C5 * y^5 + C4 * y^4 + C3 * y^3 + C2 * y^2 + C1 * y + C0
static const ALIGN16_BEG float ALIGN16_END C5[4] =
{-3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f};
static const ALIGN16_BEG float ALIGN16_END C4[4] =
{3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f};
static const ALIGN16_BEG float ALIGN16_END C3[4] =
{-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f};
static const ALIGN16_BEG float ALIGN16_END C2[4] =
{2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f};
static const ALIGN16_BEG float ALIGN16_END C1[4] =
{-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f};
static const ALIGN16_BEG float ALIGN16_END C0[4] =
{3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f};
const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128 *)C5));
const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128 *)C4));
static const ALIGN16_BEG float ALIGN16_END C5[4] = {
-3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f, -3.4436006e-2f};
static const ALIGN16_BEG float ALIGN16_END
C4[4] = {3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f, 3.1821337e-1f};
static const ALIGN16_BEG float ALIGN16_END
C3[4] = {-1.2315303f, -1.2315303f, -1.2315303f, -1.2315303f};
static const ALIGN16_BEG float ALIGN16_END
C2[4] = {2.5988452f, 2.5988452f, 2.5988452f, 2.5988452f};
static const ALIGN16_BEG float ALIGN16_END
C1[4] = {-3.3241990f, -3.3241990f, -3.3241990f, -3.3241990f};
static const ALIGN16_BEG float ALIGN16_END
C0[4] = {3.1157899f, 3.1157899f, 3.1157899f, 3.1157899f};
const __m128 pol5_y_0 = _mm_mul_ps(y, *((__m128*)C5));
const __m128 pol5_y_1 = _mm_add_ps(pol5_y_0, *((__m128*)C4));
const __m128 pol5_y_2 = _mm_mul_ps(pol5_y_1, y);
const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128 *)C3));
const __m128 pol5_y_3 = _mm_add_ps(pol5_y_2, *((__m128*)C3));
const __m128 pol5_y_4 = _mm_mul_ps(pol5_y_3, y);
const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128 *)C2));
const __m128 pol5_y_5 = _mm_add_ps(pol5_y_4, *((__m128*)C2));
const __m128 pol5_y_6 = _mm_mul_ps(pol5_y_5, y);
const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128 *)C1));
const __m128 pol5_y_7 = _mm_add_ps(pol5_y_6, *((__m128*)C1));
const __m128 pol5_y_8 = _mm_mul_ps(pol5_y_7, y);
const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128 *)C0));
const __m128 y_minus_one = _mm_sub_ps(
y, *((__m128 *)zero_biased_exponent_is_one));
const __m128 log2_y = _mm_mul_ps(y_minus_one , pol5_y);
const __m128 pol5_y = _mm_add_ps(pol5_y_8, *((__m128*)C0));
const __m128 y_minus_one =
_mm_sub_ps(y, *((__m128*)zero_biased_exponent_is_one));
const __m128 log2_y = _mm_mul_ps(y_minus_one, pol5_y);
// Combine parts.
log2_a = _mm_add_ps(n, log2_y);
@ -299,38 +315,38 @@ static __m128 mm_pow_ps(__m128 a, __m128 b)
// maximum relative error of 0.17%.
// To avoid over/underflow, we reduce the range of input to ]-127, 129].
static const ALIGN16_BEG float max_input[4] ALIGN16_END =
{129.f, 129.f, 129.f, 129.f};
static const ALIGN16_BEG float min_input[4] ALIGN16_END =
{-126.99999f, -126.99999f, -126.99999f, -126.99999f};
const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128 *)max_input));
const __m128 x_max = _mm_max_ps(x_min, *((__m128 *)min_input));
static const ALIGN16_BEG float max_input[4] ALIGN16_END = {129.f, 129.f,
129.f, 129.f};
static const ALIGN16_BEG float min_input[4] ALIGN16_END = {
-126.99999f, -126.99999f, -126.99999f, -126.99999f};
const __m128 x_min = _mm_min_ps(b_log2_a, *((__m128*)max_input));
const __m128 x_max = _mm_max_ps(x_min, *((__m128*)min_input));
// Compute n.
static const ALIGN16_BEG float half[4] ALIGN16_END =
{0.5f, 0.5f, 0.5f, 0.5f};
const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128 *)half));
static const ALIGN16_BEG float half[4] ALIGN16_END = {0.5f, 0.5f,
0.5f, 0.5f};
const __m128 x_minus_half = _mm_sub_ps(x_max, *((__m128*)half));
const __m128i x_minus_half_floor = _mm_cvtps_epi32(x_minus_half);
// Compute 2^n.
static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END =
{127, 127, 127, 127};
static const ALIGN16_BEG int float_exponent_bias[4] ALIGN16_END = {
127, 127, 127, 127};
static const int float_exponent_shift = 23;
const __m128i two_n_exponent = _mm_add_epi32(
x_minus_half_floor, *((__m128i *)float_exponent_bias));
const __m128 two_n = _mm_castsi128_ps(_mm_slli_epi32(
two_n_exponent, float_exponent_shift));
const __m128i two_n_exponent =
_mm_add_epi32(x_minus_half_floor, *((__m128i*)float_exponent_bias));
const __m128 two_n =
_mm_castsi128_ps(_mm_slli_epi32(two_n_exponent, float_exponent_shift));
// Compute y.
const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor));
// Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
static const ALIGN16_BEG float C2[4] ALIGN16_END =
{3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f};
static const ALIGN16_BEG float C1[4] ALIGN16_END =
{6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f};
static const ALIGN16_BEG float C0[4] ALIGN16_END =
{1.0017247f, 1.0017247f, 1.0017247f, 1.0017247f};
const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128 *)C2));
const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128 *)C1));
static const ALIGN16_BEG float C2[4] ALIGN16_END = {
3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f, 3.3718944e-1f};
static const ALIGN16_BEG float C1[4] ALIGN16_END = {
6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f, 6.5763628e-1f};
static const ALIGN16_BEG float C0[4] ALIGN16_END = {1.0017247f, 1.0017247f,
1.0017247f, 1.0017247f};
const __m128 exp2_y_0 = _mm_mul_ps(y, *((__m128*)C2));
const __m128 exp2_y_1 = _mm_add_ps(exp2_y_0, *((__m128*)C1));
const __m128 exp2_y_2 = _mm_mul_ps(exp2_y_1, y);
const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128 *)C0));
const __m128 exp2_y = _mm_add_ps(exp2_y_2, *((__m128*)C0));
// Combine parts.
a_exp_b = _mm_mul_ps(exp2_y, two_n);
@ -338,10 +354,8 @@ static __m128 mm_pow_ps(__m128 a, __m128 b)
return a_exp_b;
}
extern const float WebRtcAec_weightCurve[65];
extern const float WebRtcAec_overDriveCurve[65];
static void OverdriveAndSuppressSSE2(aec_t *aec, float hNl[PART_LEN1],
static void OverdriveAndSuppressSSE2(AecCore* aec,
float hNl[PART_LEN1],
const float hNlFb,
float efw[2][PART_LEN1]) {
int i;
@ -350,26 +364,25 @@ static void OverdriveAndSuppressSSE2(aec_t *aec, float hNl[PART_LEN1],
const __m128 vec_minus_one = _mm_set1_ps(-1.0f);
const __m128 vec_overDriveSm = _mm_set1_ps(aec->overDriveSm);
// vectorized code (four at once)
for (i = 0; i + 3 < PART_LEN1; i+=4) {
for (i = 0; i + 3 < PART_LEN1; i += 4) {
// Weight subbands
__m128 vec_hNl = _mm_loadu_ps(&hNl[i]);
const __m128 vec_weightCurve = _mm_loadu_ps(&WebRtcAec_weightCurve[i]);
const __m128 bigger = _mm_cmpgt_ps(vec_hNl, vec_hNlFb);
const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(
vec_weightCurve, vec_hNlFb);
const __m128 vec_weightCurve_hNlFb = _mm_mul_ps(vec_weightCurve, vec_hNlFb);
const __m128 vec_one_weightCurve = _mm_sub_ps(vec_one, vec_weightCurve);
const __m128 vec_one_weightCurve_hNl = _mm_mul_ps(
vec_one_weightCurve, vec_hNl);
const __m128 vec_one_weightCurve_hNl =
_mm_mul_ps(vec_one_weightCurve, vec_hNl);
const __m128 vec_if0 = _mm_andnot_ps(bigger, vec_hNl);
const __m128 vec_if1 = _mm_and_ps(
bigger, _mm_add_ps(vec_weightCurve_hNlFb, vec_one_weightCurve_hNl));
vec_hNl = _mm_or_ps(vec_if0, vec_if1);
{
const __m128 vec_overDriveCurve = _mm_loadu_ps(
&WebRtcAec_overDriveCurve[i]);
const __m128 vec_overDriveSm_overDriveCurve = _mm_mul_ps(
vec_overDriveSm, vec_overDriveCurve);
const __m128 vec_overDriveCurve =
_mm_loadu_ps(&WebRtcAec_overDriveCurve[i]);
const __m128 vec_overDriveSm_overDriveCurve =
_mm_mul_ps(vec_overDriveSm, vec_overDriveCurve);
vec_hNl = mm_pow_ps(vec_hNl, vec_overDriveSm_overDriveCurve);
_mm_storeu_ps(&hNl[i], vec_hNl);
}
@ -393,7 +406,7 @@ static void OverdriveAndSuppressSSE2(aec_t *aec, float hNl[PART_LEN1],
// Weight subbands
if (hNl[i] > hNlFb) {
hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
(1 - WebRtcAec_weightCurve[i]) * hNl[i];
(1 - WebRtcAec_weightCurve[i]) * hNl[i];
}
hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
@ -407,11 +420,312 @@ static void OverdriveAndSuppressSSE2(aec_t *aec, float hNl[PART_LEN1],
}
}
__inline static void _mm_add_ps_4x1(__m128 sum, float *dst) {
// A+B C+D
sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(0, 0, 3, 2)));
// A+B+C+D A+B+C+D
sum = _mm_add_ps(sum, _mm_shuffle_ps(sum, sum, _MM_SHUFFLE(1, 1, 1, 1)));
_mm_store_ss(dst, sum);
}
static int PartitionDelay(const AecCore* aec) {
// Measures the energy in each filter partition and returns the partition with
// highest energy.
// TODO(bjornv): Spread computational cost by computing one partition per
// block?
float wfEnMax = 0;
int i;
int delay = 0;
for (i = 0; i < aec->num_partitions; i++) {
int j;
int pos = i * PART_LEN1;
float wfEn = 0;
__m128 vec_wfEn = _mm_set1_ps(0.0f);
// vectorized code (four at once)
for (j = 0; j + 3 < PART_LEN1; j += 4) {
const __m128 vec_wfBuf0 = _mm_loadu_ps(&aec->wfBuf[0][pos + j]);
const __m128 vec_wfBuf1 = _mm_loadu_ps(&aec->wfBuf[1][pos + j]);
vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf0, vec_wfBuf0));
vec_wfEn = _mm_add_ps(vec_wfEn, _mm_mul_ps(vec_wfBuf1, vec_wfBuf1));
}
_mm_add_ps_4x1(vec_wfEn, &wfEn);
// scalar code for the remaining items.
for (; j < PART_LEN1; j++) {
wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
}
if (wfEn > wfEnMax) {
wfEnMax = wfEn;
delay = i;
}
}
return delay;
}
// Updates the following smoothed Power Spectral Densities (PSD):
// - sd : near-end
// - se : residual echo
// - sx : far-end
// - sde : cross-PSD of near-end and residual echo
// - sxd : cross-PSD of near-end and far-end
//
// In addition to updating the PSDs, also the filter diverge state is determined
// upon actions are taken.
static void SmoothedPSD(AecCore* aec,
float efw[2][PART_LEN1],
float dfw[2][PART_LEN1],
float xfw[2][PART_LEN1]) {
// Power estimate smoothing coefficients.
const float* ptrGCoh = aec->extended_filter_enabled
? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1]
: WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1];
int i;
float sdSum = 0, seSum = 0;
const __m128 vec_15 = _mm_set1_ps(WebRtcAec_kMinFarendPSD);
const __m128 vec_GCoh0 = _mm_set1_ps(ptrGCoh[0]);
const __m128 vec_GCoh1 = _mm_set1_ps(ptrGCoh[1]);
__m128 vec_sdSum = _mm_set1_ps(0.0f);
__m128 vec_seSum = _mm_set1_ps(0.0f);
for (i = 0; i + 3 < PART_LEN1; i += 4) {
const __m128 vec_dfw0 = _mm_loadu_ps(&dfw[0][i]);
const __m128 vec_dfw1 = _mm_loadu_ps(&dfw[1][i]);
const __m128 vec_efw0 = _mm_loadu_ps(&efw[0][i]);
const __m128 vec_efw1 = _mm_loadu_ps(&efw[1][i]);
const __m128 vec_xfw0 = _mm_loadu_ps(&xfw[0][i]);
const __m128 vec_xfw1 = _mm_loadu_ps(&xfw[1][i]);
__m128 vec_sd = _mm_mul_ps(_mm_loadu_ps(&aec->sd[i]), vec_GCoh0);
__m128 vec_se = _mm_mul_ps(_mm_loadu_ps(&aec->se[i]), vec_GCoh0);
__m128 vec_sx = _mm_mul_ps(_mm_loadu_ps(&aec->sx[i]), vec_GCoh0);
__m128 vec_dfw_sumsq = _mm_mul_ps(vec_dfw0, vec_dfw0);
__m128 vec_efw_sumsq = _mm_mul_ps(vec_efw0, vec_efw0);
__m128 vec_xfw_sumsq = _mm_mul_ps(vec_xfw0, vec_xfw0);
vec_dfw_sumsq = _mm_add_ps(vec_dfw_sumsq, _mm_mul_ps(vec_dfw1, vec_dfw1));
vec_efw_sumsq = _mm_add_ps(vec_efw_sumsq, _mm_mul_ps(vec_efw1, vec_efw1));
vec_xfw_sumsq = _mm_add_ps(vec_xfw_sumsq, _mm_mul_ps(vec_xfw1, vec_xfw1));
vec_xfw_sumsq = _mm_max_ps(vec_xfw_sumsq, vec_15);
vec_sd = _mm_add_ps(vec_sd, _mm_mul_ps(vec_dfw_sumsq, vec_GCoh1));
vec_se = _mm_add_ps(vec_se, _mm_mul_ps(vec_efw_sumsq, vec_GCoh1));
vec_sx = _mm_add_ps(vec_sx, _mm_mul_ps(vec_xfw_sumsq, vec_GCoh1));
_mm_storeu_ps(&aec->sd[i], vec_sd);
_mm_storeu_ps(&aec->se[i], vec_se);
_mm_storeu_ps(&aec->sx[i], vec_sx);
{
const __m128 vec_3210 = _mm_loadu_ps(&aec->sde[i][0]);
const __m128 vec_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
__m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
_MM_SHUFFLE(2, 0, 2, 0));
__m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
_MM_SHUFFLE(3, 1, 3, 1));
__m128 vec_dfwefw0011 = _mm_mul_ps(vec_dfw0, vec_efw0);
__m128 vec_dfwefw0110 = _mm_mul_ps(vec_dfw0, vec_efw1);
vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
vec_dfwefw0011 = _mm_add_ps(vec_dfwefw0011,
_mm_mul_ps(vec_dfw1, vec_efw1));
vec_dfwefw0110 = _mm_sub_ps(vec_dfwefw0110,
_mm_mul_ps(vec_dfw1, vec_efw0));
vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwefw0011, vec_GCoh1));
vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwefw0110, vec_GCoh1));
_mm_storeu_ps(&aec->sde[i][0], _mm_unpacklo_ps(vec_a, vec_b));
_mm_storeu_ps(&aec->sde[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
}
{
const __m128 vec_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
const __m128 vec_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
__m128 vec_a = _mm_shuffle_ps(vec_3210, vec_7654,
_MM_SHUFFLE(2, 0, 2, 0));
__m128 vec_b = _mm_shuffle_ps(vec_3210, vec_7654,
_MM_SHUFFLE(3, 1, 3, 1));
__m128 vec_dfwxfw0011 = _mm_mul_ps(vec_dfw0, vec_xfw0);
__m128 vec_dfwxfw0110 = _mm_mul_ps(vec_dfw0, vec_xfw1);
vec_a = _mm_mul_ps(vec_a, vec_GCoh0);
vec_b = _mm_mul_ps(vec_b, vec_GCoh0);
vec_dfwxfw0011 = _mm_add_ps(vec_dfwxfw0011,
_mm_mul_ps(vec_dfw1, vec_xfw1));
vec_dfwxfw0110 = _mm_sub_ps(vec_dfwxfw0110,
_mm_mul_ps(vec_dfw1, vec_xfw0));
vec_a = _mm_add_ps(vec_a, _mm_mul_ps(vec_dfwxfw0011, vec_GCoh1));
vec_b = _mm_add_ps(vec_b, _mm_mul_ps(vec_dfwxfw0110, vec_GCoh1));
_mm_storeu_ps(&aec->sxd[i][0], _mm_unpacklo_ps(vec_a, vec_b));
_mm_storeu_ps(&aec->sxd[i + 2][0], _mm_unpackhi_ps(vec_a, vec_b));
}
vec_sdSum = _mm_add_ps(vec_sdSum, vec_sd);
vec_seSum = _mm_add_ps(vec_seSum, vec_se);
}
_mm_add_ps_4x1(vec_sdSum, &sdSum);
_mm_add_ps_4x1(vec_seSum, &seSum);
for (; i < PART_LEN1; i++) {
aec->sd[i] = ptrGCoh[0] * aec->sd[i] +
ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
aec->se[i] = ptrGCoh[0] * aec->se[i] +
ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
// We threshold here to protect against the ill-effects of a zero farend.
// The threshold is not arbitrarily chosen, but balances protection and
// adverse interaction with the algorithm's tuning.
// TODO(bjornv): investigate further why this is so sensitive.
aec->sx[i] =
ptrGCoh[0] * aec->sx[i] +
ptrGCoh[1] * WEBRTC_SPL_MAX(
xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i],
WebRtcAec_kMinFarendPSD);
aec->sde[i][0] =
ptrGCoh[0] * aec->sde[i][0] +
ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
aec->sde[i][1] =
ptrGCoh[0] * aec->sde[i][1] +
ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
aec->sxd[i][0] =
ptrGCoh[0] * aec->sxd[i][0] +
ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
aec->sxd[i][1] =
ptrGCoh[0] * aec->sxd[i][1] +
ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
sdSum += aec->sd[i];
seSum += aec->se[i];
}
// Divergent filter safeguard.
aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum;
if (aec->divergeState)
memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1);
// Reset if error is significantly larger than nearend (13 dB).
if (!aec->extended_filter_enabled && seSum > (19.95f * sdSum))
memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
}
// Window time domain data to be used by the fft.
__inline static void WindowData(float* x_windowed, const float* x) {
int i;
for (i = 0; i < PART_LEN; i += 4) {
const __m128 vec_Buf1 = _mm_loadu_ps(&x[i]);
const __m128 vec_Buf2 = _mm_loadu_ps(&x[PART_LEN + i]);
const __m128 vec_sqrtHanning = _mm_load_ps(&WebRtcAec_sqrtHanning[i]);
// A B C D
__m128 vec_sqrtHanning_rev =
_mm_loadu_ps(&WebRtcAec_sqrtHanning[PART_LEN - i - 3]);
// D C B A
vec_sqrtHanning_rev =
_mm_shuffle_ps(vec_sqrtHanning_rev, vec_sqrtHanning_rev,
_MM_SHUFFLE(0, 1, 2, 3));
_mm_storeu_ps(&x_windowed[i], _mm_mul_ps(vec_Buf1, vec_sqrtHanning));
_mm_storeu_ps(&x_windowed[PART_LEN + i],
_mm_mul_ps(vec_Buf2, vec_sqrtHanning_rev));
}
}
// Puts fft output data into a complex valued array.
__inline static void StoreAsComplex(const float* data,
float data_complex[2][PART_LEN1]) {
int i;
for (i = 0; i < PART_LEN; i += 4) {
const __m128 vec_fft0 = _mm_loadu_ps(&data[2 * i]);
const __m128 vec_fft4 = _mm_loadu_ps(&data[2 * i + 4]);
const __m128 vec_a = _mm_shuffle_ps(vec_fft0, vec_fft4,
_MM_SHUFFLE(2, 0, 2, 0));
const __m128 vec_b = _mm_shuffle_ps(vec_fft0, vec_fft4,
_MM_SHUFFLE(3, 1, 3, 1));
_mm_storeu_ps(&data_complex[0][i], vec_a);
_mm_storeu_ps(&data_complex[1][i], vec_b);
}
// fix beginning/end values
data_complex[1][0] = 0;
data_complex[1][PART_LEN] = 0;
data_complex[0][0] = data[0];
data_complex[0][PART_LEN] = data[1];
}
static void SubbandCoherenceSSE2(AecCore* aec,
float efw[2][PART_LEN1],
float xfw[2][PART_LEN1],
float* fft,
float* cohde,
float* cohxd) {
float dfw[2][PART_LEN1];
int i;
if (aec->delayEstCtr == 0)
aec->delayIdx = PartitionDelay(aec);
// Use delayed far.
memcpy(xfw,
aec->xfwBuf + aec->delayIdx * PART_LEN1,
sizeof(xfw[0][0]) * 2 * PART_LEN1);
// Windowed near fft
WindowData(fft, aec->dBuf);
aec_rdft_forward_128(fft);
StoreAsComplex(fft, dfw);
// Windowed error fft
WindowData(fft, aec->eBuf);
aec_rdft_forward_128(fft);
StoreAsComplex(fft, efw);
SmoothedPSD(aec, efw, dfw, xfw);
{
const __m128 vec_1eminus10 = _mm_set1_ps(1e-10f);
// Subband coherence
for (i = 0; i + 3 < PART_LEN1; i += 4) {
const __m128 vec_sd = _mm_loadu_ps(&aec->sd[i]);
const __m128 vec_se = _mm_loadu_ps(&aec->se[i]);
const __m128 vec_sx = _mm_loadu_ps(&aec->sx[i]);
const __m128 vec_sdse = _mm_add_ps(vec_1eminus10,
_mm_mul_ps(vec_sd, vec_se));
const __m128 vec_sdsx = _mm_add_ps(vec_1eminus10,
_mm_mul_ps(vec_sd, vec_sx));
const __m128 vec_sde_3210 = _mm_loadu_ps(&aec->sde[i][0]);
const __m128 vec_sde_7654 = _mm_loadu_ps(&aec->sde[i + 2][0]);
const __m128 vec_sxd_3210 = _mm_loadu_ps(&aec->sxd[i][0]);
const __m128 vec_sxd_7654 = _mm_loadu_ps(&aec->sxd[i + 2][0]);
const __m128 vec_sde_0 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
_MM_SHUFFLE(2, 0, 2, 0));
const __m128 vec_sde_1 = _mm_shuffle_ps(vec_sde_3210, vec_sde_7654,
_MM_SHUFFLE(3, 1, 3, 1));
const __m128 vec_sxd_0 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
_MM_SHUFFLE(2, 0, 2, 0));
const __m128 vec_sxd_1 = _mm_shuffle_ps(vec_sxd_3210, vec_sxd_7654,
_MM_SHUFFLE(3, 1, 3, 1));
__m128 vec_cohde = _mm_mul_ps(vec_sde_0, vec_sde_0);
__m128 vec_cohxd = _mm_mul_ps(vec_sxd_0, vec_sxd_0);
vec_cohde = _mm_add_ps(vec_cohde, _mm_mul_ps(vec_sde_1, vec_sde_1));
vec_cohde = _mm_div_ps(vec_cohde, vec_sdse);
vec_cohxd = _mm_add_ps(vec_cohxd, _mm_mul_ps(vec_sxd_1, vec_sxd_1));
vec_cohxd = _mm_div_ps(vec_cohxd, vec_sdsx);
_mm_storeu_ps(&cohde[i], vec_cohde);
_mm_storeu_ps(&cohxd[i], vec_cohxd);
}
// scalar code for the remaining items.
for (; i < PART_LEN1; i++) {
cohde[i] =
(aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
(aec->sd[i] * aec->se[i] + 1e-10f);
cohxd[i] =
(aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
(aec->sx[i] * aec->sd[i] + 1e-10f);
}
}
}
void WebRtcAec_InitAec_SSE2(void) {
WebRtcAec_FilterFar = FilterFarSSE2;
WebRtcAec_ScaleErrorSignal = ScaleErrorSignalSSE2;
WebRtcAec_FilterAdaptation = FilterAdaptationSSE2;
WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppressSSE2;
WebRtcAec_SubbandCoherence = SubbandCoherenceSSE2;
}
#endif // WEBRTC_USE_SSE2

View File

@ -19,200 +19,193 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "aec_rdft.h"
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
#include <math.h>
#include "system_wrappers/interface/cpu_features_wrapper.h"
#include "typedefs.h"
#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
#include "webrtc/typedefs.h"
// constants shared by all paths (C, SSE2).
float rdft_w[64];
// constants used by the C path.
float rdft_wk3ri_first[32];
float rdft_wk3ri_second[32];
// constants used by SSE2 but initialized in C path.
ALIGN16_BEG float ALIGN16_END rdft_wk1r[32];
ALIGN16_BEG float ALIGN16_END rdft_wk2r[32];
ALIGN16_BEG float ALIGN16_END rdft_wk3r[32];
ALIGN16_BEG float ALIGN16_END rdft_wk1i[32];
ALIGN16_BEG float ALIGN16_END rdft_wk2i[32];
ALIGN16_BEG float ALIGN16_END rdft_wk3i[32];
ALIGN16_BEG float ALIGN16_END cftmdl_wk1r[4];
// These tables used to be computed at run-time. For example, refer to:
// https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/aec/aec_rdft.c?r=6564
// to see the initialization code.
const float rdft_w[64] = {
1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f,
0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f,
0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f,
0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f,
0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f,
0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f,
0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f,
0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f,
0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f,
0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f,
0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f,
0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f,
0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f,
0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f,
0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f,
0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f,
};
const float rdft_wk3ri_first[16] = {
1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f,
0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f,
0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f,
0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f,
};
const float rdft_wk3ri_second[16] = {
-0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f,
-0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f,
-0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f,
-0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f,
};
ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = {
1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f,
0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f,
0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f,
0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f,
0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f,
0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f,
0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f,
0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f,
};
ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = {
1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f,
0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f,
0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f,
0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f,
0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f,
0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f,
0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f,
};
ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = {
1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f,
0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f,
0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f,
-0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f,
0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f,
0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f,
0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f,
-0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f,
};
ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = {
-0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
-0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
-0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f,
-0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f,
-0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f,
-0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f,
-0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f,
-0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f,
};
ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = {
-0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f,
-0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f,
-0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f,
-0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f,
-0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f,
-0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f,
-0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f,
-0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f,
};
ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = {
-0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f,
-0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f,
-0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f,
-0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f,
-0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f,
-0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f,
-0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f,
-0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f,
};
ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = {
0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f,
};
static int ip[16];
static void bitrv2_128_C(float* a) {
/*
Following things have been attempted but are no faster:
(a) Storing the swap indexes in a LUT (index calculations are done
for 'free' while waiting on memory/L1).
(b) Consolidate the load/store of two consecutive floats by a 64 bit
integer (execution is memory/L1 bound).
(c) Do a mix of floats and 64 bit integer to maximize register
utilization (execution is memory/L1 bound).
(d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
(e) Hard-coding of the offsets to completely eliminates index
calculations.
*/
static void bitrv2_32or128(int n, int *ip, float *a) {
// n is 32 or 128
int j, j1, k, k1, m, m2;
unsigned int j, j1, k, k1;
float xr, xi, yr, yi;
ip[0] = 0;
{
int l = n;
m = 1;
while ((m << 3) < l) {
l >>= 1;
for (j = 0; j < m; j++) {
ip[m + j] = ip[j] + l;
}
m <<= 1;
}
}
m2 = 2 * m;
for (k = 0; k < m; k++) {
static const int ip[4] = {0, 64, 32, 96};
for (k = 0; k < 4; k++) {
for (j = 0; j < k; j++) {
j1 = 2 * j + ip[k];
k1 = 2 * k + ip[j];
xr = a[j1];
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
j1 += m2;
k1 += 2 * m2;
xr = a[j1];
j1 += 8;
k1 += 16;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
j1 += m2;
k1 -= m2;
xr = a[j1];
j1 += 8;
k1 -= 8;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
j1 += m2;
k1 += 2 * m2;
xr = a[j1];
j1 += 8;
k1 += 16;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
}
j1 = 2 * k + m2 + ip[k];
k1 = j1 + m2;
xr = a[j1];
j1 = 2 * k + 8 + ip[k];
k1 = j1 + 8;
xr = a[j1 + 0];
xi = a[j1 + 1];
yr = a[k1];
yr = a[k1 + 0];
yi = a[k1 + 1];
a[j1] = yr;
a[j1 + 0] = yr;
a[j1 + 1] = yi;
a[k1] = xr;
a[k1 + 0] = xr;
a[k1 + 1] = xi;
}
}
static void makewt_32(void) {
const int nw = 32;
int j, nwh;
float delta, x, y;
ip[0] = nw;
ip[1] = 1;
nwh = nw >> 1;
delta = atanf(1.0f) / nwh;
rdft_w[0] = 1;
rdft_w[1] = 0;
rdft_w[nwh] = cosf(delta * nwh);
rdft_w[nwh + 1] = rdft_w[nwh];
for (j = 2; j < nwh; j += 2) {
x = cosf(delta * j);
y = sinf(delta * j);
rdft_w[j] = x;
rdft_w[j + 1] = y;
rdft_w[nw - j] = y;
rdft_w[nw - j + 1] = x;
}
bitrv2_32or128(nw, ip + 2, rdft_w);
// pre-calculate constants used by cft1st_128 and cftmdl_128...
cftmdl_wk1r[0] = rdft_w[2];
cftmdl_wk1r[1] = rdft_w[2];
cftmdl_wk1r[2] = rdft_w[2];
cftmdl_wk1r[3] = -rdft_w[2];
{
int k1;
for (k1 = 0, j = 0; j < 128; j += 16, k1 += 2) {
const int k2 = 2 * k1;
const float wk2r = rdft_w[k1 + 0];
const float wk2i = rdft_w[k1 + 1];
float wk1r, wk1i;
// ... scalar version.
wk1r = rdft_w[k2 + 0];
wk1i = rdft_w[k2 + 1];
rdft_wk3ri_first[k1 + 0] = wk1r - 2 * wk2i * wk1i;
rdft_wk3ri_first[k1 + 1] = 2 * wk2i * wk1r - wk1i;
wk1r = rdft_w[k2 + 2];
wk1i = rdft_w[k2 + 3];
rdft_wk3ri_second[k1 + 0] = wk1r - 2 * wk2r * wk1i;
rdft_wk3ri_second[k1 + 1] = 2 * wk2r * wk1r - wk1i;
// ... vector version.
rdft_wk1r[k2 + 0] = rdft_w[k2 + 0];
rdft_wk1r[k2 + 1] = rdft_w[k2 + 0];
rdft_wk1r[k2 + 2] = rdft_w[k2 + 2];
rdft_wk1r[k2 + 3] = rdft_w[k2 + 2];
rdft_wk2r[k2 + 0] = rdft_w[k1 + 0];
rdft_wk2r[k2 + 1] = rdft_w[k1 + 0];
rdft_wk2r[k2 + 2] = -rdft_w[k1 + 1];
rdft_wk2r[k2 + 3] = -rdft_w[k1 + 1];
rdft_wk3r[k2 + 0] = rdft_wk3ri_first[k1 + 0];
rdft_wk3r[k2 + 1] = rdft_wk3ri_first[k1 + 0];
rdft_wk3r[k2 + 2] = rdft_wk3ri_second[k1 + 0];
rdft_wk3r[k2 + 3] = rdft_wk3ri_second[k1 + 0];
rdft_wk1i[k2 + 0] = -rdft_w[k2 + 1];
rdft_wk1i[k2 + 1] = rdft_w[k2 + 1];
rdft_wk1i[k2 + 2] = -rdft_w[k2 + 3];
rdft_wk1i[k2 + 3] = rdft_w[k2 + 3];
rdft_wk2i[k2 + 0] = -rdft_w[k1 + 1];
rdft_wk2i[k2 + 1] = rdft_w[k1 + 1];
rdft_wk2i[k2 + 2] = -rdft_w[k1 + 0];
rdft_wk2i[k2 + 3] = rdft_w[k1 + 0];
rdft_wk3i[k2 + 0] = -rdft_wk3ri_first[k1 + 1];
rdft_wk3i[k2 + 1] = rdft_wk3ri_first[k1 + 1];
rdft_wk3i[k2 + 2] = -rdft_wk3ri_second[k1 + 1];
rdft_wk3i[k2 + 3] = rdft_wk3ri_second[k1 + 1];
}
}
}
static void makect_32(void) {
float *c = rdft_w + 32;
const int nc = 32;
int j, nch;
float delta;
ip[1] = nc;
nch = nc >> 1;
delta = atanf(1.0f) / nch;
c[0] = cosf(delta * nch);
c[nch] = 0.5f * c[0];
for (j = 1; j < nch; j++) {
c[j] = 0.5f * cosf(delta * j);
c[nc - j] = 0.5f * sinf(delta * j);
}
}
static void cft1st_128_C(float *a) {
static void cft1st_128_C(float* a) {
const int n = 128;
int j, k1, k2;
float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
// The processing of the first set of elements was simplified in C to avoid
// some operations (multiplication by zero or one, addition of two elements
// multiplied by the same weight, ...).
x0r = a[0] + a[2];
x0i = a[1] + a[3];
x1r = a[0] - a[2];
@ -311,7 +304,7 @@ static void cft1st_128_C(float *a) {
}
}
static void cftmdl_128_C(float *a) {
static void cftmdl_128_C(float* a) {
const int l = 8;
const int n = 128;
const int m = 32;
@ -320,7 +313,7 @@ static void cftmdl_128_C(float *a) {
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
for (j0 = 0; j0 < l; j0 += 2) {
j1 = j0 + 8;
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
@ -342,7 +335,7 @@ static void cftmdl_128_C(float *a) {
}
wk1r = rdft_w[2];
for (j0 = m; j0 < l + m; j0 += 2) {
j1 = j0 + 8;
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
@ -378,7 +371,7 @@ static void cftmdl_128_C(float *a) {
wk3r = rdft_wk3ri_first[k1 + 0];
wk3i = rdft_wk3ri_first[k1 + 1];
for (j0 = k; j0 < l + k; j0 += 2) {
j1 = j0 + 8;
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
@ -409,7 +402,7 @@ static void cftmdl_128_C(float *a) {
wk3r = rdft_wk3ri_second[k1 + 0];
wk3i = rdft_wk3ri_second[k1 + 1];
for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
j1 = j0 + 8;
j1 = j0 + 8;
j2 = j0 + 16;
j3 = j0 + 24;
x0r = a[j0 + 0] + a[j1 + 0];
@ -438,7 +431,7 @@ static void cftmdl_128_C(float *a) {
}
}
static void cftfsub_128(float *a) {
static void cftfsub_128_C(float* a) {
int j, j1, j2, j3, l;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
@ -468,7 +461,7 @@ static void cftfsub_128(float *a) {
}
}
static void cftbsub_128(float *a) {
static void cftbsub_128_C(float* a) {
int j, j1, j2, j3, l;
float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
@ -499,14 +492,14 @@ static void cftbsub_128(float *a) {
}
}
static void rftfsub_128_C(float *a) {
const float *c = rdft_w + 32;
static void rftfsub_128_C(float* a) {
const float* c = rdft_w + 32;
int j1, j2, k1, k2;
float wkr, wki, xr, xi, yr, yi;
for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
k2 = 128 - j2;
k1 = 32 - j1;
k1 = 32 - j1;
wkr = 0.5f - c[k1];
wki = c[j1];
xr = a[j2 + 0] - a[k2 + 0];
@ -520,15 +513,15 @@ static void rftfsub_128_C(float *a) {
}
}
static void rftbsub_128_C(float *a) {
const float *c = rdft_w + 32;
static void rftbsub_128_C(float* a) {
const float* c = rdft_w + 32;
int j1, j2, k1, k2;
float wkr, wki, xr, xi, yr, yi;
a[1] = -a[1];
for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
k2 = 128 - j2;
k1 = 32 - j1;
k1 = 32 - j1;
wkr = 0.5f - c[k1];
wki = c[j1];
xr = a[j2 + 0] - a[k2 + 0];
@ -543,11 +536,9 @@ static void rftbsub_128_C(float *a) {
a[65] = -a[65];
}
void aec_rdft_forward_128(float *a) {
const int n = 128;
void aec_rdft_forward_128(float* a) {
float xi;
bitrv2_32or128(n, ip + 2, a);
bitrv2_128(a);
cftfsub_128(a);
rftfsub_128(a);
xi = a[0] - a[1];
@ -555,33 +546,44 @@ void aec_rdft_forward_128(float *a) {
a[1] = xi;
}
void aec_rdft_inverse_128(float *a) {
const int n = 128;
void aec_rdft_inverse_128(float* a) {
a[1] = 0.5f * (a[0] - a[1]);
a[0] -= a[1];
rftbsub_128(a);
bitrv2_32or128(n, ip + 2, a);
bitrv2_128(a);
cftbsub_128(a);
}
// code path selection
rft_sub_128_t cft1st_128;
rft_sub_128_t cftmdl_128;
rft_sub_128_t rftfsub_128;
rft_sub_128_t rftbsub_128;
RftSub128 cft1st_128;
RftSub128 cftmdl_128;
RftSub128 rftfsub_128;
RftSub128 rftbsub_128;
RftSub128 cftfsub_128;
RftSub128 cftbsub_128;
RftSub128 bitrv2_128;
void aec_rdft_init(void) {
cft1st_128 = cft1st_128_C;
cftmdl_128 = cftmdl_128_C;
rftfsub_128 = rftfsub_128_C;
rftbsub_128 = rftbsub_128_C;
cftfsub_128 = cftfsub_128_C;
cftbsub_128 = cftbsub_128_C;
bitrv2_128 = bitrv2_128_C;
#if defined(WEBRTC_ARCH_X86_FAMILY)
if (WebRtc_GetCPUInfo(kSSE2)) {
#if defined(WEBRTC_USE_SSE2)
aec_rdft_init_sse2();
#endif
}
// init library constants.
makewt_32();
makect_32();
#endif
#if defined(MIPS_FPU_LE)
aec_rdft_init_mips();
#endif
#if defined(WEBRTC_HAS_NEON)
aec_rdft_init_neon();
#elif defined(WEBRTC_DETECT_NEON)
if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) {
aec_rdft_init_neon();
}
#endif
}

View File

@ -11,6 +11,8 @@
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
#include "webrtc/modules/audio_processing/aec/aec_common.h"
// These intrinsics were unavailable before VS 2008.
// TODO(andrew): move to a common file.
#if defined(_MSC_VER) && _MSC_VER < 1500
@ -19,39 +21,41 @@ static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; }
static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; }
#endif
#ifdef _MSC_VER /* visual c++ */
# define ALIGN16_BEG __declspec(align(16))
# define ALIGN16_END
#else /* gcc or icc */
# define ALIGN16_BEG
# define ALIGN16_END __attribute__((aligned(16)))
#endif
// constants shared by all paths (C, SSE2).
extern float rdft_w[64];
// constants used by the C path.
extern float rdft_wk3ri_first[32];
extern float rdft_wk3ri_second[32];
// constants used by SSE2 but initialized in C path.
extern float rdft_wk1r[32];
extern float rdft_wk2r[32];
extern float rdft_wk3r[32];
extern float rdft_wk1i[32];
extern float rdft_wk2i[32];
extern float rdft_wk3i[32];
extern float cftmdl_wk1r[4];
// Constants shared by all paths (C, SSE2, NEON).
extern const float rdft_w[64];
// Constants used by the C path.
extern const float rdft_wk3ri_first[16];
extern const float rdft_wk3ri_second[16];
// Constants used by SSE2 and NEON but initialized in the C path.
extern ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32];
extern ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32];
extern ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32];
extern ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32];
extern ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32];
extern ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32];
extern ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4];
// code path selection function pointers
typedef void (*rft_sub_128_t)(float *a);
extern rft_sub_128_t rftfsub_128;
extern rft_sub_128_t rftbsub_128;
extern rft_sub_128_t cft1st_128;
extern rft_sub_128_t cftmdl_128;
typedef void (*RftSub128)(float* a);
extern RftSub128 rftfsub_128;
extern RftSub128 rftbsub_128;
extern RftSub128 cft1st_128;
extern RftSub128 cftmdl_128;
extern RftSub128 cftfsub_128;
extern RftSub128 cftbsub_128;
extern RftSub128 bitrv2_128;
// entry points
void aec_rdft_init(void);
void aec_rdft_init_sse2(void);
void aec_rdft_forward_128(float *a);
void aec_rdft_inverse_128(float *a);
void aec_rdft_forward_128(float* a);
void aec_rdft_inverse_128(float* a);
#if defined(MIPS_FPU_LE)
void aec_rdft_init_mips(void);
#endif
#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
void aec_rdft_init_neon(void);
#endif
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,355 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* The rdft AEC algorithm, neon version of speed-critical functions.
*
* Based on the sse2 version.
*/
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
#include <arm_neon.h>
static const ALIGN16_BEG float ALIGN16_END
k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
static void cft1st_128_neon(float* a) {
const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
int j, k2;
for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
float32x4_t a00v = vld1q_f32(&a[j + 0]);
float32x4_t a04v = vld1q_f32(&a[j + 4]);
float32x4_t a08v = vld1q_f32(&a[j + 8]);
float32x4_t a12v = vld1q_f32(&a[j + 12]);
float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v));
float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v));
float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v));
float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v));
const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]);
const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2]);
const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2]);
const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2]);
const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2]);
const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2]);
float32x4_t x0v = vaddq_f32(a01v, a23v);
const float32x4_t x1v = vsubq_f32(a01v, a23v);
const float32x4_t x2v = vaddq_f32(a45v, a67v);
const float32x4_t x3v = vsubq_f32(a45v, a67v);
const float32x4_t x3w = vrev64q_f32(x3v);
float32x4_t x0w;
a01v = vaddq_f32(x0v, x2v);
x0v = vsubq_f32(x0v, x2v);
x0w = vrev64q_f32(x0v);
a45v = vmulq_f32(wk2rv, x0v);
a45v = vmlaq_f32(a45v, wk2iv, x0w);
x0v = vmlaq_f32(x1v, x3w, vec_swap_sign);
x0w = vrev64q_f32(x0v);
a23v = vmulq_f32(wk1rv, x0v);
a23v = vmlaq_f32(a23v, wk1iv, x0w);
x0v = vmlsq_f32(x1v, x3w, vec_swap_sign);
x0w = vrev64q_f32(x0v);
a67v = vmulq_f32(wk3rv, x0v);
a67v = vmlaq_f32(a67v, wk3iv, x0w);
a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v));
a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v));
a08v = vcombine_f32(vget_high_f32(a01v), vget_high_f32(a23v));
a12v = vcombine_f32(vget_high_f32(a45v), vget_high_f32(a67v));
vst1q_f32(&a[j + 0], a00v);
vst1q_f32(&a[j + 4], a04v);
vst1q_f32(&a[j + 8], a08v);
vst1q_f32(&a[j + 12], a12v);
}
}
static void cftmdl_128_neon(float* a) {
int j;
const int l = 8;
const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
float32x4_t wk1rv = vld1q_f32(cftmdl_wk1r);
for (j = 0; j < l; j += 2) {
const float32x2_t a_00 = vld1_f32(&a[j + 0]);
const float32x2_t a_08 = vld1_f32(&a[j + 8]);
const float32x2_t a_32 = vld1_f32(&a[j + 32]);
const float32x2_t a_40 = vld1_f32(&a[j + 40]);
const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
const float32x2_t a_16 = vld1_f32(&a[j + 16]);
const float32x2_t a_24 = vld1_f32(&a[j + 24]);
const float32x2_t a_48 = vld1_f32(&a[j + 48]);
const float32x2_t a_56 = vld1_f32(&a[j + 56]);
const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
const float32x4_t xx0 = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
const float32x4_t x1_x3_add =
vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
const float32x4_t x1_x3_sub =
vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
const float32x2_t yy0_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 0);
const float32x2_t yy0_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 0);
const float32x4_t yy0_as = vcombine_f32(yy0_a, yy0_s);
const float32x2_t yy1_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 1);
const float32x2_t yy1_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 1);
const float32x4_t yy1_as = vcombine_f32(yy1_a, yy1_s);
const float32x4_t yy0 = vmlaq_f32(yy0_as, vec_swap_sign, yy1_as);
const float32x4_t yy4 = vmulq_f32(wk1rv, yy0);
const float32x4_t xx1_rev = vrev64q_f32(xx1);
const float32x4_t yy4_rev = vrev64q_f32(yy4);
vst1_f32(&a[j + 0], vget_low_f32(xx0));
vst1_f32(&a[j + 32], vget_high_f32(xx0));
vst1_f32(&a[j + 16], vget_low_f32(xx1));
vst1_f32(&a[j + 48], vget_high_f32(xx1_rev));
a[j + 48] = -a[j + 48];
vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add));
vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub));
vst1_f32(&a[j + 40], vget_low_f32(yy4));
vst1_f32(&a[j + 56], vget_high_f32(yy4_rev));
}
{
const int k = 64;
const int k1 = 2;
const int k2 = 2 * k1;
const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2 + 0]);
const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2 + 0]);
const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2 + 0]);
const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2 + 0]);
const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2 + 0]);
wk1rv = vld1q_f32(&rdft_wk1r[k2 + 0]);
for (j = k; j < l + k; j += 2) {
const float32x2_t a_00 = vld1_f32(&a[j + 0]);
const float32x2_t a_08 = vld1_f32(&a[j + 8]);
const float32x2_t a_32 = vld1_f32(&a[j + 32]);
const float32x2_t a_40 = vld1_f32(&a[j + 40]);
const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
const float32x2_t a_16 = vld1_f32(&a[j + 16]);
const float32x2_t a_24 = vld1_f32(&a[j + 24]);
const float32x2_t a_48 = vld1_f32(&a[j + 48]);
const float32x2_t a_56 = vld1_f32(&a[j + 56]);
const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
const float32x4_t xx = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
const float32x4_t x1_x3_add =
vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
const float32x4_t x1_x3_sub =
vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
float32x4_t xx4 = vmulq_f32(wk2rv, xx1);
float32x4_t xx12 = vmulq_f32(wk1rv, x1_x3_add);
float32x4_t xx22 = vmulq_f32(wk3rv, x1_x3_sub);
xx4 = vmlaq_f32(xx4, wk2iv, vrev64q_f32(xx1));
xx12 = vmlaq_f32(xx12, wk1iv, vrev64q_f32(x1_x3_add));
xx22 = vmlaq_f32(xx22, wk3iv, vrev64q_f32(x1_x3_sub));
vst1_f32(&a[j + 0], vget_low_f32(xx));
vst1_f32(&a[j + 32], vget_high_f32(xx));
vst1_f32(&a[j + 16], vget_low_f32(xx4));
vst1_f32(&a[j + 48], vget_high_f32(xx4));
vst1_f32(&a[j + 8], vget_low_f32(xx12));
vst1_f32(&a[j + 40], vget_high_f32(xx12));
vst1_f32(&a[j + 24], vget_low_f32(xx22));
vst1_f32(&a[j + 56], vget_high_f32(xx22));
}
}
}
__inline static float32x4_t reverse_order_f32x4(float32x4_t in) {
// A B C D -> C D A B
const float32x4_t rev = vcombine_f32(vget_high_f32(in), vget_low_f32(in));
// C D A B -> D C B A
return vrev64q_f32(rev);
}
static void rftfsub_128_neon(float* a) {
const float* c = rdft_w + 32;
int j1, j2;
const float32x4_t mm_half = vdupq_n_f32(0.5f);
// Vectorized code (four at once).
// Note: commented number are indexes for the first iteration of the loop.
for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
// Load 'wk'.
const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4,
const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31,
const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31,
const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
const float32x4_t wki_ = c_j1; // 1, 2, 3, 4,
// Load and shuffle 'a'.
// 2, 4, 6, 8, 3, 5, 7, 9
float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
// 120, 122, 124, 126, 121, 123, 125, 127,
const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
// 126, 124, 122, 120
const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
// 127, 125, 123, 121
const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
// Calculate 'x'.
const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
// 2-126, 4-124, 6-122, 8-120,
const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
// 3-127, 5-125, 7-123, 9-121,
// Calculate product into 'y'.
// yr = wkr * xr - wki * xi;
// yi = wkr * xi + wki * xr;
const float32x4_t a_ = vmulq_f32(wkr_, xr_);
const float32x4_t b_ = vmulq_f32(wki_, xi_);
const float32x4_t c_ = vmulq_f32(wkr_, xi_);
const float32x4_t d_ = vmulq_f32(wki_, xr_);
const float32x4_t yr_ = vsubq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120,
const float32x4_t yi_ = vaddq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121,
// Update 'a'.
// a[j2 + 0] -= yr;
// a[j2 + 1] -= yi;
// a[k2 + 0] += yr;
// a[k2 + 1] -= yi;
// 126, 124, 122, 120,
const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
// 127, 125, 123, 121,
const float32x4_t a_k2_p1n = vsubq_f32(a_k2_p1, yi_);
// Shuffle in right order and store.
const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
// 124, 125, 126, 127, 120, 121, 122, 123
const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
// 2, 4, 6, 8,
a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
// 3, 5, 7, 9,
a_j2_p.val[1] = vsubq_f32(a_j2_p.val[1], yi_);
// 2, 3, 4, 5, 6, 7, 8, 9,
vst2q_f32(&a[0 + j2], a_j2_p);
vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
}
// Scalar code for the remaining items.
for (; j2 < 64; j1 += 1, j2 += 2) {
const int k2 = 128 - j2;
const int k1 = 32 - j1;
const float wkr = 0.5f - c[k1];
const float wki = c[j1];
const float xr = a[j2 + 0] - a[k2 + 0];
const float xi = a[j2 + 1] + a[k2 + 1];
const float yr = wkr * xr - wki * xi;
const float yi = wkr * xi + wki * xr;
a[j2 + 0] -= yr;
a[j2 + 1] -= yi;
a[k2 + 0] += yr;
a[k2 + 1] -= yi;
}
}
static void rftbsub_128_neon(float* a) {
const float* c = rdft_w + 32;
int j1, j2;
const float32x4_t mm_half = vdupq_n_f32(0.5f);
a[1] = -a[1];
// Vectorized code (four at once).
// Note: commented number are indexes for the first iteration of the loop.
for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
// Load 'wk'.
const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4,
const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31,
const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31,
const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
const float32x4_t wki_ = c_j1; // 1, 2, 3, 4,
// Load and shuffle 'a'.
// 2, 4, 6, 8, 3, 5, 7, 9
float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
// 120, 122, 124, 126, 121, 123, 125, 127,
const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
// 126, 124, 122, 120
const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
// 127, 125, 123, 121
const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
// Calculate 'x'.
const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
// 2-126, 4-124, 6-122, 8-120,
const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
// 3-127, 5-125, 7-123, 9-121,
// Calculate product into 'y'.
// yr = wkr * xr - wki * xi;
// yi = wkr * xi + wki * xr;
const float32x4_t a_ = vmulq_f32(wkr_, xr_);
const float32x4_t b_ = vmulq_f32(wki_, xi_);
const float32x4_t c_ = vmulq_f32(wkr_, xi_);
const float32x4_t d_ = vmulq_f32(wki_, xr_);
const float32x4_t yr_ = vaddq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120,
const float32x4_t yi_ = vsubq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121,
// Update 'a'.
// a[j2 + 0] -= yr;
// a[j2 + 1] -= yi;
// a[k2 + 0] += yr;
// a[k2 + 1] -= yi;
// 126, 124, 122, 120,
const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
// 127, 125, 123, 121,
const float32x4_t a_k2_p1n = vsubq_f32(yi_, a_k2_p1);
// Shuffle in right order and store.
// 2, 3, 4, 5, 6, 7, 8, 9,
const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
// 124, 125, 126, 127, 120, 121, 122, 123
const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
// 2, 4, 6, 8,
a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
// 3, 5, 7, 9,
a_j2_p.val[1] = vsubq_f32(yi_, a_j2_p.val[1]);
// 2, 3, 4, 5, 6, 7, 8, 9,
vst2q_f32(&a[0 + j2], a_j2_p);
vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
}
// Scalar code for the remaining items.
for (; j2 < 64; j1 += 1, j2 += 2) {
const int k2 = 128 - j2;
const int k1 = 32 - j1;
const float wkr = 0.5f - c[k1];
const float wki = c[j1];
const float xr = a[j2 + 0] - a[k2 + 0];
const float xi = a[j2 + 1] + a[k2 + 1];
const float yr = wkr * xr + wki * xi;
const float yi = wkr * xi - wki * xr;
a[j2 + 0] = a[j2 + 0] - yr;
a[j2 + 1] = yi - a[j2 + 1];
a[k2 + 0] = yr + a[k2 + 0];
a[k2 + 1] = yi - a[k2 + 1];
}
a[65] = -a[65];
}
void aec_rdft_init_neon(void) {
cft1st_128 = cft1st_128_neon;
cftmdl_128 = cftmdl_128_neon;
rftfsub_128 = rftfsub_128_neon;
rftbsub_128 = rftbsub_128_neon;
}

View File

@ -8,172 +8,168 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "typedefs.h"
#include "webrtc/modules/audio_processing/aec/aec_rdft.h"
#if defined(WEBRTC_USE_SSE2)
#include <emmintrin.h>
#include "aec_rdft.h"
static const ALIGN16_BEG float ALIGN16_END
k_swap_sign[4] = {-1.f, 1.f, -1.f, 1.f};
static const ALIGN16_BEG float ALIGN16_END k_swap_sign[4] =
{-1.f, 1.f, -1.f, 1.f};
static void cft1st_128_SSE2(float *a) {
static void cft1st_128_SSE2(float* a) {
const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
int j, k2;
for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
__m128 a00v = _mm_loadu_ps(&a[j + 0]);
__m128 a04v = _mm_loadu_ps(&a[j + 4]);
__m128 a08v = _mm_loadu_ps(&a[j + 8]);
__m128 a12v = _mm_loadu_ps(&a[j + 12]);
__m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1 ,0));
__m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3 ,2));
__m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1 ,0));
__m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3 ,2));
__m128 a00v = _mm_loadu_ps(&a[j + 0]);
__m128 a04v = _mm_loadu_ps(&a[j + 4]);
__m128 a08v = _mm_loadu_ps(&a[j + 8]);
__m128 a12v = _mm_loadu_ps(&a[j + 12]);
__m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0));
__m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2));
__m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0));
__m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2));
const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]);
const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]);
const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]);
const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]);
const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]);
const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]);
__m128 x0v = _mm_add_ps(a01v, a23v);
const __m128 x1v = _mm_sub_ps(a01v, a23v);
const __m128 x2v = _mm_add_ps(a45v, a67v);
const __m128 x3v = _mm_sub_ps(a45v, a67v);
__m128 x0w;
a01v = _mm_add_ps(x0v, x2v);
x0v = _mm_sub_ps(x0v, x2v);
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]);
const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]);
const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]);
const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]);
const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]);
const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]);
__m128 x0v = _mm_add_ps(a01v, a23v);
const __m128 x1v = _mm_sub_ps(a01v, a23v);
const __m128 x2v = _mm_add_ps(a45v, a67v);
const __m128 x3v = _mm_sub_ps(a45v, a67v);
__m128 x0w;
a01v = _mm_add_ps(x0v, x2v);
x0v = _mm_sub_ps(x0v, x2v);
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
{
const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v);
const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w);
a45v = _mm_add_ps(a45_0v, a45_1v);
a45v = _mm_add_ps(a45_0v, a45_1v);
}
{
__m128 a23_0v, a23_1v;
const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0 ,1));
const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w);
x0v = _mm_add_ps(x1v, x3s);
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
a23_0v = _mm_mul_ps(wk1rv, x0v);
a23_1v = _mm_mul_ps(wk1iv, x0w);
a23v = _mm_add_ps(a23_0v, a23_1v);
__m128 a23_0v, a23_1v;
const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1));
const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w);
x0v = _mm_add_ps(x1v, x3s);
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
a23_0v = _mm_mul_ps(wk1rv, x0v);
a23_1v = _mm_mul_ps(wk1iv, x0w);
a23v = _mm_add_ps(a23_0v, a23_1v);
x0v = _mm_sub_ps(x1v, x3s);
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
x0v = _mm_sub_ps(x1v, x3s);
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1));
}
{
const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v);
const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w);
a67v = _mm_add_ps(a67_0v, a67_1v);
a67v = _mm_add_ps(a67_0v, a67_1v);
}
a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1 ,0));
a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1 ,0));
a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3 ,2));
a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3 ,2));
_mm_storeu_ps(&a[j + 0], a00v);
_mm_storeu_ps(&a[j + 4], a04v);
_mm_storeu_ps(&a[j + 8], a08v);
a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0));
a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0));
a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2));
a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2));
_mm_storeu_ps(&a[j + 0], a00v);
_mm_storeu_ps(&a[j + 4], a04v);
_mm_storeu_ps(&a[j + 8], a08v);
_mm_storeu_ps(&a[j + 12], a12v);
}
}
static void cftmdl_128_SSE2(float *a) {
static void cftmdl_128_SSE2(float* a) {
const int l = 8;
const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
int j0;
__m128 wk1rv = _mm_load_ps(cftmdl_wk1r);
for (j0 = 0; j0 < l; j0 += 2) {
const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
_mm_castsi128_ps(a_32),
_MM_SHUFFLE(1, 0, 1 ,0));
const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
_mm_castsi128_ps(a_40),
_MM_SHUFFLE(1, 0, 1 ,0));
__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
_mm_castsi128_ps(a_32),
_MM_SHUFFLE(1, 0, 1, 0));
const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
_mm_castsi128_ps(a_40),
_MM_SHUFFLE(1, 0, 1, 0));
__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
_mm_castsi128_ps(a_48),
_MM_SHUFFLE(1, 0, 1 ,0));
const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
_mm_castsi128_ps(a_56),
_MM_SHUFFLE(1, 0, 1 ,0));
const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
_mm_castsi128_ps(a_48),
_MM_SHUFFLE(1, 0, 1, 0));
const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
_mm_castsi128_ps(a_56),
_MM_SHUFFLE(1, 0, 1, 0));
const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(
_mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1),
_MM_SHUFFLE(2, 3, 0, 1)));
const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
_mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
const __m128 yy0 = _mm_shuffle_ps(x1_x3_add, x1_x3_sub,
_MM_SHUFFLE(2, 2, 2 ,2));
const __m128 yy1 = _mm_shuffle_ps(x1_x3_add, x1_x3_sub,
_MM_SHUFFLE(3, 3, 3 ,3));
const __m128 yy0 =
_mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2));
const __m128 yy1 =
_mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3));
const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1);
const __m128 yy3 = _mm_add_ps(yy0, yy2);
const __m128 yy4 = _mm_mul_ps(wk1rv, yy3);
_mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0));
_mm_storel_epi64((__m128i*)&a[j0 + 32],
_mm_shuffle_epi32(_mm_castps_si128(xx0),
_MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0));
_mm_storel_epi64(
(__m128i*)&a[j0 + 32],
_mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1));
_mm_storel_epi64((__m128i*)&a[j0 + 48],
_mm_shuffle_epi32(_mm_castps_si128(xx1),
_MM_SHUFFLE(2, 3, 2, 3)));
_mm_storel_epi64(
(__m128i*)&a[j0 + 48],
_mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3)));
a[j0 + 48] = -a[j0 + 48];
_mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add));
_mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add));
_mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub));
_mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4));
_mm_storel_epi64((__m128i*)&a[j0 + 56],
_mm_shuffle_epi32(_mm_castps_si128(yy4),
_MM_SHUFFLE(2, 3, 2, 3)));
_mm_storel_epi64(
(__m128i*)&a[j0 + 56],
_mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3)));
}
{
int k = 64;
int k1 = 2;
int k2 = 2 * k1;
const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2+0]);
const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2+0]);
const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2+0]);
const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2+0]);
const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2+0]);
wk1rv = _mm_load_ps(&rdft_wk1r[k2+0]);
const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]);
const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]);
const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]);
const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]);
const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]);
wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]);
for (j0 = k; j0 < l + k; j0 += 2) {
const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
_mm_castsi128_ps(a_32),
_MM_SHUFFLE(1, 0, 1 ,0));
_MM_SHUFFLE(1, 0, 1, 0));
const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
_mm_castsi128_ps(a_40),
_MM_SHUFFLE(1, 0, 1 ,0));
__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
_MM_SHUFFLE(1, 0, 1, 0));
__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
@ -182,100 +178,102 @@ static void cftmdl_128_SSE2(float *a) {
const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
_mm_castsi128_ps(a_48),
_MM_SHUFFLE(1, 0, 1 ,0));
_MM_SHUFFLE(1, 0, 1, 0));
const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
_mm_castsi128_ps(a_56),
_MM_SHUFFLE(1, 0, 1 ,0));
_MM_SHUFFLE(1, 0, 1, 0));
const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const __m128 xx2 = _mm_mul_ps(xx1 , wk2rv);
const __m128 xx3 = _mm_mul_ps(wk2iv,
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1),
_MM_SHUFFLE(2, 3, 0, 1))));
const __m128 xx2 = _mm_mul_ps(xx1, wk2rv);
const __m128 xx3 =
_mm_mul_ps(wk2iv,
_mm_castsi128_ps(_mm_shuffle_epi32(
_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 0, 1))));
const __m128 xx4 = _mm_add_ps(xx2, xx3);
const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(
_mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1),
_MM_SHUFFLE(2, 3, 0, 1)));
const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32(
_mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1)));
const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
const __m128 xx11 = _mm_mul_ps(wk1iv,
const __m128 xx11 = _mm_mul_ps(
wk1iv,
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
_MM_SHUFFLE(2, 3, 0, 1))));
const __m128 xx12 = _mm_add_ps(xx10, xx11);
const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
const __m128 xx21 = _mm_mul_ps(wk3iv,
const __m128 xx21 = _mm_mul_ps(
wk3iv,
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
_MM_SHUFFLE(2, 3, 0, 1))));
_MM_SHUFFLE(2, 3, 0, 1))));
const __m128 xx22 = _mm_add_ps(xx20, xx21);
_mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));
_mm_storel_epi64((__m128i*)&a[j0 + 32],
_mm_shuffle_epi32(_mm_castps_si128(xx),
_MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));
_mm_storel_epi64(
(__m128i*)&a[j0 + 32],
_mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4));
_mm_storel_epi64((__m128i*)&a[j0 + 48],
_mm_shuffle_epi32(_mm_castps_si128(xx4),
_MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64(
(__m128i*)&a[j0 + 48],
_mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12));
_mm_storel_epi64((__m128i*)&a[j0 + 40],
_mm_shuffle_epi32(_mm_castps_si128(xx12),
_MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12));
_mm_storel_epi64(
(__m128i*)&a[j0 + 40],
_mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22));
_mm_storel_epi64((__m128i*)&a[j0 + 56],
_mm_shuffle_epi32(_mm_castps_si128(xx22),
_MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64(
(__m128i*)&a[j0 + 56],
_mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2)));
}
}
}
static void rftfsub_128_SSE2(float *a) {
const float *c = rdft_w + 32;
static void rftfsub_128_SSE2(float* a) {
const float* c = rdft_w + 32;
int j1, j2, k1, k2;
float wkr, wki, xr, xi, yr, yi;
static const ALIGN16_BEG float ALIGN16_END k_half[4] =
{0.5f, 0.5f, 0.5f, 0.5f};
static const ALIGN16_BEG float ALIGN16_END
k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
const __m128 mm_half = _mm_load_ps(k_half);
// Vectorized code (four at once).
// Note: commented number are indexes for the first iteration of the loop.
for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
// Load 'wk'.
const __m128 c_j1 = _mm_loadu_ps(&c[ j1]); // 1, 2, 3, 4,
const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31,
const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31,
const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4,
const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31,
const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31,
const __m128 wkr_ =
_mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28,
const __m128 wki_ = c_j1; // 1, 2, 3, 4,
_mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28,
const __m128 wki_ = c_j1; // 1, 2, 3, 4,
// Load and shuffle 'a'.
const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5,
const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9,
const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5,
const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9,
const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123,
const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127,
const __m128 a_j2_p0 = _mm_shuffle_ps(a_j2_0, a_j2_4,
_MM_SHUFFLE(2, 0, 2 ,0)); // 2, 4, 6, 8,
const __m128 a_j2_p1 = _mm_shuffle_ps(a_j2_0, a_j2_4,
_MM_SHUFFLE(3, 1, 3 ,1)); // 3, 5, 7, 9,
const __m128 a_k2_p0 = _mm_shuffle_ps(a_k2_4, a_k2_0,
_MM_SHUFFLE(0, 2, 0 ,2)); // 126, 124, 122, 120,
const __m128 a_k2_p1 = _mm_shuffle_ps(a_k2_4, a_k2_0,
_MM_SHUFFLE(1, 3, 1 ,3)); // 127, 125, 123, 121,
const __m128 a_j2_p0 = _mm_shuffle_ps(
a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8,
const __m128 a_j2_p1 = _mm_shuffle_ps(
a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9,
const __m128 a_k2_p0 = _mm_shuffle_ps(
a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120,
const __m128 a_k2_p1 = _mm_shuffle_ps(
a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121,
// Calculate 'x'.
const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
// 2-126, 4-124, 6-122, 8-120,
// 2-126, 4-124, 6-122, 8-120,
const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
// 3-127, 5-125, 7-123, 9-121,
// 3-127, 5-125, 7-123, 9-121,
// Calculate product into 'y'.
// yr = wkr * xr - wki * xi;
// yi = wkr * xi + wki * xr;
@ -283,12 +281,12 @@ static void rftfsub_128_SSE2(float *a) {
const __m128 b_ = _mm_mul_ps(wki_, xi_);
const __m128 c_ = _mm_mul_ps(wkr_, xi_);
const __m128 d_ = _mm_mul_ps(wki_, xr_);
const __m128 yr_ = _mm_sub_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120,
const __m128 yi_ = _mm_add_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121,
// Update 'a'.
// a[j2 + 0] -= yr;
// a[j2 + 1] -= yi;
// a[k2 + 0] += yr;
const __m128 yr_ = _mm_sub_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120,
const __m128 yi_ = _mm_add_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121,
// Update 'a'.
// a[j2 + 0] -= yr;
// a[j2 + 1] -= yi;
// a[k2 + 0] += yr;
// a[k2 + 1] -= yi;
const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8,
const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_); // 3, 5, 7, 9,
@ -296,26 +294,26 @@ static void rftfsub_128_SSE2(float *a) {
const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_); // 127, 125, 123, 121,
// Shuffle in right order and store.
const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
// 2, 3, 4, 5,
// 2, 3, 4, 5,
const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
// 6, 7, 8, 9,
// 6, 7, 8, 9,
const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
// 122, 123, 120, 121,
// 122, 123, 120, 121,
const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
// 126, 127, 124, 125,
const __m128 a_k2_0n = _mm_shuffle_ps(a_k2_0nt, a_k2_0nt,
_MM_SHUFFLE(1, 0, 3 ,2)); // 120, 121, 122, 123,
const __m128 a_k2_4n = _mm_shuffle_ps(a_k2_4nt, a_k2_4nt,
_MM_SHUFFLE(1, 0, 3 ,2)); // 124, 125, 126, 127,
_mm_storeu_ps(&a[0 + j2], a_j2_0n);
_mm_storeu_ps(&a[4 + j2], a_j2_4n);
// 126, 127, 124, 125,
const __m128 a_k2_0n = _mm_shuffle_ps(
a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123,
const __m128 a_k2_4n = _mm_shuffle_ps(
a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127,
_mm_storeu_ps(&a[0 + j2], a_j2_0n);
_mm_storeu_ps(&a[4 + j2], a_j2_4n);
_mm_storeu_ps(&a[122 - j2], a_k2_0n);
_mm_storeu_ps(&a[126 - j2], a_k2_4n);
}
// Scalar code for the remaining items.
for (; j2 < 64; j1 += 1, j2 += 2) {
k2 = 128 - j2;
k1 = 32 - j1;
k1 = 32 - j1;
wkr = 0.5f - c[k1];
wki = c[j1];
xr = a[j2 + 0] - a[k2 + 0];
@ -329,13 +327,13 @@ static void rftfsub_128_SSE2(float *a) {
}
}
static void rftbsub_128_SSE2(float *a) {
const float *c = rdft_w + 32;
static void rftbsub_128_SSE2(float* a) {
const float* c = rdft_w + 32;
int j1, j2, k1, k2;
float wkr, wki, xr, xi, yr, yi;
static const ALIGN16_BEG float ALIGN16_END k_half[4] =
{0.5f, 0.5f, 0.5f, 0.5f};
static const ALIGN16_BEG float ALIGN16_END
k_half[4] = {0.5f, 0.5f, 0.5f, 0.5f};
const __m128 mm_half = _mm_load_ps(k_half);
a[1] = -a[1];
@ -343,30 +341,30 @@ static void rftbsub_128_SSE2(float *a) {
// Note: commented number are indexes for the first iteration of the loop.
for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
// Load 'wk'.
const __m128 c_j1 = _mm_loadu_ps(&c[ j1]); // 1, 2, 3, 4,
const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31,
const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31,
const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4,
const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31,
const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31,
const __m128 wkr_ =
_mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28,
const __m128 wki_ = c_j1; // 1, 2, 3, 4,
_mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28,
const __m128 wki_ = c_j1; // 1, 2, 3, 4,
// Load and shuffle 'a'.
const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5,
const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9,
const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5,
const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9,
const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123,
const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127,
const __m128 a_j2_p0 = _mm_shuffle_ps(a_j2_0, a_j2_4,
_MM_SHUFFLE(2, 0, 2 ,0)); // 2, 4, 6, 8,
const __m128 a_j2_p1 = _mm_shuffle_ps(a_j2_0, a_j2_4,
_MM_SHUFFLE(3, 1, 3 ,1)); // 3, 5, 7, 9,
const __m128 a_k2_p0 = _mm_shuffle_ps(a_k2_4, a_k2_0,
_MM_SHUFFLE(0, 2, 0 ,2)); // 126, 124, 122, 120,
const __m128 a_k2_p1 = _mm_shuffle_ps(a_k2_4, a_k2_0,
_MM_SHUFFLE(1, 3, 1 ,3)); // 127, 125, 123, 121,
const __m128 a_j2_p0 = _mm_shuffle_ps(
a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8,
const __m128 a_j2_p1 = _mm_shuffle_ps(
a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9,
const __m128 a_k2_p0 = _mm_shuffle_ps(
a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120,
const __m128 a_k2_p1 = _mm_shuffle_ps(
a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121,
// Calculate 'x'.
const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0);
// 2-126, 4-124, 6-122, 8-120,
// 2-126, 4-124, 6-122, 8-120,
const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1);
// 3-127, 5-125, 7-123, 9-121,
// 3-127, 5-125, 7-123, 9-121,
// Calculate product into 'y'.
// yr = wkr * xr + wki * xi;
// yi = wkr * xi - wki * xr;
@ -374,12 +372,12 @@ static void rftbsub_128_SSE2(float *a) {
const __m128 b_ = _mm_mul_ps(wki_, xi_);
const __m128 c_ = _mm_mul_ps(wkr_, xi_);
const __m128 d_ = _mm_mul_ps(wki_, xr_);
const __m128 yr_ = _mm_add_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120,
const __m128 yi_ = _mm_sub_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121,
// Update 'a'.
// a[j2 + 0] = a[j2 + 0] - yr;
// a[j2 + 1] = yi - a[j2 + 1];
// a[k2 + 0] = yr + a[k2 + 0];
const __m128 yr_ = _mm_add_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120,
const __m128 yi_ = _mm_sub_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121,
// Update 'a'.
// a[j2 + 0] = a[j2 + 0] - yr;
// a[j2 + 1] = yi - a[j2 + 1];
// a[k2 + 0] = yr + a[k2 + 0];
// a[k2 + 1] = yi - a[k2 + 1];
const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8,
const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1); // 3, 5, 7, 9,
@ -387,26 +385,26 @@ static void rftbsub_128_SSE2(float *a) {
const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1); // 127, 125, 123, 121,
// Shuffle in right order and store.
const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n);
// 2, 3, 4, 5,
// 2, 3, 4, 5,
const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n);
// 6, 7, 8, 9,
// 6, 7, 8, 9,
const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n);
// 122, 123, 120, 121,
// 122, 123, 120, 121,
const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n);
// 126, 127, 124, 125,
const __m128 a_k2_0n = _mm_shuffle_ps(a_k2_0nt, a_k2_0nt,
_MM_SHUFFLE(1, 0, 3 ,2)); // 120, 121, 122, 123,
const __m128 a_k2_4n = _mm_shuffle_ps(a_k2_4nt, a_k2_4nt,
_MM_SHUFFLE(1, 0, 3 ,2)); // 124, 125, 126, 127,
_mm_storeu_ps(&a[0 + j2], a_j2_0n);
_mm_storeu_ps(&a[4 + j2], a_j2_4n);
// 126, 127, 124, 125,
const __m128 a_k2_0n = _mm_shuffle_ps(
a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123,
const __m128 a_k2_4n = _mm_shuffle_ps(
a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127,
_mm_storeu_ps(&a[0 + j2], a_j2_0n);
_mm_storeu_ps(&a[4 + j2], a_j2_4n);
_mm_storeu_ps(&a[122 - j2], a_k2_0n);
_mm_storeu_ps(&a[126 - j2], a_k2_4n);
}
// Scalar code for the remaining items.
for (; j2 < 64; j1 += 1, j2 += 2) {
k2 = 128 - j2;
k1 = 32 - j1;
k1 = 32 - j1;
wkr = 0.5f - c[k1];
wki = c[j1];
xr = a[j2 + 0] - a[k2 + 0];
@ -427,5 +425,3 @@ void aec_rdft_init_sse2(void) {
rftfsub_128 = rftfsub_128_SSE2;
rftbsub_128 = rftbsub_128_SSE2;
}
#endif // WEBRTC_USE_SS2

View File

@ -0,0 +1,209 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for
* clock skew by resampling the farend signal.
*/
#include "webrtc/modules/audio_processing/aec/aec_resampler.h"
#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "webrtc/modules/audio_processing/aec/aec_core.h"
enum {
kEstimateLengthFrames = 400
};
typedef struct {
float buffer[kResamplerBufferSize];
float position;
int deviceSampleRateHz;
int skewData[kEstimateLengthFrames];
int skewDataIndex;
float skewEstimate;
} AecResampler;
static int EstimateSkew(const int* rawSkew,
int size,
int absLimit,
float* skewEst);
void* WebRtcAec_CreateResampler() {
return malloc(sizeof(AecResampler));
}
int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz) {
AecResampler* obj = (AecResampler*)resampInst;
memset(obj->buffer, 0, sizeof(obj->buffer));
obj->position = 0.0;
obj->deviceSampleRateHz = deviceSampleRateHz;
memset(obj->skewData, 0, sizeof(obj->skewData));
obj->skewDataIndex = 0;
obj->skewEstimate = 0.0;
return 0;
}
void WebRtcAec_FreeResampler(void* resampInst) {
AecResampler* obj = (AecResampler*)resampInst;
free(obj);
}
void WebRtcAec_ResampleLinear(void* resampInst,
const float* inspeech,
size_t size,
float skew,
float* outspeech,
size_t* size_out) {
AecResampler* obj = (AecResampler*)resampInst;
float* y;
float be, tnew;
size_t tn, mm;
assert(size <= 2 * FRAME_LEN);
assert(resampInst != NULL);
assert(inspeech != NULL);
assert(outspeech != NULL);
assert(size_out != NULL);
// Add new frame data in lookahead
memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay],
inspeech,
size * sizeof(inspeech[0]));
// Sample rate ratio
be = 1 + skew;
// Loop over input frame
mm = 0;
y = &obj->buffer[FRAME_LEN]; // Point at current frame
tnew = be * mm + obj->position;
tn = (size_t)tnew;
while (tn < size) {
// Interpolation
outspeech[mm] = y[tn] + (tnew - tn) * (y[tn + 1] - y[tn]);
mm++;
tnew = be * mm + obj->position;
tn = (int)tnew;
}
*size_out = mm;
obj->position += (*size_out) * be - size;
// Shift buffer
memmove(obj->buffer,
&obj->buffer[size],
(kResamplerBufferSize - size) * sizeof(obj->buffer[0]));
}
int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst) {
AecResampler* obj = (AecResampler*)resampInst;
int err = 0;
if (obj->skewDataIndex < kEstimateLengthFrames) {
obj->skewData[obj->skewDataIndex] = rawSkew;
obj->skewDataIndex++;
} else if (obj->skewDataIndex == kEstimateLengthFrames) {
err = EstimateSkew(
obj->skewData, kEstimateLengthFrames, obj->deviceSampleRateHz, skewEst);
obj->skewEstimate = *skewEst;
obj->skewDataIndex++;
} else {
*skewEst = obj->skewEstimate;
}
return err;
}
int EstimateSkew(const int* rawSkew,
int size,
int deviceSampleRateHz,
float* skewEst) {
const int absLimitOuter = (int)(0.04f * deviceSampleRateHz);
const int absLimitInner = (int)(0.0025f * deviceSampleRateHz);
int i = 0;
int n = 0;
float rawAvg = 0;
float err = 0;
float rawAbsDev = 0;
int upperLimit = 0;
int lowerLimit = 0;
float cumSum = 0;
float x = 0;
float x2 = 0;
float y = 0;
float xy = 0;
float xAvg = 0;
float denom = 0;
float skew = 0;
*skewEst = 0; // Set in case of error below.
for (i = 0; i < size; i++) {
if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
n++;
rawAvg += rawSkew[i];
}
}
if (n == 0) {
return -1;
}
assert(n > 0);
rawAvg /= n;
for (i = 0; i < size; i++) {
if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
err = rawSkew[i] - rawAvg;
rawAbsDev += err >= 0 ? err : -err;
}
}
assert(n > 0);
rawAbsDev /= n;
upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1); // +1 for ceiling.
lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1); // -1 for floor.
n = 0;
for (i = 0; i < size; i++) {
if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) ||
(rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) {
n++;
cumSum += rawSkew[i];
x += n;
x2 += n * n;
y += cumSum;
xy += n * cumSum;
}
}
if (n == 0) {
return -1;
}
assert(n > 0);
xAvg = x / n;
denom = x2 - xAvg * x;
if (denom != 0) {
skew = (xy - xAvg * y) / denom;
}
*skewEst = skew;
return 0;
}

View File

@ -0,0 +1,39 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_
#include "webrtc/modules/audio_processing/aec/aec_core.h"
enum {
kResamplingDelay = 1
};
enum {
kResamplerBufferSize = FRAME_LEN * 4
};
// Unless otherwise specified, functions return 0 on success and -1 on error.
void* WebRtcAec_CreateResampler(); // Returns NULL on error.
int WebRtcAec_InitResampler(void* resampInst, int deviceSampleRateHz);
void WebRtcAec_FreeResampler(void* resampInst);
// Estimates skew from raw measurement.
int WebRtcAec_GetSkew(void* resampInst, int rawSkew, float* skewEst);
// Resamples input using linear interpolation.
void WebRtcAec_ResampleLinear(void* resampInst,
const float* inspeech,
size_t size,
float skew,
float* outspeech,
size_t* size_out);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_RESAMPLER_H_

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,67 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_
#include "webrtc/common_audio/ring_buffer.h"
#include "webrtc/modules/audio_processing/aec/aec_core.h"
typedef struct {
int delayCtr;
int sampFreq;
int splitSampFreq;
int scSampFreq;
float sampFactor; // scSampRate / sampFreq
short skewMode;
int bufSizeStart;
int knownDelay;
int rate_factor;
short initFlag; // indicates if AEC has been initialized
// Variables used for averaging far end buffer size
short counter;
int sum;
short firstVal;
short checkBufSizeCtr;
// Variables used for delay shifts
short msInSndCardBuf;
short filtDelay; // Filtered delay estimate.
int timeForDelayChange;
int startup_phase;
int checkBuffSize;
short lastDelayDiff;
#ifdef WEBRTC_AEC_DEBUG_DUMP
FILE* bufFile;
FILE* delayFile;
FILE* skewFile;
#endif
// Structures
void* resampler;
int skewFrCtr;
int resample; // if the skew is small enough we don't resample
int highSkewCtr;
float skew;
RingBuffer* far_pre_buf; // Time domain far-end pre-buffer.
int lastError;
int farend_started;
AecCore* aec;
} Aec;
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_ECHO_CANCELLATION_INTERNAL_H_

View File

@ -0,0 +1,245 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_
#include <stddef.h>
#include "webrtc/typedefs.h"
// Errors
#define AEC_UNSPECIFIED_ERROR 12000
#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001
#define AEC_UNINITIALIZED_ERROR 12002
#define AEC_NULL_POINTER_ERROR 12003
#define AEC_BAD_PARAMETER_ERROR 12004
// Warnings
#define AEC_BAD_PARAMETER_WARNING 12050
enum {
kAecNlpConservative = 0,
kAecNlpModerate,
kAecNlpAggressive
};
enum {
kAecFalse = 0,
kAecTrue
};
typedef struct {
int16_t nlpMode; // default kAecNlpModerate
int16_t skewMode; // default kAecFalse
int16_t metricsMode; // default kAecFalse
int delay_logging; // default kAecFalse
// float realSkew;
} AecConfig;
typedef struct {
int instant;
int average;
int max;
int min;
} AecLevel;
typedef struct {
AecLevel rerl;
AecLevel erl;
AecLevel erle;
AecLevel aNlp;
} AecMetrics;
struct AecCore;
#ifdef __cplusplus
extern "C" {
#endif
/*
* Allocates the memory needed by the AEC. The memory needs to be initialized
* separately using the WebRtcAec_Init() function. Returns a pointer to the
* object or NULL on error.
*/
void* WebRtcAec_Create();
/*
* This function releases the memory allocated by WebRtcAec_Create().
*
* Inputs Description
* -------------------------------------------------------------------
* void* aecInst Pointer to the AEC instance
*/
void WebRtcAec_Free(void* aecInst);
/*
* Initializes an AEC instance.
*
* Inputs Description
* -------------------------------------------------------------------
* void* aecInst Pointer to the AEC instance
* int32_t sampFreq Sampling frequency of data
* int32_t scSampFreq Soundcard sampling frequency
*
* Outputs Description
* -------------------------------------------------------------------
* int32_t return 0: OK
* -1: error
*/
int32_t WebRtcAec_Init(void* aecInst, int32_t sampFreq, int32_t scSampFreq);
/*
* Inserts an 80 or 160 sample block of data into the farend buffer.
*
* Inputs Description
* -------------------------------------------------------------------
* void* aecInst Pointer to the AEC instance
* const float* farend In buffer containing one frame of
* farend signal for L band
* int16_t nrOfSamples Number of samples in farend buffer
*
* Outputs Description
* -------------------------------------------------------------------
* int32_t return 0: OK
* -1: error
*/
int32_t WebRtcAec_BufferFarend(void* aecInst,
const float* farend,
size_t nrOfSamples);
/*
* Runs the echo canceller on an 80 or 160 sample blocks of data.
*
* Inputs Description
* -------------------------------------------------------------------
* void* aecInst Pointer to the AEC instance
* float* const* nearend In buffer containing one frame of
* nearend+echo signal for each band
* int num_bands Number of bands in nearend buffer
* int16_t nrOfSamples Number of samples in nearend buffer
* int16_t msInSndCardBuf Delay estimate for sound card and
* system buffers
* int16_t skew Difference between number of samples played
* and recorded at the soundcard (for clock skew
* compensation)
*
* Outputs Description
* -------------------------------------------------------------------
* float* const* out Out buffer, one frame of processed nearend
* for each band
* int32_t return 0: OK
* -1: error
*/
int32_t WebRtcAec_Process(void* aecInst,
const float* const* nearend,
size_t num_bands,
float* const* out,
size_t nrOfSamples,
int16_t msInSndCardBuf,
int32_t skew);
/*
* This function enables the user to set certain parameters on-the-fly.
*
* Inputs Description
* -------------------------------------------------------------------
* void* handle Pointer to the AEC instance
* AecConfig config Config instance that contains all
* properties to be set
*
* Outputs Description
* -------------------------------------------------------------------
* int return 0: OK
* -1: error
*/
int WebRtcAec_set_config(void* handle, AecConfig config);
/*
* Gets the current echo status of the nearend signal.
*
* Inputs Description
* -------------------------------------------------------------------
* void* handle Pointer to the AEC instance
*
* Outputs Description
* -------------------------------------------------------------------
* int* status 0: Almost certainly nearend single-talk
* 1: Might not be neared single-talk
* int return 0: OK
* -1: error
*/
int WebRtcAec_get_echo_status(void* handle, int* status);
/*
* Gets the current echo metrics for the session.
*
* Inputs Description
* -------------------------------------------------------------------
* void* handle Pointer to the AEC instance
*
* Outputs Description
* -------------------------------------------------------------------
* AecMetrics* metrics Struct which will be filled out with the
* current echo metrics.
* int return 0: OK
* -1: error
*/
int WebRtcAec_GetMetrics(void* handle, AecMetrics* metrics);
/*
* Gets the current delay metrics for the session.
*
* Inputs Description
* -------------------------------------------------------------------
* void* handle Pointer to the AEC instance
*
* Outputs Description
* -------------------------------------------------------------------
* int* median Delay median value.
* int* std Delay standard deviation.
* float* fraction_poor_delays Fraction of the delay estimates that may
* cause the AEC to perform poorly.
*
* int return 0: OK
* -1: error
*/
int WebRtcAec_GetDelayMetrics(void* handle,
int* median,
int* std,
float* fraction_poor_delays);
/*
* Gets the last error code.
*
* Inputs Description
* -------------------------------------------------------------------
* void* aecInst Pointer to the AEC instance
*
* Outputs Description
* -------------------------------------------------------------------
* int32_t return 11000-11100: error code
*/
int32_t WebRtcAec_get_error_code(void* aecInst);
// Returns a pointer to the low level AEC handle.
//
// Input:
// - handle : Pointer to the AEC instance.
//
// Return value:
// - AecCore pointer : NULL for error.
//
struct AecCore* WebRtcAec_aec_core(void* handle);
#ifdef __cplusplus
}
#endif
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_INCLUDE_ECHO_CANCELLATION_H_

View File

@ -1,278 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_INTERFACE_ECHO_CANCELLATION_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_INTERFACE_ECHO_CANCELLATION_H_
#include "typedefs.h"
// Errors
#define AEC_UNSPECIFIED_ERROR 12000
#define AEC_UNSUPPORTED_FUNCTION_ERROR 12001
#define AEC_UNINITIALIZED_ERROR 12002
#define AEC_NULL_POINTER_ERROR 12003
#define AEC_BAD_PARAMETER_ERROR 12004
// Warnings
#define AEC_BAD_PARAMETER_WARNING 12050
enum {
kAecNlpConservative = 0,
kAecNlpModerate,
kAecNlpAggressive
};
enum {
kAecFalse = 0,
kAecTrue
};
typedef struct {
WebRtc_Word16 nlpMode; // default kAecNlpModerate
WebRtc_Word16 skewMode; // default kAecFalse
WebRtc_Word16 metricsMode; // default kAecFalse
int delay_logging; // default kAecFalse
//float realSkew;
} AecConfig;
typedef struct {
WebRtc_Word16 instant;
WebRtc_Word16 average;
WebRtc_Word16 max;
WebRtc_Word16 min;
} AecLevel;
typedef struct {
AecLevel rerl;
AecLevel erl;
AecLevel erle;
AecLevel aNlp;
} AecMetrics;
#ifdef __cplusplus
extern "C" {
#endif
/*
* Allocates the memory needed by the AEC. The memory needs to be initialized
* separately using the WebRtcAec_Init() function.
*
* Inputs Description
* -------------------------------------------------------------------
* void **aecInst Pointer to the AEC instance to be created
* and initialized
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAec_Create(void **aecInst);
/*
* This function releases the memory allocated by WebRtcAec_Create().
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecInst Pointer to the AEC instance
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAec_Free(void *aecInst);
/*
* Initializes an AEC instance.
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecInst Pointer to the AEC instance
* WebRtc_Word32 sampFreq Sampling frequency of data
* WebRtc_Word32 scSampFreq Soundcard sampling frequency
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAec_Init(void *aecInst,
WebRtc_Word32 sampFreq,
WebRtc_Word32 scSampFreq);
/*
* Inserts an 80 or 160 sample block of data into the farend buffer.
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecInst Pointer to the AEC instance
* WebRtc_Word16 *farend In buffer containing one frame of
* farend signal for L band
* WebRtc_Word16 nrOfSamples Number of samples in farend buffer
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAec_BufferFarend(void *aecInst,
const WebRtc_Word16 *farend,
WebRtc_Word16 nrOfSamples);
/*
* Runs the echo canceller on an 80 or 160 sample blocks of data.
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecInst Pointer to the AEC instance
* WebRtc_Word16 *nearend In buffer containing one frame of
* nearend+echo signal for L band
* WebRtc_Word16 *nearendH In buffer containing one frame of
* nearend+echo signal for H band
* WebRtc_Word16 nrOfSamples Number of samples in nearend buffer
* WebRtc_Word16 msInSndCardBuf Delay estimate for sound card and
* system buffers
* WebRtc_Word16 skew Difference between number of samples played
* and recorded at the soundcard (for clock skew
* compensation)
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word16 *out Out buffer, one frame of processed nearend
* for L band
* WebRtc_Word16 *outH Out buffer, one frame of processed nearend
* for H band
* WebRtc_Word32 return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAec_Process(void *aecInst,
const WebRtc_Word16 *nearend,
const WebRtc_Word16 *nearendH,
WebRtc_Word16 *out,
WebRtc_Word16 *outH,
WebRtc_Word16 nrOfSamples,
WebRtc_Word16 msInSndCardBuf,
WebRtc_Word32 skew);
/*
* This function enables the user to set certain parameters on-the-fly.
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecInst Pointer to the AEC instance
* AecConfig config Config instance that contains all
* properties to be set
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAec_set_config(void *aecInst, AecConfig config);
/*
* Gets the on-the-fly paramters.
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecInst Pointer to the AEC instance
*
* Outputs Description
* -------------------------------------------------------------------
* AecConfig *config Pointer to the config instance that
* all properties will be written to
* WebRtc_Word32 return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAec_get_config(void *aecInst, AecConfig *config);
/*
* Gets the current echo status of the nearend signal.
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecInst Pointer to the AEC instance
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word16 *status 0: Almost certainly nearend single-talk
* 1: Might not be neared single-talk
* WebRtc_Word32 return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAec_get_echo_status(void *aecInst, WebRtc_Word16 *status);
/*
* Gets the current echo metrics for the session.
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecInst Pointer to the AEC instance
*
* Outputs Description
* -------------------------------------------------------------------
* AecMetrics *metrics Struct which will be filled out with the
* current echo metrics.
* WebRtc_Word32 return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAec_GetMetrics(void *aecInst, AecMetrics *metrics);
/*
* Gets the current delay metrics for the session.
*
* Inputs Description
* -------------------------------------------------------------------
* void* handle Pointer to the AEC instance
*
* Outputs Description
* -------------------------------------------------------------------
* int* median Delay median value.
* int* std Delay standard deviation.
*
* int return 0: OK
* -1: error
*/
int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std);
/*
* Gets the last error code.
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecInst Pointer to the AEC instance
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 11000-11100: error code
*/
WebRtc_Word32 WebRtcAec_get_error_code(void *aecInst);
/*
* Gets a version string.
*
* Inputs Description
* -------------------------------------------------------------------
* char *versionStr Pointer to a string array
* WebRtc_Word16 len The maximum length of the string
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word8 *versionStr Pointer to a string array
* WebRtc_Word32 return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAec_get_version(WebRtc_Word8 *versionStr, WebRtc_Word16 len);
#ifdef __cplusplus
}
#endif
#endif /* WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_INTERFACE_ECHO_CANCELLATION_H_ */

View File

@ -1,233 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/* Resamples a signal to an arbitrary rate. Used by the AEC to compensate for clock
* skew by resampling the farend signal.
*/
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <math.h>
#include "resampler.h"
#include "aec_core.h"
enum { kFrameBufferSize = FRAME_LEN * 4 };
enum { kEstimateLengthFrames = 400 };
typedef struct {
short buffer[kFrameBufferSize];
float position;
int deviceSampleRateHz;
int skewData[kEstimateLengthFrames];
int skewDataIndex;
float skewEstimate;
} resampler_t;
static int EstimateSkew(const int* rawSkew,
int size,
int absLimit,
float *skewEst);
int WebRtcAec_CreateResampler(void **resampInst)
{
resampler_t *obj = malloc(sizeof(resampler_t));
*resampInst = obj;
if (obj == NULL) {
return -1;
}
return 0;
}
int WebRtcAec_InitResampler(void *resampInst, int deviceSampleRateHz)
{
resampler_t *obj = (resampler_t*) resampInst;
memset(obj->buffer, 0, sizeof(obj->buffer));
obj->position = 0.0;
obj->deviceSampleRateHz = deviceSampleRateHz;
memset(obj->skewData, 0, sizeof(obj->skewData));
obj->skewDataIndex = 0;
obj->skewEstimate = 0.0;
return 0;
}
int WebRtcAec_FreeResampler(void *resampInst)
{
resampler_t *obj = (resampler_t*) resampInst;
free(obj);
return 0;
}
int WebRtcAec_ResampleLinear(void *resampInst,
const short *inspeech,
int size,
float skew,
short *outspeech)
{
resampler_t *obj = (resampler_t*) resampInst;
short *y;
float be, tnew, interp;
int tn, outsize, mm;
if (size < 0 || size > 2 * FRAME_LEN) {
return -1;
}
// Add new frame data in lookahead
memcpy(&obj->buffer[FRAME_LEN + kResamplingDelay],
inspeech,
size * sizeof(short));
// Sample rate ratio
be = 1 + skew;
// Loop over input frame
mm = 0;
y = &obj->buffer[FRAME_LEN]; // Point at current frame
tnew = be * mm + obj->position;
tn = (int) tnew;
while (tn < size) {
// Interpolation
interp = y[tn] + (tnew - tn) * (y[tn+1] - y[tn]);
if (interp > 32767) {
interp = 32767;
}
else if (interp < -32768) {
interp = -32768;
}
outspeech[mm] = (short) interp;
mm++;
tnew = be * mm + obj->position;
tn = (int) tnew;
}
outsize = mm;
obj->position += outsize * be - size;
// Shift buffer
memmove(obj->buffer,
&obj->buffer[size],
(kFrameBufferSize - size) * sizeof(short));
return outsize;
}
int WebRtcAec_GetSkew(void *resampInst, int rawSkew, float *skewEst)
{
resampler_t *obj = (resampler_t*)resampInst;
int err = 0;
if (obj->skewDataIndex < kEstimateLengthFrames) {
obj->skewData[obj->skewDataIndex] = rawSkew;
obj->skewDataIndex++;
}
else if (obj->skewDataIndex == kEstimateLengthFrames) {
err = EstimateSkew(obj->skewData,
kEstimateLengthFrames,
obj->deviceSampleRateHz,
skewEst);
obj->skewEstimate = *skewEst;
obj->skewDataIndex++;
}
else {
*skewEst = obj->skewEstimate;
}
return err;
}
int EstimateSkew(const int* rawSkew,
int size,
int deviceSampleRateHz,
float *skewEst)
{
const int absLimitOuter = (int)(0.04f * deviceSampleRateHz);
const int absLimitInner = (int)(0.0025f * deviceSampleRateHz);
int i = 0;
int n = 0;
float rawAvg = 0;
float err = 0;
float rawAbsDev = 0;
int upperLimit = 0;
int lowerLimit = 0;
float cumSum = 0;
float x = 0;
float x2 = 0;
float y = 0;
float xy = 0;
float xAvg = 0;
float denom = 0;
float skew = 0;
*skewEst = 0; // Set in case of error below.
for (i = 0; i < size; i++) {
if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
n++;
rawAvg += rawSkew[i];
}
}
if (n == 0) {
return -1;
}
assert(n > 0);
rawAvg /= n;
for (i = 0; i < size; i++) {
if ((rawSkew[i] < absLimitOuter && rawSkew[i] > -absLimitOuter)) {
err = rawSkew[i] - rawAvg;
rawAbsDev += err >= 0 ? err : -err;
}
}
assert(n > 0);
rawAbsDev /= n;
upperLimit = (int)(rawAvg + 5 * rawAbsDev + 1); // +1 for ceiling.
lowerLimit = (int)(rawAvg - 5 * rawAbsDev - 1); // -1 for floor.
n = 0;
for (i = 0; i < size; i++) {
if ((rawSkew[i] < absLimitInner && rawSkew[i] > -absLimitInner) ||
(rawSkew[i] < upperLimit && rawSkew[i] > lowerLimit)) {
n++;
cumSum += rawSkew[i];
x += n;
x2 += n*n;
y += cumSum;
xy += n * cumSum;
}
}
if (n == 0) {
return -1;
}
assert(n > 0);
xAvg = x / n;
denom = x2 - xAvg*x;
if (denom != 0) {
skew = (xy - xAvg*y) / denom;
}
*skewEst = skew;
return 0;
}

View File

@ -1,32 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_RESAMPLER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_RESAMPLER_H_
enum { kResamplingDelay = 1 };
// Unless otherwise specified, functions return 0 on success and -1 on error
int WebRtcAec_CreateResampler(void **resampInst);
int WebRtcAec_InitResampler(void *resampInst, int deviceSampleRateHz);
int WebRtcAec_FreeResampler(void *resampInst);
// Estimates skew from raw measurement.
int WebRtcAec_GetSkew(void *resampInst, int rawSkew, float *skewEst);
// Resamples input using linear interpolation.
// Returns size of resampled array.
int WebRtcAec_ResampleLinear(void *resampInst,
const short *inspeech,
int size,
float skew,
short *outspeech);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_RESAMPLER_H_

View File

@ -1,9 +0,0 @@
noinst_LTLIBRARIES = libaecm.la
libaecm_la_SOURCES = interface/echo_control_mobile.h \
echo_control_mobile.c \
aecm_core.c \
aecm_core.h
libaecm_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
-I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-I$(top_srcdir)/src/modules/audio_processing/utility

View File

@ -1,34 +0,0 @@
# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'targets': [
{
'target_name': 'aecm',
'type': '<(library)',
'dependencies': [
'<(webrtc_root)/common_audio/common_audio.gyp:spl',
'apm_util'
],
'include_dirs': [
'interface',
],
'direct_dependent_settings': {
'include_dirs': [
'interface',
],
},
'sources': [
'interface/echo_control_mobile.h',
'echo_control_mobile.c',
'aecm_core.c',
'aecm_core.h',
],
},
],
}

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,217 +8,144 @@
* be found in the AUTHORS file in the root of the source tree.
*/
// Performs echo control (suppression) with fft routines in fixed-point
// Performs echo control (suppression) with fft routines in fixed-point.
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_CORE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_CORE_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_CORE_H_
#define AECM_DYNAMIC_Q // turn on/off dynamic Q-domain
//#define AECM_WITH_ABS_APPROX
//#define AECM_SHORT // for 32 sample partition length (otherwise 64)
#include "typedefs.h"
#include "signal_processing_library.h"
// Algorithm parameters
#define FRAME_LEN 80 // Total frame length, 10 ms
#ifdef AECM_SHORT
#define PART_LEN 32 // Length of partition
#define PART_LEN_SHIFT 6 // Length of (PART_LEN * 2) in base 2
#else
#define PART_LEN 64 // Length of partition
#define PART_LEN_SHIFT 7 // Length of (PART_LEN * 2) in base 2
#include "webrtc/common_audio/ring_buffer.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/aecm/aecm_defines.h"
#include "webrtc/typedefs.h"
#ifdef _MSC_VER // visual c++
#define ALIGN8_BEG __declspec(align(8))
#define ALIGN8_END
#else // gcc or icc
#define ALIGN8_BEG
#define ALIGN8_END __attribute__((aligned(8)))
#endif
#define PART_LEN1 (PART_LEN + 1) // Unique fft coefficients
#define PART_LEN2 (PART_LEN << 1) // Length of partition * 2
#define PART_LEN4 (PART_LEN << 2) // Length of partition * 4
#define FAR_BUF_LEN PART_LEN4 // Length of buffers
#define MAX_DELAY 100
// Counter parameters
#ifdef AECM_SHORT
#define CONV_LEN 1024 // Convergence length used at startup
#else
#define CONV_LEN 512 // Convergence length used at startup
#endif
#define CONV_LEN2 (CONV_LEN << 1) // Convergence length * 2 used at startup
// Energy parameters
#define MAX_BUF_LEN 64 // History length of energy signals
#define FAR_ENERGY_MIN 1025 // Lowest Far energy level: At least 2 in energy
#define FAR_ENERGY_DIFF 929 // Allowed difference between max and min
#define ENERGY_DEV_OFFSET 0 // The energy error offset in Q8
#define ENERGY_DEV_TOL 400 // The energy estimation tolerance in Q8
#define FAR_ENERGY_VAD_REGION 230 // Far VAD tolerance region
// Stepsize parameters
#define MU_MIN 10 // Min stepsize 2^-MU_MIN (far end energy dependent)
#define MU_MAX 1 // Max stepsize 2^-MU_MAX (far end energy dependent)
#define MU_DIFF 9 // MU_MIN - MU_MAX
// Channel parameters
#define MIN_MSE_COUNT 20 // Min number of consecutive blocks with enough far end
// energy to compare channel estimates
#define MIN_MSE_DIFF 29 // The ratio between adapted and stored channel to
// accept a new storage (0.8 in Q-MSE_RESOLUTION)
#define MSE_RESOLUTION 5 // MSE parameter resolution
#define RESOLUTION_CHANNEL16 12 // W16 Channel in Q-RESOLUTION_CHANNEL16
#define RESOLUTION_CHANNEL32 28 // W32 Channel in Q-RESOLUTION_CHANNEL
#define CHANNEL_VAD 16 // Minimum energy in frequency band to update channel
// Suppression gain parameters: SUPGAIN_ parameters in Q-(RESOLUTION_SUPGAIN)
#define RESOLUTION_SUPGAIN 8 // Channel in Q-(RESOLUTION_SUPGAIN)
#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN) // Default suppression gain
#define SUPGAIN_ERROR_PARAM_A 3072 // Estimation error parameter (Maximum gain) (8 in Q8)
#define SUPGAIN_ERROR_PARAM_B 1536 // Estimation error parameter (Gain before going down)
#define SUPGAIN_ERROR_PARAM_D SUPGAIN_DEFAULT // Estimation error parameter
// (Should be the same as Default) (1 in Q8)
#define SUPGAIN_EPC_DT 200 // = SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL
// Defines for "check delay estimation"
#define CORR_WIDTH 31 // Number of samples to correlate over.
#define CORR_MAX 16 // Maximum correlation offset
#define CORR_MAX_BUF 63
#define CORR_DEV 4
#define CORR_MAX_LEVEL 20
#define CORR_MAX_LOW 4
#define CORR_BUF_LEN (CORR_MAX << 1) + 1
// Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN
#define ONE_Q14 (1 << 14)
// NLP defines
#define NLP_COMP_LOW 3277 // 0.2 in Q14
#define NLP_COMP_HIGH ONE_Q14 // 1 in Q14
extern const WebRtc_Word16 WebRtcAecm_kSqrtHanning[];
typedef struct {
WebRtc_Word16 real;
WebRtc_Word16 imag;
} complex16_t;
int16_t real;
int16_t imag;
} ComplexInt16;
typedef struct
{
typedef struct {
int farBufWritePos;
int farBufReadPos;
int knownDelay;
int lastKnownDelay;
int firstVAD; // Parameter to control poorly initialized channels
int firstVAD; // Parameter to control poorly initialized channels
void *farFrameBuf;
void *nearNoisyFrameBuf;
void *nearCleanFrameBuf;
void *outFrameBuf;
RingBuffer* farFrameBuf;
RingBuffer* nearNoisyFrameBuf;
RingBuffer* nearCleanFrameBuf;
RingBuffer* outFrameBuf;
WebRtc_Word16 farBuf[FAR_BUF_LEN];
int16_t farBuf[FAR_BUF_LEN];
WebRtc_Word16 mult;
WebRtc_UWord32 seed;
int16_t mult;
uint32_t seed;
// Delay estimation variables
void* delay_estimator_farend;
void* delay_estimator;
WebRtc_UWord16 currentDelay;
uint16_t currentDelay;
// Far end history variables
// TODO(bjornv): Replace |far_history| with ring_buffer.
uint16_t far_history[PART_LEN1 * MAX_DELAY];
int far_history_pos;
int far_q_domains[MAX_DELAY];
WebRtc_Word16 nlpFlag;
WebRtc_Word16 fixedDelay;
int16_t nlpFlag;
int16_t fixedDelay;
WebRtc_UWord32 totCount;
uint32_t totCount;
WebRtc_Word16 dfaCleanQDomain;
WebRtc_Word16 dfaCleanQDomainOld;
WebRtc_Word16 dfaNoisyQDomain;
WebRtc_Word16 dfaNoisyQDomainOld;
int16_t dfaCleanQDomain;
int16_t dfaCleanQDomainOld;
int16_t dfaNoisyQDomain;
int16_t dfaNoisyQDomainOld;
WebRtc_Word16 nearLogEnergy[MAX_BUF_LEN];
WebRtc_Word16 farLogEnergy;
WebRtc_Word16 echoAdaptLogEnergy[MAX_BUF_LEN];
WebRtc_Word16 echoStoredLogEnergy[MAX_BUF_LEN];
int16_t nearLogEnergy[MAX_BUF_LEN];
int16_t farLogEnergy;
int16_t echoAdaptLogEnergy[MAX_BUF_LEN];
int16_t echoStoredLogEnergy[MAX_BUF_LEN];
// The extra 16 or 32 bytes in the following buffers are for alignment based Neon code.
// It's designed this way since the current GCC compiler can't align a buffer in 16 or 32
// byte boundaries properly.
WebRtc_Word16 channelStored_buf[PART_LEN1 + 8];
WebRtc_Word16 channelAdapt16_buf[PART_LEN1 + 8];
WebRtc_Word32 channelAdapt32_buf[PART_LEN1 + 8];
WebRtc_Word16 xBuf_buf[PART_LEN2 + 16]; // farend
WebRtc_Word16 dBufClean_buf[PART_LEN2 + 16]; // nearend
WebRtc_Word16 dBufNoisy_buf[PART_LEN2 + 16]; // nearend
WebRtc_Word16 outBuf_buf[PART_LEN + 8];
// The extra 16 or 32 bytes in the following buffers are for alignment based
// Neon code.
// It's designed this way since the current GCC compiler can't align a
// buffer in 16 or 32 byte boundaries properly.
int16_t channelStored_buf[PART_LEN1 + 8];
int16_t channelAdapt16_buf[PART_LEN1 + 8];
int32_t channelAdapt32_buf[PART_LEN1 + 8];
int16_t xBuf_buf[PART_LEN2 + 16]; // farend
int16_t dBufClean_buf[PART_LEN2 + 16]; // nearend
int16_t dBufNoisy_buf[PART_LEN2 + 16]; // nearend
int16_t outBuf_buf[PART_LEN + 8];
// Pointers to the above buffers
WebRtc_Word16 *channelStored;
WebRtc_Word16 *channelAdapt16;
WebRtc_Word32 *channelAdapt32;
WebRtc_Word16 *xBuf;
WebRtc_Word16 *dBufClean;
WebRtc_Word16 *dBufNoisy;
WebRtc_Word16 *outBuf;
int16_t *channelStored;
int16_t *channelAdapt16;
int32_t *channelAdapt32;
int16_t *xBuf;
int16_t *dBufClean;
int16_t *dBufNoisy;
int16_t *outBuf;
WebRtc_Word32 echoFilt[PART_LEN1];
WebRtc_Word16 nearFilt[PART_LEN1];
WebRtc_Word32 noiseEst[PART_LEN1];
int32_t echoFilt[PART_LEN1];
int16_t nearFilt[PART_LEN1];
int32_t noiseEst[PART_LEN1];
int noiseEstTooLowCtr[PART_LEN1];
int noiseEstTooHighCtr[PART_LEN1];
WebRtc_Word16 noiseEstCtr;
WebRtc_Word16 cngMode;
int16_t noiseEstCtr;
int16_t cngMode;
WebRtc_Word32 mseAdaptOld;
WebRtc_Word32 mseStoredOld;
WebRtc_Word32 mseThreshold;
int32_t mseAdaptOld;
int32_t mseStoredOld;
int32_t mseThreshold;
WebRtc_Word16 farEnergyMin;
WebRtc_Word16 farEnergyMax;
WebRtc_Word16 farEnergyMaxMin;
WebRtc_Word16 farEnergyVAD;
WebRtc_Word16 farEnergyMSE;
int16_t farEnergyMin;
int16_t farEnergyMax;
int16_t farEnergyMaxMin;
int16_t farEnergyVAD;
int16_t farEnergyMSE;
int currentVADValue;
WebRtc_Word16 vadUpdateCount;
int16_t vadUpdateCount;
WebRtc_Word16 startupState;
WebRtc_Word16 mseChannelCount;
WebRtc_Word16 supGain;
WebRtc_Word16 supGainOld;
int16_t startupState;
int16_t mseChannelCount;
int16_t supGain;
int16_t supGainOld;
WebRtc_Word16 supGainErrParamA;
WebRtc_Word16 supGainErrParamD;
WebRtc_Word16 supGainErrParamDiffAB;
WebRtc_Word16 supGainErrParamDiffBD;
int16_t supGainErrParamA;
int16_t supGainErrParamD;
int16_t supGainErrParamDiffAB;
int16_t supGainErrParamDiffBD;
struct RealFFT* real_fft;
#ifdef AEC_DEBUG
FILE *farFile;
FILE *nearFile;
FILE *outFile;
#endif
} AecmCore_t;
} AecmCore;
///////////////////////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_CreateCore(...)
////////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_CreateCore()
//
// Allocates the memory needed by the AECM. The memory needs to be
// initialized separately using the WebRtcAecm_InitCore() function.
//
// Input:
// - aecm : Instance that should be created
//
// Output:
// - aecm : Created instance
//
// Return value : 0 - Ok
// -1 - Error
//
int WebRtcAecm_CreateCore(AecmCore_t **aecm);
// Returns a pointer to the instance and a nullptr at failure.
AecmCore* WebRtcAecm_CreateCore();
///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_InitCore(...)
//
// This function initializes the AECM instant created with WebRtcAecm_CreateCore(...)
// This function initializes the AECM instant created with
// WebRtcAecm_CreateCore()
// Input:
// - aecm : Pointer to the AECM instance
// - samplingFreq : Sampling Frequency
@ -229,57 +156,58 @@ int WebRtcAecm_CreateCore(AecmCore_t **aecm);
// Return value : 0 - Ok
// -1 - Error
//
int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq);
int WebRtcAecm_InitCore(AecmCore* const aecm, int samplingFreq);
///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_FreeCore(...)
//
// This function releases the memory allocated by WebRtcAecm_CreateCore()
// Input:
// - aecm : Pointer to the AECM instance
//
// Return value : 0 - Ok
// -1 - Error
// 11001-11016: Error
//
int WebRtcAecm_FreeCore(AecmCore_t *aecm);
void WebRtcAecm_FreeCore(AecmCore* aecm);
int WebRtcAecm_Control(AecmCore_t *aecm, int delay, int nlpFlag);
int WebRtcAecm_Control(AecmCore* aecm, int delay, int nlpFlag);
///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_InitEchoPathCore(...)
//
// This function resets the echo channel adaptation with the specified channel.
// Input:
// - aecm : Pointer to the AECM instance
// - echo_path : Pointer to the data that should initialize the echo path
// - echo_path : Pointer to the data that should initialize the echo
// path
//
// Output:
// - aecm : Initialized instance
//
void WebRtcAecm_InitEchoPathCore(AecmCore_t* aecm, const WebRtc_Word16* echo_path);
void WebRtcAecm_InitEchoPathCore(AecmCore* aecm, const int16_t* echo_path);
///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_ProcessFrame(...)
//
// This function processes frames and sends blocks to WebRtcAecm_ProcessBlock(...)
// This function processes frames and sends blocks to
// WebRtcAecm_ProcessBlock(...)
//
// Inputs:
// - aecm : Pointer to the AECM instance
// - farend : In buffer containing one frame of echo signal
// - nearendNoisy : In buffer containing one frame of nearend+echo signal without NS
// - nearendClean : In buffer containing one frame of nearend+echo signal with NS
// - nearendNoisy : In buffer containing one frame of nearend+echo signal
// without NS
// - nearendClean : In buffer containing one frame of nearend+echo signal
// with NS
//
// Output:
// - out : Out buffer, one frame of nearend signal :
//
//
int WebRtcAecm_ProcessFrame(AecmCore_t * aecm, const WebRtc_Word16 * farend,
const WebRtc_Word16 * nearendNoisy,
const WebRtc_Word16 * nearendClean,
WebRtc_Word16 * out);
int WebRtcAecm_ProcessFrame(AecmCore* aecm,
const int16_t* farend,
const int16_t* nearendNoisy,
const int16_t* nearendClean,
int16_t* out);
///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_ProcessBlock(...)
//
// This function is called for every block within one frame
@ -288,19 +216,22 @@ int WebRtcAecm_ProcessFrame(AecmCore_t * aecm, const WebRtc_Word16 * farend,
// Inputs:
// - aecm : Pointer to the AECM instance
// - farend : In buffer containing one block of echo signal
// - nearendNoisy : In buffer containing one frame of nearend+echo signal without NS
// - nearendClean : In buffer containing one frame of nearend+echo signal with NS
// - nearendNoisy : In buffer containing one frame of nearend+echo signal
// without NS
// - nearendClean : In buffer containing one frame of nearend+echo signal
// with NS
//
// Output:
// - out : Out buffer, one block of nearend signal :
//
//
int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, const WebRtc_Word16 * farend,
const WebRtc_Word16 * nearendNoisy,
const WebRtc_Word16 * noisyClean,
WebRtc_Word16 * out);
int WebRtcAecm_ProcessBlock(AecmCore* aecm,
const int16_t* farend,
const int16_t* nearendNoisy,
const int16_t* noisyClean,
int16_t* out);
///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_BufferFarFrame()
//
// Inserts a frame of data into farend buffer.
@ -310,10 +241,11 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm, const WebRtc_Word16 * farend,
// - farend : In buffer containing one frame of farend signal
// - farLen : Length of frame
//
void WebRtcAecm_BufferFarFrame(AecmCore_t * const aecm, const WebRtc_Word16 * const farend,
void WebRtcAecm_BufferFarFrame(AecmCore* const aecm,
const int16_t* const farend,
const int farLen);
///////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_FetchFarFrame()
//
// Read the farend buffer to account for known delay
@ -324,35 +256,179 @@ void WebRtcAecm_BufferFarFrame(AecmCore_t * const aecm, const WebRtc_Word16 * co
// - farLen : Length of frame
// - knownDelay : known delay
//
void WebRtcAecm_FetchFarFrame(AecmCore_t * const aecm, WebRtc_Word16 * const farend,
const int farLen, const int knownDelay);
void WebRtcAecm_FetchFarFrame(AecmCore* const aecm,
int16_t* const farend,
const int farLen,
const int knownDelay);
///////////////////////////////////////////////////////////////////////////////////////////////
// Some internal functions shared by ARM NEON and generic C code:
// All the functions below are intended to be private
////////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_UpdateFarHistory()
//
// Moves the pointer to the next entry and inserts |far_spectrum| and
// corresponding Q-domain in its buffer.
//
// Inputs:
// - self : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum
// - far_q : Q-domain of far end spectrum
//
void WebRtcAecm_UpdateFarHistory(AecmCore* self,
uint16_t* far_spectrum,
int far_q);
void WebRtcAecm_CalcLinearEnergies(AecmCore_t* aecm,
const WebRtc_UWord16* far_spectrum,
WebRtc_Word32* echoEst,
WebRtc_UWord32* far_energy,
WebRtc_UWord32* echo_energy_adapt,
WebRtc_UWord32* echo_energy_stored);
////////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_AlignedFarend()
//
// Returns a pointer to the far end spectrum aligned to current near end
// spectrum. The function WebRtc_DelayEstimatorProcessFix(...) should have been
// called before AlignedFarend(...). Otherwise, you get the pointer to the
// previous frame. The memory is only valid until the next call of
// WebRtc_DelayEstimatorProcessFix(...).
//
// Inputs:
// - self : Pointer to the AECM instance.
// - delay : Current delay estimate.
//
// Output:
// - far_q : The Q-domain of the aligned far end spectrum
//
// Return value:
// - far_spectrum : Pointer to the aligned far end spectrum
// NULL - Error
//
const uint16_t* WebRtcAecm_AlignedFarend(AecmCore* self, int* far_q, int delay);
void WebRtcAecm_StoreAdaptiveChannel(AecmCore_t* aecm,
const WebRtc_UWord16* far_spectrum,
WebRtc_Word32* echo_est);
///////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_CalcSuppressionGain()
//
// This function calculates the suppression gain that is used in the
// Wiener filter.
//
// Inputs:
// - aecm : Pointer to the AECM instance.
//
// Return value:
// - supGain : Suppression gain with which to scale the noise
// level (Q14).
//
int16_t WebRtcAecm_CalcSuppressionGain(AecmCore* const aecm);
void WebRtcAecm_ResetAdaptiveChannel(AecmCore_t *aecm);
///////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_CalcEnergies()
//
// This function calculates the log of energies for nearend, farend and
// estimated echoes. There is also an update of energy decision levels,
// i.e. internal VAD.
//
// Inputs:
// - aecm : Pointer to the AECM instance.
// - far_spectrum : Pointer to farend spectrum.
// - far_q : Q-domain of farend spectrum.
// - nearEner : Near end energy for current block in
// Q(aecm->dfaQDomain).
//
// Output:
// - echoEst : Estimated echo in Q(xfa_q+RESOLUTION_CHANNEL16).
//
void WebRtcAecm_CalcEnergies(AecmCore* aecm,
const uint16_t* far_spectrum,
const int16_t far_q,
const uint32_t nearEner,
int32_t* echoEst);
void WebRtcAecm_WindowAndFFT(WebRtc_Word16* fft,
const WebRtc_Word16* time_signal,
complex16_t* freq_signal,
int time_signal_scaling);
///////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_CalcStepSize()
//
// This function calculates the step size used in channel estimation
//
// Inputs:
// - aecm : Pointer to the AECM instance.
//
// Return value:
// - mu : Stepsize in log2(), i.e. number of shifts.
//
int16_t WebRtcAecm_CalcStepSize(AecmCore* const aecm);
void WebRtcAecm_InverseFFTAndWindow(AecmCore_t* aecm,
WebRtc_Word16* fft,
complex16_t* efw,
WebRtc_Word16* output,
const WebRtc_Word16* nearendClean);
///////////////////////////////////////////////////////////////////////////////
// WebRtcAecm_UpdateChannel(...)
//
// This function performs channel estimation.
// NLMS and decision on channel storage.
//
// Inputs:
// - aecm : Pointer to the AECM instance.
// - far_spectrum : Absolute value of the farend signal in Q(far_q)
// - far_q : Q-domain of the farend signal
// - dfa : Absolute value of the nearend signal
// (Q[aecm->dfaQDomain])
// - mu : NLMS step size.
// Input/Output:
// - echoEst : Estimated echo in Q(far_q+RESOLUTION_CHANNEL16).
//
void WebRtcAecm_UpdateChannel(AecmCore* aecm,
const uint16_t* far_spectrum,
const int16_t far_q,
const uint16_t* const dfa,
const int16_t mu,
int32_t* echoEst);
extern const int16_t WebRtcAecm_kCosTable[];
extern const int16_t WebRtcAecm_kSinTable[];
///////////////////////////////////////////////////////////////////////////////
// Some function pointers, for internal functions shared by ARM NEON and
// generic C code.
//
typedef void (*CalcLinearEnergies)(AecmCore* aecm,
const uint16_t* far_spectrum,
int32_t* echoEst,
uint32_t* far_energy,
uint32_t* echo_energy_adapt,
uint32_t* echo_energy_stored);
extern CalcLinearEnergies WebRtcAecm_CalcLinearEnergies;
typedef void (*StoreAdaptiveChannel)(AecmCore* aecm,
const uint16_t* far_spectrum,
int32_t* echo_est);
extern StoreAdaptiveChannel WebRtcAecm_StoreAdaptiveChannel;
typedef void (*ResetAdaptiveChannel)(AecmCore* aecm);
extern ResetAdaptiveChannel WebRtcAecm_ResetAdaptiveChannel;
// For the above function pointers, functions for generic platforms are declared
// and defined as static in file aecm_core.c, while those for ARM Neon platforms
// are declared below and defined in file aecm_core_neon.c.
#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm,
const uint16_t* far_spectrum,
int32_t* echo_est,
uint32_t* far_energy,
uint32_t* echo_energy_adapt,
uint32_t* echo_energy_stored);
void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm,
const uint16_t* far_spectrum,
int32_t* echo_est);
void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm);
#endif
#if defined(MIPS32_LE)
void WebRtcAecm_CalcLinearEnergies_mips(AecmCore* aecm,
const uint16_t* far_spectrum,
int32_t* echo_est,
uint32_t* far_energy,
uint32_t* echo_energy_adapt,
uint32_t* echo_energy_stored);
#if defined(MIPS_DSP_R1_LE)
void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore* aecm,
const uint16_t* far_spectrum,
int32_t* echo_est);
void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore* aecm);
#endif
#endif
#endif

View File

@ -0,0 +1,771 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
#include <assert.h>
#include <stddef.h>
#include <stdlib.h>
#include "webrtc/common_audio/ring_buffer.h"
#include "webrtc/common_audio/signal_processing/include/real_fft.h"
#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
#include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h"
#include "webrtc/system_wrappers/interface/compile_assert_c.h"
#include "webrtc/system_wrappers/interface/cpu_features_wrapper.h"
#include "webrtc/typedefs.h"
// Square root of Hanning window in Q14.
#if defined(WEBRTC_DETECT_NEON) || defined(WEBRTC_HAS_NEON)
// Table is defined in an ARM assembly file.
extern const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END;
#else
static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
};
#endif
#ifdef AECM_WITH_ABS_APPROX
//Q15 alpha = 0.99439986968132 const Factor for magnitude approximation
static const uint16_t kAlpha1 = 32584;
//Q15 beta = 0.12967166976970 const Factor for magnitude approximation
static const uint16_t kBeta1 = 4249;
//Q15 alpha = 0.94234827210087 const Factor for magnitude approximation
static const uint16_t kAlpha2 = 30879;
//Q15 beta = 0.33787806009150 const Factor for magnitude approximation
static const uint16_t kBeta2 = 11072;
//Q15 alpha = 0.82247698684306 const Factor for magnitude approximation
static const uint16_t kAlpha3 = 26951;
//Q15 beta = 0.57762063060713 const Factor for magnitude approximation
static const uint16_t kBeta3 = 18927;
#endif
static const int16_t kNoiseEstQDomain = 15;
static const int16_t kNoiseEstIncCount = 5;
static void ComfortNoise(AecmCore* aecm,
const uint16_t* dfa,
ComplexInt16* out,
const int16_t* lambda);
static void WindowAndFFT(AecmCore* aecm,
int16_t* fft,
const int16_t* time_signal,
ComplexInt16* freq_signal,
int time_signal_scaling) {
int i = 0;
// FFT of signal
for (i = 0; i < PART_LEN; i++) {
// Window time domain signal and insert into real part of
// transformation array |fft|
int16_t scaled_time_signal = time_signal[i] << time_signal_scaling;
fft[i] = (int16_t)((scaled_time_signal * WebRtcAecm_kSqrtHanning[i]) >> 14);
scaled_time_signal = time_signal[i + PART_LEN] << time_signal_scaling;
fft[PART_LEN + i] = (int16_t)((
scaled_time_signal * WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14);
}
// Do forward FFT, then take only the first PART_LEN complex samples,
// and change signs of the imaginary parts.
WebRtcSpl_RealForwardFFT(aecm->real_fft, fft, (int16_t*)freq_signal);
for (i = 0; i < PART_LEN; i++) {
freq_signal[i].imag = -freq_signal[i].imag;
}
}
static void InverseFFTAndWindow(AecmCore* aecm,
int16_t* fft,
ComplexInt16* efw,
int16_t* output,
const int16_t* nearendClean) {
int i, j, outCFFT;
int32_t tmp32no1;
// Reuse |efw| for the inverse FFT output after transferring
// the contents to |fft|.
int16_t* ifft_out = (int16_t*)efw;
// Synthesis
for (i = 1, j = 2; i < PART_LEN; i += 1, j += 2) {
fft[j] = efw[i].real;
fft[j + 1] = -efw[i].imag;
}
fft[0] = efw[0].real;
fft[1] = -efw[0].imag;
fft[PART_LEN2] = efw[PART_LEN].real;
fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
// Inverse FFT. Keep outCFFT to scale the samples in the next block.
outCFFT = WebRtcSpl_RealInverseFFT(aecm->real_fft, fft, ifft_out);
for (i = 0; i < PART_LEN; i++) {
ifft_out[i] = (int16_t)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
ifft_out[i], WebRtcAecm_kSqrtHanning[i], 14);
tmp32no1 = WEBRTC_SPL_SHIFT_W32((int32_t)ifft_out[i],
outCFFT - aecm->dfaCleanQDomain);
output[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
tmp32no1 + aecm->outBuf[i],
WEBRTC_SPL_WORD16_MIN);
tmp32no1 = (ifft_out[PART_LEN + i] *
WebRtcAecm_kSqrtHanning[PART_LEN - i]) >> 14;
tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1,
outCFFT - aecm->dfaCleanQDomain);
aecm->outBuf[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
tmp32no1,
WEBRTC_SPL_WORD16_MIN);
}
// Copy the current block to the old position
// (aecm->outBuf is shifted elsewhere)
memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN);
memcpy(aecm->dBufNoisy,
aecm->dBufNoisy + PART_LEN,
sizeof(int16_t) * PART_LEN);
if (nearendClean != NULL)
{
memcpy(aecm->dBufClean,
aecm->dBufClean + PART_LEN,
sizeof(int16_t) * PART_LEN);
}
}
// Transforms a time domain signal into the frequency domain, outputting the
// complex valued signal, absolute value and sum of absolute values.
//
// time_signal [in] Pointer to time domain signal
// freq_signal_real [out] Pointer to real part of frequency domain array
// freq_signal_imag [out] Pointer to imaginary part of frequency domain
// array
// freq_signal_abs [out] Pointer to absolute value of frequency domain
// array
// freq_signal_sum_abs [out] Pointer to the sum of all absolute values in
// the frequency domain array
// return value The Q-domain of current frequency values
//
static int TimeToFrequencyDomain(AecmCore* aecm,
const int16_t* time_signal,
ComplexInt16* freq_signal,
uint16_t* freq_signal_abs,
uint32_t* freq_signal_sum_abs) {
int i = 0;
int time_signal_scaling = 0;
int32_t tmp32no1 = 0;
int32_t tmp32no2 = 0;
// In fft_buf, +16 for 32-byte alignment.
int16_t fft_buf[PART_LEN4 + 16];
int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31);
int16_t tmp16no1;
#ifndef WEBRTC_ARCH_ARM_V7
int16_t tmp16no2;
#endif
#ifdef AECM_WITH_ABS_APPROX
int16_t max_value = 0;
int16_t min_value = 0;
uint16_t alpha = 0;
uint16_t beta = 0;
#endif
#ifdef AECM_DYNAMIC_Q
tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2);
time_signal_scaling = WebRtcSpl_NormW16(tmp16no1);
#endif
WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling);
// Extract imaginary and real part, calculate the magnitude for
// all frequency bins
freq_signal[0].imag = 0;
freq_signal[PART_LEN].imag = 0;
freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real);
freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16(
freq_signal[PART_LEN].real);
(*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) +
(uint32_t)(freq_signal_abs[PART_LEN]);
for (i = 1; i < PART_LEN; i++)
{
if (freq_signal[i].real == 0)
{
freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
}
else if (freq_signal[i].imag == 0)
{
freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[i].real);
}
else
{
// Approximation for magnitude of complex fft output
// magn = sqrt(real^2 + imag^2)
// magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|)
//
// The parameters alpha and beta are stored in Q15
#ifdef AECM_WITH_ABS_APPROX
tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
if(tmp16no1 > tmp16no2)
{
max_value = tmp16no1;
min_value = tmp16no2;
} else
{
max_value = tmp16no2;
min_value = tmp16no1;
}
// Magnitude in Q(-6)
if ((max_value >> 2) > min_value)
{
alpha = kAlpha1;
beta = kBeta1;
} else if ((max_value >> 1) > min_value)
{
alpha = kAlpha2;
beta = kBeta2;
} else
{
alpha = kAlpha3;
beta = kBeta3;
}
tmp16no1 = (int16_t)((max_value * alpha) >> 15);
tmp16no2 = (int16_t)((min_value * beta) >> 15);
freq_signal_abs[i] = (uint16_t)tmp16no1 + (uint16_t)tmp16no2;
#else
#ifdef WEBRTC_ARCH_ARM_V7
__asm __volatile(
"smulbb %[tmp32no1], %[real], %[real]\n\t"
"smlabb %[tmp32no2], %[imag], %[imag], %[tmp32no1]\n\t"
:[tmp32no1]"+&r"(tmp32no1),
[tmp32no2]"=r"(tmp32no2)
:[real]"r"(freq_signal[i].real),
[imag]"r"(freq_signal[i].imag)
);
#else
tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real);
tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag);
tmp32no1 = tmp16no1 * tmp16no1;
tmp32no2 = tmp16no2 * tmp16no2;
tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2);
#endif // WEBRTC_ARCH_ARM_V7
tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2);
freq_signal_abs[i] = (uint16_t)tmp32no1;
#endif // AECM_WITH_ABS_APPROX
}
(*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i];
}
return time_signal_scaling;
}
int WebRtcAecm_ProcessBlock(AecmCore* aecm,
const int16_t* farend,
const int16_t* nearendNoisy,
const int16_t* nearendClean,
int16_t* output) {
int i;
uint32_t xfaSum;
uint32_t dfaNoisySum;
uint32_t dfaCleanSum;
uint32_t echoEst32Gained;
uint32_t tmpU32;
int32_t tmp32no1;
uint16_t xfa[PART_LEN1];
uint16_t dfaNoisy[PART_LEN1];
uint16_t dfaClean[PART_LEN1];
uint16_t* ptrDfaClean = dfaClean;
const uint16_t* far_spectrum_ptr = NULL;
// 32 byte aligned buffers (with +8 or +16).
// TODO(kma): define fft with ComplexInt16.
int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe.
int32_t echoEst32_buf[PART_LEN1 + 8];
int32_t dfw_buf[PART_LEN2 + 8];
int32_t efw_buf[PART_LEN2 + 8];
int16_t* fft = (int16_t*) (((uintptr_t) fft_buf + 31) & ~ 31);
int32_t* echoEst32 = (int32_t*) (((uintptr_t) echoEst32_buf + 31) & ~ 31);
ComplexInt16* dfw = (ComplexInt16*)(((uintptr_t)dfw_buf + 31) & ~31);
ComplexInt16* efw = (ComplexInt16*)(((uintptr_t)efw_buf + 31) & ~31);
int16_t hnl[PART_LEN1];
int16_t numPosCoef = 0;
int16_t nlpGain = ONE_Q14;
int delay;
int16_t tmp16no1;
int16_t tmp16no2;
int16_t mu;
int16_t supGain;
int16_t zeros32, zeros16;
int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf;
int far_q;
int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff;
const int kMinPrefBand = 4;
const int kMaxPrefBand = 24;
int32_t avgHnl32 = 0;
// Determine startup state. There are three states:
// (0) the first CONV_LEN blocks
// (1) another CONV_LEN blocks
// (2) the rest
if (aecm->startupState < 2)
{
aecm->startupState = (aecm->totCount >= CONV_LEN) +
(aecm->totCount >= CONV_LEN2);
}
// END: Determine startup state
// Buffer near and far end signals
memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN);
memcpy(aecm->dBufNoisy + PART_LEN, nearendNoisy, sizeof(int16_t) * PART_LEN);
if (nearendClean != NULL)
{
memcpy(aecm->dBufClean + PART_LEN,
nearendClean,
sizeof(int16_t) * PART_LEN);
}
// Transform far end signal from time domain to frequency domain.
far_q = TimeToFrequencyDomain(aecm,
aecm->xBuf,
dfw,
xfa,
&xfaSum);
// Transform noisy near end signal from time domain to frequency domain.
zerosDBufNoisy = TimeToFrequencyDomain(aecm,
aecm->dBufNoisy,
dfw,
dfaNoisy,
&dfaNoisySum);
aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy;
if (nearendClean == NULL)
{
ptrDfaClean = dfaNoisy;
aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld;
aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain;
dfaCleanSum = dfaNoisySum;
} else
{
// Transform clean near end signal from time domain to frequency domain.
zerosDBufClean = TimeToFrequencyDomain(aecm,
aecm->dBufClean,
dfw,
dfaClean,
&dfaCleanSum);
aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
aecm->dfaCleanQDomain = (int16_t)zerosDBufClean;
}
// Get the delay
// Save far-end history and estimate delay
WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q);
if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend,
xfa,
PART_LEN1,
far_q) == -1) {
return -1;
}
delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator,
dfaNoisy,
PART_LEN1,
zerosDBufNoisy);
if (delay == -1)
{
return -1;
}
else if (delay == -2)
{
// If the delay is unknown, we assume zero.
// NOTE: this will have to be adjusted if we ever add lookahead.
delay = 0;
}
if (aecm->fixedDelay >= 0)
{
// Use fixed delay
delay = aecm->fixedDelay;
}
// Get aligned far end spectrum
far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay);
zerosXBuf = (int16_t) far_q;
if (far_spectrum_ptr == NULL)
{
return -1;
}
// Calculate log(energy) and update energy threshold levels
WebRtcAecm_CalcEnergies(aecm,
far_spectrum_ptr,
zerosXBuf,
dfaNoisySum,
echoEst32);
// Calculate stepsize
mu = WebRtcAecm_CalcStepSize(aecm);
// Update counters
aecm->totCount++;
// This is the channel estimation algorithm.
// It is base on NLMS but has a variable step length,
// which was calculated above.
WebRtcAecm_UpdateChannel(aecm,
far_spectrum_ptr,
zerosXBuf,
dfaNoisy,
mu,
echoEst32);
supGain = WebRtcAecm_CalcSuppressionGain(aecm);
// Calculate Wiener filter hnl[]
for (i = 0; i < PART_LEN1; i++)
{
// Far end signal through channel estimate in Q8
// How much can we shift right to preserve resolution
tmp32no1 = echoEst32[i] - aecm->echoFilt[i];
aecm->echoFilt[i] += (tmp32no1 * 50) >> 8;
zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1;
zeros16 = WebRtcSpl_NormW16(supGain) + 1;
if (zeros32 + zeros16 > 16)
{
// Multiplication is safe
// Result in
// Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+
// aecm->xfaQDomainBuf[diff])
echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
(uint16_t)supGain);
resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN;
resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
} else
{
tmp16no1 = 17 - zeros32 - zeros16;
resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 -
RESOLUTION_SUPGAIN;
resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf);
if (zeros32 > tmp16no1)
{
echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i],
supGain >> tmp16no1);
} else
{
// Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16)
echoEst32Gained = (aecm->echoFilt[i] >> tmp16no1) * supGain;
}
}
zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]);
assert(zeros16 >= 0); // |zeros16| is a norm, hence non-negative.
dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld;
if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) {
tmp16no1 = aecm->nearFilt[i] << zeros16;
qDomainDiff = zeros16 - dfa_clean_q_domain_diff;
tmp16no2 = ptrDfaClean[i] >> -qDomainDiff;
} else {
tmp16no1 = dfa_clean_q_domain_diff < 0
? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff
: aecm->nearFilt[i] << dfa_clean_q_domain_diff;
qDomainDiff = 0;
tmp16no2 = ptrDfaClean[i];
}
tmp32no1 = (int32_t)(tmp16no2 - tmp16no1);
tmp16no2 = (int16_t)(tmp32no1 >> 4);
tmp16no2 += tmp16no1;
zeros16 = WebRtcSpl_NormW16(tmp16no2);
if ((tmp16no2) & (-qDomainDiff > zeros16)) {
aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX;
} else {
aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff
: tmp16no2 >> qDomainDiff;
}
// Wiener filter coefficients, resulting hnl in Q14
if (echoEst32Gained == 0)
{
hnl[i] = ONE_Q14;
} else if (aecm->nearFilt[i] == 0)
{
hnl[i] = 0;
} else
{
// Multiply the suppression gain
// Rounding
echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1);
tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained,
(uint16_t)aecm->nearFilt[i]);
// Current resolution is
// Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN- max(0,17-zeros16- zeros32))
// Make sure we are in Q14
tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff);
if (tmp32no1 > ONE_Q14)
{
hnl[i] = 0;
} else if (tmp32no1 < 0)
{
hnl[i] = ONE_Q14;
} else
{
// 1-echoEst/dfa
hnl[i] = ONE_Q14 - (int16_t)tmp32no1;
if (hnl[i] < 0)
{
hnl[i] = 0;
}
}
}
if (hnl[i])
{
numPosCoef++;
}
}
// Only in wideband. Prevent the gain in upper band from being larger than
// in lower band.
if (aecm->mult == 2)
{
// TODO(bjornv): Investigate if the scaling of hnl[i] below can cause
// speech distortion in double-talk.
for (i = 0; i < PART_LEN1; i++)
{
hnl[i] = (int16_t)((hnl[i] * hnl[i]) >> 14);
}
for (i = kMinPrefBand; i <= kMaxPrefBand; i++)
{
avgHnl32 += (int32_t)hnl[i];
}
assert(kMaxPrefBand - kMinPrefBand + 1 > 0);
avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1);
for (i = kMaxPrefBand; i < PART_LEN1; i++)
{
if (hnl[i] > (int16_t)avgHnl32)
{
hnl[i] = (int16_t)avgHnl32;
}
}
}
// Calculate NLP gain, result is in Q14
if (aecm->nlpFlag)
{
for (i = 0; i < PART_LEN1; i++)
{
// Truncate values close to zero and one.
if (hnl[i] > NLP_COMP_HIGH)
{
hnl[i] = ONE_Q14;
} else if (hnl[i] < NLP_COMP_LOW)
{
hnl[i] = 0;
}
// Remove outliers
if (numPosCoef < 3)
{
nlpGain = 0;
} else
{
nlpGain = ONE_Q14;
}
// NLP
if ((hnl[i] == ONE_Q14) && (nlpGain == ONE_Q14))
{
hnl[i] = ONE_Q14;
} else
{
hnl[i] = (int16_t)((hnl[i] * nlpGain) >> 14);
}
// multiply with Wiener coefficients
efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
hnl[i], 14));
efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
hnl[i], 14));
}
}
else
{
// multiply with Wiener coefficients
for (i = 0; i < PART_LEN1; i++)
{
efw[i].real = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real,
hnl[i], 14));
efw[i].imag = (int16_t)(WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag,
hnl[i], 14));
}
}
if (aecm->cngMode == AecmTrue)
{
ComfortNoise(aecm, ptrDfaClean, efw, hnl);
}
InverseFFTAndWindow(aecm, fft, efw, output, nearendClean);
return 0;
}
static void ComfortNoise(AecmCore* aecm,
const uint16_t* dfa,
ComplexInt16* out,
const int16_t* lambda) {
int16_t i;
int16_t tmp16;
int32_t tmp32;
int16_t randW16[PART_LEN];
int16_t uReal[PART_LEN1];
int16_t uImag[PART_LEN1];
int32_t outLShift32;
int16_t noiseRShift16[PART_LEN1];
int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain;
int16_t minTrackShift;
assert(shiftFromNearToNoise >= 0);
assert(shiftFromNearToNoise < 16);
if (aecm->noiseEstCtr < 100)
{
// Track the minimum more quickly initially.
aecm->noiseEstCtr++;
minTrackShift = 6;
} else
{
minTrackShift = 9;
}
// Estimate noise power.
for (i = 0; i < PART_LEN1; i++)
{
// Shift to the noise domain.
tmp32 = (int32_t)dfa[i];
outLShift32 = tmp32 << shiftFromNearToNoise;
if (outLShift32 < aecm->noiseEst[i])
{
// Reset "too low" counter
aecm->noiseEstTooLowCtr[i] = 0;
// Track the minimum.
if (aecm->noiseEst[i] < (1 << minTrackShift))
{
// For small values, decrease noiseEst[i] every
// |kNoiseEstIncCount| block. The regular approach below can not
// go further down due to truncation.
aecm->noiseEstTooHighCtr[i]++;
if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount)
{
aecm->noiseEst[i]--;
aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter
}
}
else
{
aecm->noiseEst[i] -= ((aecm->noiseEst[i] - outLShift32)
>> minTrackShift);
}
} else
{
// Reset "too high" counter
aecm->noiseEstTooHighCtr[i] = 0;
// Ramp slowly upwards until we hit the minimum again.
if ((aecm->noiseEst[i] >> 19) > 0)
{
// Avoid overflow.
// Multiplication with 2049 will cause wrap around. Scale
// down first and then multiply
aecm->noiseEst[i] >>= 11;
aecm->noiseEst[i] *= 2049;
}
else if ((aecm->noiseEst[i] >> 11) > 0)
{
// Large enough for relative increase
aecm->noiseEst[i] *= 2049;
aecm->noiseEst[i] >>= 11;
}
else
{
// Make incremental increases based on size every
// |kNoiseEstIncCount| block
aecm->noiseEstTooLowCtr[i]++;
if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount)
{
aecm->noiseEst[i] += (aecm->noiseEst[i] >> 9) + 1;
aecm->noiseEstTooLowCtr[i] = 0; // Reset counter
}
}
}
}
for (i = 0; i < PART_LEN1; i++)
{
tmp32 = aecm->noiseEst[i] >> shiftFromNearToNoise;
if (tmp32 > 32767)
{
tmp32 = 32767;
aecm->noiseEst[i] = tmp32 << shiftFromNearToNoise;
}
noiseRShift16[i] = (int16_t)tmp32;
tmp16 = ONE_Q14 - lambda[i];
noiseRShift16[i] = (int16_t)((tmp16 * noiseRShift16[i]) >> 14);
}
// Generate a uniform random array on [0 2^15-1].
WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed);
// Generate noise according to estimated energy.
uReal[0] = 0; // Reject LF noise.
uImag[0] = 0;
for (i = 1; i < PART_LEN1; i++)
{
// Get a random index for the cos and sin tables over [0 359].
tmp16 = (int16_t)((359 * randW16[i - 1]) >> 15);
// Tables are in Q13.
uReal[i] = (int16_t)((noiseRShift16[i] * WebRtcAecm_kCosTable[tmp16]) >>
13);
uImag[i] = (int16_t)((-noiseRShift16[i] * WebRtcAecm_kSinTable[tmp16]) >>
13);
}
uImag[PART_LEN] = 0;
for (i = 0; i < PART_LEN1; i++)
{
out[i].real = WebRtcSpl_AddSatW16(out[i].real, uReal[i]);
out[i].imag = WebRtcSpl_AddSatW16(out[i].imag, uImag[i]);
}
}

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -7,308 +7,206 @@
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#if defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)
#include "aecm_core.h"
#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
#include <arm_neon.h>
#include <assert.h>
#include "webrtc/common_audio/signal_processing/include/real_fft.h"
// TODO(kma): Re-write the corresponding assembly file, the offset
// generating script and makefile, to replace these C functions.
// Square root of Hanning window in Q14.
static const WebRtc_Word16 kSqrtHanningReversed[] __attribute__ ((aligned (8))) = {
16384, 16373, 16354, 16325,
16286, 16237, 16179, 16111,
16034, 15947, 15851, 15746,
15631, 15506, 15373, 15231,
15079, 14918, 14749, 14571,
14384, 14189, 13985, 13773,
13553, 13325, 13089, 12845,
12594, 12335, 12068, 11795,
11514, 11227, 10933, 10633,
10326, 10013, 9695, 9370,
9040, 8705, 8364, 8019,
7668, 7313, 6954, 6591,
6224, 5853, 5478, 5101,
4720, 4337, 3951, 3562,
3172, 2780, 2386, 1990,
1594, 1196, 798, 399
const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = {
0,
399, 798, 1196, 1594, 1990, 2386, 2780, 3172,
3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224,
6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040,
9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514,
11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553,
13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079,
15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034,
16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384
};
void WebRtcAecm_WindowAndFFT(WebRtc_Word16* fft,
const WebRtc_Word16* time_signal,
complex16_t* freq_signal,
int time_signal_scaling)
{
int i, j;
int16x4_t tmp16x4_scaling = vdup_n_s16(time_signal_scaling);
__asm__("vmov.i16 d21, #0" ::: "d21");
for(i = 0, j = 0; i < PART_LEN; i += 4, j += 8)
{
int16x4_t tmp16x4_0;
int16x4_t tmp16x4_1;
int32x4_t tmp32x4_0;
/* Window near end */
// fft[j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT((time_signal[i]
// << time_signal_scaling), WebRtcAecm_kSqrtHanning[i], 14);
__asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&time_signal[i]));
tmp16x4_0 = vshl_s16(tmp16x4_0, tmp16x4_scaling);
__asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_1) : "r"(&WebRtcAecm_kSqrtHanning[i]));
tmp32x4_0 = vmull_s16(tmp16x4_0, tmp16x4_1);
__asm__("vshrn.i32 d20, %q0, #14" : : "w"(tmp32x4_0) : "d20");
__asm__("vst2.16 {d20, d21}, [%0, :128]" : : "r"(&fft[j]) : "q10");
// fft[PART_LEN2 + j] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(
// (time_signal[PART_LEN + i] << time_signal_scaling),
// WebRtcAecm_kSqrtHanning[PART_LEN - i], 14);
__asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&time_signal[i + PART_LEN]));
tmp16x4_0 = vshl_s16(tmp16x4_0, tmp16x4_scaling);
__asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_1) : "r"(&kSqrtHanningReversed[i]));
tmp32x4_0 = vmull_s16(tmp16x4_0, tmp16x4_1);
__asm__("vshrn.i32 d20, %q0, #14" : : "w"(tmp32x4_0) : "d20");
__asm__("vst2.16 {d20, d21}, [%0, :128]" : : "r"(&fft[PART_LEN2 + j]) : "q10");
}
WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT);
WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1);
// Take only the first PART_LEN2 samples, and switch the sign of the imaginary part.
for(i = 0, j = 0; j < PART_LEN2; i += 8, j += 16)
{
__asm__("vld2.16 {d20, d21, d22, d23}, [%0, :256]" : : "r"(&fft[j]) : "q10", "q11");
__asm__("vneg.s16 d22, d22" : : : "q10");
__asm__("vneg.s16 d23, d23" : : : "q11");
__asm__("vst2.16 {d20, d21, d22, d23}, [%0, :256]" : :
"r"(&freq_signal[i].real): "q10", "q11");
}
static inline void AddLanes(uint32_t* ptr, uint32x4_t v) {
#if defined(WEBRTC_ARCH_ARM64)
*(ptr) = vaddvq_u32(v);
#else
uint32x2_t tmp_v;
tmp_v = vadd_u32(vget_low_u32(v), vget_high_u32(v));
tmp_v = vpadd_u32(tmp_v, tmp_v);
*(ptr) = vget_lane_u32(tmp_v, 0);
#endif
}
void WebRtcAecm_InverseFFTAndWindow(AecmCore_t* aecm,
WebRtc_Word16* fft,
complex16_t* efw,
WebRtc_Word16* output,
const WebRtc_Word16* nearendClean)
{
int i, j, outCFFT;
WebRtc_Word32 tmp32no1;
void WebRtcAecm_CalcLinearEnergiesNeon(AecmCore* aecm,
const uint16_t* far_spectrum,
int32_t* echo_est,
uint32_t* far_energy,
uint32_t* echo_energy_adapt,
uint32_t* echo_energy_stored) {
int16_t* start_stored_p = aecm->channelStored;
int16_t* start_adapt_p = aecm->channelAdapt16;
int32_t* echo_est_p = echo_est;
const int16_t* end_stored_p = aecm->channelStored + PART_LEN;
const uint16_t* far_spectrum_p = far_spectrum;
int16x8_t store_v, adapt_v;
uint16x8_t spectrum_v;
uint32x4_t echo_est_v_low, echo_est_v_high;
uint32x4_t far_energy_v, echo_stored_v, echo_adapt_v;
// Synthesis
for(i = 0, j = 0; i < PART_LEN; i += 4, j += 8)
{
// We overwrite two more elements in fft[], but it's ok.
__asm__("vld2.16 {d20, d21}, [%0, :128]" : : "r"(&(efw[i].real)) : "q10");
__asm__("vmov q11, q10" : : : "q10", "q11");
far_energy_v = vdupq_n_u32(0);
echo_adapt_v = vdupq_n_u32(0);
echo_stored_v = vdupq_n_u32(0);
__asm__("vneg.s16 d23, d23" : : : "q11");
__asm__("vst2.16 {d22, d23}, [%0, :128]" : : "r"(&fft[j]): "q11");
// Get energy for the delayed far end signal and estimated
// echo using both stored and adapted channels.
// The C code:
// for (i = 0; i < PART_LEN1; i++) {
// echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
// far_spectrum[i]);
// (*far_energy) += (uint32_t)(far_spectrum[i]);
// *echo_energy_adapt += aecm->channelAdapt16[i] * far_spectrum[i];
// (*echo_energy_stored) += (uint32_t)echo_est[i];
// }
while (start_stored_p < end_stored_p) {
spectrum_v = vld1q_u16(far_spectrum_p);
adapt_v = vld1q_s16(start_adapt_p);
store_v = vld1q_s16(start_stored_p);
__asm__("vrev64.16 q10, q10" : : : "q10");
__asm__("vst2.16 {d20, d21}, [%0]" : : "r"(&fft[PART_LEN4 - j - 6]): "q10");
}
far_energy_v = vaddw_u16(far_energy_v, vget_low_u16(spectrum_v));
far_energy_v = vaddw_u16(far_energy_v, vget_high_u16(spectrum_v));
fft[PART_LEN2] = efw[PART_LEN].real;
fft[PART_LEN2 + 1] = -efw[PART_LEN].imag;
echo_est_v_low = vmull_u16(vreinterpret_u16_s16(vget_low_s16(store_v)),
vget_low_u16(spectrum_v));
echo_est_v_high = vmull_u16(vreinterpret_u16_s16(vget_high_s16(store_v)),
vget_high_u16(spectrum_v));
vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low));
vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high));
// Inverse FFT, result should be scaled with outCFFT.
WebRtcSpl_ComplexBitReverse(fft, PART_LEN_SHIFT);
outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1);
echo_stored_v = vaddq_u32(echo_est_v_low, echo_stored_v);
echo_stored_v = vaddq_u32(echo_est_v_high, echo_stored_v);
// Take only the real values and scale with outCFFT.
for (i = 0, j = 0; i < PART_LEN2; i += 8, j+= 16)
{
__asm__("vld2.16 {d20, d21, d22, d23}, [%0, :256]" : : "r"(&fft[j]) : "q10", "q11");
__asm__("vst1.16 {d20, d21}, [%0, :128]" : : "r"(&fft[i]): "q10");
}
echo_adapt_v = vmlal_u16(echo_adapt_v,
vreinterpret_u16_s16(vget_low_s16(adapt_v)),
vget_low_u16(spectrum_v));
echo_adapt_v = vmlal_u16(echo_adapt_v,
vreinterpret_u16_s16(vget_high_s16(adapt_v)),
vget_high_u16(spectrum_v));
int32x4_t tmp32x4_2;
__asm__("vdup.32 %q0, %1" : "=w"(tmp32x4_2) : "r"((WebRtc_Word32)
(outCFFT - aecm->dfaCleanQDomain)));
for (i = 0; i < PART_LEN; i += 4)
{
int16x4_t tmp16x4_0;
int16x4_t tmp16x4_1;
int32x4_t tmp32x4_0;
int32x4_t tmp32x4_1;
start_stored_p += 8;
start_adapt_p += 8;
far_spectrum_p += 8;
echo_est_p += 8;
}
// fft[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
// fft[i], WebRtcAecm_kSqrtHanning[i], 14);
__asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&fft[i]));
__asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_1) : "r"(&WebRtcAecm_kSqrtHanning[i]));
__asm__("vmull.s16 %q0, %P1, %P2" : "=w"(tmp32x4_0) : "w"(tmp16x4_0), "w"(tmp16x4_1));
__asm__("vrshr.s32 %q0, %q1, #14" : "=w"(tmp32x4_0) : "0"(tmp32x4_0));
AddLanes(far_energy, far_energy_v);
AddLanes(echo_energy_stored, echo_stored_v);
AddLanes(echo_energy_adapt, echo_adapt_v);
// tmp32no1 = WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)fft[i],
// outCFFT - aecm->dfaCleanQDomain);
__asm__("vshl.s32 %q0, %q1, %q2" : "=w"(tmp32x4_0) : "0"(tmp32x4_0), "w"(tmp32x4_2));
// fft[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
// tmp32no1 + outBuf[i], WEBRTC_SPL_WORD16_MIN);
// output[i] = fft[i];
__asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&aecm->outBuf[i]));
__asm__("vmovl.s16 %q0, %P1" : "=w"(tmp32x4_1) : "w"(tmp16x4_0));
__asm__("vadd.i32 %q0, %q1" : : "w"(tmp32x4_0), "w"(tmp32x4_1));
__asm__("vqshrn.s32 %P0, %q1, #0" : "=w"(tmp16x4_0) : "w"(tmp32x4_0));
__asm__("vst1.16 %P0, [%1, :64]" : : "w"(tmp16x4_0), "r"(&fft[i]));
__asm__("vst1.16 %P0, [%1, :64]" : : "w"(tmp16x4_0), "r"(&output[i]));
// tmp32no1 = WEBRTC_SPL_MUL_16_16_RSFT(
// fft[PART_LEN + i], WebRtcAecm_kSqrtHanning[PART_LEN - i], 14);
__asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_0) : "r"(&fft[PART_LEN + i]));
__asm__("vld1.16 %P0, [%1, :64]" : "=w"(tmp16x4_1) : "r"(&kSqrtHanningReversed[i]));
__asm__("vmull.s16 %q0, %P1, %P2" : "=w"(tmp32x4_0) : "w"(tmp16x4_0), "w"(tmp16x4_1));
__asm__("vshr.s32 %q0, %q1, #14" : "=w"(tmp32x4_0) : "0"(tmp32x4_0));
// tmp32no1 = WEBRTC_SPL_SHIFT_W32(tmp32no1, outCFFT - aecm->dfaCleanQDomain);
__asm__("vshl.s32 %q0, %q1, %q2" : "=w"(tmp32x4_0) : "0"(tmp32x4_0), "w"(tmp32x4_2));
// outBuf[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(
// WEBRTC_SPL_WORD16_MAX, tmp32no1, WEBRTC_SPL_WORD16_MIN);
__asm__("vqshrn.s32 %P0, %q1, #0" : "=w"(tmp16x4_0) : "w"(tmp32x4_0));
__asm__("vst1.16 %P0, [%1, :64]" : : "w"(tmp16x4_0), "r"(&aecm->outBuf[i]));
}
// Copy the current block to the old position (outBuf is shifted elsewhere).
for (i = 0; i < PART_LEN; i += 16)
{
__asm__("vld1.16 {d20, d21, d22, d23}, [%0, :256]" : :
"r"(&aecm->xBuf[i + PART_LEN]) : "q10");
__asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : : "r"(&aecm->xBuf[i]): "q10");
}
for (i = 0; i < PART_LEN; i += 16)
{
__asm__("vld1.16 {d20, d21, d22, d23}, [%0, :256]" : :
"r"(&aecm->dBufNoisy[i + PART_LEN]) : "q10");
__asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : :
"r"(&aecm->dBufNoisy[i]): "q10");
}
if (nearendClean != NULL) {
for (i = 0; i < PART_LEN; i += 16)
{
__asm__("vld1.16 {d20, d21, d22, d23}, [%0, :256]" : :
"r"(&aecm->dBufClean[i + PART_LEN]) : "q10");
__asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : :
"r"(&aecm->dBufClean[i]): "q10");
}
}
echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
far_spectrum[PART_LEN]);
*echo_energy_stored += (uint32_t)echo_est[PART_LEN];
*far_energy += (uint32_t)far_spectrum[PART_LEN];
*echo_energy_adapt += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN];
}
void WebRtcAecm_CalcLinearEnergies(AecmCore_t* aecm,
const WebRtc_UWord16* far_spectrum,
WebRtc_Word32* echo_est,
WebRtc_UWord32* far_energy,
WebRtc_UWord32* echo_energy_adapt,
WebRtc_UWord32* echo_energy_stored)
{
int i;
void WebRtcAecm_StoreAdaptiveChannelNeon(AecmCore* aecm,
const uint16_t* far_spectrum,
int32_t* echo_est) {
assert((uintptr_t)echo_est % 32 == 0);
assert((uintptr_t)(aecm->channelStored) % 16 == 0);
assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0);
register WebRtc_UWord32 far_energy_r;
register WebRtc_UWord32 echo_energy_stored_r;
register WebRtc_UWord32 echo_energy_adapt_r;
uint32x4_t tmp32x4_0;
// This is C code of following optimized code.
// During startup we store the channel every block.
// memcpy(aecm->channelStored,
// aecm->channelAdapt16,
// sizeof(int16_t) * PART_LEN1);
// Recalculate echo estimate
// for (i = 0; i < PART_LEN; i += 4) {
// echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
// far_spectrum[i]);
// echo_est[i + 1] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 1],
// far_spectrum[i + 1]);
// echo_est[i + 2] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 2],
// far_spectrum[i + 2]);
// echo_est[i + 3] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i + 3],
// far_spectrum[i + 3]);
// }
// echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i],
// far_spectrum[i]);
const uint16_t* far_spectrum_p = far_spectrum;
int16_t* start_adapt_p = aecm->channelAdapt16;
int16_t* start_stored_p = aecm->channelStored;
const int16_t* end_stored_p = aecm->channelStored + PART_LEN;
int32_t* echo_est_p = echo_est;
__asm__("vmov.i32 q14, #0" : : : "q14"); // far_energy
__asm__("vmov.i32 q8, #0" : : : "q8"); // echo_energy_stored
__asm__("vmov.i32 q9, #0" : : : "q9"); // echo_energy_adapt
uint16x8_t far_spectrum_v;
int16x8_t adapt_v;
uint32x4_t echo_est_v_low, echo_est_v_high;
for(i = 0; i < PART_LEN -7; i += 8)
{
// far_energy += (WebRtc_UWord32)(far_spectrum[i]);
__asm__("vld1.16 {d26, d27}, [%0]" : : "r"(&far_spectrum[i]) : "q13");
__asm__("vaddw.u16 q14, q14, d26" : : : "q14", "q13");
__asm__("vaddw.u16 q14, q14, d27" : : : "q14", "q13");
while (start_stored_p < end_stored_p) {
far_spectrum_v = vld1q_u16(far_spectrum_p);
adapt_v = vld1q_s16(start_adapt_p);
// Get estimated echo energies for adaptive channel and stored channel.
// echoEst[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]);
__asm__("vld1.16 {d24, d25}, [%0, :128]" : : "r"(&aecm->channelStored[i]) : "q12");
__asm__("vmull.u16 q10, d26, d24" : : : "q12", "q13", "q10");
__asm__("vmull.u16 q11, d27, d25" : : : "q12", "q13", "q11");
__asm__("vst1.32 {d20, d21, d22, d23}, [%0, :256]" : : "r"(&echo_est[i]):
"q10", "q11");
vst1q_s16(start_stored_p, adapt_v);
// echo_energy_stored += (WebRtc_UWord32)echoEst[i];
__asm__("vadd.u32 q8, q10" : : : "q10", "q8");
__asm__("vadd.u32 q8, q11" : : : "q11", "q8");
echo_est_v_low = vmull_u16(vget_low_u16(far_spectrum_v),
vget_low_u16(vreinterpretq_u16_s16(adapt_v)));
echo_est_v_high = vmull_u16(vget_high_u16(far_spectrum_v),
vget_high_u16(vreinterpretq_u16_s16(adapt_v)));
// echo_energy_adapt += WEBRTC_SPL_UMUL_16_16(
// aecm->channelAdapt16[i], far_spectrum[i]);
__asm__("vld1.16 {d24, d25}, [%0, :128]" : : "r"(&aecm->channelAdapt16[i]) : "q12");
__asm__("vmull.u16 q10, d26, d24" : : : "q12", "q13", "q10");
__asm__("vmull.u16 q11, d27, d25" : : : "q12", "q13", "q11");
__asm__("vadd.u32 q9, q10" : : : "q9", "q15");
__asm__("vadd.u32 q9, q11" : : : "q9", "q11");
}
vst1q_s32(echo_est_p, vreinterpretq_s32_u32(echo_est_v_low));
vst1q_s32(echo_est_p + 4, vreinterpretq_s32_u32(echo_est_v_high));
__asm__("vadd.u32 d28, d29" : : : "q14");
__asm__("vpadd.u32 d28, d28" : : : "q14");
__asm__("vmov.32 %0, d28[0]" : "=r"(far_energy_r): : "q14");
__asm__("vadd.u32 d18, d19" : : : "q9");
__asm__("vpadd.u32 d18, d18" : : : "q9");
__asm__("vmov.32 %0, d18[0]" : "=r"(echo_energy_adapt_r): : "q9");
__asm__("vadd.u32 d16, d17" : : : "q8");
__asm__("vpadd.u32 d16, d16" : : : "q8");
__asm__("vmov.32 %0, d16[0]" : "=r"(echo_energy_stored_r): : "q8");
// Get estimated echo energies for adaptive channel and stored channel.
echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]);
*echo_energy_stored = echo_energy_stored_r + (WebRtc_UWord32)echo_est[i];
*far_energy = far_energy_r + (WebRtc_UWord32)(far_spectrum[i]);
*echo_energy_adapt = echo_energy_adapt_r + WEBRTC_SPL_UMUL_16_16(
aecm->channelAdapt16[i], far_spectrum[i]);
far_spectrum_p += 8;
start_adapt_p += 8;
start_stored_p += 8;
echo_est_p += 8;
}
aecm->channelStored[PART_LEN] = aecm->channelAdapt16[PART_LEN];
echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN],
far_spectrum[PART_LEN]);
}
void WebRtcAecm_StoreAdaptiveChannel(AecmCore_t* aecm,
const WebRtc_UWord16* far_spectrum,
WebRtc_Word32* echo_est)
{
int i;
void WebRtcAecm_ResetAdaptiveChannelNeon(AecmCore* aecm) {
assert((uintptr_t)(aecm->channelStored) % 16 == 0);
assert((uintptr_t)(aecm->channelAdapt16) % 16 == 0);
assert((uintptr_t)(aecm->channelAdapt32) % 32 == 0);
// During startup we store the channel every block.
// Recalculate echo estimate.
for(i = 0; i < PART_LEN -7; i += 8)
{
// aecm->channelStored[i] = acem->channelAdapt16[i];
// echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]);
__asm__("vld1.16 {d26, d27}, [%0]" : : "r"(&far_spectrum[i]) : "q13");
__asm__("vld1.16 {d24, d25}, [%0, :128]" : : "r"(&aecm->channelAdapt16[i]) : "q12");
__asm__("vst1.16 {d24, d25}, [%0, :128]" : : "r"(&aecm->channelStored[i]) : "q12");
__asm__("vmull.u16 q10, d26, d24" : : : "q12", "q13", "q10");
__asm__("vmull.u16 q11, d27, d25" : : : "q12", "q13", "q11");
__asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : :
"r"(&echo_est[i]) : "q10", "q11");
}
aecm->channelStored[i] = aecm->channelAdapt16[i];
echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], far_spectrum[i]);
// The C code of following optimized code.
// for (i = 0; i < PART_LEN1; i++) {
// aecm->channelAdapt16[i] = aecm->channelStored[i];
// aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32(
// (int32_t)aecm->channelStored[i], 16);
// }
int16_t* start_stored_p = aecm->channelStored;
int16_t* start_adapt16_p = aecm->channelAdapt16;
int32_t* start_adapt32_p = aecm->channelAdapt32;
const int16_t* end_stored_p = start_stored_p + PART_LEN;
int16x8_t stored_v;
int32x4_t adapt32_v_low, adapt32_v_high;
while (start_stored_p < end_stored_p) {
stored_v = vld1q_s16(start_stored_p);
vst1q_s16(start_adapt16_p, stored_v);
adapt32_v_low = vshll_n_s16(vget_low_s16(stored_v), 16);
adapt32_v_high = vshll_n_s16(vget_high_s16(stored_v), 16);
vst1q_s32(start_adapt32_p, adapt32_v_low);
vst1q_s32(start_adapt32_p + 4, adapt32_v_high);
start_stored_p += 8;
start_adapt16_p += 8;
start_adapt32_p += 8;
}
aecm->channelAdapt16[PART_LEN] = aecm->channelStored[PART_LEN];
aecm->channelAdapt32[PART_LEN] = (int32_t)aecm->channelStored[PART_LEN] << 16;
}
void WebRtcAecm_ResetAdaptiveChannel(AecmCore_t* aecm)
{
int i;
for(i = 0; i < PART_LEN -7; i += 8)
{
// aecm->channelAdapt16[i] = aecm->channelStored[i];
// aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)
// aecm->channelStored[i], 16);
__asm__("vld1.16 {d24, d25}, [%0, :128]" : :
"r"(&aecm->channelStored[i]) : "q12");
__asm__("vst1.16 {d24, d25}, [%0, :128]" : :
"r"(&aecm->channelAdapt16[i]) : "q12");
__asm__("vshll.s16 q10, d24, #16" : : : "q12", "q13", "q10");
__asm__("vshll.s16 q11, d25, #16" : : : "q12", "q13", "q11");
__asm__("vst1.16 {d20, d21, d22, d23}, [%0, :256]" : :
"r"(&aecm->channelAdapt32[i]): "q10", "q11");
}
aecm->channelAdapt16[i] = aecm->channelStored[i];
aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32(
(WebRtc_Word32)aecm->channelStored[i], 16);
}
#endif // #if defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON)

View File

@ -0,0 +1,87 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_AECM_DEFINES_H_
#define AECM_DYNAMIC_Q /* Turn on/off dynamic Q-domain. */
/* Algorithm parameters */
#define FRAME_LEN 80 /* Total frame length, 10 ms. */
#define PART_LEN 64 /* Length of partition. */
#define PART_LEN_SHIFT 7 /* Length of (PART_LEN * 2) in base 2. */
#define PART_LEN1 (PART_LEN + 1) /* Unique fft coefficients. */
#define PART_LEN2 (PART_LEN << 1) /* Length of partition * 2. */
#define PART_LEN4 (PART_LEN << 2) /* Length of partition * 4. */
#define FAR_BUF_LEN PART_LEN4 /* Length of buffers. */
#define MAX_DELAY 100
/* Counter parameters */
#define CONV_LEN 512 /* Convergence length used at startup. */
#define CONV_LEN2 (CONV_LEN << 1) /* Used at startup. */
/* Energy parameters */
#define MAX_BUF_LEN 64 /* History length of energy signals. */
#define FAR_ENERGY_MIN 1025 /* Lowest Far energy level: At least 2 */
/* in energy. */
#define FAR_ENERGY_DIFF 929 /* Allowed difference between max */
/* and min. */
#define ENERGY_DEV_OFFSET 0 /* The energy error offset in Q8. */
#define ENERGY_DEV_TOL 400 /* The energy estimation tolerance (Q8). */
#define FAR_ENERGY_VAD_REGION 230 /* Far VAD tolerance region. */
/* Stepsize parameters */
#define MU_MIN 10 /* Min stepsize 2^-MU_MIN (far end energy */
/* dependent). */
#define MU_MAX 1 /* Max stepsize 2^-MU_MAX (far end energy */
/* dependent). */
#define MU_DIFF 9 /* MU_MIN - MU_MAX */
/* Channel parameters */
#define MIN_MSE_COUNT 20 /* Min number of consecutive blocks with enough */
/* far end energy to compare channel estimates. */
#define MIN_MSE_DIFF 29 /* The ratio between adapted and stored channel to */
/* accept a new storage (0.8 in Q-MSE_RESOLUTION). */
#define MSE_RESOLUTION 5 /* MSE parameter resolution. */
#define RESOLUTION_CHANNEL16 12 /* W16 Channel in Q-RESOLUTION_CHANNEL16. */
#define RESOLUTION_CHANNEL32 28 /* W32 Channel in Q-RESOLUTION_CHANNEL. */
#define CHANNEL_VAD 16 /* Minimum energy in frequency band */
/* to update channel. */
/* Suppression gain parameters: SUPGAIN parameters in Q-(RESOLUTION_SUPGAIN). */
#define RESOLUTION_SUPGAIN 8 /* Channel in Q-(RESOLUTION_SUPGAIN). */
#define SUPGAIN_DEFAULT (1 << RESOLUTION_SUPGAIN) /* Default. */
#define SUPGAIN_ERROR_PARAM_A 3072 /* Estimation error parameter */
/* (Maximum gain) (8 in Q8). */
#define SUPGAIN_ERROR_PARAM_B 1536 /* Estimation error parameter */
/* (Gain before going down). */
#define SUPGAIN_ERROR_PARAM_D SUPGAIN_DEFAULT /* Estimation error parameter */
/* (Should be the same as Default) (1 in Q8). */
#define SUPGAIN_EPC_DT 200 /* SUPGAIN_ERROR_PARAM_C * ENERGY_DEV_TOL */
/* Defines for "check delay estimation" */
#define CORR_WIDTH 31 /* Number of samples to correlate over. */
#define CORR_MAX 16 /* Maximum correlation offset. */
#define CORR_MAX_BUF 63
#define CORR_DEV 4
#define CORR_MAX_LEVEL 20
#define CORR_MAX_LOW 4
#define CORR_BUF_LEN (CORR_MAX << 1) + 1
/* Note that CORR_WIDTH + 2*CORR_MAX <= MAX_BUF_LEN. */
#define ONE_Q14 (1 << 14)
/* NLP defines */
#define NLP_COMP_LOW 3277 /* 0.2 in Q14 */
#define NLP_COMP_HIGH ONE_Q14 /* 1 in Q14 */
#endif

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,22 +8,16 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <stdlib.h>
//#include <string.h>
#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
#include "echo_control_mobile.h"
#include "aecm_core.h"
#include "ring_buffer.h"
#ifdef AEC_DEBUG
#include <stdio.h>
#endif
#ifdef MAC_IPHONE_PRINT
#include <time.h>
#include <stdio.h>
#elif defined ARM_WINM_LOG
#include "windows.h"
extern HANDLE logFile;
#endif
#include <stdlib.h>
#include "webrtc/common_audio/ring_buffer.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/aecm/aecm_core.h"
#define BUF_SIZE_FRAMES 50 // buffer size (frames)
// Maximum length of resampled signal. Must be an integer multiple of frames
@ -31,7 +25,7 @@ extern HANDLE logFile;
// The factor of 2 handles wb, and the + 1 is as a safety margin
#define MAX_RESAMP_LEN (5 * FRAME_LEN)
static const int kBufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples)
static const size_t kBufSizeSamp = BUF_SIZE_FRAMES * FRAME_LEN; // buffer size (samples)
static const int kSampMsNb = 8; // samples per ms in nb
// Target suppression levels for nlp modes
// log{0.001, 0.00001, 0.00000001}
@ -63,7 +57,7 @@ typedef struct
int delayChange;
short lastDelayDiff;
WebRtc_Word16 echoMode;
int16_t echoMode;
#ifdef AEC_DEBUG
FILE *bufFile;
@ -72,47 +66,37 @@ typedef struct
FILE *postCompFile;
#endif // AEC_DEBUG
// Structures
void *farendBuf;
RingBuffer *farendBuf;
int lastError;
AecmCore_t *aecmCore;
} aecmob_t;
AecmCore* aecmCore;
} AecMobile;
// Estimates delay to set the position of the farend buffer read pointer
// (controlled by knownDelay)
static int WebRtcAecm_EstBufDelay(aecmob_t *aecmInst, short msInSndCardBuf);
static int WebRtcAecm_EstBufDelay(AecMobile* aecmInst, short msInSndCardBuf);
// Stuffs the farend buffer if the estimated delay is too large
static int WebRtcAecm_DelayComp(aecmob_t *aecmInst);
static int WebRtcAecm_DelayComp(AecMobile* aecmInst);
WebRtc_Word32 WebRtcAecm_Create(void **aecmInst)
{
aecmob_t *aecm;
if (aecmInst == NULL)
{
return -1;
void* WebRtcAecm_Create() {
AecMobile* aecm = malloc(sizeof(AecMobile));
WebRtcSpl_Init();
aecm->aecmCore = WebRtcAecm_CreateCore();
if (!aecm->aecmCore) {
WebRtcAecm_Free(aecm);
return NULL;
}
aecm = malloc(sizeof(aecmob_t));
*aecmInst = aecm;
if (aecm == NULL)
{
return -1;
}
if (WebRtcAecm_CreateCore(&aecm->aecmCore) == -1)
aecm->farendBuf = WebRtc_CreateBuffer(kBufSizeSamp,
sizeof(int16_t));
if (!aecm->farendBuf)
{
WebRtcAecm_Free(aecm);
aecm = NULL;
return -1;
}
if (WebRtcApm_CreateBuffer(&aecm->farendBuf, kBufSizeSamp) == -1)
{
WebRtcAecm_Free(aecm);
aecm = NULL;
return -1;
return NULL;
}
aecm->initFlag = 0;
@ -129,16 +113,14 @@ WebRtc_Word32 WebRtcAecm_Create(void **aecmInst)
aecm->preCompFile = fopen("preComp.pcm", "wb");
aecm->postCompFile = fopen("postComp.pcm", "wb");
#endif // AEC_DEBUG
return 0;
return aecm;
}
WebRtc_Word32 WebRtcAecm_Free(void *aecmInst)
{
aecmob_t *aecm = aecmInst;
void WebRtcAecm_Free(void* aecmInst) {
AecMobile* aecm = aecmInst;
if (aecm == NULL)
{
return -1;
if (aecm == NULL) {
return;
}
#ifdef AEC_DEBUG
@ -153,15 +135,13 @@ WebRtc_Word32 WebRtcAecm_Free(void *aecmInst)
fclose(aecm->postCompFile);
#endif // AEC_DEBUG
WebRtcAecm_FreeCore(aecm->aecmCore);
WebRtcApm_FreeBuffer(aecm->farendBuf);
WebRtc_FreeBuffer(aecm->farendBuf);
free(aecm);
return 0;
}
WebRtc_Word32 WebRtcAecm_Init(void *aecmInst, WebRtc_Word32 sampFreq)
int32_t WebRtcAecm_Init(void *aecmInst, int32_t sampFreq)
{
aecmob_t *aecm = aecmInst;
AecMobile* aecm = aecmInst;
AecmConfig aecConfig;
if (aecm == NULL)
@ -184,11 +164,7 @@ WebRtc_Word32 WebRtcAecm_Init(void *aecmInst, WebRtc_Word32 sampFreq)
}
// Initialize farend buffer
if (WebRtcApm_InitBuffer(aecm->farendBuf) == -1)
{
aecm->lastError = AECM_UNSPECIFIED_ERROR;
return -1;
}
WebRtc_InitBuffer(aecm->farendBuf);
aecm->initFlag = kInitCheck; // indicates that initialization has been done
@ -222,11 +198,11 @@ WebRtc_Word32 WebRtcAecm_Init(void *aecmInst, WebRtc_Word32 sampFreq)
return 0;
}
WebRtc_Word32 WebRtcAecm_BufferFarend(void *aecmInst, const WebRtc_Word16 *farend,
WebRtc_Word16 nrOfSamples)
int32_t WebRtcAecm_BufferFarend(void *aecmInst, const int16_t *farend,
size_t nrOfSamples)
{
aecmob_t *aecm = aecmInst;
WebRtc_Word32 retVal = 0;
AecMobile* aecm = aecmInst;
int32_t retVal = 0;
if (aecm == NULL)
{
@ -257,38 +233,25 @@ WebRtc_Word32 WebRtcAecm_BufferFarend(void *aecmInst, const WebRtc_Word16 *faren
WebRtcAecm_DelayComp(aecm);
}
WebRtcApm_WriteBuffer(aecm->farendBuf, farend, nrOfSamples);
WebRtc_WriteBuffer(aecm->farendBuf, farend, nrOfSamples);
return retVal;
}
WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoisy,
const WebRtc_Word16 *nearendClean, WebRtc_Word16 *out,
WebRtc_Word16 nrOfSamples, WebRtc_Word16 msInSndCardBuf)
int32_t WebRtcAecm_Process(void *aecmInst, const int16_t *nearendNoisy,
const int16_t *nearendClean, int16_t *out,
size_t nrOfSamples, int16_t msInSndCardBuf)
{
aecmob_t *aecm = aecmInst;
WebRtc_Word32 retVal = 0;
short i;
short farend[FRAME_LEN];
AecMobile* aecm = aecmInst;
int32_t retVal = 0;
size_t i;
short nmbrOfFilledBuffers;
short nBlocks10ms;
short nFrames;
size_t nBlocks10ms;
size_t nFrames;
#ifdef AEC_DEBUG
short msInAECBuf;
#endif
#ifdef ARM_WINM_LOG
__int64 freq, start, end, diff;
unsigned int milliseconds;
DWORD temp;
#elif defined MAC_IPHONE_PRINT
// double endtime = 0, starttime = 0;
struct timeval starttime;
struct timeval endtime;
static long int timeused = 0;
static int timecount = 0;
#endif
if (aecm == NULL)
{
return -1;
@ -339,13 +302,17 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi
{
if (nearendClean == NULL)
{
memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples);
} else
if (out != nearendNoisy)
{
memcpy(out, nearendNoisy, sizeof(short) * nrOfSamples);
}
} else if (out != nearendClean)
{
memcpy(out, nearendClean, sizeof(short) * nrOfSamples);
}
nmbrOfFilledBuffers = WebRtcApm_get_buffer_size(aecm->farendBuf) / FRAME_LEN;
nmbrOfFilledBuffers =
(short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN;
// The AECM is in the start up mode
// AECM is disabled until the soundcard buffer and farend buffers are OK
@ -407,10 +374,9 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi
aecm->ECstartup = 0; // Enable the AECM
} else if (nmbrOfFilledBuffers > aecm->bufSizeStart)
{
WebRtcApm_FlushBuffer(
aecm->farendBuf,
WebRtcApm_get_buffer_size(aecm->farendBuf)
- aecm->bufSizeStart * FRAME_LEN);
WebRtc_MoveReadPtr(aecm->farendBuf,
(int) WebRtc_available_read(aecm->farendBuf)
- (int) aecm->bufSizeStart * FRAME_LEN);
aecm->ECstartup = 0;
}
}
@ -422,20 +388,27 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi
// Note only 1 block supported for nb and 2 blocks for wb
for (i = 0; i < nFrames; i++)
{
nmbrOfFilledBuffers = WebRtcApm_get_buffer_size(aecm->farendBuf) / FRAME_LEN;
int16_t farend[FRAME_LEN];
const int16_t* farend_ptr = NULL;
nmbrOfFilledBuffers =
(short) WebRtc_available_read(aecm->farendBuf) / FRAME_LEN;
// Check that there is data in the far end buffer
if (nmbrOfFilledBuffers > 0)
{
// Get the next 80 samples from the farend buffer
WebRtcApm_ReadBuffer(aecm->farendBuf, farend, FRAME_LEN);
WebRtc_ReadBuffer(aecm->farendBuf, (void**) &farend_ptr, farend,
FRAME_LEN);
// Always store the last frame for use when we run out of data
memcpy(&(aecm->farendOld[i][0]), farend, FRAME_LEN * sizeof(short));
memcpy(&(aecm->farendOld[i][0]), farend_ptr,
FRAME_LEN * sizeof(short));
} else
{
// We have no data so we use the last played frame
memcpy(farend, &(aecm->farendOld[i][0]), FRAME_LEN * sizeof(short));
farend_ptr = farend;
}
// Call buffer delay estimator when all data is extracted,
@ -445,77 +418,23 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi
WebRtcAecm_EstBufDelay(aecm, aecm->msInSndCardBuf);
}
#ifdef ARM_WINM_LOG
// measure tick start
QueryPerformanceFrequency((LARGE_INTEGER*)&freq);
QueryPerformanceCounter((LARGE_INTEGER*)&start);
#elif defined MAC_IPHONE_PRINT
// starttime = clock()/(double)CLOCKS_PER_SEC;
gettimeofday(&starttime, NULL);
#endif
// Call the AECM
/*WebRtcAecm_ProcessFrame(aecm->aecmCore, farend, &nearend[FRAME_LEN * i],
&out[FRAME_LEN * i], aecm->knownDelay);*/
if (nearendClean == NULL)
{
if (WebRtcAecm_ProcessFrame(aecm->aecmCore,
farend,
&nearendNoisy[FRAME_LEN * i],
NULL,
&out[FRAME_LEN * i]) == -1)
{
return -1;
}
} else
{
if (WebRtcAecm_ProcessFrame(aecm->aecmCore,
farend,
&nearendNoisy[FRAME_LEN * i],
&nearendClean[FRAME_LEN * i],
&out[FRAME_LEN * i]) == -1)
{
return -1;
}
}
#ifdef ARM_WINM_LOG
// measure tick end
QueryPerformanceCounter((LARGE_INTEGER*)&end);
if(end > start)
{
diff = ((end - start) * 1000) / (freq/1000);
milliseconds = (unsigned int)(diff & 0xffffffff);
WriteFile (logFile, &milliseconds, sizeof(unsigned int), &temp, NULL);
}
#elif defined MAC_IPHONE_PRINT
// endtime = clock()/(double)CLOCKS_PER_SEC;
// printf("%f\n", endtime - starttime);
gettimeofday(&endtime, NULL);
if( endtime.tv_usec > starttime.tv_usec)
{
timeused += endtime.tv_usec - starttime.tv_usec;
} else
{
timeused += endtime.tv_usec + 1000000 - starttime.tv_usec;
}
if(++timecount == 1000)
{
timecount = 0;
printf("AEC: %ld\n", timeused);
timeused = 0;
}
#endif
if (WebRtcAecm_ProcessFrame(aecm->aecmCore,
farend_ptr,
&nearendNoisy[FRAME_LEN * i],
(nearendClean
? &nearendClean[FRAME_LEN * i]
: NULL),
&out[FRAME_LEN * i]) == -1)
return -1;
}
}
#ifdef AEC_DEBUG
msInAECBuf = WebRtcApm_get_buffer_size(aecm->farendBuf) / (kSampMsNb*aecm->aecmCore->mult);
msInAECBuf = (short) WebRtc_available_read(aecm->farendBuf) /
(kSampMsNb * aecm->aecmCore->mult);
fwrite(&msInAECBuf, 2, 1, aecm->bufFile);
fwrite(&(aecm->knownDelay), sizeof(aecm->knownDelay), 1, aecm->delayFile);
#endif
@ -523,9 +442,9 @@ WebRtc_Word32 WebRtcAecm_Process(void *aecmInst, const WebRtc_Word16 *nearendNoi
return retVal;
}
WebRtc_Word32 WebRtcAecm_set_config(void *aecmInst, AecmConfig config)
int32_t WebRtcAecm_set_config(void *aecmInst, AecmConfig config)
{
aecmob_t *aecm = aecmInst;
AecMobile* aecm = aecmInst;
if (aecm == NULL)
{
@ -605,9 +524,9 @@ WebRtc_Word32 WebRtcAecm_set_config(void *aecmInst, AecmConfig config)
return 0;
}
WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst, AecmConfig *config)
int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config)
{
aecmob_t *aecm = aecmInst;
AecMobile* aecm = aecmInst;
if (aecm == NULL)
{
@ -632,17 +551,19 @@ WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst, AecmConfig *config)
return 0;
}
WebRtc_Word32 WebRtcAecm_InitEchoPath(void* aecmInst,
const void* echo_path,
size_t size_bytes)
int32_t WebRtcAecm_InitEchoPath(void* aecmInst,
const void* echo_path,
size_t size_bytes)
{
aecmob_t *aecm = aecmInst;
const WebRtc_Word16* echo_path_ptr = echo_path;
AecMobile* aecm = aecmInst;
const int16_t* echo_path_ptr = echo_path;
if ((aecm == NULL) || (echo_path == NULL))
{
aecm->lastError = AECM_NULL_POINTER_ERROR;
return -1;
if (aecmInst == NULL) {
return -1;
}
if (echo_path == NULL) {
aecm->lastError = AECM_NULL_POINTER_ERROR;
return -1;
}
if (size_bytes != WebRtcAecm_echo_path_size_bytes())
{
@ -661,17 +582,19 @@ WebRtc_Word32 WebRtcAecm_InitEchoPath(void* aecmInst,
return 0;
}
WebRtc_Word32 WebRtcAecm_GetEchoPath(void* aecmInst,
void* echo_path,
size_t size_bytes)
int32_t WebRtcAecm_GetEchoPath(void* aecmInst,
void* echo_path,
size_t size_bytes)
{
aecmob_t *aecm = aecmInst;
WebRtc_Word16* echo_path_ptr = echo_path;
AecMobile* aecm = aecmInst;
int16_t* echo_path_ptr = echo_path;
if ((aecm == NULL) || (echo_path == NULL))
{
aecm->lastError = AECM_NULL_POINTER_ERROR;
return -1;
if (aecmInst == NULL) {
return -1;
}
if (echo_path == NULL) {
aecm->lastError = AECM_NULL_POINTER_ERROR;
return -1;
}
if (size_bytes != WebRtcAecm_echo_path_size_bytes())
{
@ -691,31 +614,12 @@ WebRtc_Word32 WebRtcAecm_GetEchoPath(void* aecmInst,
size_t WebRtcAecm_echo_path_size_bytes()
{
return (PART_LEN1 * sizeof(WebRtc_Word16));
return (PART_LEN1 * sizeof(int16_t));
}
WebRtc_Word32 WebRtcAecm_get_version(WebRtc_Word8 *versionStr, WebRtc_Word16 len)
int32_t WebRtcAecm_get_error_code(void *aecmInst)
{
const char version[] = "AECM 1.2.0";
const short versionLen = (short)strlen(version) + 1; // +1 for null-termination
if (versionStr == NULL)
{
return -1;
}
if (versionLen > len)
{
return -1;
}
strncpy(versionStr, version, versionLen);
return 0;
}
WebRtc_Word32 WebRtcAecm_get_error_code(void *aecmInst)
{
aecmob_t *aecm = aecmInst;
AecMobile* aecm = aecmInst;
if (aecm == NULL)
{
@ -725,19 +629,18 @@ WebRtc_Word32 WebRtcAecm_get_error_code(void *aecmInst)
return aecm->lastError;
}
static int WebRtcAecm_EstBufDelay(aecmob_t *aecm, short msInSndCardBuf)
{
short delayNew, nSampFar, nSampSndCard;
static int WebRtcAecm_EstBufDelay(AecMobile* aecm, short msInSndCardBuf) {
short delayNew, nSampSndCard;
short nSampFar = (short) WebRtc_available_read(aecm->farendBuf);
short diff;
nSampFar = WebRtcApm_get_buffer_size(aecm->farendBuf);
nSampSndCard = msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult;
delayNew = nSampSndCard - nSampFar;
if (delayNew < FRAME_LEN)
{
WebRtcApm_FlushBuffer(aecm->farendBuf, FRAME_LEN);
WebRtc_MoveReadPtr(aecm->farendBuf, FRAME_LEN);
delayNew += FRAME_LEN;
}
@ -775,12 +678,11 @@ static int WebRtcAecm_EstBufDelay(aecmob_t *aecm, short msInSndCardBuf)
return 0;
}
static int WebRtcAecm_DelayComp(aecmob_t *aecm)
{
int nSampFar, nSampSndCard, delayNew, nSampAdd;
static int WebRtcAecm_DelayComp(AecMobile* aecm) {
int nSampFar = (int) WebRtc_available_read(aecm->farendBuf);
int nSampSndCard, delayNew, nSampAdd;
const int maxStuffSamp = 10 * FRAME_LEN;
nSampFar = WebRtcApm_get_buffer_size(aecm->farendBuf);
nSampSndCard = aecm->msInSndCardBuf * kSampMsNb * aecm->aecmCore->mult;
delayNew = nSampSndCard - nSampFar;
@ -792,7 +694,7 @@ static int WebRtcAecm_DelayComp(aecmob_t *aecm)
FRAME_LEN));
nSampAdd = WEBRTC_SPL_MIN(nSampAdd, maxStuffSamp);
WebRtcApm_StuffBuffer(aecm->farendBuf, nSampAdd);
WebRtc_MoveReadPtr(aecm->farendBuf, -nSampAdd);
aecm->delayChange = 1; // the delay needs to be updated
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,10 +8,12 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_INTERFACE_ECHO_CONTROL_MOBILE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_INTERFACE_ECHO_CONTROL_MOBILE_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_
#include "typedefs.h"
#include <stdlib.h>
#include "webrtc/typedefs.h"
enum {
AecmFalse = 0,
@ -29,8 +31,8 @@ enum {
#define AECM_BAD_PARAMETER_WARNING 12100
typedef struct {
WebRtc_Word16 cngMode; // AECM_FALSE, AECM_TRUE (default)
WebRtc_Word16 echoMode; // 0, 1, 2, 3 (default), 4
int16_t cngMode; // AECM_FALSE, AECM_TRUE (default)
int16_t echoMode; // 0, 1, 2, 3 (default), 4
} AecmConfig;
#ifdef __cplusplus
@ -40,133 +42,116 @@ extern "C" {
/*
* Allocates the memory needed by the AECM. The memory needs to be
* initialized separately using the WebRtcAecm_Init() function.
*
* Inputs Description
* -------------------------------------------------------------------
* void **aecmInst Pointer to the AECM instance to be
* created and initialized
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 0: OK
* -1: error
* Returns a pointer to the instance and a nullptr at failure.
*/
WebRtc_Word32 WebRtcAecm_Create(void **aecmInst);
void* WebRtcAecm_Create();
/*
* This function releases the memory allocated by WebRtcAecm_Create()
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecmInst Pointer to the AECM instance
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 0: OK
* -1: error
* void* aecmInst Pointer to the AECM instance
*/
WebRtc_Word32 WebRtcAecm_Free(void *aecmInst);
void WebRtcAecm_Free(void* aecmInst);
/*
* Initializes an AECM instance.
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecmInst Pointer to the AECM instance
* WebRtc_Word32 sampFreq Sampling frequency of data
* void* aecmInst Pointer to the AECM instance
* int32_t sampFreq Sampling frequency of data
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 0: OK
* int32_t return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAecm_Init(void* aecmInst,
WebRtc_Word32 sampFreq);
int32_t WebRtcAecm_Init(void* aecmInst, int32_t sampFreq);
/*
* Inserts an 80 or 160 sample block of data into the farend buffer.
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecmInst Pointer to the AECM instance
* WebRtc_Word16 *farend In buffer containing one frame of
* void* aecmInst Pointer to the AECM instance
* int16_t* farend In buffer containing one frame of
* farend signal
* WebRtc_Word16 nrOfSamples Number of samples in farend buffer
* int16_t nrOfSamples Number of samples in farend buffer
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 0: OK
* int32_t return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAecm_BufferFarend(void* aecmInst,
const WebRtc_Word16* farend,
WebRtc_Word16 nrOfSamples);
int32_t WebRtcAecm_BufferFarend(void* aecmInst,
const int16_t* farend,
size_t nrOfSamples);
/*
* Runs the AECM on an 80 or 160 sample blocks of data.
*
* Inputs Description
* Inputs Description
* -------------------------------------------------------------------
* void *aecmInst Pointer to the AECM instance
* WebRtc_Word16 *nearendNoisy In buffer containing one frame of
* void* aecmInst Pointer to the AECM instance
* int16_t* nearendNoisy In buffer containing one frame of
* reference nearend+echo signal. If
* noise reduction is active, provide
* the noisy signal here.
* WebRtc_Word16 *nearendClean In buffer containing one frame of
* int16_t* nearendClean In buffer containing one frame of
* nearend+echo signal. If noise
* reduction is active, provide the
* clean signal here. Otherwise pass a
* NULL pointer.
* WebRtc_Word16 nrOfSamples Number of samples in nearend buffer
* WebRtc_Word16 msInSndCardBuf Delay estimate for sound card and
* int16_t nrOfSamples Number of samples in nearend buffer
* int16_t msInSndCardBuf Delay estimate for sound card and
* system buffers
*
* Outputs Description
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word16 *out Out buffer, one frame of processed nearend
* WebRtc_Word32 return 0: OK
* -1: error
* int16_t* out Out buffer, one frame of processed nearend
* int32_t return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAecm_Process(void* aecmInst,
const WebRtc_Word16* nearendNoisy,
const WebRtc_Word16* nearendClean,
WebRtc_Word16* out,
WebRtc_Word16 nrOfSamples,
WebRtc_Word16 msInSndCardBuf);
int32_t WebRtcAecm_Process(void* aecmInst,
const int16_t* nearendNoisy,
const int16_t* nearendClean,
int16_t* out,
size_t nrOfSamples,
int16_t msInSndCardBuf);
/*
* This function enables the user to set certain parameters on-the-fly
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecmInst Pointer to the AECM instance
* AecmConfig config Config instance that contains all
* void* aecmInst Pointer to the AECM instance
* AecmConfig config Config instance that contains all
* properties to be set
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 0: OK
* int32_t return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAecm_set_config(void* aecmInst,
AecmConfig config);
int32_t WebRtcAecm_set_config(void* aecmInst, AecmConfig config);
/*
* This function enables the user to set certain parameters on-the-fly
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecmInst Pointer to the AECM instance
* void* aecmInst Pointer to the AECM instance
*
* Outputs Description
* -------------------------------------------------------------------
* AecmConfig *config Pointer to the config instance that
* AecmConfig* config Pointer to the config instance that
* all properties will be written to
* WebRtc_Word32 return 0: OK
* int32_t return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst,
AecmConfig *config);
int32_t WebRtcAecm_get_config(void *aecmInst, AecmConfig *config);
/*
* This function enables the user to set the echo path on-the-fly.
@ -179,12 +164,12 @@ WebRtc_Word32 WebRtcAecm_get_config(void *aecmInst,
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 0: OK
* int32_t return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAecm_InitEchoPath(void* aecmInst,
const void* echo_path,
size_t size_bytes);
int32_t WebRtcAecm_InitEchoPath(void* aecmInst,
const void* echo_path,
size_t size_bytes);
/*
* This function enables the user to get the currently used echo path
@ -198,19 +183,19 @@ WebRtc_Word32 WebRtcAecm_InitEchoPath(void* aecmInst,
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 0: OK
* int32_t return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAecm_GetEchoPath(void* aecmInst,
void* echo_path,
size_t size_bytes);
int32_t WebRtcAecm_GetEchoPath(void* aecmInst,
void* echo_path,
size_t size_bytes);
/*
* This function enables the user to get the echo path size in bytes
*
* Outputs Description
* -------------------------------------------------------------------
* size_t return : size in bytes
* size_t return Size in bytes
*/
size_t WebRtcAecm_echo_path_size_bytes();
@ -219,32 +204,15 @@ size_t WebRtcAecm_echo_path_size_bytes();
*
* Inputs Description
* -------------------------------------------------------------------
* void *aecmInst Pointer to the AECM instance
* void* aecmInst Pointer to the AECM instance
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word32 return 11000-11100: error code
* int32_t return 11000-11100: error code
*/
WebRtc_Word32 WebRtcAecm_get_error_code(void *aecmInst);
/*
* Gets a version string
*
* Inputs Description
* -------------------------------------------------------------------
* char *versionStr Pointer to a string array
* WebRtc_Word16 len The maximum length of the string
*
* Outputs Description
* -------------------------------------------------------------------
* WebRtc_Word8 *versionStr Pointer to a string array
* WebRtc_Word32 return 0: OK
* -1: error
*/
WebRtc_Word32 WebRtcAecm_get_version(WebRtc_Word8 *versionStr,
WebRtc_Word16 len);
int32_t WebRtcAecm_get_error_code(void *aecmInst);
#ifdef __cplusplus
}
#endif
#endif /* WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_INTERFACE_ECHO_CONTROL_MOBILE_H_ */
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_INCLUDE_ECHO_CONTROL_MOBILE_H_

View File

@ -1,10 +0,0 @@
noinst_LTLIBRARIES = libagc.la
libagc_la_SOURCES = interface/gain_control.h \
analog_agc.c \
analog_agc.h \
digital_agc.c \
digital_agc.h
libagc_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
-I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-I$(top_srcdir)/src/modules/audio_processing/utility

View File

@ -0,0 +1,101 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/agc.h"
#include <cmath>
#include <cstdlib>
#include <algorithm>
#include <vector>
#include "webrtc/base/checks.h"
#include "webrtc/modules/audio_processing/agc/histogram.h"
#include "webrtc/modules/audio_processing/agc/utility.h"
#include "webrtc/modules/interface/module_common_types.h"
namespace webrtc {
namespace {
const int kDefaultLevelDbfs = -18;
const int kNumAnalysisFrames = 100;
const double kActivityThreshold = 0.3;
} // namespace
Agc::Agc()
: target_level_loudness_(Dbfs2Loudness(kDefaultLevelDbfs)),
target_level_dbfs_(kDefaultLevelDbfs),
histogram_(Histogram::Create(kNumAnalysisFrames)),
inactive_histogram_(Histogram::Create()) {
}
Agc::~Agc() {}
float Agc::AnalyzePreproc(const int16_t* audio, size_t length) {
assert(length > 0);
size_t num_clipped = 0;
for (size_t i = 0; i < length; ++i) {
if (audio[i] == 32767 || audio[i] == -32768)
++num_clipped;
}
return 1.0f * num_clipped / length;
}
int Agc::Process(const int16_t* audio, size_t length, int sample_rate_hz) {
vad_.ProcessChunk(audio, length, sample_rate_hz);
const std::vector<double>& rms = vad_.chunkwise_rms();
const std::vector<double>& probabilities =
vad_.chunkwise_voice_probabilities();
RTC_DCHECK_EQ(rms.size(), probabilities.size());
for (size_t i = 0; i < rms.size(); ++i) {
histogram_->Update(rms[i], probabilities[i]);
}
return 0;
}
bool Agc::GetRmsErrorDb(int* error) {
if (!error) {
assert(false);
return false;
}
if (histogram_->num_updates() < kNumAnalysisFrames) {
// We haven't yet received enough frames.
return false;
}
if (histogram_->AudioContent() < kNumAnalysisFrames * kActivityThreshold) {
// We are likely in an inactive segment.
return false;
}
double loudness = Linear2Loudness(histogram_->CurrentRms());
*error = std::floor(Loudness2Db(target_level_loudness_ - loudness) + 0.5);
histogram_->Reset();
return true;
}
void Agc::Reset() {
histogram_->Reset();
}
int Agc::set_target_level_dbfs(int level) {
// TODO(turajs): just some arbitrary sanity check. We can come up with better
// limits. The upper limit should be chosen such that the risk of clipping is
// low. The lower limit should not result in a too quiet signal.
if (level >= 0 || level <= -100)
return -1;
target_level_dbfs_ = level;
target_level_loudness_ = Dbfs2Loudness(level);
return 0;
}
} // namespace webrtc

View File

@ -1,34 +0,0 @@
# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'targets': [
{
'target_name': 'agc',
'type': '<(library)',
'dependencies': [
'<(webrtc_root)/common_audio/common_audio.gyp:spl',
],
'include_dirs': [
'interface',
],
'direct_dependent_settings': {
'include_dirs': [
'interface',
],
},
'sources': [
'interface/gain_control.h',
'analog_agc.c',
'analog_agc.h',
'digital_agc.c',
'digital_agc.h',
],
},
],
}

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_processing/vad/voice_activity_detector.h"
#include "webrtc/typedefs.h"
namespace webrtc {
class AudioFrame;
class Histogram;
class Agc {
public:
Agc();
virtual ~Agc();
// Returns the proportion of samples in the buffer which are at full-scale
// (and presumably clipped).
virtual float AnalyzePreproc(const int16_t* audio, size_t length);
// |audio| must be mono; in a multi-channel stream, provide the first (usually
// left) channel.
virtual int Process(const int16_t* audio, size_t length, int sample_rate_hz);
// Retrieves the difference between the target RMS level and the current
// signal RMS level in dB. Returns true if an update is available and false
// otherwise, in which case |error| should be ignored and no action taken.
virtual bool GetRmsErrorDb(int* error);
virtual void Reset();
virtual int set_target_level_dbfs(int level);
virtual int target_level_dbfs() const { return target_level_dbfs_; }
virtual float voice_probability() const {
return vad_.last_voice_probability();
}
private:
double target_level_loudness_;
int target_level_dbfs_;
rtc::scoped_ptr<Histogram> histogram_;
rtc::scoped_ptr<Histogram> inactive_histogram_;
VoiceActivityDetector vad_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_H_

View File

@ -0,0 +1,442 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/agc_manager_direct.h"
#include <cassert>
#include <cmath>
#ifdef WEBRTC_AGC_DEBUG_DUMP
#include <cstdio>
#endif
#include "webrtc/modules/audio_processing/agc/gain_map_internal.h"
#include "webrtc/modules/audio_processing/gain_control_impl.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/logging.h"
namespace webrtc {
namespace {
// Lowest the microphone level can be lowered due to clipping.
const int kClippedLevelMin = 170;
// Amount the microphone level is lowered with every clipping event.
const int kClippedLevelStep = 15;
// Proportion of clipped samples required to declare a clipping event.
const float kClippedRatioThreshold = 0.1f;
// Time in frames to wait after a clipping event before checking again.
const int kClippedWaitFrames = 300;
// Amount of error we tolerate in the microphone level (presumably due to OS
// quantization) before we assume the user has manually adjusted the microphone.
const int kLevelQuantizationSlack = 25;
const int kDefaultCompressionGain = 7;
const int kMaxCompressionGain = 12;
const int kMinCompressionGain = 2;
// Controls the rate of compression changes towards the target.
const float kCompressionGainStep = 0.05f;
const int kMaxMicLevel = 255;
static_assert(kGainMapSize > kMaxMicLevel, "gain map too small");
const int kMinMicLevel = 12;
// Prevent very large microphone level changes.
const int kMaxResidualGainChange = 15;
// Maximum additional gain allowed to compensate for microphone level
// restrictions from clipping events.
const int kSurplusCompressionGain = 6;
int ClampLevel(int mic_level) {
return std::min(std::max(kMinMicLevel, mic_level), kMaxMicLevel);
}
int LevelFromGainError(int gain_error, int level) {
assert(level >= 0 && level <= kMaxMicLevel);
if (gain_error == 0) {
return level;
}
// TODO(ajm): Could be made more efficient with a binary search.
int new_level = level;
if (gain_error > 0) {
while (kGainMap[new_level] - kGainMap[level] < gain_error &&
new_level < kMaxMicLevel) {
++new_level;
}
} else {
while (kGainMap[new_level] - kGainMap[level] > gain_error &&
new_level > kMinMicLevel) {
--new_level;
}
}
return new_level;
}
} // namespace
// Facility for dumping debug audio files. All methods are no-ops in the
// default case where WEBRTC_AGC_DEBUG_DUMP is undefined.
class DebugFile {
#ifdef WEBRTC_AGC_DEBUG_DUMP
public:
explicit DebugFile(const char* filename)
: file_(fopen(filename, "wb")) {
assert(file_);
}
~DebugFile() {
fclose(file_);
}
void Write(const int16_t* data, size_t length_samples) {
fwrite(data, 1, length_samples * sizeof(int16_t), file_);
}
private:
FILE* file_;
#else
public:
explicit DebugFile(const char* filename) {
}
~DebugFile() {
}
void Write(const int16_t* data, size_t length_samples) {
}
#endif // WEBRTC_AGC_DEBUG_DUMP
};
AgcManagerDirect::AgcManagerDirect(GainControl* gctrl,
VolumeCallbacks* volume_callbacks,
int startup_min_level)
: agc_(new Agc()),
gctrl_(gctrl),
volume_callbacks_(volume_callbacks),
frames_since_clipped_(kClippedWaitFrames),
level_(0),
max_level_(kMaxMicLevel),
max_compression_gain_(kMaxCompressionGain),
target_compression_(kDefaultCompressionGain),
compression_(target_compression_),
compression_accumulator_(compression_),
capture_muted_(false),
check_volume_on_next_process_(true), // Check at startup.
startup_(true),
startup_min_level_(ClampLevel(startup_min_level)),
file_preproc_(new DebugFile("agc_preproc.pcm")),
file_postproc_(new DebugFile("agc_postproc.pcm")) {
}
AgcManagerDirect::AgcManagerDirect(Agc* agc,
GainControl* gctrl,
VolumeCallbacks* volume_callbacks,
int startup_min_level)
: agc_(agc),
gctrl_(gctrl),
volume_callbacks_(volume_callbacks),
frames_since_clipped_(kClippedWaitFrames),
level_(0),
max_level_(kMaxMicLevel),
max_compression_gain_(kMaxCompressionGain),
target_compression_(kDefaultCompressionGain),
compression_(target_compression_),
compression_accumulator_(compression_),
capture_muted_(false),
check_volume_on_next_process_(true), // Check at startup.
startup_(true),
startup_min_level_(ClampLevel(startup_min_level)),
file_preproc_(new DebugFile("agc_preproc.pcm")),
file_postproc_(new DebugFile("agc_postproc.pcm")) {
}
AgcManagerDirect::~AgcManagerDirect() {}
int AgcManagerDirect::Initialize() {
max_level_ = kMaxMicLevel;
max_compression_gain_ = kMaxCompressionGain;
target_compression_ = kDefaultCompressionGain;
compression_ = target_compression_;
compression_accumulator_ = compression_;
capture_muted_ = false;
check_volume_on_next_process_ = true;
// TODO(bjornv): Investigate if we need to reset |startup_| as well. For
// example, what happens when we change devices.
if (gctrl_->set_mode(GainControl::kFixedDigital) != 0) {
LOG_FERR1(LS_ERROR, set_mode, GainControl::kFixedDigital);
return -1;
}
if (gctrl_->set_target_level_dbfs(2) != 0) {
LOG_FERR1(LS_ERROR, set_target_level_dbfs, 2);
return -1;
}
if (gctrl_->set_compression_gain_db(kDefaultCompressionGain) != 0) {
LOG_FERR1(LS_ERROR, set_compression_gain_db, kDefaultCompressionGain);
return -1;
}
if (gctrl_->enable_limiter(true) != 0) {
LOG_FERR1(LS_ERROR, enable_limiter, true);
return -1;
}
return 0;
}
void AgcManagerDirect::AnalyzePreProcess(int16_t* audio,
int num_channels,
size_t samples_per_channel) {
size_t length = num_channels * samples_per_channel;
if (capture_muted_) {
return;
}
file_preproc_->Write(audio, length);
if (frames_since_clipped_ < kClippedWaitFrames) {
++frames_since_clipped_;
return;
}
// Check for clipped samples, as the AGC has difficulty detecting pitch
// under clipping distortion. We do this in the preprocessing phase in order
// to catch clipped echo as well.
//
// If we find a sufficiently clipped frame, drop the current microphone level
// and enforce a new maximum level, dropped the same amount from the current
// maximum. This harsh treatment is an effort to avoid repeated clipped echo
// events. As compensation for this restriction, the maximum compression
// gain is increased, through SetMaxLevel().
float clipped_ratio = agc_->AnalyzePreproc(audio, length);
if (clipped_ratio > kClippedRatioThreshold) {
LOG(LS_INFO) << "[agc] Clipping detected. clipped_ratio="
<< clipped_ratio;
// Always decrease the maximum level, even if the current level is below
// threshold.
SetMaxLevel(std::max(kClippedLevelMin, max_level_ - kClippedLevelStep));
if (level_ > kClippedLevelMin) {
// Don't try to adjust the level if we're already below the limit. As
// a consequence, if the user has brought the level above the limit, we
// will still not react until the postproc updates the level.
SetLevel(std::max(kClippedLevelMin, level_ - kClippedLevelStep));
// Reset the AGC since the level has changed.
agc_->Reset();
}
frames_since_clipped_ = 0;
}
}
void AgcManagerDirect::Process(const int16_t* audio,
size_t length,
int sample_rate_hz) {
if (capture_muted_) {
return;
}
if (check_volume_on_next_process_) {
check_volume_on_next_process_ = false;
// We have to wait until the first process call to check the volume,
// because Chromium doesn't guarantee it to be valid any earlier.
CheckVolumeAndReset();
}
if (agc_->Process(audio, length, sample_rate_hz) != 0) {
LOG_FERR0(LS_ERROR, Agc::Process);
assert(false);
}
UpdateGain();
UpdateCompressor();
file_postproc_->Write(audio, length);
}
void AgcManagerDirect::SetLevel(int new_level) {
int voe_level = volume_callbacks_->GetMicVolume();
if (voe_level < 0) {
return;
}
if (voe_level == 0) {
LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
return;
}
if (voe_level > kMaxMicLevel) {
LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << voe_level;
return;
}
if (voe_level > level_ + kLevelQuantizationSlack ||
voe_level < level_ - kLevelQuantizationSlack) {
LOG(LS_INFO) << "[agc] Mic volume was manually adjusted. Updating "
<< "stored level from " << level_ << " to " << voe_level;
level_ = voe_level;
// Always allow the user to increase the volume.
if (level_ > max_level_) {
SetMaxLevel(level_);
}
// Take no action in this case, since we can't be sure when the volume
// was manually adjusted. The compressor will still provide some of the
// desired gain change.
agc_->Reset();
return;
}
new_level = std::min(new_level, max_level_);
if (new_level == level_) {
return;
}
volume_callbacks_->SetMicVolume(new_level);
LOG(LS_INFO) << "[agc] voe_level=" << voe_level << ", "
<< "level_=" << level_ << ", "
<< "new_level=" << new_level;
level_ = new_level;
}
void AgcManagerDirect::SetMaxLevel(int level) {
assert(level >= kClippedLevelMin);
max_level_ = level;
// Scale the |kSurplusCompressionGain| linearly across the restricted
// level range.
max_compression_gain_ = kMaxCompressionGain + std::floor(
(1.f * kMaxMicLevel - max_level_) / (kMaxMicLevel - kClippedLevelMin) *
kSurplusCompressionGain + 0.5f);
LOG(LS_INFO) << "[agc] max_level_=" << max_level_
<< ", max_compression_gain_=" << max_compression_gain_;
}
void AgcManagerDirect::SetCaptureMuted(bool muted) {
if (capture_muted_ == muted) {
return;
}
capture_muted_ = muted;
if (!muted) {
// When we unmute, we should reset things to be safe.
check_volume_on_next_process_ = true;
}
}
float AgcManagerDirect::voice_probability() {
return agc_->voice_probability();
}
int AgcManagerDirect::CheckVolumeAndReset() {
int level = volume_callbacks_->GetMicVolume();
if (level < 0) {
return -1;
}
// Reasons for taking action at startup:
// 1) A person starting a call is expected to be heard.
// 2) Independent of interpretation of |level| == 0 we should raise it so the
// AGC can do its job properly.
if (level == 0 && !startup_) {
LOG(LS_INFO) << "[agc] VolumeCallbacks returned level=0, taking no action.";
return 0;
}
if (level > kMaxMicLevel) {
LOG(LS_ERROR) << "VolumeCallbacks returned an invalid level=" << level;
return -1;
}
LOG(LS_INFO) << "[agc] Initial GetMicVolume()=" << level;
int minLevel = startup_ ? startup_min_level_ : kMinMicLevel;
if (level < minLevel) {
level = minLevel;
LOG(LS_INFO) << "[agc] Initial volume too low, raising to " << level;
volume_callbacks_->SetMicVolume(level);
}
agc_->Reset();
level_ = level;
startup_ = false;
return 0;
}
// Requests the RMS error from AGC and distributes the required gain change
// between the digital compression stage and volume slider. We use the
// compressor first, providing a slack region around the current slider
// position to reduce movement.
//
// If the slider needs to be moved, we check first if the user has adjusted
// it, in which case we take no action and cache the updated level.
void AgcManagerDirect::UpdateGain() {
int rms_error = 0;
if (!agc_->GetRmsErrorDb(&rms_error)) {
// No error update ready.
return;
}
// The compressor will always add at least kMinCompressionGain. In effect,
// this adjusts our target gain upward by the same amount and rms_error
// needs to reflect that.
rms_error += kMinCompressionGain;
// Handle as much error as possible with the compressor first.
int raw_compression = std::max(std::min(rms_error, max_compression_gain_),
kMinCompressionGain);
// Deemphasize the compression gain error. Move halfway between the current
// target and the newly received target. This serves to soften perceptible
// intra-talkspurt adjustments, at the cost of some adaptation speed.
if ((raw_compression == max_compression_gain_ &&
target_compression_ == max_compression_gain_ - 1) ||
(raw_compression == kMinCompressionGain &&
target_compression_ == kMinCompressionGain + 1)) {
// Special case to allow the target to reach the endpoints of the
// compression range. The deemphasis would otherwise halt it at 1 dB shy.
target_compression_ = raw_compression;
} else {
target_compression_ = (raw_compression - target_compression_) / 2
+ target_compression_;
}
// Residual error will be handled by adjusting the volume slider. Use the
// raw rather than deemphasized compression here as we would otherwise
// shrink the amount of slack the compressor provides.
int residual_gain = rms_error - raw_compression;
residual_gain = std::min(std::max(residual_gain, -kMaxResidualGainChange),
kMaxResidualGainChange);
LOG(LS_INFO) << "[agc] rms_error=" << rms_error << ", "
<< "target_compression=" << target_compression_ << ", "
<< "residual_gain=" << residual_gain;
if (residual_gain == 0)
return;
SetLevel(LevelFromGainError(residual_gain, level_));
}
void AgcManagerDirect::UpdateCompressor() {
if (compression_ == target_compression_) {
return;
}
// Adapt the compression gain slowly towards the target, in order to avoid
// highly perceptible changes.
if (target_compression_ > compression_) {
compression_accumulator_ += kCompressionGainStep;
} else {
compression_accumulator_ -= kCompressionGainStep;
}
// The compressor accepts integer gains in dB. Adjust the gain when
// we've come within half a stepsize of the nearest integer. (We don't
// check for equality due to potential floating point imprecision).
int new_compression = compression_;
int nearest_neighbor = std::floor(compression_accumulator_ + 0.5);
if (std::fabs(compression_accumulator_ - nearest_neighbor) <
kCompressionGainStep / 2) {
new_compression = nearest_neighbor;
}
// Set the new compression gain.
if (new_compression != compression_) {
compression_ = new_compression;
compression_accumulator_ = new_compression;
if (gctrl_->set_compression_gain_db(compression_) != 0) {
LOG_FERR1(LS_ERROR, set_compression_gain_db, compression_);
}
}
}
} // namespace webrtc

View File

@ -0,0 +1,108 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_processing/agc/agc.h"
namespace webrtc {
class AudioFrame;
class DebugFile;
class GainControl;
// Callbacks that need to be injected into AgcManagerDirect to read and control
// the volume values. This is done to remove the VoiceEngine dependency in
// AgcManagerDirect.
// TODO(aluebs): Remove VolumeCallbacks.
class VolumeCallbacks {
public:
virtual ~VolumeCallbacks() {}
virtual void SetMicVolume(int volume) = 0;
virtual int GetMicVolume() = 0;
};
// Direct interface to use AGC to set volume and compression values.
// AudioProcessing uses this interface directly to integrate the callback-less
// AGC.
//
// This class is not thread-safe.
class AgcManagerDirect final {
public:
// AgcManagerDirect will configure GainControl internally. The user is
// responsible for processing the audio using it after the call to Process.
// The operating range of startup_min_level is [12, 255] and any input value
// outside that range will be clamped.
AgcManagerDirect(GainControl* gctrl,
VolumeCallbacks* volume_callbacks,
int startup_min_level);
// Dependency injection for testing. Don't delete |agc| as the memory is owned
// by the manager.
AgcManagerDirect(Agc* agc,
GainControl* gctrl,
VolumeCallbacks* volume_callbacks,
int startup_min_level);
~AgcManagerDirect();
int Initialize();
void AnalyzePreProcess(int16_t* audio,
int num_channels,
size_t samples_per_channel);
void Process(const int16_t* audio, size_t length, int sample_rate_hz);
// Call when the capture stream has been muted/unmuted. This causes the
// manager to disregard all incoming audio; chances are good it's background
// noise to which we'd like to avoid adapting.
void SetCaptureMuted(bool muted);
bool capture_muted() { return capture_muted_; }
float voice_probability();
private:
// Sets a new microphone level, after first checking that it hasn't been
// updated by the user, in which case no action is taken.
void SetLevel(int new_level);
// Set the maximum level the AGC is allowed to apply. Also updates the
// maximum compression gain to compensate. The level must be at least
// |kClippedLevelMin|.
void SetMaxLevel(int level);
int CheckVolumeAndReset();
void UpdateGain();
void UpdateCompressor();
rtc::scoped_ptr<Agc> agc_;
GainControl* gctrl_;
VolumeCallbacks* volume_callbacks_;
int frames_since_clipped_;
int level_;
int max_level_;
int max_compression_gain_;
int target_compression_;
int compression_;
float compression_accumulator_;
bool capture_muted_;
bool check_volume_on_next_process_;
bool startup_;
int startup_min_level_;
rtc::scoped_ptr<DebugFile> file_preproc_;
rtc::scoped_ptr<DebugFile> file_postproc_;
RTC_DISALLOW_COPY_AND_ASSIGN(AgcManagerDirect);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_AGC_MANAGER_DIRECT_H_

View File

@ -1,133 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_
#include "typedefs.h"
#include "gain_control.h"
#include "digital_agc.h"
//#define AGC_DEBUG
//#define MIC_LEVEL_FEEDBACK
#ifdef AGC_DEBUG
#include <stdio.h>
#endif
/* Analog Automatic Gain Control variables:
* Constant declarations (inner limits inside which no changes are done)
* In the beginning the range is narrower to widen as soon as the measure
* 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0
* and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal
* go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm
* The limits are created by running the AGC with a file having the desired
* signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined
* by out=10*log10(in/260537279.7); Set the target level to the average level
* of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in
* Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) )
*/
#define RXX_BUFFER_LEN 10
static const WebRtc_Word16 kMsecSpeechInner = 520;
static const WebRtc_Word16 kMsecSpeechOuter = 340;
static const WebRtc_Word16 kNormalVadThreshold = 400;
static const WebRtc_Word16 kAlphaShortTerm = 6; // 1 >> 6 = 0.0156
static const WebRtc_Word16 kAlphaLongTerm = 10; // 1 >> 10 = 0.000977
typedef struct
{
// Configurable parameters/variables
WebRtc_UWord32 fs; // Sampling frequency
WebRtc_Word16 compressionGaindB; // Fixed gain level in dB
WebRtc_Word16 targetLevelDbfs; // Target level in -dBfs of envelope (default -3)
WebRtc_Word16 agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig)
WebRtc_UWord8 limiterEnable; // Enabling limiter (on/off (default off))
WebRtcAgc_config_t defaultConfig;
WebRtcAgc_config_t usedConfig;
// General variables
WebRtc_Word16 initFlag;
WebRtc_Word16 lastError;
// Target level parameters
// Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7)
WebRtc_Word32 analogTargetLevel; // = RXX_BUFFER_LEN * 846805; -22 dBfs
WebRtc_Word32 startUpperLimit; // = RXX_BUFFER_LEN * 1066064; -21 dBfs
WebRtc_Word32 startLowerLimit; // = RXX_BUFFER_LEN * 672641; -23 dBfs
WebRtc_Word32 upperPrimaryLimit; // = RXX_BUFFER_LEN * 1342095; -20 dBfs
WebRtc_Word32 lowerPrimaryLimit; // = RXX_BUFFER_LEN * 534298; -24 dBfs
WebRtc_Word32 upperSecondaryLimit;// = RXX_BUFFER_LEN * 2677832; -17 dBfs
WebRtc_Word32 lowerSecondaryLimit;// = RXX_BUFFER_LEN * 267783; -27 dBfs
WebRtc_UWord16 targetIdx; // Table index for corresponding target level
#ifdef MIC_LEVEL_FEEDBACK
WebRtc_UWord16 targetIdxOffset; // Table index offset for level compensation
#endif
WebRtc_Word16 analogTarget; // Digital reference level in ENV scale
// Analog AGC specific variables
WebRtc_Word32 filterState[8]; // For downsampling wb to nb
WebRtc_Word32 upperLimit; // Upper limit for mic energy
WebRtc_Word32 lowerLimit; // Lower limit for mic energy
WebRtc_Word32 Rxx160w32; // Average energy for one frame
WebRtc_Word32 Rxx16_LPw32; // Low pass filtered subframe energies
WebRtc_Word32 Rxx160_LPw32; // Low pass filtered frame energies
WebRtc_Word32 Rxx16_LPw32Max; // Keeps track of largest energy subframe
WebRtc_Word32 Rxx16_vectorw32[RXX_BUFFER_LEN];// Array with subframe energies
WebRtc_Word32 Rxx16w32_array[2][5];// Energy values of microphone signal
WebRtc_Word32 env[2][10]; // Envelope values of subframes
WebRtc_Word16 Rxx16pos; // Current position in the Rxx16_vectorw32
WebRtc_Word16 envSum; // Filtered scaled envelope in subframes
WebRtc_Word16 vadThreshold; // Threshold for VAD decision
WebRtc_Word16 inActive; // Inactive time in milliseconds
WebRtc_Word16 msTooLow; // Milliseconds of speech at a too low level
WebRtc_Word16 msTooHigh; // Milliseconds of speech at a too high level
WebRtc_Word16 changeToSlowMode; // Change to slow mode after some time at target
WebRtc_Word16 firstCall; // First call to the process-function
WebRtc_Word16 msZero; // Milliseconds of zero input
WebRtc_Word16 msecSpeechOuterChange;// Min ms of speech between volume changes
WebRtc_Word16 msecSpeechInnerChange;// Min ms of speech between volume changes
WebRtc_Word16 activeSpeech; // Milliseconds of active speech
WebRtc_Word16 muteGuardMs; // Counter to prevent mute action
WebRtc_Word16 inQueue; // 10 ms batch indicator
// Microphone level variables
WebRtc_Word32 micRef; // Remember ref. mic level for virtual mic
WebRtc_UWord16 gainTableIdx; // Current position in virtual gain table
WebRtc_Word32 micGainIdx; // Gain index of mic level to increase slowly
WebRtc_Word32 micVol; // Remember volume between frames
WebRtc_Word32 maxLevel; // Max possible vol level, incl dig gain
WebRtc_Word32 maxAnalog; // Maximum possible analog volume level
WebRtc_Word32 maxInit; // Initial value of "max"
WebRtc_Word32 minLevel; // Minimum possible volume level
WebRtc_Word32 minOutput; // Minimum output volume level
WebRtc_Word32 zeroCtrlMax; // Remember max gain => don't amp low input
WebRtc_Word16 scale; // Scale factor for internal volume levels
#ifdef MIC_LEVEL_FEEDBACK
WebRtc_Word16 numBlocksMicLvlSat;
WebRtc_UWord8 micLvlSat;
#endif
// Structs for VAD and digital_agc
AgcVad_t vadMic;
DigitalAgc_t digitalAgc;
#ifdef AGC_DEBUG
FILE* fpt;
FILE* agcLog;
WebRtc_Word32 fcount;
#endif
WebRtc_Word16 lowLevelSignal;
} Agc_t;
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_

View File

@ -1,76 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_DIGITAL_AGC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_DIGITAL_AGC_H_
#ifdef AGC_DEBUG
#include <stdio.h>
#endif
#include "typedefs.h"
#include "signal_processing_library.h"
// the 32 most significant bits of A(19) * B(26) >> 13
#define AGC_MUL32(A, B) (((B)>>13)*(A) + ( ((0x00001FFF & (B))*(A)) >> 13 ))
// C + the 32 most significant bits of A * B
#define AGC_SCALEDIFF32(A, B, C) ((C) + ((B)>>16)*(A) + ( ((0x0000FFFF & (B))*(A)) >> 16 ))
typedef struct
{
WebRtc_Word32 downState[8];
WebRtc_Word16 HPstate;
WebRtc_Word16 counter;
WebRtc_Word16 logRatio; // log( P(active) / P(inactive) ) (Q10)
WebRtc_Word16 meanLongTerm; // Q10
WebRtc_Word32 varianceLongTerm; // Q8
WebRtc_Word16 stdLongTerm; // Q10
WebRtc_Word16 meanShortTerm; // Q10
WebRtc_Word32 varianceShortTerm; // Q8
WebRtc_Word16 stdShortTerm; // Q10
} AgcVad_t; // total = 54 bytes
typedef struct
{
WebRtc_Word32 capacitorSlow;
WebRtc_Word32 capacitorFast;
WebRtc_Word32 gain;
WebRtc_Word32 gainTable[32];
WebRtc_Word16 gatePrevious;
WebRtc_Word16 agcMode;
AgcVad_t vadNearend;
AgcVad_t vadFarend;
#ifdef AGC_DEBUG
FILE* logFile;
int frameCounter;
#endif
} DigitalAgc_t;
WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *digitalAgcInst, WebRtc_Word16 agcMode);
WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *digitalAgcInst, const WebRtc_Word16 *inNear,
const WebRtc_Word16 *inNear_H, WebRtc_Word16 *out,
WebRtc_Word16 *out_H, WebRtc_UWord32 FS,
WebRtc_Word16 lowLevelSignal);
WebRtc_Word32 WebRtcAgc_AddFarendToDigital(DigitalAgc_t *digitalAgcInst, const WebRtc_Word16 *inFar,
WebRtc_Word16 nrSamples);
void WebRtcAgc_InitVad(AgcVad_t *vadInst);
WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *vadInst, // (i) VAD state
const WebRtc_Word16 *in, // (i) Speech signal
WebRtc_Word16 nrSamples); // (i) number of samples
WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
WebRtc_Word16 compressionGaindB, // Q0 (in dB)
WebRtc_Word16 targetLevelDbfs,// Q0 (in dB)
WebRtc_UWord8 limiterEnable, WebRtc_Word16 analogTarget);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_SOURCE_ANALOG_AGC_H_

View File

@ -0,0 +1,275 @@
/*
* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_
static const int kGainMapSize = 256;
// Uses parameters: si = 2, sf = 0.25, D = 8/256
static const int kGainMap[kGainMapSize] = {
-56,
-54,
-52,
-50,
-48,
-47,
-45,
-43,
-42,
-40,
-38,
-37,
-35,
-34,
-33,
-31,
-30,
-29,
-27,
-26,
-25,
-24,
-23,
-22,
-20,
-19,
-18,
-17,
-16,
-15,
-14,
-14,
-13,
-12,
-11,
-10,
-9,
-8,
-8,
-7,
-6,
-5,
-5,
-4,
-3,
-2,
-2,
-1,
0,
0,
1,
1,
2,
3,
3,
4,
4,
5,
5,
6,
6,
7,
7,
8,
8,
9,
9,
10,
10,
11,
11,
12,
12,
13,
13,
13,
14,
14,
15,
15,
15,
16,
16,
17,
17,
17,
18,
18,
18,
19,
19,
19,
20,
20,
21,
21,
21,
22,
22,
22,
23,
23,
23,
24,
24,
24,
24,
25,
25,
25,
26,
26,
26,
27,
27,
27,
28,
28,
28,
28,
29,
29,
29,
30,
30,
30,
30,
31,
31,
31,
32,
32,
32,
32,
33,
33,
33,
33,
34,
34,
34,
35,
35,
35,
35,
36,
36,
36,
36,
37,
37,
37,
38,
38,
38,
38,
39,
39,
39,
39,
40,
40,
40,
40,
41,
41,
41,
41,
42,
42,
42,
42,
43,
43,
43,
44,
44,
44,
44,
45,
45,
45,
45,
46,
46,
46,
46,
47,
47,
47,
47,
48,
48,
48,
48,
49,
49,
49,
49,
50,
50,
50,
50,
51,
51,
51,
51,
52,
52,
52,
52,
53,
53,
53,
53,
54,
54,
54,
54,
55,
55,
55,
55,
56,
56,
56,
56,
57,
57,
57,
57,
58,
58,
58,
58,
59,
59,
59,
59,
60,
60,
60,
60,
61,
61,
61,
61,
62,
62,
62,
62,
63,
63,
63,
63,
64
};
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_GAIN_MAP_INTERNAL_H_

View File

@ -0,0 +1,228 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/histogram.h"
#include <cmath>
#include <cstring>
#include "webrtc/modules/interface/module_common_types.h"
namespace webrtc {
static const double kHistBinCenters[] = {
7.59621091765857e-02, 9.02036021061016e-02, 1.07115112009343e-01,
1.27197217770508e-01, 1.51044347572047e-01, 1.79362373905283e-01,
2.12989507320644e-01, 2.52921107370304e-01, 3.00339145144454e-01,
3.56647189489147e-01, 4.23511952494003e-01, 5.02912623991786e-01,
5.97199455365749e-01, 7.09163326739184e-01, 8.42118356728544e-01,
1.00000000000000e+00, 1.18748153630660e+00, 1.41011239906908e+00,
1.67448243801153e+00, 1.98841697800836e+00, 2.36120844786349e+00,
2.80389143520905e+00, 3.32956930911896e+00, 3.95380207843188e+00,
4.69506696634852e+00, 5.57530533426190e+00, 6.62057214370769e+00,
7.86180718043869e+00, 9.33575086877358e+00, 1.10860317842269e+01,
1.31644580546776e+01, 1.56325508754123e+01, 1.85633655299256e+01,
2.20436538184971e+01, 2.61764319021997e+01, 3.10840295702492e+01,
3.69117111886792e+01, 4.38319755100383e+01, 5.20496616180135e+01,
6.18080121423973e+01, 7.33958732149108e+01, 8.71562442838066e+01,
1.03496430860848e+02, 1.22900100720889e+02, 1.45941600416277e+02,
1.73302955873365e+02, 2.05794060286978e+02, 2.44376646872353e+02,
2.90192756065437e+02, 3.44598539797631e+02, 4.09204403447902e+02,
4.85922673669740e+02, 5.77024203055553e+02, 6.85205587130498e+02,
8.13668983291589e+02, 9.66216894324125e+02, 1.14736472207740e+03,
1.36247442287647e+03, 1.61791322085579e+03, 1.92124207711260e+03,
2.28143949334655e+03, 2.70916727454970e+03, 3.21708611729384e+03,
3.82023036499473e+03, 4.53645302286906e+03, 5.38695420497926e+03,
6.39690865534207e+03, 7.59621091765857e+03, 9.02036021061016e+03,
1.07115112009343e+04, 1.27197217770508e+04, 1.51044347572047e+04,
1.79362373905283e+04, 2.12989507320644e+04, 2.52921107370304e+04,
3.00339145144454e+04, 3.56647189489147e+04};
static const double kProbQDomain = 1024.0;
// Loudness of -15 dB (smallest expected loudness) in log domain,
// loudness_db = 13.5 * log10(rms);
static const double kLogDomainMinBinCenter = -2.57752062648587;
// Loudness step of 1 dB in log domain
static const double kLogDomainStepSizeInverse = 5.81954605750359;
static const int kTransientWidthThreshold = 7;
static const double kLowProbabilityThreshold = 0.2;
static const int kLowProbThresholdQ10 = static_cast<int>(
kLowProbabilityThreshold * kProbQDomain);
Histogram::Histogram()
: num_updates_(0),
audio_content_q10_(0),
bin_count_q10_(),
activity_probability_(),
hist_bin_index_(),
buffer_index_(0),
buffer_is_full_(false),
len_circular_buffer_(0),
len_high_activity_(0) {
static_assert(
kHistSize == sizeof(kHistBinCenters) / sizeof(kHistBinCenters[0]),
"histogram bin centers incorrect size");
}
Histogram::Histogram(int window_size)
: num_updates_(0),
audio_content_q10_(0),
bin_count_q10_(),
activity_probability_(new int[window_size]),
hist_bin_index_(new int[window_size]),
buffer_index_(0),
buffer_is_full_(false),
len_circular_buffer_(window_size),
len_high_activity_(0) {}
Histogram::~Histogram() {}
void Histogram::Update(double rms, double activity_probaility) {
// If circular histogram is activated then remove the oldest entry.
if (len_circular_buffer_ > 0)
RemoveOldestEntryAndUpdate();
// Find the corresponding bin.
int hist_index = GetBinIndex(rms);
// To Q10 domain.
int prob_q10 = static_cast<int16_t>(floor(activity_probaility *
kProbQDomain));
InsertNewestEntryAndUpdate(prob_q10, hist_index);
}
// Doing nothing if buffer is not full, yet.
void Histogram::RemoveOldestEntryAndUpdate() {
assert(len_circular_buffer_ > 0);
// Do nothing if circular buffer is not full.
if (!buffer_is_full_)
return;
int oldest_prob = activity_probability_[buffer_index_];
int oldest_hist_index = hist_bin_index_[buffer_index_];
UpdateHist(-oldest_prob, oldest_hist_index);
}
void Histogram::RemoveTransient() {
// Don't expect to be here if high-activity region is longer than
// |kTransientWidthThreshold| or there has not been any transient.
assert(len_high_activity_ <= kTransientWidthThreshold);
int index = (buffer_index_ > 0) ? (buffer_index_ - 1) :
len_circular_buffer_ - 1;
while (len_high_activity_ > 0) {
UpdateHist(-activity_probability_[index], hist_bin_index_[index]);
activity_probability_[index] = 0;
index = (index > 0) ? (index - 1) : (len_circular_buffer_ - 1);
len_high_activity_--;
}
}
void Histogram::InsertNewestEntryAndUpdate(int activity_prob_q10,
int hist_index) {
// Update the circular buffer if it is enabled.
if (len_circular_buffer_ > 0) {
// Removing transient.
if (activity_prob_q10 <= kLowProbThresholdQ10) {
// Lower than threshold probability, set it to zero.
activity_prob_q10 = 0;
// Check if this has been a transient.
if (len_high_activity_ <= kTransientWidthThreshold)
RemoveTransient(); // Remove this transient.
len_high_activity_ = 0;
} else if (len_high_activity_ <= kTransientWidthThreshold) {
len_high_activity_++;
}
// Updating the circular buffer.
activity_probability_[buffer_index_] = activity_prob_q10;
hist_bin_index_[buffer_index_] = hist_index;
// Increment the buffer index and check for wrap-around.
buffer_index_++;
if (buffer_index_ >= len_circular_buffer_) {
buffer_index_ = 0;
buffer_is_full_ = true;
}
}
num_updates_++;
if (num_updates_ < 0)
num_updates_--;
UpdateHist(activity_prob_q10, hist_index);
}
void Histogram::UpdateHist(int activity_prob_q10, int hist_index) {
bin_count_q10_[hist_index] += activity_prob_q10;
audio_content_q10_ += activity_prob_q10;
}
double Histogram::AudioContent() const {
return audio_content_q10_ / kProbQDomain;
}
Histogram* Histogram::Create() {
return new Histogram;
}
Histogram* Histogram::Create(int window_size) {
if (window_size < 0)
return NULL;
return new Histogram(window_size);
}
void Histogram::Reset() {
// Reset the histogram, audio-content and number of updates.
memset(bin_count_q10_, 0, sizeof(bin_count_q10_));
audio_content_q10_ = 0;
num_updates_ = 0;
// Empty the circular buffer.
buffer_index_ = 0;
buffer_is_full_ = false;
len_high_activity_ = 0;
}
int Histogram::GetBinIndex(double rms) {
// First exclude overload cases.
if (rms <= kHistBinCenters[0]) {
return 0;
} else if (rms >= kHistBinCenters[kHistSize - 1]) {
return kHistSize - 1;
} else {
// The quantizer is uniform in log domain. Alternatively we could do binary
// search in linear domain.
double rms_log = log(rms);
int index = static_cast<int>(floor((rms_log - kLogDomainMinBinCenter) *
kLogDomainStepSizeInverse));
// The final decision is in linear domain.
double b = 0.5 * (kHistBinCenters[index] + kHistBinCenters[index + 1]);
if (rms > b) {
return index + 1;
}
return index;
}
}
double Histogram::CurrentRms() const {
double p;
double mean_val = 0;
if (audio_content_q10_ > 0) {
double p_total_inverse = 1. / static_cast<double>(audio_content_q10_);
for (int n = 0; n < kHistSize; n++) {
p = static_cast<double>(bin_count_q10_[n]) * p_total_inverse;
mean_val += p * kHistBinCenters[n];
}
} else {
mean_val = kHistBinCenters[0];
}
return mean_val;
}
} // namespace webrtc

View File

@ -0,0 +1,91 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_
#include <string.h>
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/typedefs.h"
namespace webrtc {
// This class implements the histogram of loudness with circular buffers so that
// the histogram tracks the last T seconds of the loudness.
class Histogram {
public:
// Create a non-sliding Histogram.
static Histogram* Create();
// Create a sliding Histogram, i.e. the histogram represents the last
// |window_size| samples.
static Histogram* Create(int window_size);
~Histogram();
// Insert RMS and the corresponding activity probability.
void Update(double rms, double activity_probability);
// Reset the histogram, forget the past.
void Reset();
// Current loudness, which is actually the mean of histogram in loudness
// domain.
double CurrentRms() const;
// Sum of the histogram content.
double AudioContent() const;
// Number of times the histogram has been updated.
int num_updates() const { return num_updates_; }
private:
Histogram();
explicit Histogram(int window);
// Find the histogram bin associated with the given |rms|.
int GetBinIndex(double rms);
void RemoveOldestEntryAndUpdate();
void InsertNewestEntryAndUpdate(int activity_prob_q10, int hist_index);
void UpdateHist(int activity_prob_q10, int hist_index);
void RemoveTransient();
// Number of histogram bins.
static const int kHistSize = 77;
// Number of times the histogram is updated
int num_updates_;
// Audio content, this should be equal to the sum of the components of
// |bin_count_q10_|.
int64_t audio_content_q10_;
// Histogram of input RMS in Q10 with |kHistSize_| bins. In each 'Update(),'
// we increment the associated histogram-bin with the given probability. The
// increment is implemented in Q10 to avoid rounding errors.
int64_t bin_count_q10_[kHistSize];
// Circular buffer for probabilities
rtc::scoped_ptr<int[]> activity_probability_;
// Circular buffer for histogram-indices of probabilities.
rtc::scoped_ptr<int[]> hist_bin_index_;
// Current index of circular buffer, where the newest data will be written to,
// therefore, pointing to the oldest data if buffer is full.
int buffer_index_;
// Indicating if buffer is full and we had a wrap around.
int buffer_is_full_;
// Size of circular buffer.
int len_circular_buffer_;
int len_high_activity_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_HISTOGRAM_H_

View File

@ -0,0 +1,133 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_
//#define MIC_LEVEL_FEEDBACK
#ifdef WEBRTC_AGC_DEBUG_DUMP
#include <stdio.h>
#endif
#include "webrtc/modules/audio_processing/agc/legacy/digital_agc.h"
#include "webrtc/modules/audio_processing/agc/legacy/gain_control.h"
#include "webrtc/typedefs.h"
/* Analog Automatic Gain Control variables:
* Constant declarations (inner limits inside which no changes are done)
* In the beginning the range is narrower to widen as soon as the measure
* 'Rxx160_LP' is inside it. Currently the starting limits are -22.2+/-1dBm0
* and the final limits -22.2+/-2.5dBm0. These levels makes the speech signal
* go towards -25.4dBm0 (-31.4dBov). Tuned with wbfile-31.4dBov.pcm
* The limits are created by running the AGC with a file having the desired
* signal level and thereafter plotting Rxx160_LP in the dBm0-domain defined
* by out=10*log10(in/260537279.7); Set the target level to the average level
* of our measure Rxx160_LP. Remember that the levels are in blocks of 16 in
* Q(-7). (Example matlab code: round(db2pow(-21.2)*16/2^7) )
*/
#define RXX_BUFFER_LEN 10
static const int16_t kMsecSpeechInner = 520;
static const int16_t kMsecSpeechOuter = 340;
static const int16_t kNormalVadThreshold = 400;
static const int16_t kAlphaShortTerm = 6; // 1 >> 6 = 0.0156
static const int16_t kAlphaLongTerm = 10; // 1 >> 10 = 0.000977
typedef struct
{
// Configurable parameters/variables
uint32_t fs; // Sampling frequency
int16_t compressionGaindB; // Fixed gain level in dB
int16_t targetLevelDbfs; // Target level in -dBfs of envelope (default -3)
int16_t agcMode; // Hard coded mode (adaptAna/adaptDig/fixedDig)
uint8_t limiterEnable; // Enabling limiter (on/off (default off))
WebRtcAgcConfig defaultConfig;
WebRtcAgcConfig usedConfig;
// General variables
int16_t initFlag;
int16_t lastError;
// Target level parameters
// Based on the above: analogTargetLevel = round((32767*10^(-22/20))^2*16/2^7)
int32_t analogTargetLevel; // = RXX_BUFFER_LEN * 846805; -22 dBfs
int32_t startUpperLimit; // = RXX_BUFFER_LEN * 1066064; -21 dBfs
int32_t startLowerLimit; // = RXX_BUFFER_LEN * 672641; -23 dBfs
int32_t upperPrimaryLimit; // = RXX_BUFFER_LEN * 1342095; -20 dBfs
int32_t lowerPrimaryLimit; // = RXX_BUFFER_LEN * 534298; -24 dBfs
int32_t upperSecondaryLimit;// = RXX_BUFFER_LEN * 2677832; -17 dBfs
int32_t lowerSecondaryLimit;// = RXX_BUFFER_LEN * 267783; -27 dBfs
uint16_t targetIdx; // Table index for corresponding target level
#ifdef MIC_LEVEL_FEEDBACK
uint16_t targetIdxOffset; // Table index offset for level compensation
#endif
int16_t analogTarget; // Digital reference level in ENV scale
// Analog AGC specific variables
int32_t filterState[8]; // For downsampling wb to nb
int32_t upperLimit; // Upper limit for mic energy
int32_t lowerLimit; // Lower limit for mic energy
int32_t Rxx160w32; // Average energy for one frame
int32_t Rxx16_LPw32; // Low pass filtered subframe energies
int32_t Rxx160_LPw32; // Low pass filtered frame energies
int32_t Rxx16_LPw32Max; // Keeps track of largest energy subframe
int32_t Rxx16_vectorw32[RXX_BUFFER_LEN];// Array with subframe energies
int32_t Rxx16w32_array[2][5];// Energy values of microphone signal
int32_t env[2][10]; // Envelope values of subframes
int16_t Rxx16pos; // Current position in the Rxx16_vectorw32
int16_t envSum; // Filtered scaled envelope in subframes
int16_t vadThreshold; // Threshold for VAD decision
int16_t inActive; // Inactive time in milliseconds
int16_t msTooLow; // Milliseconds of speech at a too low level
int16_t msTooHigh; // Milliseconds of speech at a too high level
int16_t changeToSlowMode; // Change to slow mode after some time at target
int16_t firstCall; // First call to the process-function
int16_t msZero; // Milliseconds of zero input
int16_t msecSpeechOuterChange;// Min ms of speech between volume changes
int16_t msecSpeechInnerChange;// Min ms of speech between volume changes
int16_t activeSpeech; // Milliseconds of active speech
int16_t muteGuardMs; // Counter to prevent mute action
int16_t inQueue; // 10 ms batch indicator
// Microphone level variables
int32_t micRef; // Remember ref. mic level for virtual mic
uint16_t gainTableIdx; // Current position in virtual gain table
int32_t micGainIdx; // Gain index of mic level to increase slowly
int32_t micVol; // Remember volume between frames
int32_t maxLevel; // Max possible vol level, incl dig gain
int32_t maxAnalog; // Maximum possible analog volume level
int32_t maxInit; // Initial value of "max"
int32_t minLevel; // Minimum possible volume level
int32_t minOutput; // Minimum output volume level
int32_t zeroCtrlMax; // Remember max gain => don't amp low input
int32_t lastInMicLevel;
int16_t scale; // Scale factor for internal volume levels
#ifdef MIC_LEVEL_FEEDBACK
int16_t numBlocksMicLvlSat;
uint8_t micLvlSat;
#endif
// Structs for VAD and digital_agc
AgcVad vadMic;
DigitalAgc digitalAgc;
#ifdef WEBRTC_AGC_DEBUG_DUMP
FILE* fpt;
FILE* agcLog;
int32_t fcount;
#endif
int16_t lowLevelSignal;
} LegacyAgc;
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_ANALOG_AGC_H_

View File

@ -12,12 +12,15 @@
*
*/
#include "webrtc/modules/audio_processing/agc/legacy/digital_agc.h"
#include <assert.h>
#include <string.h>
#ifdef AGC_DEBUG
#ifdef WEBRTC_AGC_DEBUG_DUMP
#include <stdio.h>
#endif
#include "digital_agc.h"
#include "gain_control.h"
#include "webrtc/modules/audio_processing/agc/legacy/gain_control.h"
// To generate the gaintable, copy&paste the following lines to a Matlab window:
// MaxGain = 6; MinGain = 0; CompRatio = 3; Knee = 1;
@ -33,7 +36,8 @@
// zoom on;
// Generator table for y=log2(1+e^x) in Q8.
static const WebRtc_UWord16 kGenFuncTable[128] = {
enum { kGenFuncTableSize = 128 };
static const uint16_t kGenFuncTable[kGenFuncTableSize] = {
256, 485, 786, 1126, 1484, 1849, 2217, 2586,
2955, 3324, 3693, 4063, 4432, 4801, 5171, 5540,
5909, 6279, 6648, 7017, 7387, 7756, 8125, 8495,
@ -52,29 +56,29 @@ static const WebRtc_UWord16 kGenFuncTable[128] = {
44320, 44689, 45058, 45428, 45797, 46166, 46536, 46905
};
static const WebRtc_Word16 kAvgDecayTime = 250; // frames; < 3000
static const int16_t kAvgDecayTime = 250; // frames; < 3000
WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
WebRtc_Word16 digCompGaindB, // Q0
WebRtc_Word16 targetLevelDbfs,// Q0
WebRtc_UWord8 limiterEnable,
WebRtc_Word16 analogTarget) // Q0
int32_t WebRtcAgc_CalculateGainTable(int32_t *gainTable, // Q16
int16_t digCompGaindB, // Q0
int16_t targetLevelDbfs,// Q0
uint8_t limiterEnable,
int16_t analogTarget) // Q0
{
// This function generates the compressor gain table used in the fixed digital part.
WebRtc_UWord32 tmpU32no1, tmpU32no2, absInLevel, logApprox;
WebRtc_Word32 inLevel, limiterLvl;
WebRtc_Word32 tmp32, tmp32no1, tmp32no2, numFIX, den, y32;
const WebRtc_UWord16 kLog10 = 54426; // log2(10) in Q14
const WebRtc_UWord16 kLog10_2 = 49321; // 10*log10(2) in Q14
const WebRtc_UWord16 kLogE_1 = 23637; // log2(e) in Q14
WebRtc_UWord16 constMaxGain;
WebRtc_UWord16 tmpU16, intPart, fracPart;
const WebRtc_Word16 kCompRatio = 3;
const WebRtc_Word16 kSoftLimiterLeft = 1;
WebRtc_Word16 limiterOffset = 0; // Limiter offset
WebRtc_Word16 limiterIdx, limiterLvlX;
WebRtc_Word16 constLinApprox, zeroGainLvl, maxGain, diffGain;
WebRtc_Word16 i, tmp16, tmp16no1;
uint32_t tmpU32no1, tmpU32no2, absInLevel, logApprox;
int32_t inLevel, limiterLvl;
int32_t tmp32, tmp32no1, tmp32no2, numFIX, den, y32;
const uint16_t kLog10 = 54426; // log2(10) in Q14
const uint16_t kLog10_2 = 49321; // 10*log10(2) in Q14
const uint16_t kLogE_1 = 23637; // log2(e) in Q14
uint16_t constMaxGain;
uint16_t tmpU16, intPart, fracPart;
const int16_t kCompRatio = 3;
const int16_t kSoftLimiterLeft = 1;
int16_t limiterOffset = 0; // Limiter offset
int16_t limiterIdx, limiterLvlX;
int16_t constLinApprox, zeroGainLvl, maxGain, diffGain;
int16_t i, tmp16, tmp16no1;
int zeros, zerosScale;
// Constants
@ -83,11 +87,11 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
// kLog10_2 = 49321; // 10*log10(2) in Q14
// Calculate maximum digital gain and zero gain level
tmp32no1 = WEBRTC_SPL_MUL_16_16(digCompGaindB - analogTarget, kCompRatio - 1);
tmp32no1 = (digCompGaindB - analogTarget) * (kCompRatio - 1);
tmp16no1 = analogTarget - targetLevelDbfs;
tmp16no1 += WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
maxGain = WEBRTC_SPL_MAX(tmp16no1, (analogTarget - targetLevelDbfs));
tmp32no1 = WEBRTC_SPL_MUL_16_16(maxGain, kCompRatio);
tmp32no1 = maxGain * kCompRatio;
zeroGainLvl = digCompGaindB;
zeroGainLvl -= WebRtcSpl_DivW32W16ResW16(tmp32no1 + ((kCompRatio - 1) >> 1),
kCompRatio - 1);
@ -100,10 +104,11 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
// Calculate the difference between maximum gain and gain at 0dB0v:
// diffGain = maxGain + (compRatio-1)*zeroGainLvl/compRatio
// = (compRatio-1)*digCompGaindB/compRatio
tmp32no1 = WEBRTC_SPL_MUL_16_16(digCompGaindB, kCompRatio - 1);
tmp32no1 = digCompGaindB * (kCompRatio - 1);
diffGain = WebRtcSpl_DivW32W16ResW16(tmp32no1 + (kCompRatio >> 1), kCompRatio);
if (diffGain < 0)
if (diffGain < 0 || diffGain >= kGenFuncTableSize)
{
assert(0);
return -1;
}
@ -111,9 +116,8 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
// limiterLvlX = analogTarget - limiterOffset
// limiterLvl = targetLevelDbfs + limiterOffset/compRatio
limiterLvlX = analogTarget - limiterOffset;
limiterIdx = 2
+ WebRtcSpl_DivW32W16ResW16(WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)limiterLvlX, 13),
WEBRTC_SPL_RSHIFT_U16(kLog10_2, 1));
limiterIdx =
2 + WebRtcSpl_DivW32W16ResW16((int32_t)limiterLvlX << 13, kLog10_2 / 2);
tmp16no1 = WebRtcSpl_DivW32W16ResW16(limiterOffset + (kCompRatio >> 1), kCompRatio);
limiterLvl = targetLevelDbfs + tmp16no1;
@ -134,23 +138,23 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
{
// Calculate scaled input level (compressor):
// inLevel = fix((-constLog10_2*(compRatio-1)*(1-i)+fix(compRatio/2))/compRatio)
tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(kCompRatio - 1, i - 1); // Q0
tmp16 = (int16_t)((kCompRatio - 1) * (i - 1)); // Q0
tmp32 = WEBRTC_SPL_MUL_16_U16(tmp16, kLog10_2) + 1; // Q14
inLevel = WebRtcSpl_DivW32W16(tmp32, kCompRatio); // Q14
// Calculate diffGain-inLevel, to map using the genFuncTable
inLevel = WEBRTC_SPL_LSHIFT_W32((WebRtc_Word32)diffGain, 14) - inLevel; // Q14
inLevel = ((int32_t)diffGain << 14) - inLevel; // Q14
// Make calculations on abs(inLevel) and compensate for the sign afterwards.
absInLevel = (WebRtc_UWord32)WEBRTC_SPL_ABS_W32(inLevel); // Q14
absInLevel = (uint32_t)WEBRTC_SPL_ABS_W32(inLevel); // Q14
// LUT with interpolation
intPart = (WebRtc_UWord16)WEBRTC_SPL_RSHIFT_U32(absInLevel, 14);
fracPart = (WebRtc_UWord16)(absInLevel & 0x00003FFF); // extract the fractional part
intPart = (uint16_t)(absInLevel >> 14);
fracPart = (uint16_t)(absInLevel & 0x00003FFF); // extract the fractional part
tmpU16 = kGenFuncTable[intPart + 1] - kGenFuncTable[intPart]; // Q8
tmpU32no1 = WEBRTC_SPL_UMUL_16_16(tmpU16, fracPart); // Q22
tmpU32no1 += WEBRTC_SPL_LSHIFT_U32((WebRtc_UWord32)kGenFuncTable[intPart], 14); // Q22
logApprox = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 8); // Q14
tmpU32no1 = tmpU16 * fracPart; // Q22
tmpU32no1 += (uint32_t)kGenFuncTable[intPart] << 14; // Q22
logApprox = tmpU32no1 >> 8; // Q14
// Compensate for negative exponent using the relation:
// log2(1 + 2^-x) = log2(1 + 2^x) - x
if (inLevel < 0)
@ -160,83 +164,89 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
if (zeros < 15)
{
// Not enough space for multiplication
tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(absInLevel, 15 - zeros); // Q(zeros-1)
tmpU32no2 = absInLevel >> (15 - zeros); // Q(zeros-1)
tmpU32no2 = WEBRTC_SPL_UMUL_32_16(tmpU32no2, kLogE_1); // Q(zeros+13)
if (zeros < 9)
{
tmpU32no1 = WEBRTC_SPL_RSHIFT_U32(tmpU32no1, 9 - zeros); // Q(zeros+13)
zerosScale = 9 - zeros;
tmpU32no1 >>= zerosScale; // Q(zeros+13)
} else
{
tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, zeros - 9); // Q22
tmpU32no2 >>= zeros - 9; // Q22
}
} else
{
tmpU32no2 = WEBRTC_SPL_UMUL_32_16(absInLevel, kLogE_1); // Q28
tmpU32no2 = WEBRTC_SPL_RSHIFT_U32(tmpU32no2, 6); // Q22
tmpU32no2 >>= 6; // Q22
}
logApprox = 0;
if (tmpU32no2 < tmpU32no1)
{
logApprox = WEBRTC_SPL_RSHIFT_U32(tmpU32no1 - tmpU32no2, 8 - zerosScale); //Q14
logApprox = (tmpU32no1 - tmpU32no2) >> (8 - zerosScale); //Q14
}
}
numFIX = WEBRTC_SPL_LSHIFT_W32(WEBRTC_SPL_MUL_16_U16(maxGain, constMaxGain), 6); // Q14
numFIX -= WEBRTC_SPL_MUL_32_16((WebRtc_Word32)logApprox, diffGain); // Q14
numFIX = (maxGain * constMaxGain) << 6; // Q14
numFIX -= (int32_t)logApprox * diffGain; // Q14
// Calculate ratio
// Shift numFIX as much as possible
zeros = WebRtcSpl_NormW32(numFIX);
numFIX = WEBRTC_SPL_LSHIFT_W32(numFIX, zeros); // Q(14+zeros)
// Shift |numFIX| as much as possible.
// Ensure we avoid wrap-around in |den| as well.
if (numFIX > (den >> 8)) // |den| is Q8.
{
zeros = WebRtcSpl_NormW32(numFIX);
} else
{
zeros = WebRtcSpl_NormW32(den) + 8;
}
numFIX <<= zeros; // Q(14+zeros)
// Shift den so we end up in Qy1
tmp32no1 = WEBRTC_SPL_SHIFT_W32(den, zeros - 8); // Q(zeros)
if (numFIX < 0)
{
numFIX -= WEBRTC_SPL_RSHIFT_W32(tmp32no1, 1);
numFIX -= tmp32no1 / 2;
} else
{
numFIX += WEBRTC_SPL_RSHIFT_W32(tmp32no1, 1);
numFIX += tmp32no1 / 2;
}
y32 = WEBRTC_SPL_DIV(numFIX, tmp32no1); // in Q14
y32 = numFIX / tmp32no1; // in Q14
if (limiterEnable && (i < limiterIdx))
{
tmp32 = WEBRTC_SPL_MUL_16_U16(i - 1, kLog10_2); // Q14
tmp32 -= WEBRTC_SPL_LSHIFT_W32(limiterLvl, 14); // Q14
tmp32 -= limiterLvl << 14; // Q14
y32 = WebRtcSpl_DivW32W16(tmp32 + 10, 20);
}
if (y32 > 39000)
{
tmp32 = WEBRTC_SPL_MUL(y32 >> 1, kLog10) + 4096; // in Q27
tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 13); // in Q14
tmp32 = (y32 >> 1) * kLog10 + 4096; // in Q27
tmp32 >>= 13; // In Q14.
} else
{
tmp32 = WEBRTC_SPL_MUL(y32, kLog10) + 8192; // in Q28
tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 14); // in Q14
tmp32 = y32 * kLog10 + 8192; // in Q28
tmp32 >>= 14; // In Q14.
}
tmp32 += WEBRTC_SPL_LSHIFT_W32(16, 14); // in Q14 (Make sure final output is in Q16)
tmp32 += 16 << 14; // in Q14 (Make sure final output is in Q16)
// Calculate power
if (tmp32 > 0)
{
intPart = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 14);
fracPart = (WebRtc_UWord16)(tmp32 & 0x00003FFF); // in Q14
if (WEBRTC_SPL_RSHIFT_W32(fracPart, 13))
intPart = (int16_t)(tmp32 >> 14);
fracPart = (uint16_t)(tmp32 & 0x00003FFF); // in Q14
if ((fracPart >> 13) != 0)
{
tmp16 = WEBRTC_SPL_LSHIFT_W16(2, 14) - constLinApprox;
tmp32no2 = WEBRTC_SPL_LSHIFT_W32(1, 14) - fracPart;
tmp32no2 = WEBRTC_SPL_MUL_32_16(tmp32no2, tmp16);
tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 13);
tmp32no2 = WEBRTC_SPL_LSHIFT_W32(1, 14) - tmp32no2;
tmp16 = (2 << 14) - constLinApprox;
tmp32no2 = (1 << 14) - fracPart;
tmp32no2 *= tmp16;
tmp32no2 >>= 13;
tmp32no2 = (1 << 14) - tmp32no2;
} else
{
tmp16 = constLinApprox - WEBRTC_SPL_LSHIFT_W16(1, 14);
tmp32no2 = WEBRTC_SPL_MUL_32_16(fracPart, tmp16);
tmp32no2 = WEBRTC_SPL_RSHIFT_W32(tmp32no2, 13);
tmp16 = constLinApprox - (1 << 14);
tmp32no2 = (fracPart * tmp16) >> 13;
}
fracPart = (WebRtc_UWord16)tmp32no2;
gainTable[i] = WEBRTC_SPL_LSHIFT_W32(1, intPart)
+ WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14);
fracPart = (uint16_t)tmp32no2;
gainTable[i] =
(1 << intPart) + WEBRTC_SPL_SHIFT_W32(fracPart, intPart - 14);
} else
{
gainTable[i] = 0;
@ -246,9 +256,7 @@ WebRtc_Word32 WebRtcAgc_CalculateGainTable(WebRtc_Word32 *gainTable, // Q16
return 0;
}
WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode)
{
int32_t WebRtcAgc_InitDigital(DigitalAgc* stt, int16_t agcMode) {
if (agcMode == kAgcModeFixedDigital)
{
// start at minimum to find correct gain faster
@ -256,13 +264,13 @@ WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode)
} else
{
// start out with 0 dB gain
stt->capacitorSlow = 134217728; // (WebRtc_Word32)(0.125f * 32768.0f * 32768.0f);
stt->capacitorSlow = 134217728; // (int32_t)(0.125f * 32768.0f * 32768.0f);
}
stt->capacitorFast = 0;
stt->gain = 65536;
stt->gatePrevious = 0;
stt->agcMode = agcMode;
#ifdef AGC_DEBUG
#ifdef WEBRTC_AGC_DEBUG_DUMP
stt->frameCounter = 0;
#endif
@ -273,52 +281,45 @@ WebRtc_Word32 WebRtcAgc_InitDigital(DigitalAgc_t *stt, WebRtc_Word16 agcMode)
return 0;
}
WebRtc_Word32 WebRtcAgc_AddFarendToDigital(DigitalAgc_t *stt, const WebRtc_Word16 *in_far,
WebRtc_Word16 nrSamples)
{
// Check for valid pointer
if (&stt->vadFarend == NULL)
{
return -1;
}
int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* stt,
const int16_t* in_far,
size_t nrSamples) {
assert(stt != NULL);
// VAD for far end
WebRtcAgc_ProcessVad(&stt->vadFarend, in_far, nrSamples);
return 0;
}
WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *in_near,
const WebRtc_Word16 *in_near_H, WebRtc_Word16 *out,
WebRtc_Word16 *out_H, WebRtc_UWord32 FS,
WebRtc_Word16 lowlevelSignal)
{
int32_t WebRtcAgc_ProcessDigital(DigitalAgc* stt,
const int16_t* const* in_near,
size_t num_bands,
int16_t* const* out,
uint32_t FS,
int16_t lowlevelSignal) {
// array for gains (one value per ms, incl start & end)
WebRtc_Word32 gains[11];
int32_t gains[11];
WebRtc_Word32 out_tmp, tmp32;
WebRtc_Word32 env[10];
WebRtc_Word32 nrg, max_nrg;
WebRtc_Word32 cur_level;
WebRtc_Word32 gain32, delta;
WebRtc_Word16 logratio;
WebRtc_Word16 lower_thr, upper_thr;
WebRtc_Word16 zeros, zeros_fast, frac;
WebRtc_Word16 decay;
WebRtc_Word16 gate, gain_adj;
WebRtc_Word16 k, n;
WebRtc_Word16 L, L2; // samples/subframe
int32_t out_tmp, tmp32;
int32_t env[10];
int32_t max_nrg;
int32_t cur_level;
int32_t gain32, delta;
int16_t logratio;
int16_t lower_thr, upper_thr;
int16_t zeros = 0, zeros_fast, frac = 0;
int16_t decay;
int16_t gate, gain_adj;
int16_t k;
size_t n, i, L;
int16_t L2; // samples/subframe
// determine number of samples per ms
if (FS == 8000)
{
L = 8;
L2 = 3;
} else if (FS == 16000)
{
L = 16;
L2 = 4;
} else if (FS == 32000)
} else if (FS == 16000 || FS == 32000 || FS == 48000)
{
L = 16;
L2 = 4;
@ -327,27 +328,22 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
return -1;
}
// TODO(andrew): again, we don't need input and output pointers...
if (in_near != out)
for (i = 0; i < num_bands; ++i)
{
// Only needed if they don't already point to the same place.
memcpy(out, in_near, 10 * L * sizeof(WebRtc_Word16));
}
if (FS == 32000)
{
if (in_near_H != out_H)
if (in_near[i] != out[i])
{
memcpy(out_H, in_near_H, 10 * L * sizeof(WebRtc_Word16));
// Only needed if they don't already point to the same place.
memcpy(out[i], in_near[i], 10 * L * sizeof(in_near[i][0]));
}
}
// VAD for near end
logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out, L * 10);
logratio = WebRtcAgc_ProcessVad(&stt->vadNearend, out[0], L * 10);
// Account for far end VAD
if (stt->vadFarend.counter > 10)
{
tmp32 = WEBRTC_SPL_MUL_16_16(3, logratio);
logratio = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 - stt->vadFarend.logRatio, 2);
tmp32 = 3 * logratio;
logratio = (int16_t)((tmp32 - stt->vadFarend.logRatio) >> 2);
}
// Determine decay factor depending on VAD
@ -364,11 +360,11 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
decay = 0;
} else
{
// decay = (WebRtc_Word16)(((lower_thr - logratio)
// decay = (int16_t)(((lower_thr - logratio)
// * (2^27/(DecayTime*(upper_thr-lower_thr)))) >> 10);
// SUBSTITUTED: 2^27/(DecayTime*(upper_thr-lower_thr)) -> 65
tmp32 = WEBRTC_SPL_MUL_16_16((lower_thr - logratio), 65);
decay = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 10);
tmp32 = (lower_thr - logratio) * 65;
decay = (int16_t)(tmp32 >> 10);
}
// adjust decay factor for long silence (detected as low standard deviation)
@ -380,9 +376,9 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
decay = 0;
} else if (stt->vadNearend.stdLongTerm < 8096)
{
// decay = (WebRtc_Word16)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> 12);
tmp32 = WEBRTC_SPL_MUL_16_16((stt->vadNearend.stdLongTerm - 4000), decay);
decay = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 12);
// decay = (int16_t)(((stt->vadNearend.stdLongTerm - 4000) * decay) >> 12);
tmp32 = (stt->vadNearend.stdLongTerm - 4000) * decay;
decay = (int16_t)(tmp32 >> 12);
}
if (lowlevelSignal != 0)
@ -390,9 +386,14 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
decay = 0;
}
}
#ifdef AGC_DEBUG
#ifdef WEBRTC_AGC_DEBUG_DUMP
stt->frameCounter++;
fprintf(stt->logFile, "%5.2f\t%d\t%d\t%d\t", (float)(stt->frameCounter) / 100, logratio, decay, stt->vadNearend.stdLongTerm);
fprintf(stt->logFile,
"%5.2f\t%d\t%d\t%d\t",
(float)(stt->frameCounter) / 100,
logratio,
decay,
stt->vadNearend.stdLongTerm);
#endif
// Find max amplitude per sub frame
// iterate over sub frames
@ -402,7 +403,7 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
max_nrg = 0;
for (n = 0; n < L; n++)
{
nrg = WEBRTC_SPL_MUL_16_16(out[k * L + n], out[k * L + n]);
int32_t nrg = out[0][k * L + n] * out[0][k * L + n];
if (nrg > max_nrg)
{
max_nrg = nrg;
@ -445,34 +446,39 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
}
// Translate signal level into gain, using a piecewise linear approximation
// find number of leading zeros
zeros = WebRtcSpl_NormU32((WebRtc_UWord32)cur_level);
zeros = WebRtcSpl_NormU32((uint32_t)cur_level);
if (cur_level == 0)
{
zeros = 31;
}
tmp32 = (WEBRTC_SPL_LSHIFT_W32(cur_level, zeros) & 0x7FFFFFFF);
frac = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 19); // Q12
tmp32 = WEBRTC_SPL_MUL((stt->gainTable[zeros-1] - stt->gainTable[zeros]), frac);
gains[k + 1] = stt->gainTable[zeros] + WEBRTC_SPL_RSHIFT_W32(tmp32, 12);
#ifdef AGC_DEBUG
if (k == 0)
{
fprintf(stt->logFile, "%d\t%d\t%d\t%d\t%d\n", env[0], cur_level, stt->capacitorFast, stt->capacitorSlow, zeros);
tmp32 = (cur_level << zeros) & 0x7FFFFFFF;
frac = (int16_t)(tmp32 >> 19); // Q12.
tmp32 = (stt->gainTable[zeros-1] - stt->gainTable[zeros]) * frac;
gains[k + 1] = stt->gainTable[zeros] + (tmp32 >> 12);
#ifdef WEBRTC_AGC_DEBUG_DUMP
if (k == 0) {
fprintf(stt->logFile,
"%d\t%d\t%d\t%d\t%d\n",
env[0],
cur_level,
stt->capacitorFast,
stt->capacitorSlow,
zeros);
}
#endif
}
// Gate processing (lower gain during absence of speech)
zeros = WEBRTC_SPL_LSHIFT_W16(zeros, 9) - WEBRTC_SPL_RSHIFT_W16(frac, 3);
zeros = (zeros << 9) - (frac >> 3);
// find number of leading zeros
zeros_fast = WebRtcSpl_NormU32((WebRtc_UWord32)stt->capacitorFast);
zeros_fast = WebRtcSpl_NormU32((uint32_t)stt->capacitorFast);
if (stt->capacitorFast == 0)
{
zeros_fast = 31;
}
tmp32 = (WEBRTC_SPL_LSHIFT_W32(stt->capacitorFast, zeros_fast) & 0x7FFFFFFF);
zeros_fast = WEBRTC_SPL_LSHIFT_W16(zeros_fast, 9);
zeros_fast -= (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 22);
tmp32 = (stt->capacitorFast << zeros_fast) & 0x7FFFFFFF;
zeros_fast <<= 9;
zeros_fast -= (int16_t)(tmp32 >> 22);
gate = 1000 + zeros_fast - zeros - stt->vadNearend.stdShortTerm;
@ -481,8 +487,8 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
stt->gatePrevious = 0;
} else
{
tmp32 = WEBRTC_SPL_MUL_16_16(stt->gatePrevious, 7);
gate = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32((WebRtc_Word32)gate + tmp32, 3);
tmp32 = stt->gatePrevious * 7;
gate = (int16_t)((gate + tmp32) >> 3);
stt->gatePrevious = gate;
}
// gate < 0 -> no gate
@ -491,7 +497,7 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
{
if (gate < 2500)
{
gain_adj = WEBRTC_SPL_RSHIFT_W16(2500 - gate, 5);
gain_adj = (2500 - gate) >> 5;
} else
{
gain_adj = 0;
@ -501,12 +507,12 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
if ((gains[k + 1] - stt->gainTable[0]) > 8388608)
{
// To prevent wraparound
tmp32 = WEBRTC_SPL_RSHIFT_W32((gains[k+1] - stt->gainTable[0]), 8);
tmp32 = WEBRTC_SPL_MUL(tmp32, (178 + gain_adj));
tmp32 = (gains[k + 1] - stt->gainTable[0]) >> 8;
tmp32 *= 178 + gain_adj;
} else
{
tmp32 = WEBRTC_SPL_MUL((gains[k+1] - stt->gainTable[0]), (178 + gain_adj));
tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 8);
tmp32 = (gains[k+1] - stt->gainTable[0]) * (178 + gain_adj);
tmp32 >>= 8;
}
gains[k + 1] = stt->gainTable[0] + tmp32;
}
@ -521,23 +527,23 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
{
zeros = 16 - WebRtcSpl_NormW32(gains[k + 1]);
}
gain32 = WEBRTC_SPL_RSHIFT_W32(gains[k+1], zeros) + 1;
gain32 = WEBRTC_SPL_MUL(gain32, gain32);
gain32 = (gains[k + 1] >> zeros) + 1;
gain32 *= gain32;
// check for overflow
while (AGC_MUL32(WEBRTC_SPL_RSHIFT_W32(env[k], 12) + 1, gain32)
> WEBRTC_SPL_SHIFT_W32((WebRtc_Word32)32767, 2 * (1 - zeros + 10)))
while (AGC_MUL32((env[k] >> 12) + 1, gain32)
> WEBRTC_SPL_SHIFT_W32((int32_t)32767, 2 * (1 - zeros + 10)))
{
// multiply by 253/256 ==> -0.1 dB
if (gains[k + 1] > 8388607)
{
// Prevent wrap around
gains[k + 1] = WEBRTC_SPL_MUL(WEBRTC_SPL_RSHIFT_W32(gains[k+1], 8), 253);
gains[k + 1] = (gains[k+1] / 256) * 253;
} else
{
gains[k + 1] = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL(gains[k+1], 253), 8);
gains[k + 1] = (gains[k+1] * 253) / 256;
}
gain32 = WEBRTC_SPL_RSHIFT_W32(gains[k+1], zeros) + 1;
gain32 = WEBRTC_SPL_MUL(gain32, gain32);
gain32 = (gains[k + 1] >> zeros) + 1;
gain32 *= gain32;
}
}
// gain reductions should be done 1 ms earlier than gain increases
@ -553,42 +559,25 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
// Apply gain
// handle first sub frame separately
delta = WEBRTC_SPL_LSHIFT_W32(gains[1] - gains[0], (4 - L2));
gain32 = WEBRTC_SPL_LSHIFT_W32(gains[0], 4);
delta = (gains[1] - gains[0]) << (4 - L2);
gain32 = gains[0] << 4;
// iterate over samples
for (n = 0; n < L; n++)
{
// For lower band
tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[n], WEBRTC_SPL_RSHIFT_W32(gain32 + 127, 7));
out_tmp = WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
if (out_tmp > 4095)
for (i = 0; i < num_bands; ++i)
{
out[n] = (WebRtc_Word16)32767;
} else if (out_tmp < -4096)
{
out[n] = (WebRtc_Word16)-32768;
} else
{
tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[n], WEBRTC_SPL_RSHIFT_W32(gain32, 4));
out[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
}
// For higher band
if (FS == 32000)
{
tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[n],
WEBRTC_SPL_RSHIFT_W32(gain32 + 127, 7));
out_tmp = WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
tmp32 = out[i][n] * ((gain32 + 127) >> 7);
out_tmp = tmp32 >> 16;
if (out_tmp > 4095)
{
out_H[n] = (WebRtc_Word16)32767;
out[i][n] = (int16_t)32767;
} else if (out_tmp < -4096)
{
out_H[n] = (WebRtc_Word16)-32768;
out[i][n] = (int16_t)-32768;
} else
{
tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[n],
WEBRTC_SPL_RSHIFT_W32(gain32, 4));
out_H[n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
tmp32 = out[i][n] * (gain32 >> 4);
out[i][n] = (int16_t)(tmp32 >> 16);
}
}
//
@ -598,21 +587,15 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
// iterate over subframes
for (k = 1; k < 10; k++)
{
delta = WEBRTC_SPL_LSHIFT_W32(gains[k+1] - gains[k], (4 - L2));
gain32 = WEBRTC_SPL_LSHIFT_W32(gains[k], 4);
delta = (gains[k+1] - gains[k]) << (4 - L2);
gain32 = gains[k] << 4;
// iterate over samples
for (n = 0; n < L; n++)
{
// For lower band
tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out[k * L + n],
WEBRTC_SPL_RSHIFT_W32(gain32, 4));
out[k * L + n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
// For higher band
if (FS == 32000)
for (i = 0; i < num_bands; ++i)
{
tmp32 = WEBRTC_SPL_MUL((WebRtc_Word32)out_H[k * L + n],
WEBRTC_SPL_RSHIFT_W32(gain32, 4));
out_H[k * L + n] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32 , 16);
tmp32 = out[i][k * L + n] * (gain32 >> 4);
out[i][k * L + n] = (int16_t)(tmp32 >> 16);
}
gain32 += delta;
}
@ -621,24 +604,23 @@ WebRtc_Word32 WebRtcAgc_ProcessDigital(DigitalAgc_t *stt, const WebRtc_Word16 *i
return 0;
}
void WebRtcAgc_InitVad(AgcVad_t *state)
{
WebRtc_Word16 k;
void WebRtcAgc_InitVad(AgcVad* state) {
int16_t k;
state->HPstate = 0; // state of high pass filter
state->logRatio = 0; // log( P(active) / P(inactive) )
// average input level (Q10)
state->meanLongTerm = WEBRTC_SPL_LSHIFT_W16(15, 10);
state->meanLongTerm = 15 << 10;
// variance of input level (Q8)
state->varianceLongTerm = WEBRTC_SPL_LSHIFT_W32(500, 8);
state->varianceLongTerm = 500 << 8;
state->stdLongTerm = 0; // standard deviation of input level in dB
// short-term average input level (Q10)
state->meanShortTerm = WEBRTC_SPL_LSHIFT_W16(15, 10);
state->meanShortTerm = 15 << 10;
// short-term variance of input level (Q8)
state->varianceShortTerm = WEBRTC_SPL_LSHIFT_W32(500, 8);
state->varianceShortTerm = 500 << 8;
state->stdShortTerm = 0; // short-term standard deviation of input level in dB
state->counter = 3; // counts updates
@ -649,17 +631,17 @@ void WebRtcAgc_InitVad(AgcVad_t *state)
}
}
WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
const WebRtc_Word16 *in, // (i) Speech signal
WebRtc_Word16 nrSamples) // (i) number of samples
int16_t WebRtcAgc_ProcessVad(AgcVad* state, // (i) VAD state
const int16_t* in, // (i) Speech signal
size_t nrSamples) // (i) number of samples
{
WebRtc_Word32 out, nrg, tmp32, tmp32b;
WebRtc_UWord16 tmpU16;
WebRtc_Word16 k, subfr, tmp16;
WebRtc_Word16 buf1[8];
WebRtc_Word16 buf2[4];
WebRtc_Word16 HPstate;
WebRtc_Word16 zeros, dB;
int32_t out, nrg, tmp32, tmp32b;
uint16_t tmpU16;
int16_t k, subfr, tmp16;
int16_t buf1[8];
int16_t buf2[4];
int16_t HPstate;
int16_t zeros, dB;
// process in 10 sub frames of 1 ms (to save on memory)
nrg = 0;
@ -671,9 +653,9 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
{
for (k = 0; k < 8; k++)
{
tmp32 = (WebRtc_Word32)in[2 * k] + (WebRtc_Word32)in[2 * k + 1];
tmp32 = WEBRTC_SPL_RSHIFT_W32(tmp32, 1);
buf1[k] = (WebRtc_Word16)tmp32;
tmp32 = (int32_t)in[2 * k] + (int32_t)in[2 * k + 1];
tmp32 >>= 1;
buf1[k] = (int16_t)tmp32;
}
in += 16;
@ -688,10 +670,9 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
for (k = 0; k < 4; k++)
{
out = buf2[k] + HPstate;
tmp32 = WEBRTC_SPL_MUL(600, out);
HPstate = (WebRtc_Word16)(WEBRTC_SPL_RSHIFT_W32(tmp32, 10) - buf2[k]);
tmp32 = WEBRTC_SPL_MUL(out, out);
nrg += WEBRTC_SPL_RSHIFT_W32(tmp32, 6);
tmp32 = 600 * out;
HPstate = (int16_t)((tmp32 >> 10) - buf2[k]);
nrg += (out * out) >> 6;
}
}
state->HPstate = HPstate;
@ -722,7 +703,7 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
}
// energy level (range {-32..30}) (Q10)
dB = WEBRTC_SPL_LSHIFT_W16(15 - zeros, 11);
dB = (15 - zeros) << 11;
// Update statistics
@ -733,44 +714,49 @@ WebRtc_Word16 WebRtcAgc_ProcessVad(AgcVad_t *state, // (i) VAD state
}
// update short-term estimate of mean energy level (Q10)
tmp32 = (WEBRTC_SPL_MUL_16_16(state->meanShortTerm, 15) + (WebRtc_Word32)dB);
state->meanShortTerm = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 4);
tmp32 = state->meanShortTerm * 15 + dB;
state->meanShortTerm = (int16_t)(tmp32 >> 4);
// update short-term estimate of variance in energy level (Q8)
tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(dB, dB), 12);
tmp32 += WEBRTC_SPL_MUL(state->varianceShortTerm, 15);
state->varianceShortTerm = WEBRTC_SPL_RSHIFT_W32(tmp32, 4);
tmp32 = (dB * dB) >> 12;
tmp32 += state->varianceShortTerm * 15;
state->varianceShortTerm = tmp32 / 16;
// update short-term estimate of standard deviation in energy level (Q10)
tmp32 = WEBRTC_SPL_MUL_16_16(state->meanShortTerm, state->meanShortTerm);
tmp32 = WEBRTC_SPL_LSHIFT_W32(state->varianceShortTerm, 12) - tmp32;
state->stdShortTerm = (WebRtc_Word16)WebRtcSpl_Sqrt(tmp32);
tmp32 = state->meanShortTerm * state->meanShortTerm;
tmp32 = (state->varianceShortTerm << 12) - tmp32;
state->stdShortTerm = (int16_t)WebRtcSpl_Sqrt(tmp32);
// update long-term estimate of mean energy level (Q10)
tmp32 = WEBRTC_SPL_MUL_16_16(state->meanLongTerm, state->counter) + (WebRtc_Word32)dB;
state->meanLongTerm = WebRtcSpl_DivW32W16ResW16(tmp32,
WEBRTC_SPL_ADD_SAT_W16(state->counter, 1));
tmp32 = state->meanLongTerm * state->counter + dB;
state->meanLongTerm = WebRtcSpl_DivW32W16ResW16(
tmp32, WebRtcSpl_AddSatW16(state->counter, 1));
// update long-term estimate of variance in energy level (Q8)
tmp32 = WEBRTC_SPL_RSHIFT_W32(WEBRTC_SPL_MUL_16_16(dB, dB), 12);
tmp32 += WEBRTC_SPL_MUL(state->varianceLongTerm, state->counter);
state->varianceLongTerm = WebRtcSpl_DivW32W16(tmp32,
WEBRTC_SPL_ADD_SAT_W16(state->counter, 1));
tmp32 = (dB * dB) >> 12;
tmp32 += state->varianceLongTerm * state->counter;
state->varianceLongTerm = WebRtcSpl_DivW32W16(
tmp32, WebRtcSpl_AddSatW16(state->counter, 1));
// update long-term estimate of standard deviation in energy level (Q10)
tmp32 = WEBRTC_SPL_MUL_16_16(state->meanLongTerm, state->meanLongTerm);
tmp32 = WEBRTC_SPL_LSHIFT_W32(state->varianceLongTerm, 12) - tmp32;
state->stdLongTerm = (WebRtc_Word16)WebRtcSpl_Sqrt(tmp32);
tmp32 = state->meanLongTerm * state->meanLongTerm;
tmp32 = (state->varianceLongTerm << 12) - tmp32;
state->stdLongTerm = (int16_t)WebRtcSpl_Sqrt(tmp32);
// update voice activity measure (Q10)
tmp16 = WEBRTC_SPL_LSHIFT_W16(3, 12);
tmp32 = WEBRTC_SPL_MUL_16_16(tmp16, (dB - state->meanLongTerm));
tmp16 = 3 << 12;
// TODO(bjornv): (dB - state->meanLongTerm) can overflow, e.g., in
// ApmTest.Process unit test. Previously the macro WEBRTC_SPL_MUL_16_16()
// was used, which did an intermediate cast to (int16_t), hence losing
// significant bits. This cause logRatio to max out positive, rather than
// negative. This is a bug, but has very little significance.
tmp32 = tmp16 * (int16_t)(dB - state->meanLongTerm);
tmp32 = WebRtcSpl_DivW32W16(tmp32, state->stdLongTerm);
tmpU16 = WEBRTC_SPL_LSHIFT_U16((WebRtc_UWord16)13, 12);
tmpU16 = (13 << 12);
tmp32b = WEBRTC_SPL_MUL_16_U16(state->logRatio, tmpU16);
tmp32 += WEBRTC_SPL_RSHIFT_W32(tmp32b, 10);
tmp32 += tmp32b >> 10;
state->logRatio = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp32, 6);
state->logRatio = (int16_t)(tmp32 >> 6);
// limit
if (state->logRatio > 2048)

View File

@ -0,0 +1,80 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_
#ifdef WEBRTC_AGC_DEBUG_DUMP
#include <stdio.h>
#endif
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/typedefs.h"
// the 32 most significant bits of A(19) * B(26) >> 13
#define AGC_MUL32(A, B) (((B)>>13)*(A) + ( ((0x00001FFF & (B))*(A)) >> 13 ))
// C + the 32 most significant bits of A * B
#define AGC_SCALEDIFF32(A, B, C) ((C) + ((B)>>16)*(A) + ( ((0x0000FFFF & (B))*(A)) >> 16 ))
typedef struct
{
int32_t downState[8];
int16_t HPstate;
int16_t counter;
int16_t logRatio; // log( P(active) / P(inactive) ) (Q10)
int16_t meanLongTerm; // Q10
int32_t varianceLongTerm; // Q8
int16_t stdLongTerm; // Q10
int16_t meanShortTerm; // Q10
int32_t varianceShortTerm; // Q8
int16_t stdShortTerm; // Q10
} AgcVad; // total = 54 bytes
typedef struct
{
int32_t capacitorSlow;
int32_t capacitorFast;
int32_t gain;
int32_t gainTable[32];
int16_t gatePrevious;
int16_t agcMode;
AgcVad vadNearend;
AgcVad vadFarend;
#ifdef WEBRTC_AGC_DEBUG_DUMP
FILE* logFile;
int frameCounter;
#endif
} DigitalAgc;
int32_t WebRtcAgc_InitDigital(DigitalAgc* digitalAgcInst, int16_t agcMode);
int32_t WebRtcAgc_ProcessDigital(DigitalAgc* digitalAgcInst,
const int16_t* const* inNear,
size_t num_bands,
int16_t* const* out,
uint32_t FS,
int16_t lowLevelSignal);
int32_t WebRtcAgc_AddFarendToDigital(DigitalAgc* digitalAgcInst,
const int16_t* inFar,
size_t nrSamples);
void WebRtcAgc_InitVad(AgcVad* vadInst);
int16_t WebRtcAgc_ProcessVad(AgcVad* vadInst, // (i) VAD state
const int16_t* in, // (i) Speech signal
size_t nrSamples); // (i) number of samples
int32_t WebRtcAgc_CalculateGainTable(int32_t *gainTable, // Q16
int16_t compressionGaindB, // Q0 (in dB)
int16_t targetLevelDbfs,// Q0 (in dB)
uint8_t limiterEnable,
int16_t analogTarget);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_DIGITAL_AGC_H_

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,10 +8,10 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_
#include "typedefs.h"
#include "webrtc/typedefs.h"
// Errors
#define AGC_UNSPECIFIED_ERROR 18000
@ -39,10 +39,10 @@ enum
typedef struct
{
WebRtc_Word16 targetLevelDbfs; // default 3 (-3 dBOv)
WebRtc_Word16 compressionGaindB; // default 9 dB
WebRtc_UWord8 limiterEnable; // default kAgcTrue (on)
} WebRtcAgc_config_t;
int16_t targetLevelDbfs; // default 3 (-3 dBOv)
int16_t compressionGaindB; // default 9 dB
uint8_t limiterEnable; // default kAgcTrue (on)
} WebRtcAgcConfig;
#if defined(__cplusplus)
extern "C"
@ -50,14 +50,14 @@ extern "C"
#endif
/*
* This function processes a 10/20ms frame of far-end speech to determine
* if there is active speech. Far-end speech length can be either 10ms or
* 20ms. The length of the input speech vector must be given in samples
* (80/160 when FS=8000, and 160/320 when FS=16000 or FS=32000).
* This function processes a 10 ms frame of far-end speech to determine
* if there is active speech. The length of the input speech vector must be
* given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
* FS=48000).
*
* Input:
* - agcInst : AGC instance.
* - inFar : Far-end input speech vector (10 or 20ms)
* - inFar : Far-end input speech vector
* - samples : Number of samples in input vector
*
* Return value:
@ -65,26 +65,23 @@ extern "C"
* : -1 - Error
*/
int WebRtcAgc_AddFarend(void* agcInst,
const WebRtc_Word16* inFar,
WebRtc_Word16 samples);
const int16_t* inFar,
size_t samples);
/*
* This function processes a 10/20ms frame of microphone speech to determine
* if there is active speech. Microphone speech length can be either 10ms or
* 20ms. The length of the input speech vector must be given in samples
* (80/160 when FS=8000, and 160/320 when FS=16000 or FS=32000). For very low
* input levels, the input signal is increased in level by multiplying and
* overwriting the samples in inMic[].
* This function processes a 10 ms frame of microphone speech to determine
* if there is active speech. The length of the input speech vector must be
* given in samples (80 when FS=8000, and 160 when FS=16000, FS=32000 or
* FS=48000). For very low input levels, the input signal is increased in level
* by multiplying and overwriting the samples in inMic[].
*
* This function should be called before any further processing of the
* near-end microphone signal.
*
* Input:
* - agcInst : AGC instance.
* - inMic : Microphone input speech vector (10 or 20 ms) for
* L band
* - inMic_H : Microphone input speech vector (10 or 20 ms) for
* H band
* - inMic : Microphone input speech vector for each band
* - num_bands : Number of bands in input vector
* - samples : Number of samples in input vector
*
* Return value:
@ -92,24 +89,21 @@ int WebRtcAgc_AddFarend(void* agcInst,
* : -1 - Error
*/
int WebRtcAgc_AddMic(void* agcInst,
WebRtc_Word16* inMic,
WebRtc_Word16* inMic_H,
WebRtc_Word16 samples);
int16_t* const* inMic,
size_t num_bands,
size_t samples);
/*
* This function replaces the analog microphone with a virtual one.
* It is a digital gain applied to the input signal and is used in the
* agcAdaptiveDigital mode where no microphone level is adjustable.
* Microphone speech length can be either 10ms or 20ms. The length of the
* input speech vector must be given in samples (80/160 when FS=8000, and
* 160/320 when FS=16000 or FS=32000).
* agcAdaptiveDigital mode where no microphone level is adjustable. The length
* of the input speech vector must be given in samples (80 when FS=8000, and 160
* when FS=16000, FS=32000 or FS=48000).
*
* Input:
* - agcInst : AGC instance.
* - inMic : Microphone input speech vector for (10 or 20 ms)
* L band
* - inMic_H : Microphone input speech vector for (10 or 20 ms)
* H band
* - inMic : Microphone input speech vector for each band
* - num_bands : Number of bands in input vector
* - samples : Number of samples in input vector
* - micLevelIn : Input level of microphone (static)
*
@ -123,30 +117,27 @@ int WebRtcAgc_AddMic(void* agcInst,
* : -1 - Error
*/
int WebRtcAgc_VirtualMic(void* agcInst,
WebRtc_Word16* inMic,
WebRtc_Word16* inMic_H,
WebRtc_Word16 samples,
WebRtc_Word32 micLevelIn,
WebRtc_Word32* micLevelOut);
int16_t* const* inMic,
size_t num_bands,
size_t samples,
int32_t micLevelIn,
int32_t* micLevelOut);
/*
* This function processes a 10/20ms frame and adjusts (normalizes) the gain
* both analog and digitally. The gain adjustments are done only during
* active periods of speech. The input speech length can be either 10ms or
* 20ms and the output is of the same length. The length of the speech
* vectors must be given in samples (80/160 when FS=8000, and 160/320 when
* FS=16000 or FS=32000). The echo parameter can be used to ensure the AGC will
* not adjust upward in the presence of echo.
* This function processes a 10 ms frame and adjusts (normalizes) the gain both
* analog and digitally. The gain adjustments are done only during active
* periods of speech. The length of the speech vectors must be given in samples
* (80 when FS=8000, and 160 when FS=16000, FS=32000 or FS=48000). The echo
* parameter can be used to ensure the AGC will not adjust upward in the
* presence of echo.
*
* This function should be called after processing the near-end microphone
* signal, in any case after any echo cancellation.
*
* Input:
* - agcInst : AGC instance
* - inNear : Near-end input speech vector (10 or 20 ms) for
* L band
* - inNear_H : Near-end input speech vector (10 or 20 ms) for
* H band
* - inNear : Near-end input speech vector for each band
* - num_bands : Number of bands in input/output vector
* - samples : Number of samples in input/output vector
* - inMicLevel : Current microphone volume level
* - echo : Set to 0 if the signal passed to add_mic is
@ -156,9 +147,8 @@ int WebRtcAgc_VirtualMic(void* agcInst,
*
* Output:
* - outMicLevel : Adjusted microphone volume level
* - out : Gain-adjusted near-end speech vector (L band)
* - out : Gain-adjusted near-end speech vector
* : May be the same vector as the input.
* - out_H : Gain-adjusted near-end speech vector (H band)
* - saturationWarning : A returned value of 1 indicates a saturation event
* has occurred and the volume cannot be further
* reduced. Otherwise will be set to 0.
@ -168,15 +158,14 @@ int WebRtcAgc_VirtualMic(void* agcInst,
* : -1 - Error
*/
int WebRtcAgc_Process(void* agcInst,
const WebRtc_Word16* inNear,
const WebRtc_Word16* inNear_H,
WebRtc_Word16 samples,
WebRtc_Word16* out,
WebRtc_Word16* out_H,
WebRtc_Word32 inMicLevel,
WebRtc_Word32* outMicLevel,
WebRtc_Word16 echo,
WebRtc_UWord8* saturationWarning);
const int16_t* const* inNear,
size_t num_bands,
size_t samples,
int16_t* const* out,
int32_t inMicLevel,
int32_t* outMicLevel,
int16_t echo,
uint8_t* saturationWarning);
/*
* This function sets the config parameters (targetLevelDbfs,
@ -192,7 +181,7 @@ int WebRtcAgc_Process(void* agcInst,
* : 0 - Normal operation.
* : -1 - Error
*/
int WebRtcAgc_set_config(void* agcInst, WebRtcAgc_config_t config);
int WebRtcAgc_set_config(void* agcInst, WebRtcAgcConfig config);
/*
* This function returns the config parameters (targetLevelDbfs,
@ -208,27 +197,21 @@ int WebRtcAgc_set_config(void* agcInst, WebRtcAgc_config_t config);
* : 0 - Normal operation.
* : -1 - Error
*/
int WebRtcAgc_get_config(void* agcInst, WebRtcAgc_config_t* config);
int WebRtcAgc_get_config(void* agcInst, WebRtcAgcConfig* config);
/*
* This function creates an AGC instance, which will contain the state
* information for one (duplex) channel.
*
* Return value : AGC instance if successful
* : 0 (i.e., a NULL pointer) if unsuccessful
* This function creates and returns an AGC instance, which will contain the
* state information for one (duplex) channel.
*/
int WebRtcAgc_Create(void **agcInst);
void* WebRtcAgc_Create();
/*
* This function frees the AGC instance created at the beginning.
*
* Input:
* - agcInst : AGC instance.
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcAgc_Free(void *agcInst);
void WebRtcAgc_Free(void* agcInst);
/*
* This function initializes an AGC instance.
@ -247,27 +230,13 @@ int WebRtcAgc_Free(void *agcInst);
* -1 - Error
*/
int WebRtcAgc_Init(void *agcInst,
WebRtc_Word32 minLevel,
WebRtc_Word32 maxLevel,
WebRtc_Word16 agcMode,
WebRtc_UWord32 fs);
/*
* This function returns a text string containing the version.
*
* Input:
* - length : Length of the char array pointed to by version
* Output:
* - version : Pointer to a char array of to which the version
* : string will be copied.
*
* Return value : 0 - OK
* -1 - Error
*/
int WebRtcAgc_Version(WebRtc_Word8 *versionStr, WebRtc_Word16 length);
int32_t minLevel,
int32_t maxLevel,
int16_t agcMode,
uint32_t fs);
#if defined(__cplusplus)
}
#endif
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_MAIN_INTERFACE_GAIN_CONTROL_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_LEGACY_GAIN_CONTROL_H_

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/agc/utility.h"
#include <math.h>
static const double kLog10 = 2.30258509299;
static const double kLinear2DbScale = 20.0 / kLog10;
static const double kLinear2LoudnessScale = 13.4 / kLog10;
double Loudness2Db(double loudness) {
return loudness * kLinear2DbScale / kLinear2LoudnessScale;
}
double Linear2Loudness(double rms) {
if (rms == 0)
return -15;
return kLinear2LoudnessScale * log(rms);
}
double Db2Loudness(double db) {
return db * kLinear2LoudnessScale / kLinear2DbScale;
}
double Dbfs2Loudness(double dbfs) {
return Db2Loudness(90 + dbfs);
}

View File

@ -0,0 +1,23 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_
// TODO(turajs): Add description of function.
double Loudness2Db(double loudness);
double Linear2Loudness(double rms);
double Db2Loudness(double db);
double Dbfs2Loudness(double dbfs);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AGC_UTILITY_H_

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,173 +8,331 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "audio_buffer.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/resampler/push_sinc_resampler.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/modules/audio_processing/common.h"
namespace webrtc {
namespace {
enum {
kSamplesPer8kHzChannel = 80,
kSamplesPer16kHzChannel = 160,
kSamplesPer32kHzChannel = 320
};
const size_t kSamplesPer16kHzChannel = 160;
const size_t kSamplesPer32kHzChannel = 320;
const size_t kSamplesPer48kHzChannel = 480;
void StereoToMono(const WebRtc_Word16* left, const WebRtc_Word16* right,
WebRtc_Word16* out, int samples_per_channel) {
WebRtc_Word32 data_int32 = 0;
for (int i = 0; i < samples_per_channel; i++) {
data_int32 = (left[i] + right[i]) >> 1;
if (data_int32 > 32767) {
data_int32 = 32767;
} else if (data_int32 < -32768) {
data_int32 = -32768;
}
out[i] = static_cast<WebRtc_Word16>(data_int32);
int KeyboardChannelIndex(const StreamConfig& stream_config) {
if (!stream_config.has_keyboard()) {
assert(false);
return -1;
}
return stream_config.num_channels();
}
size_t NumBandsFromSamplesPerChannel(size_t num_frames) {
size_t num_bands = 1;
if (num_frames == kSamplesPer32kHzChannel ||
num_frames == kSamplesPer48kHzChannel) {
num_bands = rtc::CheckedDivExact(num_frames, kSamplesPer16kHzChannel);
}
return num_bands;
}
} // namespace
struct AudioChannel {
AudioChannel() {
memset(data, 0, sizeof(data));
}
WebRtc_Word16 data[kSamplesPer32kHzChannel];
};
struct SplitAudioChannel {
SplitAudioChannel() {
memset(low_pass_data, 0, sizeof(low_pass_data));
memset(high_pass_data, 0, sizeof(high_pass_data));
memset(analysis_filter_state1, 0, sizeof(analysis_filter_state1));
memset(analysis_filter_state2, 0, sizeof(analysis_filter_state2));
memset(synthesis_filter_state1, 0, sizeof(synthesis_filter_state1));
memset(synthesis_filter_state2, 0, sizeof(synthesis_filter_state2));
}
WebRtc_Word16 low_pass_data[kSamplesPer16kHzChannel];
WebRtc_Word16 high_pass_data[kSamplesPer16kHzChannel];
WebRtc_Word32 analysis_filter_state1[6];
WebRtc_Word32 analysis_filter_state2[6];
WebRtc_Word32 synthesis_filter_state1[6];
WebRtc_Word32 synthesis_filter_state2[6];
};
// TODO(andrew): check range of input parameters?
AudioBuffer::AudioBuffer(int max_num_channels,
int samples_per_channel)
: max_num_channels_(max_num_channels),
num_channels_(0),
num_mixed_channels_(0),
num_mixed_low_pass_channels_(0),
samples_per_channel_(samples_per_channel),
samples_per_split_channel_(samples_per_channel),
AudioBuffer::AudioBuffer(size_t input_num_frames,
int num_input_channels,
size_t process_num_frames,
int num_process_channels,
size_t output_num_frames)
: input_num_frames_(input_num_frames),
num_input_channels_(num_input_channels),
proc_num_frames_(process_num_frames),
num_proc_channels_(num_process_channels),
output_num_frames_(output_num_frames),
num_channels_(num_process_channels),
num_bands_(NumBandsFromSamplesPerChannel(proc_num_frames_)),
num_split_frames_(rtc::CheckedDivExact(proc_num_frames_, num_bands_)),
mixed_low_pass_valid_(false),
reference_copied_(false),
activity_(AudioFrame::kVadUnknown),
data_(NULL),
channels_(NULL),
split_channels_(NULL),
mixed_low_pass_channels_(NULL),
low_pass_reference_channels_(NULL) {
if (max_num_channels_ > 1) {
channels_ = new AudioChannel[max_num_channels_];
mixed_low_pass_channels_ = new AudioChannel[max_num_channels_];
}
low_pass_reference_channels_ = new AudioChannel[max_num_channels_];
keyboard_data_(NULL),
data_(new IFChannelBuffer(proc_num_frames_, num_proc_channels_)) {
assert(input_num_frames_ > 0);
assert(proc_num_frames_ > 0);
assert(output_num_frames_ > 0);
assert(num_input_channels_ > 0);
assert(num_proc_channels_ > 0 && num_proc_channels_ <= num_input_channels_);
if (samples_per_channel_ == kSamplesPer32kHzChannel) {
split_channels_ = new SplitAudioChannel[max_num_channels_];
samples_per_split_channel_ = kSamplesPer16kHzChannel;
if (input_num_frames_ != proc_num_frames_ ||
output_num_frames_ != proc_num_frames_) {
// Create an intermediate buffer for resampling.
process_buffer_.reset(new ChannelBuffer<float>(proc_num_frames_,
num_proc_channels_));
if (input_num_frames_ != proc_num_frames_) {
for (int i = 0; i < num_proc_channels_; ++i) {
input_resamplers_.push_back(
new PushSincResampler(input_num_frames_,
proc_num_frames_));
}
}
if (output_num_frames_ != proc_num_frames_) {
for (int i = 0; i < num_proc_channels_; ++i) {
output_resamplers_.push_back(
new PushSincResampler(proc_num_frames_,
output_num_frames_));
}
}
}
if (num_bands_ > 1) {
split_data_.reset(new IFChannelBuffer(proc_num_frames_,
num_proc_channels_,
num_bands_));
splitting_filter_.reset(new SplittingFilter(num_proc_channels_,
num_bands_,
proc_num_frames_));
}
}
AudioBuffer::~AudioBuffer() {
if (channels_ != NULL) {
delete [] channels_;
AudioBuffer::~AudioBuffer() {}
void AudioBuffer::CopyFrom(const float* const* data,
const StreamConfig& stream_config) {
assert(stream_config.num_frames() == input_num_frames_);
assert(stream_config.num_channels() == num_input_channels_);
InitForNewData();
// Initialized lazily because there's a different condition in
// DeinterleaveFrom.
const bool need_to_downmix =
num_input_channels_ > 1 && num_proc_channels_ == 1;
if (need_to_downmix && !input_buffer_) {
input_buffer_.reset(
new IFChannelBuffer(input_num_frames_, num_proc_channels_));
}
if (mixed_low_pass_channels_ != NULL) {
delete [] mixed_low_pass_channels_;
if (stream_config.has_keyboard()) {
keyboard_data_ = data[KeyboardChannelIndex(stream_config)];
}
if (low_pass_reference_channels_ != NULL) {
delete [] low_pass_reference_channels_;
// Downmix.
const float* const* data_ptr = data;
if (need_to_downmix) {
DownmixToMono<float, float>(data, input_num_frames_, num_input_channels_,
input_buffer_->fbuf()->channels()[0]);
data_ptr = input_buffer_->fbuf_const()->channels();
}
if (split_channels_ != NULL) {
delete [] split_channels_;
// Resample.
if (input_num_frames_ != proc_num_frames_) {
for (int i = 0; i < num_proc_channels_; ++i) {
input_resamplers_[i]->Resample(data_ptr[i],
input_num_frames_,
process_buffer_->channels()[i],
proc_num_frames_);
}
data_ptr = process_buffer_->channels();
}
// Convert to the S16 range.
for (int i = 0; i < num_proc_channels_; ++i) {
FloatToFloatS16(data_ptr[i],
proc_num_frames_,
data_->fbuf()->channels()[i]);
}
}
WebRtc_Word16* AudioBuffer::data(int channel) const {
assert(channel >= 0 && channel < num_channels_);
if (data_ != NULL) {
return data_;
void AudioBuffer::CopyTo(const StreamConfig& stream_config,
float* const* data) {
assert(stream_config.num_frames() == output_num_frames_);
assert(stream_config.num_channels() == num_channels_);
// Convert to the float range.
float* const* data_ptr = data;
if (output_num_frames_ != proc_num_frames_) {
// Convert to an intermediate buffer for subsequent resampling.
data_ptr = process_buffer_->channels();
}
for (int i = 0; i < num_channels_; ++i) {
FloatS16ToFloat(data_->fbuf()->channels()[i],
proc_num_frames_,
data_ptr[i]);
}
return channels_[channel].data;
// Resample.
if (output_num_frames_ != proc_num_frames_) {
for (int i = 0; i < num_channels_; ++i) {
output_resamplers_[i]->Resample(data_ptr[i],
proc_num_frames_,
data[i],
output_num_frames_);
}
}
}
WebRtc_Word16* AudioBuffer::low_pass_split_data(int channel) const {
assert(channel >= 0 && channel < num_channels_);
if (split_channels_ == NULL) {
return data(channel);
void AudioBuffer::InitForNewData() {
keyboard_data_ = NULL;
mixed_low_pass_valid_ = false;
reference_copied_ = false;
activity_ = AudioFrame::kVadUnknown;
num_channels_ = num_proc_channels_;
}
const int16_t* const* AudioBuffer::channels_const() const {
return data_->ibuf_const()->channels();
}
int16_t* const* AudioBuffer::channels() {
mixed_low_pass_valid_ = false;
return data_->ibuf()->channels();
}
const int16_t* const* AudioBuffer::split_bands_const(int channel) const {
return split_data_.get() ?
split_data_->ibuf_const()->bands(channel) :
data_->ibuf_const()->bands(channel);
}
int16_t* const* AudioBuffer::split_bands(int channel) {
mixed_low_pass_valid_ = false;
return split_data_.get() ?
split_data_->ibuf()->bands(channel) :
data_->ibuf()->bands(channel);
}
const int16_t* const* AudioBuffer::split_channels_const(Band band) const {
if (split_data_.get()) {
return split_data_->ibuf_const()->channels(band);
} else {
return band == kBand0To8kHz ? data_->ibuf_const()->channels() : nullptr;
}
}
int16_t* const* AudioBuffer::split_channels(Band band) {
mixed_low_pass_valid_ = false;
if (split_data_.get()) {
return split_data_->ibuf()->channels(band);
} else {
return band == kBand0To8kHz ? data_->ibuf()->channels() : nullptr;
}
}
ChannelBuffer<int16_t>* AudioBuffer::data() {
mixed_low_pass_valid_ = false;
return data_->ibuf();
}
const ChannelBuffer<int16_t>* AudioBuffer::data() const {
return data_->ibuf_const();
}
ChannelBuffer<int16_t>* AudioBuffer::split_data() {
mixed_low_pass_valid_ = false;
return split_data_.get() ? split_data_->ibuf() : data_->ibuf();
}
const ChannelBuffer<int16_t>* AudioBuffer::split_data() const {
return split_data_.get() ? split_data_->ibuf_const() : data_->ibuf_const();
}
const float* const* AudioBuffer::channels_const_f() const {
return data_->fbuf_const()->channels();
}
float* const* AudioBuffer::channels_f() {
mixed_low_pass_valid_ = false;
return data_->fbuf()->channels();
}
const float* const* AudioBuffer::split_bands_const_f(int channel) const {
return split_data_.get() ?
split_data_->fbuf_const()->bands(channel) :
data_->fbuf_const()->bands(channel);
}
float* const* AudioBuffer::split_bands_f(int channel) {
mixed_low_pass_valid_ = false;
return split_data_.get() ?
split_data_->fbuf()->bands(channel) :
data_->fbuf()->bands(channel);
}
const float* const* AudioBuffer::split_channels_const_f(Band band) const {
if (split_data_.get()) {
return split_data_->fbuf_const()->channels(band);
} else {
return band == kBand0To8kHz ? data_->fbuf_const()->channels() : nullptr;
}
}
float* const* AudioBuffer::split_channels_f(Band band) {
mixed_low_pass_valid_ = false;
if (split_data_.get()) {
return split_data_->fbuf()->channels(band);
} else {
return band == kBand0To8kHz ? data_->fbuf()->channels() : nullptr;
}
}
ChannelBuffer<float>* AudioBuffer::data_f() {
mixed_low_pass_valid_ = false;
return data_->fbuf();
}
const ChannelBuffer<float>* AudioBuffer::data_f() const {
return data_->fbuf_const();
}
ChannelBuffer<float>* AudioBuffer::split_data_f() {
mixed_low_pass_valid_ = false;
return split_data_.get() ? split_data_->fbuf() : data_->fbuf();
}
const ChannelBuffer<float>* AudioBuffer::split_data_f() const {
return split_data_.get() ? split_data_->fbuf_const() : data_->fbuf_const();
}
const int16_t* AudioBuffer::mixed_low_pass_data() {
if (num_proc_channels_ == 1) {
return split_bands_const(0)[kBand0To8kHz];
}
return split_channels_[channel].low_pass_data;
}
if (!mixed_low_pass_valid_) {
if (!mixed_low_pass_channels_.get()) {
mixed_low_pass_channels_.reset(
new ChannelBuffer<int16_t>(num_split_frames_, 1));
}
WebRtc_Word16* AudioBuffer::high_pass_split_data(int channel) const {
assert(channel >= 0 && channel < num_channels_);
if (split_channels_ == NULL) {
return NULL;
DownmixToMono<int16_t, int32_t>(split_channels_const(kBand0To8kHz),
num_split_frames_, num_channels_,
mixed_low_pass_channels_->channels()[0]);
mixed_low_pass_valid_ = true;
}
return split_channels_[channel].high_pass_data;
return mixed_low_pass_channels_->channels()[0];
}
WebRtc_Word16* AudioBuffer::mixed_low_pass_data(int channel) const {
assert(channel >= 0 && channel < num_mixed_low_pass_channels_);
return mixed_low_pass_channels_[channel].data;
}
WebRtc_Word16* AudioBuffer::low_pass_reference(int channel) const {
assert(channel >= 0 && channel < num_channels_);
const int16_t* AudioBuffer::low_pass_reference(int channel) const {
if (!reference_copied_) {
return NULL;
}
return low_pass_reference_channels_[channel].data;
return low_pass_reference_channels_->channels()[channel];
}
WebRtc_Word32* AudioBuffer::analysis_filter_state1(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].analysis_filter_state1;
}
WebRtc_Word32* AudioBuffer::analysis_filter_state2(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].analysis_filter_state2;
}
WebRtc_Word32* AudioBuffer::synthesis_filter_state1(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].synthesis_filter_state1;
}
WebRtc_Word32* AudioBuffer::synthesis_filter_state2(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].synthesis_filter_state2;
const float* AudioBuffer::keyboard_data() const {
return keyboard_data_;
}
void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
activity_ = activity;
}
AudioFrame::VADActivity AudioBuffer::activity() {
AudioFrame::VADActivity AudioBuffer::activity() const {
return activity_;
}
@ -182,107 +340,123 @@ int AudioBuffer::num_channels() const {
return num_channels_;
}
int AudioBuffer::samples_per_channel() const {
return samples_per_channel_;
void AudioBuffer::set_num_channels(int num_channels) {
num_channels_ = num_channels;
}
int AudioBuffer::samples_per_split_channel() const {
return samples_per_split_channel_;
size_t AudioBuffer::num_frames() const {
return proc_num_frames_;
}
// TODO(andrew): Do deinterleaving and mixing in one step?
size_t AudioBuffer::num_frames_per_band() const {
return num_split_frames_;
}
size_t AudioBuffer::num_keyboard_frames() const {
// We don't resample the keyboard channel.
return input_num_frames_;
}
size_t AudioBuffer::num_bands() const {
return num_bands_;
}
// The resampler is only for supporting 48kHz to 16kHz in the reverse stream.
void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
assert(frame->_audioChannel <= max_num_channels_);
assert(frame->_payloadDataLengthInSamples == samples_per_channel_);
assert(frame->num_channels_ == num_input_channels_);
assert(frame->samples_per_channel_ == input_num_frames_);
InitForNewData();
// Initialized lazily because there's a different condition in CopyFrom.
if ((input_num_frames_ != proc_num_frames_) && !input_buffer_) {
input_buffer_.reset(
new IFChannelBuffer(input_num_frames_, num_proc_channels_));
}
activity_ = frame->vad_activity_;
num_channels_ = frame->_audioChannel;
num_mixed_channels_ = 0;
num_mixed_low_pass_channels_ = 0;
reference_copied_ = false;
activity_ = frame->_vadActivity;
int16_t* const* deinterleaved;
if (input_num_frames_ == proc_num_frames_) {
deinterleaved = data_->ibuf()->channels();
} else {
deinterleaved = input_buffer_->ibuf()->channels();
}
if (num_proc_channels_ == 1) {
// Downmix and deinterleave simultaneously.
DownmixInterleavedToMono(frame->data_, input_num_frames_,
num_input_channels_, deinterleaved[0]);
} else {
assert(num_proc_channels_ == num_input_channels_);
Deinterleave(frame->data_,
input_num_frames_,
num_proc_channels_,
deinterleaved);
}
if (num_channels_ == 1) {
// We can get away with a pointer assignment in this case.
data_ = frame->_payloadData;
// Resample.
if (input_num_frames_ != proc_num_frames_) {
for (int i = 0; i < num_proc_channels_; ++i) {
input_resamplers_[i]->Resample(input_buffer_->fbuf_const()->channels()[i],
input_num_frames_,
data_->fbuf()->channels()[i],
proc_num_frames_);
}
}
}
void AudioBuffer::InterleaveTo(AudioFrame* frame, bool data_changed) {
frame->vad_activity_ = activity_;
if (!data_changed) {
return;
}
WebRtc_Word16* interleaved = frame->_payloadData;
for (int i = 0; i < num_channels_; i++) {
WebRtc_Word16* deinterleaved = channels_[i].data;
int interleaved_idx = i;
for (int j = 0; j < samples_per_channel_; j++) {
deinterleaved[j] = interleaved[interleaved_idx];
interleaved_idx += num_channels_;
assert(frame->num_channels_ == num_channels_ || num_channels_ == 1);
assert(frame->samples_per_channel_ == output_num_frames_);
// Resample if necessary.
IFChannelBuffer* data_ptr = data_.get();
if (proc_num_frames_ != output_num_frames_) {
if (!output_buffer_) {
output_buffer_.reset(
new IFChannelBuffer(output_num_frames_, num_channels_));
}
}
}
void AudioBuffer::InterleaveTo(AudioFrame* frame) const {
assert(frame->_audioChannel == num_channels_);
assert(frame->_payloadDataLengthInSamples == samples_per_channel_);
frame->_vadActivity = activity_;
if (num_channels_ == 1) {
if (num_mixed_channels_ == 1) {
memcpy(frame->_payloadData,
channels_[0].data,
sizeof(WebRtc_Word16) * samples_per_channel_);
} else {
// These should point to the same buffer in this case.
assert(data_ == frame->_payloadData);
for (int i = 0; i < num_channels_; ++i) {
output_resamplers_[i]->Resample(
data_->fbuf()->channels()[i], proc_num_frames_,
output_buffer_->fbuf()->channels()[i], output_num_frames_);
}
return;
data_ptr = output_buffer_.get();
}
WebRtc_Word16* interleaved = frame->_payloadData;
for (int i = 0; i < num_channels_; i++) {
WebRtc_Word16* deinterleaved = channels_[i].data;
int interleaved_idx = i;
for (int j = 0; j < samples_per_channel_; j++) {
interleaved[interleaved_idx] = deinterleaved[j];
interleaved_idx += num_channels_;
}
if (frame->num_channels_ == num_channels_) {
Interleave(data_ptr->ibuf()->channels(), proc_num_frames_, num_channels_,
frame->data_);
} else {
UpmixMonoToInterleaved(data_ptr->ibuf()->channels()[0], proc_num_frames_,
frame->num_channels_, frame->data_);
}
}
// TODO(andrew): would be good to support the no-mix case with pointer
// assignment.
// TODO(andrew): handle mixing to multiple channels?
void AudioBuffer::Mix(int num_mixed_channels) {
// We currently only support the stereo to mono case.
assert(num_channels_ == 2);
assert(num_mixed_channels == 1);
StereoToMono(channels_[0].data,
channels_[1].data,
channels_[0].data,
samples_per_channel_);
num_channels_ = num_mixed_channels;
num_mixed_channels_ = num_mixed_channels;
}
void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) {
// We currently only support the stereo to mono case.
assert(num_channels_ == 2);
assert(num_mixed_channels == 1);
StereoToMono(low_pass_split_data(0),
low_pass_split_data(1),
mixed_low_pass_channels_[0].data,
samples_per_split_channel_);
num_mixed_low_pass_channels_ = num_mixed_channels;
}
void AudioBuffer::CopyLowPassToReference() {
reference_copied_ = true;
for (int i = 0; i < num_channels_; i++) {
memcpy(low_pass_reference_channels_[i].data,
low_pass_split_data(i),
sizeof(WebRtc_Word16) * samples_per_split_channel_);
if (!low_pass_reference_channels_.get() ||
low_pass_reference_channels_->num_channels() != num_channels_) {
low_pass_reference_channels_.reset(
new ChannelBuffer<int16_t>(num_split_frames_,
num_proc_channels_));
}
for (int i = 0; i < num_proc_channels_; i++) {
memcpy(low_pass_reference_channels_->channels()[i],
split_bands_const(i)[kBand0To8kHz],
low_pass_reference_channels_->num_frames_per_band() *
sizeof(split_bands_const(i)[kBand0To8kHz][0]));
}
}
void AudioBuffer::SplitIntoFrequencyBands() {
splitting_filter_->Analysis(data_.get(), split_data_.get());
}
void AudioBuffer::MergeFrequencyBands() {
splitting_filter_->Synthesis(split_data_.get(), data_.get());
}
} // namespace webrtc

View File

@ -8,64 +8,156 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_
#include "module_common_types.h"
#include "typedefs.h"
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/audio_processing/splitting_filter.h"
#include "webrtc/modules/interface/module_common_types.h"
#include "webrtc/system_wrappers/interface/scoped_vector.h"
#include "webrtc/typedefs.h"
namespace webrtc {
struct AudioChannel;
struct SplitAudioChannel;
class PushSincResampler;
class IFChannelBuffer;
enum Band {
kBand0To8kHz = 0,
kBand8To16kHz = 1,
kBand16To24kHz = 2
};
class AudioBuffer {
public:
AudioBuffer(int max_num_channels, int samples_per_channel);
// TODO(ajm): Switch to take ChannelLayouts.
AudioBuffer(size_t input_num_frames,
int num_input_channels,
size_t process_num_frames,
int num_process_channels,
size_t output_num_frames);
virtual ~AudioBuffer();
int num_channels() const;
int samples_per_channel() const;
int samples_per_split_channel() const;
void set_num_channels(int num_channels);
size_t num_frames() const;
size_t num_frames_per_band() const;
size_t num_keyboard_frames() const;
size_t num_bands() const;
WebRtc_Word16* data(int channel) const;
WebRtc_Word16* low_pass_split_data(int channel) const;
WebRtc_Word16* high_pass_split_data(int channel) const;
WebRtc_Word16* mixed_low_pass_data(int channel) const;
WebRtc_Word16* low_pass_reference(int channel) const;
// Returns a pointer array to the full-band channels.
// Usage:
// channels()[channel][sample].
// Where:
// 0 <= channel < |num_proc_channels_|
// 0 <= sample < |proc_num_frames_|
int16_t* const* channels();
const int16_t* const* channels_const() const;
float* const* channels_f();
const float* const* channels_const_f() const;
WebRtc_Word32* analysis_filter_state1(int channel) const;
WebRtc_Word32* analysis_filter_state2(int channel) const;
WebRtc_Word32* synthesis_filter_state1(int channel) const;
WebRtc_Word32* synthesis_filter_state2(int channel) const;
// Returns a pointer array to the bands for a specific channel.
// Usage:
// split_bands(channel)[band][sample].
// Where:
// 0 <= channel < |num_proc_channels_|
// 0 <= band < |num_bands_|
// 0 <= sample < |num_split_frames_|
int16_t* const* split_bands(int channel);
const int16_t* const* split_bands_const(int channel) const;
float* const* split_bands_f(int channel);
const float* const* split_bands_const_f(int channel) const;
// Returns a pointer array to the channels for a specific band.
// Usage:
// split_channels(band)[channel][sample].
// Where:
// 0 <= band < |num_bands_|
// 0 <= channel < |num_proc_channels_|
// 0 <= sample < |num_split_frames_|
int16_t* const* split_channels(Band band);
const int16_t* const* split_channels_const(Band band) const;
float* const* split_channels_f(Band band);
const float* const* split_channels_const_f(Band band) const;
// Returns a pointer to the ChannelBuffer that encapsulates the full-band
// data.
ChannelBuffer<int16_t>* data();
const ChannelBuffer<int16_t>* data() const;
ChannelBuffer<float>* data_f();
const ChannelBuffer<float>* data_f() const;
// Returns a pointer to the ChannelBuffer that encapsulates the split data.
ChannelBuffer<int16_t>* split_data();
const ChannelBuffer<int16_t>* split_data() const;
ChannelBuffer<float>* split_data_f();
const ChannelBuffer<float>* split_data_f() const;
// Returns a pointer to the low-pass data downmixed to mono. If this data
// isn't already available it re-calculates it.
const int16_t* mixed_low_pass_data();
const int16_t* low_pass_reference(int channel) const;
const float* keyboard_data() const;
void set_activity(AudioFrame::VADActivity activity);
AudioFrame::VADActivity activity();
AudioFrame::VADActivity activity() const;
// Use for int16 interleaved data.
void DeinterleaveFrom(AudioFrame* audioFrame);
void InterleaveTo(AudioFrame* audioFrame) const;
void Mix(int num_mixed_channels);
void CopyAndMixLowPass(int num_mixed_channels);
// If |data_changed| is false, only the non-audio data members will be copied
// to |frame|.
void InterleaveTo(AudioFrame* frame, bool data_changed);
// Use for float deinterleaved data.
void CopyFrom(const float* const* data, const StreamConfig& stream_config);
void CopyTo(const StreamConfig& stream_config, float* const* data);
void CopyLowPassToReference();
// Splits the signal into different bands.
void SplitIntoFrequencyBands();
// Recombine the different bands into one signal.
void MergeFrequencyBands();
private:
const int max_num_channels_;
// Called from DeinterleaveFrom() and CopyFrom().
void InitForNewData();
// The audio is passed into DeinterleaveFrom() or CopyFrom() with input
// format (samples per channel and number of channels).
const size_t input_num_frames_;
const int num_input_channels_;
// The audio is stored by DeinterleaveFrom() or CopyFrom() with processing
// format.
const size_t proc_num_frames_;
const int num_proc_channels_;
// The audio is returned by InterleaveTo() and CopyTo() with output samples
// per channels and the current number of channels. This last one can be
// changed at any time using set_num_channels().
const size_t output_num_frames_;
int num_channels_;
int num_mixed_channels_;
int num_mixed_low_pass_channels_;
const int samples_per_channel_;
int samples_per_split_channel_;
size_t num_bands_;
size_t num_split_frames_;
bool mixed_low_pass_valid_;
bool reference_copied_;
AudioFrame::VADActivity activity_;
WebRtc_Word16* data_;
// TODO(andrew): use vectors here.
AudioChannel* channels_;
SplitAudioChannel* split_channels_;
// TODO(andrew): improve this, we don't need the full 32 kHz space here.
AudioChannel* mixed_low_pass_channels_;
AudioChannel* low_pass_reference_channels_;
const float* keyboard_data_;
rtc::scoped_ptr<IFChannelBuffer> data_;
rtc::scoped_ptr<IFChannelBuffer> split_data_;
rtc::scoped_ptr<SplittingFilter> splitting_filter_;
rtc::scoped_ptr<ChannelBuffer<int16_t> > mixed_low_pass_channels_;
rtc::scoped_ptr<ChannelBuffer<int16_t> > low_pass_reference_channels_;
rtc::scoped_ptr<IFChannelBuffer> input_buffer_;
rtc::scoped_ptr<IFChannelBuffer> output_buffer_;
rtc::scoped_ptr<ChannelBuffer<float> > process_buffer_;
ScopedVector<PushSincResampler> input_resamplers_;
ScopedVector<PushSincResampler> output_resamplers_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_BUFFER_H_

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,81 +8,140 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_
#include <list>
#include <string>
#include <vector>
#include "audio_processing.h"
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/base/thread_annotations.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
namespace webrtc {
namespace audioproc {
class Event;
} // audioproc
class AgcManagerDirect;
class AudioBuffer;
class AudioConverter;
template<typename T>
class Beamformer;
class CriticalSectionWrapper;
class EchoCancellationImpl;
class EchoControlMobileImpl;
class FileWrapper;
class GainControlImpl;
class GainControlForNewAgc;
class HighPassFilterImpl;
class LevelEstimatorImpl;
class NoiseSuppressionImpl;
class ProcessingComponent;
class TransientSuppressor;
class VoiceDetectionImpl;
class IntelligibilityEnhancer;
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
namespace audioproc {
class Event;
} // namespace audioproc
#endif
class AudioProcessingImpl : public AudioProcessing {
public:
enum {
kSampleRate8kHz = 8000,
kSampleRate16kHz = 16000,
kSampleRate32kHz = 32000
};
explicit AudioProcessingImpl(const Config& config);
explicit AudioProcessingImpl(int id);
// AudioProcessingImpl takes ownership of beamformer.
AudioProcessingImpl(const Config& config, Beamformer<float>* beamformer);
virtual ~AudioProcessingImpl();
CriticalSectionWrapper* crit() const;
int split_sample_rate_hz() const;
bool was_stream_delay_set() const;
// AudioProcessing methods.
virtual int Initialize();
virtual int InitializeLocked();
virtual int set_sample_rate_hz(int rate);
virtual int sample_rate_hz() const;
virtual int set_num_channels(int input_channels, int output_channels);
virtual int num_input_channels() const;
virtual int num_output_channels() const;
virtual int set_num_reverse_channels(int channels);
virtual int num_reverse_channels() const;
virtual int ProcessStream(AudioFrame* frame);
virtual int AnalyzeReverseStream(AudioFrame* frame);
virtual int set_stream_delay_ms(int delay);
virtual int stream_delay_ms() const;
virtual int StartDebugRecording(const char filename[kMaxFilenameSize]);
virtual int StopDebugRecording();
virtual EchoCancellation* echo_cancellation() const;
virtual EchoControlMobile* echo_control_mobile() const;
virtual GainControl* gain_control() const;
virtual HighPassFilter* high_pass_filter() const;
virtual LevelEstimator* level_estimator() const;
virtual NoiseSuppression* noise_suppression() const;
virtual VoiceDetection* voice_detection() const;
int Initialize() override;
int Initialize(int input_sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
ChannelLayout input_layout,
ChannelLayout output_layout,
ChannelLayout reverse_layout) override;
int Initialize(const ProcessingConfig& processing_config) override;
void SetExtraOptions(const Config& config) override;
int proc_sample_rate_hz() const override;
int proc_split_sample_rate_hz() const override;
int num_input_channels() const override;
int num_output_channels() const override;
int num_reverse_channels() const override;
void set_output_will_be_muted(bool muted) override;
int ProcessStream(AudioFrame* frame) override;
int ProcessStream(const float* const* src,
size_t samples_per_channel,
int input_sample_rate_hz,
ChannelLayout input_layout,
int output_sample_rate_hz,
ChannelLayout output_layout,
float* const* dest) override;
int ProcessStream(const float* const* src,
const StreamConfig& input_config,
const StreamConfig& output_config,
float* const* dest) override;
int AnalyzeReverseStream(AudioFrame* frame) override;
int ProcessReverseStream(AudioFrame* frame) override;
int AnalyzeReverseStream(const float* const* data,
size_t samples_per_channel,
int sample_rate_hz,
ChannelLayout layout) override;
int ProcessReverseStream(const float* const* src,
const StreamConfig& reverse_input_config,
const StreamConfig& reverse_output_config,
float* const* dest) override;
int set_stream_delay_ms(int delay) override;
int stream_delay_ms() const override;
bool was_stream_delay_set() const override;
void set_delay_offset_ms(int offset) override;
int delay_offset_ms() const override;
void set_stream_key_pressed(bool key_pressed) override;
int StartDebugRecording(const char filename[kMaxFilenameSize]) override;
int StartDebugRecording(FILE* handle) override;
int StartDebugRecordingForPlatformFile(rtc::PlatformFile handle) override;
int StopDebugRecording() override;
void UpdateHistogramsOnCallEnd() override;
EchoCancellation* echo_cancellation() const override;
EchoControlMobile* echo_control_mobile() const override;
GainControl* gain_control() const override;
HighPassFilter* high_pass_filter() const override;
LevelEstimator* level_estimator() const override;
NoiseSuppression* noise_suppression() const override;
VoiceDetection* voice_detection() const override;
// Module methods.
virtual WebRtc_Word32 Version(WebRtc_Word8* version,
WebRtc_UWord32& remainingBufferInBytes,
WebRtc_UWord32& position) const;
virtual WebRtc_Word32 ChangeUniqueId(const WebRtc_Word32 id);
protected:
// Overridden in a mock.
virtual int InitializeLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
private:
int WriteMessageToDebugFile();
int WriteInitMessage();
int InitializeLocked(const ProcessingConfig& config)
EXCLUSIVE_LOCKS_REQUIRED(crit_);
int MaybeInitializeLocked(const ProcessingConfig& config)
EXCLUSIVE_LOCKS_REQUIRED(crit_);
// TODO(ekm): Remove once all clients updated to new interface.
int AnalyzeReverseStream(const float* const* src,
const StreamConfig& input_config,
const StreamConfig& output_config);
int ProcessStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
int ProcessReverseStreamLocked() EXCLUSIVE_LOCKS_REQUIRED(crit_);
int id_;
bool is_data_processed() const;
bool output_copy_needed(bool is_data_processed) const;
bool synthesis_needed(bool is_data_processed) const;
bool analysis_needed(bool is_data_processed) const;
bool is_rev_processed() const;
bool rev_conversion_needed() const;
void InitializeExperimentalAgc() EXCLUSIVE_LOCKS_REQUIRED(crit_);
void InitializeTransient() EXCLUSIVE_LOCKS_REQUIRED(crit_);
void InitializeBeamformer() EXCLUSIVE_LOCKS_REQUIRED(crit_);
void InitializeIntelligibility() EXCLUSIVE_LOCKS_REQUIRED(crit_);
void MaybeUpdateHistograms() EXCLUSIVE_LOCKS_REQUIRED(crit_);
EchoCancellationImpl* echo_cancellation_;
EchoControlMobileImpl* echo_control_mobile_;
@ -91,27 +150,69 @@ class AudioProcessingImpl : public AudioProcessing {
LevelEstimatorImpl* level_estimator_;
NoiseSuppressionImpl* noise_suppression_;
VoiceDetectionImpl* voice_detection_;
rtc::scoped_ptr<GainControlForNewAgc> gain_control_for_new_agc_;
std::list<ProcessingComponent*> component_list_;
FileWrapper* debug_file_;
audioproc::Event* event_msg_; // Protobuf message.
std::string event_str_; // Memory for protobuf serialization.
CriticalSectionWrapper* crit_;
rtc::scoped_ptr<AudioBuffer> render_audio_;
rtc::scoped_ptr<AudioBuffer> capture_audio_;
rtc::scoped_ptr<AudioConverter> render_converter_;
#ifdef WEBRTC_AUDIOPROC_DEBUG_DUMP
// TODO(andrew): make this more graceful. Ideally we would split this stuff
// out into a separate class with an "enabled" and "disabled" implementation.
int WriteMessageToDebugFile();
int WriteInitMessage();
AudioBuffer* render_audio_;
AudioBuffer* capture_audio_;
// Writes Config message. If not |forced|, only writes the current config if
// it is different from the last saved one; if |forced|, writes the config
// regardless of the last saved.
int WriteConfigMessage(bool forced);
rtc::scoped_ptr<FileWrapper> debug_file_;
rtc::scoped_ptr<audioproc::Event> event_msg_; // Protobuf message.
std::string event_str_; // Memory for protobuf serialization.
// Serialized string of last saved APM configuration.
std::string last_serialized_config_;
#endif
// Format of processing streams at input/output call sites.
ProcessingConfig api_format_;
// Only the rate and samples fields of fwd_proc_format_ are used because the
// forward processing number of channels is mutable and is tracked by the
// capture_audio_.
StreamConfig fwd_proc_format_;
StreamConfig rev_proc_format_;
int split_rate_;
int sample_rate_hz_;
int split_sample_rate_hz_;
int samples_per_channel_;
int stream_delay_ms_;
int delay_offset_ms_;
bool was_stream_delay_set_;
int last_stream_delay_ms_;
int last_aec_system_delay_ms_;
int stream_delay_jumps_;
int aec_system_delay_jumps_;
int num_reverse_channels_;
int num_input_channels_;
int num_output_channels_;
bool output_will_be_muted_ GUARDED_BY(crit_);
bool key_pressed_;
// Only set through the constructor's Config parameter.
const bool use_new_agc_;
rtc::scoped_ptr<AgcManagerDirect> agc_manager_ GUARDED_BY(crit_);
int agc_startup_min_volume_;
bool transient_suppressor_enabled_;
rtc::scoped_ptr<TransientSuppressor> transient_suppressor_;
const bool beamformer_enabled_;
rtc::scoped_ptr<Beamformer<float>> beamformer_;
const std::vector<Point> array_geometry_;
bool intelligibility_enabled_;
rtc::scoped_ptr<IntelligibilityEnhancer> intelligibility_enhancer_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_PROCESSING_IMPL_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AUDIO_PROCESSING_IMPL_H_

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_
#include <cmath>
namespace webrtc {
// Coordinates in meters.
template<typename T>
struct CartesianPoint {
CartesianPoint(T x, T y, T z) {
c[0] = x;
c[1] = y;
c[2] = z;
}
T x() const { return c[0]; }
T y() const { return c[1]; }
T z() const { return c[2]; }
T c[3];
};
using Point = CartesianPoint<float>;
template<typename T>
float Distance(CartesianPoint<T> a, CartesianPoint<T> b) {
return std::sqrt((a.x() - b.x()) * (a.x() - b.x()) +
(a.y() - b.y()) * (a.y() - b.y()) +
(a.z() - b.z()) * (a.z() - b.z()));
}
template <typename T>
struct SphericalPoint {
SphericalPoint(T azimuth, T elevation, T radius) {
s[0] = azimuth;
s[1] = elevation;
s[2] = radius;
}
T azimuth() const { return s[0]; }
T elevation() const { return s[1]; }
T distance() const { return s[2]; }
T s[3];
};
using SphericalPointf = SphericalPoint<float>;
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_ARRAY_UTIL_H_

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_
#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/modules/audio_processing/beamformer/array_util.h"
namespace webrtc {
template<typename T>
class Beamformer {
public:
virtual ~Beamformer() {}
// Process one time-domain chunk of audio. The audio is expected to be split
// into frequency bands inside the ChannelBuffer. The number of frames and
// channels must correspond to the constructor parameters. The same
// ChannelBuffer can be passed in as |input| and |output|.
virtual void ProcessChunk(const ChannelBuffer<T>& input,
ChannelBuffer<T>* output) = 0;
// Sample rate corresponds to the lower band.
// Needs to be called before the the Beamformer can be used.
virtual void Initialize(int chunk_size_ms, int sample_rate_hz) = 0;
// Indicates whether a given point is inside of the beam.
virtual bool IsInBeam(const SphericalPointf& spherical_point) { return true; }
// Returns true if the current data contains the target signal.
// Which signals are considered "targets" is implementation dependent.
virtual bool is_target_present() = 0;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BEAMFORMER_H_

View File

@ -0,0 +1,97 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_
#include <complex>
#include "webrtc/base/checks.h"
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/modules/audio_processing/beamformer/matrix.h"
namespace webrtc {
using std::complex;
// An extension of Matrix for operations that only work on a complex type.
template <typename T>
class ComplexMatrix : public Matrix<complex<T> > {
public:
ComplexMatrix() : Matrix<complex<T> >() {}
ComplexMatrix(int num_rows, int num_columns)
: Matrix<complex<T> >(num_rows, num_columns) {}
ComplexMatrix(const complex<T>* data, int num_rows, int num_columns)
: Matrix<complex<T> >(data, num_rows, num_columns) {}
// Complex Matrix operations.
ComplexMatrix& PointwiseConjugate() {
complex<T>* const data = this->data();
size_t size = this->num_rows() * this->num_columns();
for (size_t i = 0; i < size; ++i) {
data[i] = conj(data[i]);
}
return *this;
}
ComplexMatrix& PointwiseConjugate(const ComplexMatrix& operand) {
this->CopyFrom(operand);
return PointwiseConjugate();
}
ComplexMatrix& ConjugateTranspose() {
this->CopyDataToScratch();
int num_rows = this->num_rows();
this->SetNumRows(this->num_columns());
this->SetNumColumns(num_rows);
this->Resize();
return ConjugateTranspose(this->scratch_elements());
}
ComplexMatrix& ConjugateTranspose(const ComplexMatrix& operand) {
RTC_CHECK_EQ(operand.num_rows(), this->num_columns());
RTC_CHECK_EQ(operand.num_columns(), this->num_rows());
return ConjugateTranspose(operand.elements());
}
ComplexMatrix& ZeroImag() {
complex<T>* const data = this->data();
size_t size = this->num_rows() * this->num_columns();
for (size_t i = 0; i < size; ++i) {
data[i] = complex<T>(data[i].real(), 0);
}
return *this;
}
ComplexMatrix& ZeroImag(const ComplexMatrix& operand) {
this->CopyFrom(operand);
return ZeroImag();
}
private:
ComplexMatrix& ConjugateTranspose(const complex<T>* const* src) {
complex<T>* const* elements = this->elements();
for (int i = 0; i < this->num_rows(); ++i) {
for (int j = 0; j < this->num_columns(); ++j) {
elements[i][j] = conj(src[j][i]);
}
}
return *this;
}
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COMPLEX_MATRIX_H_

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#define _USE_MATH_DEFINES
#include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h"
#include <cmath>
namespace {
float BesselJ0(float x) {
#if WEBRTC_WIN
return _j0(x);
#else
return j0(x);
#endif
}
} // namespace
namespace webrtc {
void CovarianceMatrixGenerator::UniformCovarianceMatrix(
float wave_number,
const std::vector<Point>& geometry,
ComplexMatrix<float>* mat) {
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_rows());
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns());
complex<float>* const* mat_els = mat->elements();
for (size_t i = 0; i < geometry.size(); ++i) {
for (size_t j = 0; j < geometry.size(); ++j) {
if (wave_number > 0.f) {
mat_els[i][j] =
BesselJ0(wave_number * Distance(geometry[i], geometry[j]));
} else {
mat_els[i][j] = i == j ? 1.f : 0.f;
}
}
}
}
void CovarianceMatrixGenerator::AngledCovarianceMatrix(
float sound_speed,
float angle,
size_t frequency_bin,
size_t fft_size,
size_t num_freq_bins,
int sample_rate,
const std::vector<Point>& geometry,
ComplexMatrix<float>* mat) {
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_rows());
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns());
ComplexMatrix<float> interf_cov_vector(1, geometry.size());
ComplexMatrix<float> interf_cov_vector_transposed(geometry.size(), 1);
PhaseAlignmentMasks(frequency_bin,
fft_size,
sample_rate,
sound_speed,
geometry,
angle,
&interf_cov_vector);
interf_cov_vector_transposed.Transpose(interf_cov_vector);
interf_cov_vector.PointwiseConjugate();
mat->Multiply(interf_cov_vector_transposed, interf_cov_vector);
}
void CovarianceMatrixGenerator::PhaseAlignmentMasks(
size_t frequency_bin,
size_t fft_size,
int sample_rate,
float sound_speed,
const std::vector<Point>& geometry,
float angle,
ComplexMatrix<float>* mat) {
RTC_CHECK_EQ(1, mat->num_rows());
RTC_CHECK_EQ(static_cast<int>(geometry.size()), mat->num_columns());
float freq_in_hertz =
(static_cast<float>(frequency_bin) / fft_size) * sample_rate;
complex<float>* const* mat_els = mat->elements();
for (size_t c_ix = 0; c_ix < geometry.size(); ++c_ix) {
float distance = std::cos(angle) * geometry[c_ix].x() +
std::sin(angle) * geometry[c_ix].y();
float phase_shift = -2.f * M_PI * distance * freq_in_hertz / sound_speed;
// Euler's formula for mat[0][c_ix] = e^(j * phase_shift).
mat_els[0][c_ix] = complex<float>(cos(phase_shift), sin(phase_shift));
}
}
} // namespace webrtc

View File

@ -0,0 +1,54 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COVARIANCE_MATRIX_GENERATOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_COVARIANCE_MATRIX_GENERATOR_H_
#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
#include "webrtc/modules/audio_processing/beamformer/array_util.h"
namespace webrtc {
// Helper class for Beamformer in charge of generating covariance matrices. For
// each function, the passed-in ComplexMatrix is expected to be of size
// |num_input_channels| x |num_input_channels|.
class CovarianceMatrixGenerator {
public:
// A uniform covariance matrix with a gap at the target location. WARNING:
// The target angle is assumed to be 0.
static void UniformCovarianceMatrix(float wave_number,
const std::vector<Point>& geometry,
ComplexMatrix<float>* mat);
// The covariance matrix of a source at the given angle.
static void AngledCovarianceMatrix(float sound_speed,
float angle,
size_t frequency_bin,
size_t fft_size,
size_t num_freq_bins,
int sample_rate,
const std::vector<Point>& geometry,
ComplexMatrix<float>* mat);
// Calculates phase shifts that, when applied to a multichannel signal and
// added together, cause constructive interferernce for sources located at
// the given angle.
static void PhaseAlignmentMasks(size_t frequency_bin,
size_t fft_size,
int sample_rate,
float sound_speed,
const std::vector<Point>& geometry,
float angle,
ComplexMatrix<float>* mat);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_BF_HELPERS_H_

View File

@ -0,0 +1,368 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_
#include <algorithm>
#include <cstring>
#include <string>
#include <vector>
#include "webrtc/base/checks.h"
#include "webrtc/base/constructormagic.h"
#include "webrtc/base/scoped_ptr.h"
namespace {
// Wrappers to get around the compiler warning resulting from the fact that
// there's no std::sqrt overload for ints. We cast all non-complex types to
// a double for the sqrt method.
template <typename T>
T sqrt_wrapper(T x) {
return sqrt(static_cast<double>(x));
}
template <typename S>
std::complex<S> sqrt_wrapper(std::complex<S> x) {
return sqrt(x);
}
} // namespace
namespace webrtc {
// Matrix is a class for doing standard matrix operations on 2 dimensional
// matrices of any size. Results of matrix operations are stored in the
// calling object. Function overloads exist for both in-place (the calling
// object is used as both an operand and the result) and out-of-place (all
// operands are passed in as parameters) operations. If operand dimensions
// mismatch, the program crashes. Out-of-place operations change the size of
// the calling object, if necessary, before operating.
//
// 'In-place' operations that inherently change the size of the matrix (eg.
// Transpose, Multiply on different-sized matrices) must make temporary copies
// (|scratch_elements_| and |scratch_data_|) of existing data to complete the
// operations.
//
// The data is stored contiguously. Data can be accessed internally as a flat
// array, |data_|, or as an array of row pointers, |elements_|, but is
// available to users only as an array of row pointers through |elements()|.
// Memory for storage is allocated when a matrix is resized only if the new
// size overflows capacity. Memory needed temporarily for any operations is
// similarly resized only if the new size overflows capacity.
//
// If you pass in storage through the ctor, that storage is copied into the
// matrix. TODO(claguna): albeit tricky, allow for data to be referenced
// instead of copied, and owned by the user.
template <typename T>
class Matrix {
public:
Matrix() : num_rows_(0), num_columns_(0) {}
// Allocates space for the elements and initializes all values to zero.
Matrix(int num_rows, int num_columns)
: num_rows_(num_rows), num_columns_(num_columns) {
Resize();
scratch_data_.resize(num_rows_ * num_columns_);
scratch_elements_.resize(num_rows_);
}
// Copies |data| into the new Matrix.
Matrix(const T* data, int num_rows, int num_columns)
: num_rows_(0), num_columns_(0) {
CopyFrom(data, num_rows, num_columns);
scratch_data_.resize(num_rows_ * num_columns_);
scratch_elements_.resize(num_rows_);
}
virtual ~Matrix() {}
// Deep copy an existing matrix.
void CopyFrom(const Matrix& other) {
CopyFrom(&other.data_[0], other.num_rows_, other.num_columns_);
}
// Copy |data| into the Matrix. The current data is lost.
void CopyFrom(const T* const data, int num_rows, int num_columns) {
Resize(num_rows, num_columns);
memcpy(&data_[0], data, num_rows_ * num_columns_ * sizeof(data_[0]));
}
Matrix& CopyFromColumn(const T* const* src,
size_t column_index,
int num_rows) {
Resize(1, num_rows);
for (int i = 0; i < num_columns_; ++i) {
data_[i] = src[i][column_index];
}
return *this;
}
void Resize(int num_rows, int num_columns) {
if (num_rows != num_rows_ || num_columns != num_columns_) {
num_rows_ = num_rows;
num_columns_ = num_columns;
Resize();
}
}
// Accessors and mutators.
int num_rows() const { return num_rows_; }
int num_columns() const { return num_columns_; }
T* const* elements() { return &elements_[0]; }
const T* const* elements() const { return &elements_[0]; }
T Trace() {
RTC_CHECK_EQ(num_rows_, num_columns_);
T trace = 0;
for (int i = 0; i < num_rows_; ++i) {
trace += elements_[i][i];
}
return trace;
}
// Matrix Operations. Returns *this to support method chaining.
Matrix& Transpose() {
CopyDataToScratch();
Resize(num_columns_, num_rows_);
return Transpose(scratch_elements());
}
Matrix& Transpose(const Matrix& operand) {
RTC_CHECK_EQ(operand.num_rows_, num_columns_);
RTC_CHECK_EQ(operand.num_columns_, num_rows_);
return Transpose(operand.elements());
}
template <typename S>
Matrix& Scale(const S& scalar) {
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] *= scalar;
}
return *this;
}
template <typename S>
Matrix& Scale(const Matrix& operand, const S& scalar) {
CopyFrom(operand);
return Scale(scalar);
}
Matrix& Add(const Matrix& operand) {
RTC_CHECK_EQ(num_rows_, operand.num_rows_);
RTC_CHECK_EQ(num_columns_, operand.num_columns_);
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] += operand.data_[i];
}
return *this;
}
Matrix& Add(const Matrix& lhs, const Matrix& rhs) {
CopyFrom(lhs);
return Add(rhs);
}
Matrix& Subtract(const Matrix& operand) {
RTC_CHECK_EQ(num_rows_, operand.num_rows_);
RTC_CHECK_EQ(num_columns_, operand.num_columns_);
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] -= operand.data_[i];
}
return *this;
}
Matrix& Subtract(const Matrix& lhs, const Matrix& rhs) {
CopyFrom(lhs);
return Subtract(rhs);
}
Matrix& PointwiseMultiply(const Matrix& operand) {
RTC_CHECK_EQ(num_rows_, operand.num_rows_);
RTC_CHECK_EQ(num_columns_, operand.num_columns_);
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] *= operand.data_[i];
}
return *this;
}
Matrix& PointwiseMultiply(const Matrix& lhs, const Matrix& rhs) {
CopyFrom(lhs);
return PointwiseMultiply(rhs);
}
Matrix& PointwiseDivide(const Matrix& operand) {
RTC_CHECK_EQ(num_rows_, operand.num_rows_);
RTC_CHECK_EQ(num_columns_, operand.num_columns_);
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] /= operand.data_[i];
}
return *this;
}
Matrix& PointwiseDivide(const Matrix& lhs, const Matrix& rhs) {
CopyFrom(lhs);
return PointwiseDivide(rhs);
}
Matrix& PointwiseSquareRoot() {
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] = sqrt_wrapper(data_[i]);
}
return *this;
}
Matrix& PointwiseSquareRoot(const Matrix& operand) {
CopyFrom(operand);
return PointwiseSquareRoot();
}
Matrix& PointwiseAbsoluteValue() {
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] = abs(data_[i]);
}
return *this;
}
Matrix& PointwiseAbsoluteValue(const Matrix& operand) {
CopyFrom(operand);
return PointwiseAbsoluteValue();
}
Matrix& PointwiseSquare() {
for (size_t i = 0; i < data_.size(); ++i) {
data_[i] *= data_[i];
}
return *this;
}
Matrix& PointwiseSquare(const Matrix& operand) {
CopyFrom(operand);
return PointwiseSquare();
}
Matrix& Multiply(const Matrix& lhs, const Matrix& rhs) {
RTC_CHECK_EQ(lhs.num_columns_, rhs.num_rows_);
RTC_CHECK_EQ(num_rows_, lhs.num_rows_);
RTC_CHECK_EQ(num_columns_, rhs.num_columns_);
return Multiply(lhs.elements(), rhs.num_rows_, rhs.elements());
}
Matrix& Multiply(const Matrix& rhs) {
RTC_CHECK_EQ(num_columns_, rhs.num_rows_);
CopyDataToScratch();
Resize(num_rows_, rhs.num_columns_);
return Multiply(scratch_elements(), rhs.num_rows_, rhs.elements());
}
std::string ToString() const {
std::ostringstream ss;
ss << std::endl << "Matrix" << std::endl;
for (int i = 0; i < num_rows_; ++i) {
for (int j = 0; j < num_columns_; ++j) {
ss << elements_[i][j] << " ";
}
ss << std::endl;
}
ss << std::endl;
return ss.str();
}
protected:
void SetNumRows(const int num_rows) { num_rows_ = num_rows; }
void SetNumColumns(const int num_columns) { num_columns_ = num_columns; }
T* data() { return &data_[0]; }
const T* data() const { return &data_[0]; }
const T* const* scratch_elements() const { return &scratch_elements_[0]; }
// Resize the matrix. If an increase in capacity is required, the current
// data is lost.
void Resize() {
size_t size = num_rows_ * num_columns_;
data_.resize(size);
elements_.resize(num_rows_);
for (int i = 0; i < num_rows_; ++i) {
elements_[i] = &data_[i * num_columns_];
}
}
// Copies data_ into scratch_data_ and updates scratch_elements_ accordingly.
void CopyDataToScratch() {
scratch_data_ = data_;
scratch_elements_.resize(num_rows_);
for (int i = 0; i < num_rows_; ++i) {
scratch_elements_[i] = &scratch_data_[i * num_columns_];
}
}
private:
int num_rows_;
int num_columns_;
std::vector<T> data_;
std::vector<T*> elements_;
// Stores temporary copies of |data_| and |elements_| for in-place operations
// where referring to original data is necessary.
std::vector<T> scratch_data_;
std::vector<T*> scratch_elements_;
// Helpers for Transpose and Multiply operations that unify in-place and
// out-of-place solutions.
Matrix& Transpose(const T* const* src) {
for (int i = 0; i < num_rows_; ++i) {
for (int j = 0; j < num_columns_; ++j) {
elements_[i][j] = src[j][i];
}
}
return *this;
}
Matrix& Multiply(const T* const* lhs, int num_rows_rhs, const T* const* rhs) {
for (int row = 0; row < num_rows_; ++row) {
for (int col = 0; col < num_columns_; ++col) {
T cur_element = 0;
for (int i = 0; i < num_rows_rhs; ++i) {
cur_element += lhs[row][i] * rhs[i][col];
}
elements_[row][col] = cur_element;
}
}
return *this;
}
RTC_DISALLOW_COPY_AND_ASSIGN(Matrix);
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_H_

View File

@ -0,0 +1,102 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_
#include "testing/gtest/include/gtest/gtest.h"
#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
#include "webrtc/modules/audio_processing/beamformer/matrix.h"
namespace {
const float kTolerance = 0.001f;
}
namespace webrtc {
using std::complex;
// Functions used in both matrix_unittest and complex_matrix_unittest.
class MatrixTestHelpers {
public:
template <typename T>
static void ValidateMatrixEquality(const Matrix<T>& expected,
const Matrix<T>& actual) {
EXPECT_EQ(expected.num_rows(), actual.num_rows());
EXPECT_EQ(expected.num_columns(), actual.num_columns());
const T* const* expected_elements = expected.elements();
const T* const* actual_elements = actual.elements();
for (int i = 0; i < expected.num_rows(); ++i) {
for (int j = 0; j < expected.num_columns(); ++j) {
EXPECT_EQ(expected_elements[i][j], actual_elements[i][j]);
}
}
}
static void ValidateMatrixEqualityFloat(const Matrix<float>& expected,
const Matrix<float>& actual) {
EXPECT_EQ(expected.num_rows(), actual.num_rows());
EXPECT_EQ(expected.num_columns(), actual.num_columns());
const float* const* expected_elements = expected.elements();
const float* const* actual_elements = actual.elements();
for (int i = 0; i < expected.num_rows(); ++i) {
for (int j = 0; j < expected.num_columns(); ++j) {
EXPECT_NEAR(expected_elements[i][j], actual_elements[i][j], kTolerance);
}
}
}
static void ValidateMatrixEqualityComplexFloat(
const Matrix<complex<float> >& expected,
const Matrix<complex<float> >& actual) {
EXPECT_EQ(expected.num_rows(), actual.num_rows());
EXPECT_EQ(expected.num_columns(), actual.num_columns());
const complex<float>* const* expected_elements = expected.elements();
const complex<float>* const* actual_elements = actual.elements();
for (int i = 0; i < expected.num_rows(); ++i) {
for (int j = 0; j < expected.num_columns(); ++j) {
EXPECT_NEAR(expected_elements[i][j].real(),
actual_elements[i][j].real(),
kTolerance);
EXPECT_NEAR(expected_elements[i][j].imag(),
actual_elements[i][j].imag(),
kTolerance);
}
}
}
static void ValidateMatrixNearEqualityComplexFloat(
const Matrix<complex<float> >& expected,
const Matrix<complex<float> >& actual,
float tolerance) {
EXPECT_EQ(expected.num_rows(), actual.num_rows());
EXPECT_EQ(expected.num_columns(), actual.num_columns());
const complex<float>* const* expected_elements = expected.elements();
const complex<float>* const* actual_elements = actual.elements();
for (int i = 0; i < expected.num_rows(); ++i) {
for (int j = 0; j < expected.num_columns(); ++j) {
EXPECT_NEAR(expected_elements[i][j].real(),
actual_elements[i][j].real(),
tolerance);
EXPECT_NEAR(expected_elements[i][j].imag(),
actual_elements[i][j].imag(),
tolerance);
}
}
}
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_MATRIX_TEST_HELPERS_H_

View File

@ -0,0 +1,516 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#define _USE_MATH_DEFINES
#include "webrtc/modules/audio_processing/beamformer/nonlinear_beamformer.h"
#include <algorithm>
#include <cmath>
#include <numeric>
#include <vector>
#include "webrtc/base/arraysize.h"
#include "webrtc/common_audio/window_generator.h"
#include "webrtc/modules/audio_processing/beamformer/covariance_matrix_generator.h"
namespace webrtc {
namespace {
// Alpha for the Kaiser Bessel Derived window.
const float kKbdAlpha = 1.5f;
// The minimum value a post-processing mask can take.
const float kMaskMinimum = 0.01f;
const float kSpeedOfSoundMeterSeconds = 343;
// For both target and interference angles, PI / 2 is perpendicular to the
// microphone array, facing forwards. The positive direction goes
// counterclockwise.
// The angle at which we amplify sound.
const float kTargetAngleRadians = static_cast<float>(M_PI) / 2.f;
// The angle at which we suppress sound. Suppression is symmetric around PI / 2
// radians, so sound is suppressed at both +|kInterfAngleRadians| and
// PI - |kInterfAngleRadians|. Since the beamformer is robust, this should
// suppress sound coming from close angles as well.
const float kInterfAngleRadians = static_cast<float>(M_PI) / 4.f;
// When calculating the interference covariance matrix, this is the weight for
// the weighted average between the uniform covariance matrix and the angled
// covariance matrix.
// Rpsi = Rpsi_angled * kBalance + Rpsi_uniform * (1 - kBalance)
const float kBalance = 0.4f;
const float kHalfBeamWidthRadians = static_cast<float>(M_PI) * 20.f / 180.f;
// TODO(claguna): need comment here.
const float kBeamwidthConstant = 0.00002f;
// Alpha coefficients for mask smoothing.
const float kMaskTimeSmoothAlpha = 0.2f;
const float kMaskFrequencySmoothAlpha = 0.6f;
// The average mask is computed from masks in this mid-frequency range. If these
// ranges are changed |kMaskQuantile| might need to be adjusted.
const int kLowMeanStartHz = 200;
const int kLowMeanEndHz = 400;
const int kHighMeanStartHz = 3000;
const int kHighMeanEndHz = 5000;
// Quantile of mask values which is used to estimate target presence.
const float kMaskQuantile = 0.7f;
// Mask threshold over which the data is considered signal and not interference.
const float kMaskTargetThreshold = 0.3f;
// Time in seconds after which the data is considered interference if the mask
// does not pass |kMaskTargetThreshold|.
const float kHoldTargetSeconds = 0.25f;
// Does conjugate(|norm_mat|) * |mat| * transpose(|norm_mat|). No extra space is
// used; to accomplish this, we compute both multiplications in the same loop.
// The returned norm is clamped to be non-negative.
float Norm(const ComplexMatrix<float>& mat,
const ComplexMatrix<float>& norm_mat) {
RTC_CHECK_EQ(norm_mat.num_rows(), 1);
RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_rows());
RTC_CHECK_EQ(norm_mat.num_columns(), mat.num_columns());
complex<float> first_product = complex<float>(0.f, 0.f);
complex<float> second_product = complex<float>(0.f, 0.f);
const complex<float>* const* mat_els = mat.elements();
const complex<float>* const* norm_mat_els = norm_mat.elements();
for (int i = 0; i < norm_mat.num_columns(); ++i) {
for (int j = 0; j < norm_mat.num_columns(); ++j) {
first_product += conj(norm_mat_els[0][j]) * mat_els[j][i];
}
second_product += first_product * norm_mat_els[0][i];
first_product = 0.f;
}
return std::max(second_product.real(), 0.f);
}
// Does conjugate(|lhs|) * |rhs| for row vectors |lhs| and |rhs|.
complex<float> ConjugateDotProduct(const ComplexMatrix<float>& lhs,
const ComplexMatrix<float>& rhs) {
RTC_CHECK_EQ(lhs.num_rows(), 1);
RTC_CHECK_EQ(rhs.num_rows(), 1);
RTC_CHECK_EQ(lhs.num_columns(), rhs.num_columns());
const complex<float>* const* lhs_elements = lhs.elements();
const complex<float>* const* rhs_elements = rhs.elements();
complex<float> result = complex<float>(0.f, 0.f);
for (int i = 0; i < lhs.num_columns(); ++i) {
result += conj(lhs_elements[0][i]) * rhs_elements[0][i];
}
return result;
}
// Works for positive numbers only.
size_t Round(float x) {
return static_cast<size_t>(std::floor(x + 0.5f));
}
// Calculates the sum of absolute values of a complex matrix.
float SumAbs(const ComplexMatrix<float>& mat) {
float sum_abs = 0.f;
const complex<float>* const* mat_els = mat.elements();
for (int i = 0; i < mat.num_rows(); ++i) {
for (int j = 0; j < mat.num_columns(); ++j) {
sum_abs += std::abs(mat_els[i][j]);
}
}
return sum_abs;
}
// Calculates the sum of squares of a complex matrix.
float SumSquares(const ComplexMatrix<float>& mat) {
float sum_squares = 0.f;
const complex<float>* const* mat_els = mat.elements();
for (int i = 0; i < mat.num_rows(); ++i) {
for (int j = 0; j < mat.num_columns(); ++j) {
float abs_value = std::abs(mat_els[i][j]);
sum_squares += abs_value * abs_value;
}
}
return sum_squares;
}
// Does |out| = |in|.' * conj(|in|) for row vector |in|.
void TransposedConjugatedProduct(const ComplexMatrix<float>& in,
ComplexMatrix<float>* out) {
RTC_CHECK_EQ(in.num_rows(), 1);
RTC_CHECK_EQ(out->num_rows(), in.num_columns());
RTC_CHECK_EQ(out->num_columns(), in.num_columns());
const complex<float>* in_elements = in.elements()[0];
complex<float>* const* out_elements = out->elements();
for (int i = 0; i < out->num_rows(); ++i) {
for (int j = 0; j < out->num_columns(); ++j) {
out_elements[i][j] = in_elements[i] * conj(in_elements[j]);
}
}
}
std::vector<Point> GetCenteredArray(std::vector<Point> array_geometry) {
for (int dim = 0; dim < 3; ++dim) {
float center = 0.f;
for (size_t i = 0; i < array_geometry.size(); ++i) {
center += array_geometry[i].c[dim];
}
center /= array_geometry.size();
for (size_t i = 0; i < array_geometry.size(); ++i) {
array_geometry[i].c[dim] -= center;
}
}
return array_geometry;
}
} // namespace
// static
const size_t NonlinearBeamformer::kNumFreqBins;
NonlinearBeamformer::NonlinearBeamformer(
const std::vector<Point>& array_geometry)
: num_input_channels_(array_geometry.size()),
array_geometry_(GetCenteredArray(array_geometry)) {
WindowGenerator::KaiserBesselDerived(kKbdAlpha, kFftSize, window_);
}
void NonlinearBeamformer::Initialize(int chunk_size_ms, int sample_rate_hz) {
chunk_length_ =
static_cast<size_t>(sample_rate_hz / (1000.f / chunk_size_ms));
sample_rate_hz_ = sample_rate_hz;
low_mean_start_bin_ = Round(kLowMeanStartHz * kFftSize / sample_rate_hz_);
low_mean_end_bin_ = Round(kLowMeanEndHz * kFftSize / sample_rate_hz_);
high_mean_start_bin_ = Round(kHighMeanStartHz * kFftSize / sample_rate_hz_);
high_mean_end_bin_ = Round(kHighMeanEndHz * kFftSize / sample_rate_hz_);
// These bin indexes determine the regions over which a mean is taken. This
// is applied as a constant value over the adjacent end "frequency correction"
// regions.
//
// low_mean_start_bin_ high_mean_start_bin_
// v v constant
// |----------------|--------|----------------|-------|----------------|
// constant ^ ^
// low_mean_end_bin_ high_mean_end_bin_
//
RTC_DCHECK_GT(low_mean_start_bin_, 0U);
RTC_DCHECK_LT(low_mean_start_bin_, low_mean_end_bin_);
RTC_DCHECK_LT(low_mean_end_bin_, high_mean_end_bin_);
RTC_DCHECK_LT(high_mean_start_bin_, high_mean_end_bin_);
RTC_DCHECK_LT(high_mean_end_bin_, kNumFreqBins - 1);
high_pass_postfilter_mask_ = 1.f;
is_target_present_ = false;
hold_target_blocks_ = kHoldTargetSeconds * 2 * sample_rate_hz / kFftSize;
interference_blocks_count_ = hold_target_blocks_;
lapped_transform_.reset(new LappedTransform(num_input_channels_,
1,
chunk_length_,
window_,
kFftSize,
kFftSize / 2,
this));
for (size_t i = 0; i < kNumFreqBins; ++i) {
time_smooth_mask_[i] = 1.f;
final_mask_[i] = 1.f;
float freq_hz = (static_cast<float>(i) / kFftSize) * sample_rate_hz_;
wave_numbers_[i] = 2 * M_PI * freq_hz / kSpeedOfSoundMeterSeconds;
mask_thresholds_[i] = num_input_channels_ * num_input_channels_ *
kBeamwidthConstant * wave_numbers_[i] *
wave_numbers_[i];
}
// Initialize all nonadaptive values before looping through the frames.
InitDelaySumMasks();
InitTargetCovMats();
InitInterfCovMats();
for (size_t i = 0; i < kNumFreqBins; ++i) {
rxiws_[i] = Norm(target_cov_mats_[i], delay_sum_masks_[i]);
rpsiws_[i] = Norm(interf_cov_mats_[i], delay_sum_masks_[i]);
reflected_rpsiws_[i] =
Norm(reflected_interf_cov_mats_[i], delay_sum_masks_[i]);
}
}
void NonlinearBeamformer::InitDelaySumMasks() {
for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
delay_sum_masks_[f_ix].Resize(1, num_input_channels_);
CovarianceMatrixGenerator::PhaseAlignmentMasks(f_ix,
kFftSize,
sample_rate_hz_,
kSpeedOfSoundMeterSeconds,
array_geometry_,
kTargetAngleRadians,
&delay_sum_masks_[f_ix]);
complex_f norm_factor = sqrt(
ConjugateDotProduct(delay_sum_masks_[f_ix], delay_sum_masks_[f_ix]));
delay_sum_masks_[f_ix].Scale(1.f / norm_factor);
normalized_delay_sum_masks_[f_ix].CopyFrom(delay_sum_masks_[f_ix]);
normalized_delay_sum_masks_[f_ix].Scale(1.f / SumAbs(
normalized_delay_sum_masks_[f_ix]));
}
}
void NonlinearBeamformer::InitTargetCovMats() {
for (size_t i = 0; i < kNumFreqBins; ++i) {
target_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
TransposedConjugatedProduct(delay_sum_masks_[i], &target_cov_mats_[i]);
complex_f normalization_factor = target_cov_mats_[i].Trace();
target_cov_mats_[i].Scale(1.f / normalization_factor);
}
}
void NonlinearBeamformer::InitInterfCovMats() {
for (size_t i = 0; i < kNumFreqBins; ++i) {
interf_cov_mats_[i].Resize(num_input_channels_, num_input_channels_);
ComplexMatrixF uniform_cov_mat(num_input_channels_, num_input_channels_);
ComplexMatrixF angled_cov_mat(num_input_channels_, num_input_channels_);
CovarianceMatrixGenerator::UniformCovarianceMatrix(wave_numbers_[i],
array_geometry_,
&uniform_cov_mat);
CovarianceMatrixGenerator::AngledCovarianceMatrix(kSpeedOfSoundMeterSeconds,
kInterfAngleRadians,
i,
kFftSize,
kNumFreqBins,
sample_rate_hz_,
array_geometry_,
&angled_cov_mat);
// Normalize matrices before averaging them.
complex_f normalization_factor = uniform_cov_mat.Trace();
uniform_cov_mat.Scale(1.f / normalization_factor);
normalization_factor = angled_cov_mat.Trace();
angled_cov_mat.Scale(1.f / normalization_factor);
// Average matrices.
uniform_cov_mat.Scale(1 - kBalance);
angled_cov_mat.Scale(kBalance);
interf_cov_mats_[i].Add(uniform_cov_mat, angled_cov_mat);
reflected_interf_cov_mats_[i].PointwiseConjugate(interf_cov_mats_[i]);
}
}
void NonlinearBeamformer::ProcessChunk(const ChannelBuffer<float>& input,
ChannelBuffer<float>* output) {
RTC_DCHECK_EQ(input.num_channels(), num_input_channels_);
RTC_DCHECK_EQ(input.num_frames_per_band(), chunk_length_);
float old_high_pass_mask = high_pass_postfilter_mask_;
lapped_transform_->ProcessChunk(input.channels(0), output->channels(0));
// Ramp up/down for smoothing. 1 mask per 10ms results in audible
// discontinuities.
const float ramp_increment =
(high_pass_postfilter_mask_ - old_high_pass_mask) /
input.num_frames_per_band();
// Apply delay and sum and post-filter in the time domain. WARNING: only works
// because delay-and-sum is not frequency dependent.
for (size_t i = 1; i < input.num_bands(); ++i) {
float smoothed_mask = old_high_pass_mask;
for (size_t j = 0; j < input.num_frames_per_band(); ++j) {
smoothed_mask += ramp_increment;
// Applying the delay and sum (at zero degrees, this is equivalent to
// averaging).
float sum = 0.f;
for (int k = 0; k < input.num_channels(); ++k) {
sum += input.channels(i)[k][j];
}
output->channels(i)[0][j] = sum / input.num_channels() * smoothed_mask;
}
}
}
bool NonlinearBeamformer::IsInBeam(const SphericalPointf& spherical_point) {
// If more than half-beamwidth degrees away from the beam's center,
// you are out of the beam.
return fabs(spherical_point.azimuth() - kTargetAngleRadians) <
kHalfBeamWidthRadians;
}
void NonlinearBeamformer::ProcessAudioBlock(const complex_f* const* input,
int num_input_channels,
size_t num_freq_bins,
int num_output_channels,
complex_f* const* output) {
RTC_CHECK_EQ(num_freq_bins, kNumFreqBins);
RTC_CHECK_EQ(num_input_channels, num_input_channels_);
RTC_CHECK_EQ(num_output_channels, 1);
// Calculating the post-filter masks. Note that we need two for each
// frequency bin to account for the positive and negative interferer
// angle.
for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
eig_m_.CopyFromColumn(input, i, num_input_channels_);
float eig_m_norm_factor = std::sqrt(SumSquares(eig_m_));
if (eig_m_norm_factor != 0.f) {
eig_m_.Scale(1.f / eig_m_norm_factor);
}
float rxim = Norm(target_cov_mats_[i], eig_m_);
float ratio_rxiw_rxim = 0.f;
if (rxim > 0.f) {
ratio_rxiw_rxim = rxiws_[i] / rxim;
}
complex_f rmw = abs(ConjugateDotProduct(delay_sum_masks_[i], eig_m_));
rmw *= rmw;
float rmw_r = rmw.real();
new_mask_[i] = CalculatePostfilterMask(interf_cov_mats_[i],
rpsiws_[i],
ratio_rxiw_rxim,
rmw_r,
mask_thresholds_[i]);
new_mask_[i] *= CalculatePostfilterMask(reflected_interf_cov_mats_[i],
reflected_rpsiws_[i],
ratio_rxiw_rxim,
rmw_r,
mask_thresholds_[i]);
}
ApplyMaskTimeSmoothing();
EstimateTargetPresence();
ApplyLowFrequencyCorrection();
ApplyHighFrequencyCorrection();
ApplyMaskFrequencySmoothing();
ApplyMasks(input, output);
}
float NonlinearBeamformer::CalculatePostfilterMask(
const ComplexMatrixF& interf_cov_mat,
float rpsiw,
float ratio_rxiw_rxim,
float rmw_r,
float mask_threshold) {
float rpsim = Norm(interf_cov_mat, eig_m_);
// Find lambda.
float ratio = 0.f;
if (rpsim > 0.f) {
ratio = rpsiw / rpsim;
}
float numerator = rmw_r - ratio;
float denominator = ratio_rxiw_rxim - ratio;
float mask = 1.f;
if (denominator > mask_threshold) {
float lambda = numerator / denominator;
mask = std::max(lambda * ratio_rxiw_rxim / rmw_r, kMaskMinimum);
}
return mask;
}
void NonlinearBeamformer::ApplyMasks(const complex_f* const* input,
complex_f* const* output) {
complex_f* output_channel = output[0];
for (size_t f_ix = 0; f_ix < kNumFreqBins; ++f_ix) {
output_channel[f_ix] = complex_f(0.f, 0.f);
const complex_f* delay_sum_mask_els =
normalized_delay_sum_masks_[f_ix].elements()[0];
for (int c_ix = 0; c_ix < num_input_channels_; ++c_ix) {
output_channel[f_ix] += input[c_ix][f_ix] * delay_sum_mask_els[c_ix];
}
output_channel[f_ix] *= final_mask_[f_ix];
}
}
// Smooth new_mask_ into time_smooth_mask_.
void NonlinearBeamformer::ApplyMaskTimeSmoothing() {
for (size_t i = low_mean_start_bin_; i <= high_mean_end_bin_; ++i) {
time_smooth_mask_[i] = kMaskTimeSmoothAlpha * new_mask_[i] +
(1 - kMaskTimeSmoothAlpha) * time_smooth_mask_[i];
}
}
// Copy time_smooth_mask_ to final_mask_ and smooth over frequency.
void NonlinearBeamformer::ApplyMaskFrequencySmoothing() {
// Smooth over frequency in both directions. The "frequency correction"
// regions have constant value, but we enter them to smooth over the jump
// that exists at the boundary. However, this does mean when smoothing "away"
// from the region that we only need to use the last element.
//
// Upward smoothing:
// low_mean_start_bin_
// v
// |------|------------|------|
// ^------------------>^
//
// Downward smoothing:
// high_mean_end_bin_
// v
// |------|------------|------|
// ^<------------------^
std::copy(time_smooth_mask_, time_smooth_mask_ + kNumFreqBins, final_mask_);
for (size_t i = low_mean_start_bin_; i < kNumFreqBins; ++i) {
final_mask_[i] = kMaskFrequencySmoothAlpha * final_mask_[i] +
(1 - kMaskFrequencySmoothAlpha) * final_mask_[i - 1];
}
for (size_t i = high_mean_end_bin_ + 1; i > 0; --i) {
final_mask_[i - 1] = kMaskFrequencySmoothAlpha * final_mask_[i - 1] +
(1 - kMaskFrequencySmoothAlpha) * final_mask_[i];
}
}
// Apply low frequency correction to time_smooth_mask_.
void NonlinearBeamformer::ApplyLowFrequencyCorrection() {
const float low_frequency_mask =
MaskRangeMean(low_mean_start_bin_, low_mean_end_bin_ + 1);
std::fill(time_smooth_mask_, time_smooth_mask_ + low_mean_start_bin_,
low_frequency_mask);
}
// Apply high frequency correction to time_smooth_mask_. Update
// high_pass_postfilter_mask_ to use for the high frequency time-domain bands.
void NonlinearBeamformer::ApplyHighFrequencyCorrection() {
high_pass_postfilter_mask_ =
MaskRangeMean(high_mean_start_bin_, high_mean_end_bin_ + 1);
std::fill(time_smooth_mask_ + high_mean_end_bin_ + 1,
time_smooth_mask_ + kNumFreqBins, high_pass_postfilter_mask_);
}
// Compute mean over the given range of time_smooth_mask_, [first, last).
float NonlinearBeamformer::MaskRangeMean(size_t first, size_t last) {
RTC_DCHECK_GT(last, first);
const float sum = std::accumulate(time_smooth_mask_ + first,
time_smooth_mask_ + last, 0.f);
return sum / (last - first);
}
void NonlinearBeamformer::EstimateTargetPresence() {
const size_t quantile = static_cast<size_t>(
(high_mean_end_bin_ - low_mean_start_bin_) * kMaskQuantile +
low_mean_start_bin_);
std::nth_element(new_mask_ + low_mean_start_bin_, new_mask_ + quantile,
new_mask_ + high_mean_end_bin_ + 1);
if (new_mask_[quantile] > kMaskTargetThreshold) {
is_target_present_ = true;
interference_blocks_count_ = 0;
} else {
is_target_present_ = interference_blocks_count_++ < hold_target_blocks_;
}
}
} // namespace webrtc

View File

@ -0,0 +1,177 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_
#include <vector>
#include "webrtc/common_audio/lapped_transform.h"
#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/modules/audio_processing/beamformer/beamformer.h"
#include "webrtc/modules/audio_processing/beamformer/complex_matrix.h"
namespace webrtc {
// Enhances sound sources coming directly in front of a uniform linear array
// and suppresses sound sources coming from all other directions. Operates on
// multichannel signals and produces single-channel output.
//
// The implemented nonlinear postfilter algorithm taken from "A Robust Nonlinear
// Beamforming Postprocessor" by Bastiaan Kleijn.
//
// TODO(aluebs): Target angle assumed to be 0. Parameterize target angle.
class NonlinearBeamformer
: public Beamformer<float>,
public LappedTransform::Callback {
public:
// At the moment it only accepts uniform linear microphone arrays. Using the
// first microphone as a reference position [0, 0, 0] is a natural choice.
explicit NonlinearBeamformer(const std::vector<Point>& array_geometry);
// Sample rate corresponds to the lower band.
// Needs to be called before the NonlinearBeamformer can be used.
void Initialize(int chunk_size_ms, int sample_rate_hz) override;
// Process one time-domain chunk of audio. The audio is expected to be split
// into frequency bands inside the ChannelBuffer. The number of frames and
// channels must correspond to the constructor parameters. The same
// ChannelBuffer can be passed in as |input| and |output|.
void ProcessChunk(const ChannelBuffer<float>& input,
ChannelBuffer<float>* output) override;
bool IsInBeam(const SphericalPointf& spherical_point) override;
// After processing each block |is_target_present_| is set to true if the
// target signal es present and to false otherwise. This methods can be called
// to know if the data is target signal or interference and process it
// accordingly.
bool is_target_present() override { return is_target_present_; }
protected:
// Process one frequency-domain block of audio. This is where the fun
// happens. Implements LappedTransform::Callback.
void ProcessAudioBlock(const complex<float>* const* input,
int num_input_channels,
size_t num_freq_bins,
int num_output_channels,
complex<float>* const* output) override;
private:
typedef Matrix<float> MatrixF;
typedef ComplexMatrix<float> ComplexMatrixF;
typedef complex<float> complex_f;
void InitDelaySumMasks();
void InitTargetCovMats(); // TODO(aluebs): Make this depend on target angle.
void InitInterfCovMats();
// An implementation of equation 18, which calculates postfilter masks that,
// when applied, minimize the mean-square error of our estimation of the
// desired signal. A sub-task is to calculate lambda, which is solved via
// equation 13.
float CalculatePostfilterMask(const ComplexMatrixF& interf_cov_mat,
float rpsiw,
float ratio_rxiw_rxim,
float rmxi_r,
float mask_threshold);
// Prevents the postfilter masks from degenerating too quickly (a cause of
// musical noise).
void ApplyMaskTimeSmoothing();
void ApplyMaskFrequencySmoothing();
// The postfilter masks are unreliable at low frequencies. Calculates a better
// mask by averaging mid-low frequency values.
void ApplyLowFrequencyCorrection();
// Postfilter masks are also unreliable at high frequencies. Average mid-high
// frequency masks to calculate a single mask per block which can be applied
// in the time-domain. Further, we average these block-masks over a chunk,
// resulting in one postfilter mask per audio chunk. This allows us to skip
// both transforming and blocking the high-frequency signal.
void ApplyHighFrequencyCorrection();
// Compute the means needed for the above frequency correction.
float MaskRangeMean(size_t start_bin, size_t end_bin);
// Applies both sets of masks to |input| and store in |output|.
void ApplyMasks(const complex_f* const* input, complex_f* const* output);
void EstimateTargetPresence();
static const size_t kFftSize = 256;
static const size_t kNumFreqBins = kFftSize / 2 + 1;
// Deals with the fft transform and blocking.
size_t chunk_length_;
rtc::scoped_ptr<LappedTransform> lapped_transform_;
float window_[kFftSize];
// Parameters exposed to the user.
const int num_input_channels_;
int sample_rate_hz_;
const std::vector<Point> array_geometry_;
// Calculated based on user-input and constants in the .cc file.
size_t low_mean_start_bin_;
size_t low_mean_end_bin_;
size_t high_mean_start_bin_;
size_t high_mean_end_bin_;
// Quickly varying mask updated every block.
float new_mask_[kNumFreqBins];
// Time smoothed mask.
float time_smooth_mask_[kNumFreqBins];
// Time and frequency smoothed mask.
float final_mask_[kNumFreqBins];
// Array of length |kNumFreqBins|, Matrix of size |1| x |num_channels_|.
ComplexMatrixF delay_sum_masks_[kNumFreqBins];
ComplexMatrixF normalized_delay_sum_masks_[kNumFreqBins];
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
// |num_input_channels_|.
ComplexMatrixF target_cov_mats_[kNumFreqBins];
// Array of length |kNumFreqBins|, Matrix of size |num_input_channels_| x
// |num_input_channels_|.
ComplexMatrixF interf_cov_mats_[kNumFreqBins];
ComplexMatrixF reflected_interf_cov_mats_[kNumFreqBins];
// Of length |kNumFreqBins|.
float mask_thresholds_[kNumFreqBins];
float wave_numbers_[kNumFreqBins];
// Preallocated for ProcessAudioBlock()
// Of length |kNumFreqBins|.
float rxiws_[kNumFreqBins];
float rpsiws_[kNumFreqBins];
float reflected_rpsiws_[kNumFreqBins];
// The microphone normalization factor.
ComplexMatrixF eig_m_;
// For processing the high-frequency input signal.
float high_pass_postfilter_mask_;
// True when the target signal is present.
bool is_target_present_;
// Number of blocks after which the data is considered interference if the
// mask does not pass |kMaskSignalThreshold|.
size_t hold_target_blocks_;
// Number of blocks since the last mask that passed |kMaskSignalThreshold|.
size_t interference_blocks_count_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_BEAMFORMER_NONLINEAR_BEAMFORMER_H_

View File

@ -0,0 +1,35 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_
#include <assert.h>
#include "webrtc/modules/audio_processing/include/audio_processing.h"
namespace webrtc {
static inline int ChannelsFromLayout(AudioProcessing::ChannelLayout layout) {
switch (layout) {
case AudioProcessing::kMono:
case AudioProcessing::kMonoAndKeyboard:
return 1;
case AudioProcessing::kStereo:
case AudioProcessing::kStereoAndKeyboard:
return 2;
}
assert(false);
return -1;
}
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_COMMON_H_

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,23 +8,24 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "echo_cancellation_impl.h"
#include "webrtc/modules/audio_processing/echo_cancellation_impl.h"
#include <cassert>
#include <assert.h>
#include <string.h>
#include "critical_section_wrapper.h"
#include "echo_cancellation.h"
#include "audio_processing_impl.h"
#include "audio_buffer.h"
extern "C" {
#include "webrtc/modules/audio_processing/aec/aec_core.h"
}
#include "webrtc/modules/audio_processing/aec/include/echo_cancellation.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
namespace webrtc {
typedef void Handle;
namespace {
WebRtc_Word16 MapSetting(EchoCancellation::SuppressionLevel level) {
int16_t MapSetting(EchoCancellation::SuppressionLevel level) {
switch (level) {
case EchoCancellation::kLowSuppression:
return kAecNlpConservative;
@ -32,22 +33,19 @@ WebRtc_Word16 MapSetting(EchoCancellation::SuppressionLevel level) {
return kAecNlpModerate;
case EchoCancellation::kHighSuppression:
return kAecNlpAggressive;
default:
return -1;
}
assert(false);
return -1;
}
int MapError(int err) {
AudioProcessing::Error MapError(int err) {
switch (err) {
case AEC_UNSUPPORTED_FUNCTION_ERROR:
return AudioProcessing::kUnsupportedFunctionError;
break;
case AEC_BAD_PARAMETER_ERROR:
return AudioProcessing::kBadParameterError;
break;
case AEC_BAD_PARAMETER_WARNING:
return AudioProcessing::kBadStreamParameterWarning;
break;
default:
// AEC_UNSPECIFIED_ERROR
// AEC_UNINITIALIZED_ERROR
@ -57,17 +55,21 @@ int MapError(int err) {
}
} // namespace
EchoCancellationImpl::EchoCancellationImpl(const AudioProcessingImpl* apm)
: ProcessingComponent(apm),
apm_(apm),
drift_compensation_enabled_(false),
metrics_enabled_(false),
suppression_level_(kModerateSuppression),
device_sample_rate_hz_(48000),
stream_drift_samples_(0),
was_stream_drift_set_(false),
stream_has_echo_(false),
delay_logging_enabled_(false) {}
EchoCancellationImpl::EchoCancellationImpl(const AudioProcessing* apm,
CriticalSectionWrapper* crit)
: ProcessingComponent(),
apm_(apm),
crit_(crit),
drift_compensation_enabled_(false),
metrics_enabled_(false),
suppression_level_(kModerateSuppression),
stream_drift_samples_(0),
was_stream_drift_set_(false),
stream_has_echo_(false),
delay_logging_enabled_(false),
extended_filter_enabled_(false),
delay_agnostic_enabled_(false) {
}
EchoCancellationImpl::~EchoCancellationImpl() {}
@ -76,7 +78,7 @@ int EchoCancellationImpl::ProcessRenderAudio(const AudioBuffer* audio) {
return apm_->kNoError;
}
assert(audio->samples_per_split_channel() <= 160);
assert(audio->num_frames_per_band() <= 160);
assert(audio->num_channels() == apm_->num_reverse_channels());
int err = apm_->kNoError;
@ -88,8 +90,8 @@ int EchoCancellationImpl::ProcessRenderAudio(const AudioBuffer* audio) {
Handle* my_handle = static_cast<Handle*>(handle(handle_index));
err = WebRtcAec_BufferFarend(
my_handle,
audio->low_pass_split_data(j),
static_cast<WebRtc_Word16>(audio->samples_per_split_channel()));
audio->split_bands_const_f(j)[kBand0To8kHz],
audio->num_frames_per_band());
if (err != apm_->kNoError) {
return GetHandleError(my_handle); // TODO(ajm): warning possible?
@ -115,7 +117,7 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) {
return apm_->kStreamParameterNotSetError;
}
assert(audio->samples_per_split_channel() <= 160);
assert(audio->num_frames_per_band() <= 160);
assert(audio->num_channels() == apm_->num_output_channels());
int err = apm_->kNoError;
@ -128,11 +130,10 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) {
Handle* my_handle = handle(handle_index);
err = WebRtcAec_Process(
my_handle,
audio->low_pass_split_data(i),
audio->high_pass_split_data(i),
audio->low_pass_split_data(i),
audio->high_pass_split_data(i),
static_cast<WebRtc_Word16>(audio->samples_per_split_channel()),
audio->split_bands_const_f(i),
audio->num_bands(),
audio->split_bands_f(i),
audio->num_frames_per_band(),
apm_->stream_delay_ms(),
stream_drift_samples_);
@ -144,7 +145,7 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) {
}
}
WebRtc_Word16 status = 0;
int status = 0;
err = WebRtcAec_get_echo_status(my_handle, &status);
if (err != apm_->kNoError) {
return GetHandleError(my_handle);
@ -163,7 +164,7 @@ int EchoCancellationImpl::ProcessCaptureAudio(AudioBuffer* audio) {
}
int EchoCancellationImpl::Enable(bool enable) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
// Ensure AEC and AECM are not both enabled.
if (enable && apm_->echo_control_mobile()->is_enabled()) {
return apm_->kBadParameterError;
@ -177,7 +178,7 @@ bool EchoCancellationImpl::is_enabled() const {
}
int EchoCancellationImpl::set_suppression_level(SuppressionLevel level) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
if (MapSetting(level) == -1) {
return apm_->kBadParameterError;
}
@ -192,7 +193,7 @@ EchoCancellation::SuppressionLevel EchoCancellationImpl::suppression_level()
}
int EchoCancellationImpl::enable_drift_compensation(bool enable) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
drift_compensation_enabled_ = enable;
return Configure();
}
@ -201,24 +202,9 @@ bool EchoCancellationImpl::is_drift_compensation_enabled() const {
return drift_compensation_enabled_;
}
int EchoCancellationImpl::set_device_sample_rate_hz(int rate) {
CriticalSectionScoped crit_scoped(*apm_->crit());
if (rate < 8000 || rate > 96000) {
return apm_->kBadParameterError;
}
device_sample_rate_hz_ = rate;
return Initialize();
}
int EchoCancellationImpl::device_sample_rate_hz() const {
return device_sample_rate_hz_;
}
int EchoCancellationImpl::set_stream_drift_samples(int drift) {
void EchoCancellationImpl::set_stream_drift_samples(int drift) {
was_stream_drift_set_ = true;
stream_drift_samples_ = drift;
return apm_->kNoError;
}
int EchoCancellationImpl::stream_drift_samples() const {
@ -226,7 +212,7 @@ int EchoCancellationImpl::stream_drift_samples() const {
}
int EchoCancellationImpl::enable_metrics(bool enable) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
metrics_enabled_ = enable;
return Configure();
}
@ -238,7 +224,7 @@ bool EchoCancellationImpl::are_metrics_enabled() const {
// TODO(ajm): we currently just use the metrics from the first AEC. Think more
// aboue the best way to extend this to multi-channel.
int EchoCancellationImpl::GetMetrics(Metrics* metrics) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
if (metrics == NULL) {
return apm_->kNullPointerError;
}
@ -285,7 +271,7 @@ bool EchoCancellationImpl::stream_has_echo() const {
}
int EchoCancellationImpl::enable_delay_logging(bool enable) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
delay_logging_enabled_ = enable;
return Configure();
}
@ -294,9 +280,23 @@ bool EchoCancellationImpl::is_delay_logging_enabled() const {
return delay_logging_enabled_;
}
bool EchoCancellationImpl::is_delay_agnostic_enabled() const {
return delay_agnostic_enabled_;
}
bool EchoCancellationImpl::is_extended_filter_enabled() const {
return extended_filter_enabled_;
}
// TODO(bjornv): How should we handle the multi-channel case?
int EchoCancellationImpl::GetDelayMetrics(int* median, int* std) {
CriticalSectionScoped crit_scoped(*apm_->crit());
float fraction_poor_delays = 0;
return GetDelayMetrics(median, std, &fraction_poor_delays);
}
int EchoCancellationImpl::GetDelayMetrics(int* median, int* std,
float* fraction_poor_delays) {
CriticalSectionScoped crit_scoped(crit_);
if (median == NULL) {
return apm_->kNullPointerError;
}
@ -309,7 +309,7 @@ int EchoCancellationImpl::GetDelayMetrics(int* median, int* std) {
}
Handle* my_handle = static_cast<Handle*>(handle(0));
if (WebRtcAec_GetDelayMetrics(my_handle, median, std) !=
if (WebRtcAec_GetDelayMetrics(my_handle, median, std, fraction_poor_delays) !=
apm_->kNoError) {
return GetHandleError(my_handle);
}
@ -317,47 +317,47 @@ int EchoCancellationImpl::GetDelayMetrics(int* median, int* std) {
return apm_->kNoError;
}
struct AecCore* EchoCancellationImpl::aec_core() const {
CriticalSectionScoped crit_scoped(crit_);
if (!is_component_enabled()) {
return NULL;
}
Handle* my_handle = static_cast<Handle*>(handle(0));
return WebRtcAec_aec_core(my_handle);
}
int EchoCancellationImpl::Initialize() {
int err = ProcessingComponent::Initialize();
if (err != apm_->kNoError || !is_component_enabled()) {
return err;
}
was_stream_drift_set_ = false;
return apm_->kNoError;
}
int EchoCancellationImpl::get_version(char* version,
int version_len_bytes) const {
if (WebRtcAec_get_version(version, version_len_bytes) != 0) {
return apm_->kBadParameterError;
}
return apm_->kNoError;
void EchoCancellationImpl::SetExtraOptions(const Config& config) {
extended_filter_enabled_ = config.Get<ExtendedFilter>().enabled;
delay_agnostic_enabled_ = config.Get<DelayAgnostic>().enabled;
Configure();
}
void* EchoCancellationImpl::CreateHandle() const {
Handle* handle = NULL;
if (WebRtcAec_Create(&handle) != apm_->kNoError) {
handle = NULL;
} else {
assert(handle != NULL);
}
return handle;
return WebRtcAec_Create();
}
int EchoCancellationImpl::DestroyHandle(void* handle) const {
void EchoCancellationImpl::DestroyHandle(void* handle) const {
assert(handle != NULL);
return WebRtcAec_Free(static_cast<Handle*>(handle));
WebRtcAec_Free(static_cast<Handle*>(handle));
}
int EchoCancellationImpl::InitializeHandle(void* handle) const {
assert(handle != NULL);
// TODO(ajm): Drift compensation is disabled in practice. If restored, it
// should be managed internally and not depend on the hardware sample rate.
// For now, just hardcode a 48 kHz value.
return WebRtcAec_Init(static_cast<Handle*>(handle),
apm_->sample_rate_hz(),
device_sample_rate_hz_);
apm_->proc_sample_rate_hz(),
48000);
}
int EchoCancellationImpl::ConfigureHandle(void* handle) const {
@ -368,6 +368,12 @@ int EchoCancellationImpl::ConfigureHandle(void* handle) const {
config.skewMode = drift_compensation_enabled_;
config.delay_logging = delay_logging_enabled_;
WebRtcAec_enable_extended_filter(
WebRtcAec_aec_core(static_cast<Handle*>(handle)),
extended_filter_enabled_ ? 1 : 0);
WebRtcAec_enable_delay_agnostic(
WebRtcAec_aec_core(static_cast<Handle*>(handle)),
delay_agnostic_enabled_ ? 1 : 0);
return WebRtcAec_set_config(static_cast<Handle*>(handle), config);
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,69 +8,79 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CANCELLATION_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CANCELLATION_IMPL_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_
#include "audio_processing.h"
#include "processing_component.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/audio_processing/processing_component.h"
namespace webrtc {
class AudioProcessingImpl;
class AudioBuffer;
class CriticalSectionWrapper;
class EchoCancellationImpl : public EchoCancellation,
public ProcessingComponent {
public:
explicit EchoCancellationImpl(const AudioProcessingImpl* apm);
EchoCancellationImpl(const AudioProcessing* apm,
CriticalSectionWrapper* crit);
virtual ~EchoCancellationImpl();
int ProcessRenderAudio(const AudioBuffer* audio);
int ProcessCaptureAudio(AudioBuffer* audio);
// EchoCancellation implementation.
virtual bool is_enabled() const;
virtual int device_sample_rate_hz() const;
virtual int stream_drift_samples() const;
bool is_enabled() const override;
int stream_drift_samples() const override;
SuppressionLevel suppression_level() const override;
bool is_drift_compensation_enabled() const override;
// ProcessingComponent implementation.
virtual int Initialize();
virtual int get_version(char* version, int version_len_bytes) const;
int Initialize() override;
void SetExtraOptions(const Config& config) override;
bool is_delay_agnostic_enabled() const;
bool is_extended_filter_enabled() const;
private:
// EchoCancellation implementation.
virtual int Enable(bool enable);
virtual int enable_drift_compensation(bool enable);
virtual bool is_drift_compensation_enabled() const;
virtual int set_device_sample_rate_hz(int rate);
virtual int set_stream_drift_samples(int drift);
virtual int set_suppression_level(SuppressionLevel level);
virtual SuppressionLevel suppression_level() const;
virtual int enable_metrics(bool enable);
virtual bool are_metrics_enabled() const;
virtual bool stream_has_echo() const;
virtual int GetMetrics(Metrics* metrics);
virtual int enable_delay_logging(bool enable);
virtual bool is_delay_logging_enabled() const;
virtual int GetDelayMetrics(int* median, int* std);
int Enable(bool enable) override;
int enable_drift_compensation(bool enable) override;
void set_stream_drift_samples(int drift) override;
int set_suppression_level(SuppressionLevel level) override;
int enable_metrics(bool enable) override;
bool are_metrics_enabled() const override;
bool stream_has_echo() const override;
int GetMetrics(Metrics* metrics) override;
int enable_delay_logging(bool enable) override;
bool is_delay_logging_enabled() const override;
int GetDelayMetrics(int* median, int* std) override;
int GetDelayMetrics(int* median,
int* std,
float* fraction_poor_delays) override;
struct AecCore* aec_core() const override;
// ProcessingComponent implementation.
virtual void* CreateHandle() const;
virtual int InitializeHandle(void* handle) const;
virtual int ConfigureHandle(void* handle) const;
virtual int DestroyHandle(void* handle) const;
virtual int num_handles_required() const;
virtual int GetHandleError(void* handle) const;
void* CreateHandle() const override;
int InitializeHandle(void* handle) const override;
int ConfigureHandle(void* handle) const override;
void DestroyHandle(void* handle) const override;
int num_handles_required() const override;
int GetHandleError(void* handle) const override;
const AudioProcessingImpl* apm_;
const AudioProcessing* apm_;
CriticalSectionWrapper* crit_;
bool drift_compensation_enabled_;
bool metrics_enabled_;
SuppressionLevel suppression_level_;
int device_sample_rate_hz_;
int stream_drift_samples_;
bool was_stream_drift_set_;
bool stream_has_echo_;
bool delay_logging_enabled_;
bool extended_filter_enabled_;
bool delay_agnostic_enabled_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CANCELLATION_IMPL_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CANCELLATION_IMPL_H_

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,23 +8,22 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "echo_control_mobile_impl.h"
#include "webrtc/modules/audio_processing/echo_control_mobile_impl.h"
#include <cassert>
#include <cstring>
#include <assert.h>
#include <string.h>
#include "critical_section_wrapper.h"
#include "echo_control_mobile.h"
#include "audio_processing_impl.h"
#include "audio_buffer.h"
#include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
#include "webrtc/system_wrappers/interface/logging.h"
namespace webrtc {
typedef void Handle;
namespace {
WebRtc_Word16 MapSetting(EchoControlMobile::RoutingMode mode) {
int16_t MapSetting(EchoControlMobile::RoutingMode mode) {
switch (mode) {
case EchoControlMobile::kQuietEarpieceOrHeadset:
return 0;
@ -36,12 +35,12 @@ WebRtc_Word16 MapSetting(EchoControlMobile::RoutingMode mode) {
return 3;
case EchoControlMobile::kLoudSpeakerphone:
return 4;
default:
return -1;
}
assert(false);
return -1;
}
int MapError(int err) {
AudioProcessing::Error MapError(int err) {
switch (err) {
case AECM_UNSUPPORTED_FUNCTION_ERROR:
return AudioProcessing::kUnsupportedFunctionError;
@ -63,9 +62,11 @@ size_t EchoControlMobile::echo_path_size_bytes() {
return WebRtcAecm_echo_path_size_bytes();
}
EchoControlMobileImpl::EchoControlMobileImpl(const AudioProcessingImpl* apm)
: ProcessingComponent(apm),
EchoControlMobileImpl::EchoControlMobileImpl(const AudioProcessing* apm,
CriticalSectionWrapper* crit)
: ProcessingComponent(),
apm_(apm),
crit_(crit),
routing_mode_(kSpeakerphone),
comfort_noise_enabled_(true),
external_echo_path_(NULL) {}
@ -82,7 +83,7 @@ int EchoControlMobileImpl::ProcessRenderAudio(const AudioBuffer* audio) {
return apm_->kNoError;
}
assert(audio->samples_per_split_channel() <= 160);
assert(audio->num_frames_per_band() <= 160);
assert(audio->num_channels() == apm_->num_reverse_channels());
int err = apm_->kNoError;
@ -94,8 +95,8 @@ int EchoControlMobileImpl::ProcessRenderAudio(const AudioBuffer* audio) {
Handle* my_handle = static_cast<Handle*>(handle(handle_index));
err = WebRtcAecm_BufferFarend(
my_handle,
audio->low_pass_split_data(j),
static_cast<WebRtc_Word16>(audio->samples_per_split_channel()));
audio->split_bands_const(j)[kBand0To8kHz],
audio->num_frames_per_band());
if (err != apm_->kNoError) {
return GetHandleError(my_handle); // TODO(ajm): warning possible?
@ -117,7 +118,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
return apm_->kStreamParameterNotSetError;
}
assert(audio->samples_per_split_channel() <= 160);
assert(audio->num_frames_per_band() <= 160);
assert(audio->num_channels() == apm_->num_output_channels());
int err = apm_->kNoError;
@ -127,8 +128,8 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
for (int i = 0; i < audio->num_channels(); i++) {
// TODO(ajm): improve how this works, possibly inside AECM.
// This is kind of hacked up.
WebRtc_Word16* noisy = audio->low_pass_reference(i);
WebRtc_Word16* clean = audio->low_pass_split_data(i);
const int16_t* noisy = audio->low_pass_reference(i);
const int16_t* clean = audio->split_bands_const(i)[kBand0To8kHz];
if (noisy == NULL) {
noisy = clean;
clean = NULL;
@ -139,8 +140,8 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
my_handle,
noisy,
clean,
audio->low_pass_split_data(i),
static_cast<WebRtc_Word16>(audio->samples_per_split_channel()),
audio->split_bands(i)[kBand0To8kHz],
audio->num_frames_per_band(),
apm_->stream_delay_ms());
if (err != apm_->kNoError) {
@ -155,7 +156,7 @@ int EchoControlMobileImpl::ProcessCaptureAudio(AudioBuffer* audio) {
}
int EchoControlMobileImpl::Enable(bool enable) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
// Ensure AEC and AECM are not both enabled.
if (enable && apm_->echo_cancellation()->is_enabled()) {
return apm_->kBadParameterError;
@ -169,7 +170,7 @@ bool EchoControlMobileImpl::is_enabled() const {
}
int EchoControlMobileImpl::set_routing_mode(RoutingMode mode) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
if (MapSetting(mode) == -1) {
return apm_->kBadParameterError;
}
@ -184,7 +185,7 @@ EchoControlMobile::RoutingMode EchoControlMobileImpl::routing_mode()
}
int EchoControlMobileImpl::enable_comfort_noise(bool enable) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
comfort_noise_enabled_ = enable;
return Configure();
}
@ -195,7 +196,7 @@ bool EchoControlMobileImpl::is_comfort_noise_enabled() const {
int EchoControlMobileImpl::SetEchoPath(const void* echo_path,
size_t size_bytes) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
if (echo_path == NULL) {
return apm_->kNullPointerError;
}
@ -214,7 +215,7 @@ int EchoControlMobileImpl::SetEchoPath(const void* echo_path,
int EchoControlMobileImpl::GetEchoPath(void* echo_path,
size_t size_bytes) const {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
if (echo_path == NULL) {
return apm_->kNullPointerError;
}
@ -240,42 +241,26 @@ int EchoControlMobileImpl::Initialize() {
return apm_->kNoError;
}
if (apm_->sample_rate_hz() == apm_->kSampleRate32kHz) {
// AECM doesn't support super-wideband.
if (apm_->proc_sample_rate_hz() > apm_->kSampleRate16kHz) {
LOG(LS_ERROR) << "AECM only supports 16 kHz or lower sample rates";
return apm_->kBadSampleRateError;
}
return ProcessingComponent::Initialize();
}
int EchoControlMobileImpl::get_version(char* version,
int version_len_bytes) const {
if (WebRtcAecm_get_version(version, version_len_bytes) != 0) {
return apm_->kBadParameterError;
}
return apm_->kNoError;
}
void* EchoControlMobileImpl::CreateHandle() const {
Handle* handle = NULL;
if (WebRtcAecm_Create(&handle) != apm_->kNoError) {
handle = NULL;
} else {
assert(handle != NULL);
}
return handle;
return WebRtcAecm_Create();
}
int EchoControlMobileImpl::DestroyHandle(void* handle) const {
return WebRtcAecm_Free(static_cast<Handle*>(handle));
void EchoControlMobileImpl::DestroyHandle(void* handle) const {
WebRtcAecm_Free(static_cast<Handle*>(handle));
}
int EchoControlMobileImpl::InitializeHandle(void* handle) const {
assert(handle != NULL);
Handle* my_handle = static_cast<Handle*>(handle);
if (WebRtcAecm_Init(my_handle, apm_->sample_rate_hz()) != 0) {
if (WebRtcAecm_Init(my_handle, apm_->proc_sample_rate_hz()) != 0) {
return GetHandleError(my_handle);
}
if (external_echo_path_ != NULL) {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,55 +8,57 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CONTROL_MOBILE_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CONTROL_MOBILE_IMPL_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_
#include "audio_processing.h"
#include "processing_component.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/audio_processing/processing_component.h"
namespace webrtc {
class AudioProcessingImpl;
class AudioBuffer;
class CriticalSectionWrapper;
class EchoControlMobileImpl : public EchoControlMobile,
public ProcessingComponent {
public:
explicit EchoControlMobileImpl(const AudioProcessingImpl* apm);
EchoControlMobileImpl(const AudioProcessing* apm,
CriticalSectionWrapper* crit);
virtual ~EchoControlMobileImpl();
int ProcessRenderAudio(const AudioBuffer* audio);
int ProcessCaptureAudio(AudioBuffer* audio);
// EchoControlMobile implementation.
virtual bool is_enabled() const;
bool is_enabled() const override;
RoutingMode routing_mode() const override;
bool is_comfort_noise_enabled() const override;
// ProcessingComponent implementation.
virtual int Initialize();
virtual int get_version(char* version, int version_len_bytes) const;
int Initialize() override;
private:
// EchoControlMobile implementation.
virtual int Enable(bool enable);
virtual int set_routing_mode(RoutingMode mode);
virtual RoutingMode routing_mode() const;
virtual int enable_comfort_noise(bool enable);
virtual bool is_comfort_noise_enabled() const;
virtual int SetEchoPath(const void* echo_path, size_t size_bytes);
virtual int GetEchoPath(void* echo_path, size_t size_bytes) const;
int Enable(bool enable) override;
int set_routing_mode(RoutingMode mode) override;
int enable_comfort_noise(bool enable) override;
int SetEchoPath(const void* echo_path, size_t size_bytes) override;
int GetEchoPath(void* echo_path, size_t size_bytes) const override;
// ProcessingComponent implementation.
virtual void* CreateHandle() const;
virtual int InitializeHandle(void* handle) const;
virtual int ConfigureHandle(void* handle) const;
virtual int DestroyHandle(void* handle) const;
virtual int num_handles_required() const;
virtual int GetHandleError(void* handle) const;
void* CreateHandle() const override;
int InitializeHandle(void* handle) const override;
int ConfigureHandle(void* handle) const override;
void DestroyHandle(void* handle) const override;
int num_handles_required() const override;
int GetHandleError(void* handle) const override;
const AudioProcessingImpl* apm_;
const AudioProcessing* apm_;
CriticalSectionWrapper* crit_;
RoutingMode routing_mode_;
bool comfort_noise_enabled_;
unsigned char* external_echo_path_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_ECHO_CONTROL_MOBILE_IMPL_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_ECHO_CONTROL_MOBILE_IMPL_H_

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,54 +8,38 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "gain_control_impl.h"
#include "webrtc/modules/audio_processing/gain_control_impl.h"
#include <cassert>
#include <assert.h>
#include "critical_section_wrapper.h"
#include "gain_control.h"
#include "audio_processing_impl.h"
#include "audio_buffer.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/modules/audio_processing/agc/legacy/gain_control.h"
#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
namespace webrtc {
typedef void Handle;
/*template <class T>
class GainControlHandle : public ComponentHandle<T> {
public:
GainControlHandle();
virtual ~GainControlHandle();
virtual int Create();
virtual T* ptr() const;
private:
T* handle;
};*/
namespace {
WebRtc_Word16 MapSetting(GainControl::Mode mode) {
int16_t MapSetting(GainControl::Mode mode) {
switch (mode) {
case GainControl::kAdaptiveAnalog:
return kAgcModeAdaptiveAnalog;
break;
case GainControl::kAdaptiveDigital:
return kAgcModeAdaptiveDigital;
break;
case GainControl::kFixedDigital:
return kAgcModeFixedDigital;
break;
default:
return -1;
}
assert(false);
return -1;
}
} // namespace
GainControlImpl::GainControlImpl(const AudioProcessingImpl* apm)
: ProcessingComponent(apm),
GainControlImpl::GainControlImpl(const AudioProcessing* apm,
CriticalSectionWrapper* crit)
: ProcessingComponent(),
apm_(apm),
crit_(crit),
mode_(kAdaptiveAnalog),
minimum_capture_level_(0),
maximum_capture_level_(255),
@ -73,20 +57,14 @@ int GainControlImpl::ProcessRenderAudio(AudioBuffer* audio) {
return apm_->kNoError;
}
assert(audio->samples_per_split_channel() <= 160);
WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
if (audio->num_channels() > 1) {
audio->CopyAndMixLowPass(1);
mixed_data = audio->mixed_low_pass_data(0);
}
assert(audio->num_frames_per_band() <= 160);
for (int i = 0; i < num_handles(); i++) {
Handle* my_handle = static_cast<Handle*>(handle(i));
int err = WebRtcAgc_AddFarend(
my_handle,
mixed_data,
static_cast<WebRtc_Word16>(audio->samples_per_split_channel()));
audio->mixed_low_pass_data(),
audio->num_frames_per_band());
if (err != apm_->kNoError) {
return GetHandleError(my_handle);
@ -101,19 +79,20 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
return apm_->kNoError;
}
assert(audio->samples_per_split_channel() <= 160);
assert(audio->num_frames_per_band() <= 160);
assert(audio->num_channels() == num_handles());
int err = apm_->kNoError;
if (mode_ == kAdaptiveAnalog) {
capture_levels_.assign(num_handles(), analog_capture_level_);
for (int i = 0; i < num_handles(); i++) {
Handle* my_handle = static_cast<Handle*>(handle(i));
err = WebRtcAgc_AddMic(
my_handle,
audio->low_pass_split_data(i),
audio->high_pass_split_data(i),
static_cast<WebRtc_Word16>(audio->samples_per_split_channel()));
audio->split_bands(i),
audio->num_bands(),
audio->num_frames_per_band());
if (err != apm_->kNoError) {
return GetHandleError(my_handle);
@ -123,14 +102,13 @@ int GainControlImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
for (int i = 0; i < num_handles(); i++) {
Handle* my_handle = static_cast<Handle*>(handle(i));
WebRtc_Word32 capture_level_out = 0;
int32_t capture_level_out = 0;
err = WebRtcAgc_VirtualMic(
my_handle,
audio->low_pass_split_data(i),
audio->high_pass_split_data(i),
static_cast<WebRtc_Word16>(audio->samples_per_split_channel()),
//capture_levels_[i],
audio->split_bands(i),
audio->num_bands(),
audio->num_frames_per_band(),
analog_capture_level_,
&capture_level_out);
@ -155,22 +133,21 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) {
return apm_->kStreamParameterNotSetError;
}
assert(audio->samples_per_split_channel() <= 160);
assert(audio->num_frames_per_band() <= 160);
assert(audio->num_channels() == num_handles());
stream_is_saturated_ = false;
for (int i = 0; i < num_handles(); i++) {
Handle* my_handle = static_cast<Handle*>(handle(i));
WebRtc_Word32 capture_level_out = 0;
WebRtc_UWord8 saturation_warning = 0;
int32_t capture_level_out = 0;
uint8_t saturation_warning = 0;
int err = WebRtcAgc_Process(
my_handle,
audio->low_pass_split_data(i),
audio->high_pass_split_data(i),
static_cast<WebRtc_Word16>(audio->samples_per_split_channel()),
audio->low_pass_split_data(i),
audio->high_pass_split_data(i),
audio->split_bands_const(i),
audio->num_bands(),
audio->num_frames_per_band(),
audio->split_bands(i),
capture_levels_[i],
&capture_level_out,
apm_->echo_cancellation()->stream_has_echo(),
@ -202,17 +179,11 @@ int GainControlImpl::ProcessCaptureAudio(AudioBuffer* audio) {
// TODO(ajm): ensure this is called under kAdaptiveAnalog.
int GainControlImpl::set_stream_analog_level(int level) {
CriticalSectionScoped crit_scoped(crit_);
was_analog_level_set_ = true;
if (level < minimum_capture_level_ || level > maximum_capture_level_) {
return apm_->kBadParameterError;
}
if (mode_ == kAdaptiveAnalog) {
if (level != analog_capture_level_) {
// The analog level has been changed; update our internal levels.
capture_levels_.assign(num_handles(), level);
}
}
analog_capture_level_ = level;
return apm_->kNoError;
@ -226,7 +197,7 @@ int GainControlImpl::stream_analog_level() {
}
int GainControlImpl::Enable(bool enable) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
return EnableComponent(enable);
}
@ -235,7 +206,7 @@ bool GainControlImpl::is_enabled() const {
}
int GainControlImpl::set_mode(Mode mode) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
if (MapSetting(mode) == -1) {
return apm_->kBadParameterError;
}
@ -250,7 +221,7 @@ GainControl::Mode GainControlImpl::mode() const {
int GainControlImpl::set_analog_level_limits(int minimum,
int maximum) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
if (minimum < 0) {
return apm_->kBadParameterError;
}
@ -282,7 +253,7 @@ bool GainControlImpl::stream_is_saturated() const {
}
int GainControlImpl::set_target_level_dbfs(int level) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
if (level > 31 || level < 0) {
return apm_->kBadParameterError;
}
@ -296,7 +267,7 @@ int GainControlImpl::target_level_dbfs() const {
}
int GainControlImpl::set_compression_gain_db(int gain) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
if (gain < 0 || gain > 90) {
return apm_->kBadParameterError;
}
@ -310,7 +281,7 @@ int GainControlImpl::compression_gain_db() const {
}
int GainControlImpl::enable_limiter(bool enable) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
limiter_enabled_ = enable;
return Configure();
}
@ -325,35 +296,16 @@ int GainControlImpl::Initialize() {
return err;
}
analog_capture_level_ =
(maximum_capture_level_ - minimum_capture_level_) >> 1;
capture_levels_.assign(num_handles(), analog_capture_level_);
was_analog_level_set_ = false;
return apm_->kNoError;
}
int GainControlImpl::get_version(char* version, int version_len_bytes) const {
if (WebRtcAgc_Version(version, version_len_bytes) != 0) {
return apm_->kBadParameterError;
}
return apm_->kNoError;
}
void* GainControlImpl::CreateHandle() const {
Handle* handle = NULL;
if (WebRtcAgc_Create(&handle) != apm_->kNoError) {
handle = NULL;
} else {
assert(handle != NULL);
}
return handle;
return WebRtcAgc_Create();
}
int GainControlImpl::DestroyHandle(void* handle) const {
return WebRtcAgc_Free(static_cast<Handle*>(handle));
void GainControlImpl::DestroyHandle(void* handle) const {
WebRtcAgc_Free(static_cast<Handle*>(handle));
}
int GainControlImpl::InitializeHandle(void* handle) const {
@ -361,18 +313,18 @@ int GainControlImpl::InitializeHandle(void* handle) const {
minimum_capture_level_,
maximum_capture_level_,
MapSetting(mode_),
apm_->sample_rate_hz());
apm_->proc_sample_rate_hz());
}
int GainControlImpl::ConfigureHandle(void* handle) const {
WebRtcAgc_config_t config;
WebRtcAgcConfig config;
// TODO(ajm): Flip the sign here (since AGC expects a positive value) if we
// change the interface.
//assert(target_level_dbfs_ <= 0);
//config.targetLevelDbfs = static_cast<WebRtc_Word16>(-target_level_dbfs_);
config.targetLevelDbfs = static_cast<WebRtc_Word16>(target_level_dbfs_);
//config.targetLevelDbfs = static_cast<int16_t>(-target_level_dbfs_);
config.targetLevelDbfs = static_cast<int16_t>(target_level_dbfs_);
config.compressionGaindB =
static_cast<WebRtc_Word16>(compression_gain_db_);
static_cast<int16_t>(compression_gain_db_);
config.limiterEnable = limiter_enabled_;
return WebRtcAgc_set_config(static_cast<Handle*>(handle), config);

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,22 +8,24 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_GAIN_CONTROL_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_GAIN_CONTROL_IMPL_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_
#include <vector>
#include "audio_processing.h"
#include "processing_component.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/audio_processing/processing_component.h"
namespace webrtc {
class AudioProcessingImpl;
class AudioBuffer;
class CriticalSectionWrapper;
class GainControlImpl : public GainControl,
public ProcessingComponent {
public:
explicit GainControlImpl(const AudioProcessingImpl* apm);
GainControlImpl(const AudioProcessing* apm,
CriticalSectionWrapper* crit);
virtual ~GainControlImpl();
int ProcessRenderAudio(AudioBuffer* audio);
@ -31,39 +33,39 @@ class GainControlImpl : public GainControl,
int ProcessCaptureAudio(AudioBuffer* audio);
// ProcessingComponent implementation.
virtual int Initialize();
virtual int get_version(char* version, int version_len_bytes) const;
int Initialize() override;
// GainControl implementation.
virtual bool is_enabled() const;
virtual int stream_analog_level();
bool is_enabled() const override;
int stream_analog_level() override;
bool is_limiter_enabled() const override;
Mode mode() const override;
private:
// GainControl implementation.
virtual int Enable(bool enable);
virtual int set_stream_analog_level(int level);
virtual int set_mode(Mode mode);
virtual Mode mode() const;
virtual int set_target_level_dbfs(int level);
virtual int target_level_dbfs() const;
virtual int set_compression_gain_db(int gain);
virtual int compression_gain_db() const;
virtual int enable_limiter(bool enable);
virtual bool is_limiter_enabled() const;
virtual int set_analog_level_limits(int minimum, int maximum);
virtual int analog_level_minimum() const;
virtual int analog_level_maximum() const;
virtual bool stream_is_saturated() const;
int Enable(bool enable) override;
int set_stream_analog_level(int level) override;
int set_mode(Mode mode) override;
int set_target_level_dbfs(int level) override;
int target_level_dbfs() const override;
int set_compression_gain_db(int gain) override;
int compression_gain_db() const override;
int enable_limiter(bool enable) override;
int set_analog_level_limits(int minimum, int maximum) override;
int analog_level_minimum() const override;
int analog_level_maximum() const override;
bool stream_is_saturated() const override;
// ProcessingComponent implementation.
virtual void* CreateHandle() const;
virtual int InitializeHandle(void* handle) const;
virtual int ConfigureHandle(void* handle) const;
virtual int DestroyHandle(void* handle) const;
virtual int num_handles_required() const;
virtual int GetHandleError(void* handle) const;
void* CreateHandle() const override;
int InitializeHandle(void* handle) const override;
int ConfigureHandle(void* handle) const override;
void DestroyHandle(void* handle) const override;
int num_handles_required() const override;
int GetHandleError(void* handle) const override;
const AudioProcessingImpl* apm_;
const AudioProcessing* apm_;
CriticalSectionWrapper* crit_;
Mode mode_;
int minimum_capture_level_;
int maximum_capture_level_;
@ -77,4 +79,4 @@ class GainControlImpl : public GainControl,
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_GAIN_CONTROL_IMPL_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_GAIN_CONTROL_IMPL_H_

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,35 +8,34 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "high_pass_filter_impl.h"
#include "webrtc/modules/audio_processing/high_pass_filter_impl.h"
#include <cassert>
#include <assert.h>
#include "critical_section_wrapper.h"
#include "typedefs.h"
#include "signal_processing_library.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
#include "webrtc/typedefs.h"
#include "audio_processing_impl.h"
#include "audio_buffer.h"
namespace webrtc {
namespace {
const WebRtc_Word16 kFilterCoefficients8kHz[5] =
const int16_t kFilterCoefficients8kHz[5] =
{3798, -7596, 3798, 7807, -3733};
const WebRtc_Word16 kFilterCoefficients[5] =
const int16_t kFilterCoefficients[5] =
{4012, -8024, 4012, 8002, -3913};
struct FilterState {
WebRtc_Word16 y[4];
WebRtc_Word16 x[2];
const WebRtc_Word16* ba;
int16_t y[4];
int16_t x[2];
const int16_t* ba;
};
int InitializeFilter(FilterState* hpf, int sample_rate_hz) {
assert(hpf != NULL);
if (sample_rate_hz == AudioProcessingImpl::kSampleRate8kHz) {
if (sample_rate_hz == AudioProcessing::kSampleRate8kHz) {
hpf->ba = kFilterCoefficients8kHz;
} else {
hpf->ba = kFilterCoefficients;
@ -48,32 +47,28 @@ int InitializeFilter(FilterState* hpf, int sample_rate_hz) {
return AudioProcessing::kNoError;
}
int Filter(FilterState* hpf, WebRtc_Word16* data, int length) {
int Filter(FilterState* hpf, int16_t* data, size_t length) {
assert(hpf != NULL);
WebRtc_Word32 tmp_int32 = 0;
WebRtc_Word16* y = hpf->y;
WebRtc_Word16* x = hpf->x;
const WebRtc_Word16* ba = hpf->ba;
int32_t tmp_int32 = 0;
int16_t* y = hpf->y;
int16_t* x = hpf->x;
const int16_t* ba = hpf->ba;
for (int i = 0; i < length; i++) {
for (size_t i = 0; i < length; i++) {
// y[i] = b[0] * x[i] + b[1] * x[i-1] + b[2] * x[i-2]
// + -a[1] * y[i-1] + -a[2] * y[i-2];
tmp_int32 =
WEBRTC_SPL_MUL_16_16(y[1], ba[3]); // -a[1] * y[i-1] (low part)
tmp_int32 +=
WEBRTC_SPL_MUL_16_16(y[3], ba[4]); // -a[2] * y[i-2] (low part)
tmp_int32 = y[1] * ba[3]; // -a[1] * y[i-1] (low part)
tmp_int32 += y[3] * ba[4]; // -a[2] * y[i-2] (low part)
tmp_int32 = (tmp_int32 >> 15);
tmp_int32 +=
WEBRTC_SPL_MUL_16_16(y[0], ba[3]); // -a[1] * y[i-1] (high part)
tmp_int32 +=
WEBRTC_SPL_MUL_16_16(y[2], ba[4]); // -a[2] * y[i-2] (high part)
tmp_int32 += y[0] * ba[3]; // -a[1] * y[i-1] (high part)
tmp_int32 += y[2] * ba[4]; // -a[2] * y[i-2] (high part)
tmp_int32 = (tmp_int32 << 1);
tmp_int32 += WEBRTC_SPL_MUL_16_16(data[i], ba[0]); // b[0]*x[0]
tmp_int32 += WEBRTC_SPL_MUL_16_16(x[0], ba[1]); // b[1]*x[i-1]
tmp_int32 += WEBRTC_SPL_MUL_16_16(x[1], ba[2]); // b[2]*x[i-2]
tmp_int32 += data[i] * ba[0]; // b[0]*x[0]
tmp_int32 += x[0] * ba[1]; // b[1]*x[i-1]
tmp_int32 += x[1] * ba[2]; // b[2]*x[i-2]
// Update state (input part)
x[1] = x[0];
@ -82,21 +77,20 @@ int Filter(FilterState* hpf, WebRtc_Word16* data, int length) {
// Update state (filtered part)
y[2] = y[0];
y[3] = y[1];
y[0] = static_cast<WebRtc_Word16>(tmp_int32 >> 13);
y[1] = static_cast<WebRtc_Word16>((tmp_int32 -
WEBRTC_SPL_LSHIFT_W32(static_cast<WebRtc_Word32>(y[0]), 13)) << 2);
y[0] = static_cast<int16_t>(tmp_int32 >> 13);
y[1] = static_cast<int16_t>(
(tmp_int32 - (static_cast<int32_t>(y[0]) << 13)) << 2);
// Rounding in Q12, i.e. add 2^11
tmp_int32 += 2048;
// Saturate (to 2^27) so that the HP filtered signal does not overflow
tmp_int32 = WEBRTC_SPL_SAT(static_cast<WebRtc_Word32>(134217727),
tmp_int32 = WEBRTC_SPL_SAT(static_cast<int32_t>(134217727),
tmp_int32,
static_cast<WebRtc_Word32>(-134217728));
// Convert back to Q0 and use rounding
data[i] = (WebRtc_Word16)WEBRTC_SPL_RSHIFT_W32(tmp_int32, 12);
static_cast<int32_t>(-134217728));
// Convert back to Q0 and use rounding.
data[i] = (int16_t)(tmp_int32 >> 12);
}
return AudioProcessing::kNoError;
@ -105,9 +99,11 @@ int Filter(FilterState* hpf, WebRtc_Word16* data, int length) {
typedef FilterState Handle;
HighPassFilterImpl::HighPassFilterImpl(const AudioProcessingImpl* apm)
: ProcessingComponent(apm),
apm_(apm) {}
HighPassFilterImpl::HighPassFilterImpl(const AudioProcessing* apm,
CriticalSectionWrapper* crit)
: ProcessingComponent(),
apm_(apm),
crit_(crit) {}
HighPassFilterImpl::~HighPassFilterImpl() {}
@ -118,13 +114,13 @@ int HighPassFilterImpl::ProcessCaptureAudio(AudioBuffer* audio) {
return apm_->kNoError;
}
assert(audio->samples_per_split_channel() <= 160);
assert(audio->num_frames_per_band() <= 160);
for (int i = 0; i < num_handles(); i++) {
Handle* my_handle = static_cast<Handle*>(handle(i));
err = Filter(my_handle,
audio->low_pass_split_data(i),
audio->samples_per_split_channel());
audio->split_bands(i)[kBand0To8kHz],
audio->num_frames_per_band());
if (err != apm_->kNoError) {
return GetHandleError(my_handle);
@ -135,7 +131,7 @@ int HighPassFilterImpl::ProcessCaptureAudio(AudioBuffer* audio) {
}
int HighPassFilterImpl::Enable(bool enable) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
return EnableComponent(enable);
}
@ -143,25 +139,17 @@ bool HighPassFilterImpl::is_enabled() const {
return is_component_enabled();
}
int HighPassFilterImpl::get_version(char* version,
int version_len_bytes) const {
// An empty string is used to indicate no version information.
memset(version, 0, version_len_bytes);
return apm_->kNoError;
}
void* HighPassFilterImpl::CreateHandle() const {
return new FilterState;
}
int HighPassFilterImpl::DestroyHandle(void* handle) const {
void HighPassFilterImpl::DestroyHandle(void* handle) const {
delete static_cast<Handle*>(handle);
return apm_->kNoError;
}
int HighPassFilterImpl::InitializeHandle(void* handle) const {
return InitializeFilter(static_cast<Handle*>(handle),
apm_->sample_rate_hz());
apm_->proc_sample_rate_hz());
}
int HighPassFilterImpl::ConfigureHandle(void* /*handle*/) const {

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,44 +8,43 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_HIGH_PASS_FILTER_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_HIGH_PASS_FILTER_IMPL_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_IMPL_H_
#include "audio_processing.h"
#include "processing_component.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/audio_processing/processing_component.h"
namespace webrtc {
class AudioProcessingImpl;
class AudioBuffer;
class CriticalSectionWrapper;
class HighPassFilterImpl : public HighPassFilter,
public ProcessingComponent {
public:
explicit HighPassFilterImpl(const AudioProcessingImpl* apm);
HighPassFilterImpl(const AudioProcessing* apm, CriticalSectionWrapper* crit);
virtual ~HighPassFilterImpl();
int ProcessCaptureAudio(AudioBuffer* audio);
// HighPassFilter implementation.
virtual bool is_enabled() const;
// ProcessingComponent implementation.
virtual int get_version(char* version, int version_len_bytes) const;
bool is_enabled() const override;
private:
// HighPassFilter implementation.
virtual int Enable(bool enable);
int Enable(bool enable) override;
// ProcessingComponent implementation.
virtual void* CreateHandle() const;
virtual int InitializeHandle(void* handle) const;
virtual int ConfigureHandle(void* handle) const;
virtual int DestroyHandle(void* handle) const;
virtual int num_handles_required() const;
virtual int GetHandleError(void* handle) const;
void* CreateHandle() const override;
int InitializeHandle(void* handle) const override;
int ConfigureHandle(void* handle) const override;
void DestroyHandle(void* handle) const override;
int num_handles_required() const override;
int GetHandleError(void* handle) const override;
const AudioProcessingImpl* apm_;
const AudioProcessing* apm_;
CriticalSectionWrapper* crit_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_HIGH_PASS_FILTER_IMPL_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_HIGH_PASS_FILTER_IMPL_H_

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,17 +8,31 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_
#include <stddef.h> // size_t
#include <stddef.h> // size_t
#include <stdio.h> // FILE
#include <vector>
#include "typedefs.h"
#include "module.h"
#include "webrtc/base/arraysize.h"
#include "webrtc/base/platform_file.h"
#include "webrtc/common.h"
#include "webrtc/modules/audio_processing/beamformer/array_util.h"
#include "webrtc/typedefs.h"
struct AecCore;
namespace webrtc {
class AudioFrame;
template<typename T>
class Beamformer;
class StreamConfig;
class ProcessingConfig;
class EchoCancellation;
class EchoControlMobile;
class GainControl;
@ -27,6 +41,94 @@ class LevelEstimator;
class NoiseSuppression;
class VoiceDetection;
// Use to enable the extended filter mode in the AEC, along with robustness
// measures around the reported system delays. It comes with a significant
// increase in AEC complexity, but is much more robust to unreliable reported
// delays.
//
// Detailed changes to the algorithm:
// - The filter length is changed from 48 to 128 ms. This comes with tuning of
// several parameters: i) filter adaptation stepsize and error threshold;
// ii) non-linear processing smoothing and overdrive.
// - Option to ignore the reported delays on platforms which we deem
// sufficiently unreliable. See WEBRTC_UNTRUSTED_DELAY in echo_cancellation.c.
// - Faster startup times by removing the excessive "startup phase" processing
// of reported delays.
// - Much more conservative adjustments to the far-end read pointer. We smooth
// the delay difference more heavily, and back off from the difference more.
// Adjustments force a readaptation of the filter, so they should be avoided
// except when really necessary.
struct ExtendedFilter {
ExtendedFilter() : enabled(false) {}
explicit ExtendedFilter(bool enabled) : enabled(enabled) {}
bool enabled;
};
// Enables delay-agnostic echo cancellation. This feature relies on internally
// estimated delays between the process and reverse streams, thus not relying
// on reported system delays. This configuration only applies to
// EchoCancellation and not EchoControlMobile. It can be set in the constructor
// or using AudioProcessing::SetExtraOptions().
struct DelayAgnostic {
DelayAgnostic() : enabled(false) {}
explicit DelayAgnostic(bool enabled) : enabled(enabled) {}
bool enabled;
};
// Use to enable experimental gain control (AGC). At startup the experimental
// AGC moves the microphone volume up to |startup_min_volume| if the current
// microphone volume is set too low. The value is clamped to its operating range
// [12, 255]. Here, 255 maps to 100%.
//
// Must be provided through AudioProcessing::Create(Confg&).
#if defined(WEBRTC_CHROMIUM_BUILD)
static const int kAgcStartupMinVolume = 85;
#else
static const int kAgcStartupMinVolume = 0;
#endif // defined(WEBRTC_CHROMIUM_BUILD)
struct ExperimentalAgc {
ExperimentalAgc() : enabled(true), startup_min_volume(kAgcStartupMinVolume) {}
explicit ExperimentalAgc(bool enabled)
: enabled(enabled), startup_min_volume(kAgcStartupMinVolume) {}
ExperimentalAgc(bool enabled, int startup_min_volume)
: enabled(enabled), startup_min_volume(startup_min_volume) {}
bool enabled;
int startup_min_volume;
};
// Use to enable experimental noise suppression. It can be set in the
// constructor or using AudioProcessing::SetExtraOptions().
struct ExperimentalNs {
ExperimentalNs() : enabled(false) {}
explicit ExperimentalNs(bool enabled) : enabled(enabled) {}
bool enabled;
};
// Use to enable beamforming. Must be provided through the constructor. It will
// have no impact if used with AudioProcessing::SetExtraOptions().
struct Beamforming {
Beamforming()
: enabled(false),
array_geometry() {}
Beamforming(bool enabled, const std::vector<Point>& array_geometry)
: enabled(enabled),
array_geometry(array_geometry) {}
const bool enabled;
const std::vector<Point> array_geometry;
};
// Use to enable intelligibility enhancer in audio processing. Must be provided
// though the constructor. It will have no impact if used with
// AudioProcessing::SetExtraOptions().
//
// Note: If enabled and the reverse stream has more than one output channel,
// the reverse stream will become an upmixed mono signal.
struct Intelligibility {
Intelligibility() : enabled(false) {}
explicit Intelligibility(bool enabled) : enabled(enabled) {}
bool enabled;
};
// The Audio Processing Module (APM) provides a collection of voice processing
// components designed for real-time communications software.
//
@ -56,16 +158,12 @@ class VoiceDetection;
// 2. Parameter getters are never called concurrently with the corresponding
// setter.
//
// APM accepts only 16-bit linear PCM audio data in frames of 10 ms. Multiple
// channels should be interleaved.
// APM accepts only linear PCM audio data in chunks of 10 ms. The int16
// interfaces use interleaved data, while the float interfaces use deinterleaved
// data.
//
// Usage example, omitting error checking:
// AudioProcessing* apm = AudioProcessing::Create(0);
// apm->set_sample_rate_hz(32000); // Super-wideband processing.
//
// // Mono capture and stereo render.
// apm->set_num_channels(1, 1);
// apm->set_num_reverse_channels(2);
//
// apm->high_pass_filter()->Enable(true);
//
@ -102,44 +200,84 @@ class VoiceDetection;
// apm->Initialize();
//
// // Close the application...
// AudioProcessing::Destroy(apm);
// apm = NULL;
// delete apm;
//
class AudioProcessing : public Module {
class AudioProcessing {
public:
// Creates a APM instance, with identifier |id|. Use one instance for every
// primary audio stream requiring processing. On the client-side, this would
// typically be one instance for the near-end stream, and additional instances
// for each far-end stream which requires processing. On the server-side,
// this would typically be one instance for every incoming stream.
static AudioProcessing* Create(int id);
// TODO(mgraczyk): Remove once all methods that use ChannelLayout are gone.
enum ChannelLayout {
kMono,
// Left, right.
kStereo,
// Mono, keyboard mic.
kMonoAndKeyboard,
// Left, right, keyboard mic.
kStereoAndKeyboard
};
// Destroys a |apm| instance.
static void Destroy(AudioProcessing* apm);
// Creates an APM instance. Use one instance for every primary audio stream
// requiring processing. On the client-side, this would typically be one
// instance for the near-end stream, and additional instances for each far-end
// stream which requires processing. On the server-side, this would typically
// be one instance for every incoming stream.
static AudioProcessing* Create();
// Allows passing in an optional configuration at create-time.
static AudioProcessing* Create(const Config& config);
// Only for testing.
static AudioProcessing* Create(const Config& config,
Beamformer<float>* beamformer);
virtual ~AudioProcessing() {}
// Initializes internal states, while retaining all user settings. This
// should be called before beginning to process a new audio stream. However,
// it is not necessary to call before processing the first stream after
// creation.
//
// It is also not necessary to call if the audio parameters (sample
// rate and number of channels) have changed. Passing updated parameters
// directly to |ProcessStream()| and |AnalyzeReverseStream()| is permissible.
// If the parameters are known at init-time though, they may be provided.
virtual int Initialize() = 0;
// Sets the sample |rate| in Hz for both the primary and reverse audio
// streams. 8000, 16000 or 32000 Hz are permitted.
virtual int set_sample_rate_hz(int rate) = 0;
virtual int sample_rate_hz() const = 0;
// The int16 interfaces require:
// - only |NativeRate|s be used
// - that the input, output and reverse rates must match
// - that |processing_config.output_stream()| matches
// |processing_config.input_stream()|.
//
// The float interfaces accept arbitrary rates and support differing input and
// output layouts, but the output must have either one channel or the same
// number of channels as the input.
virtual int Initialize(const ProcessingConfig& processing_config) = 0;
// Sets the number of channels for the primary audio stream. Input frames must
// contain a number of channels given by |input_channels|, while output frames
// will be returned with number of channels given by |output_channels|.
virtual int set_num_channels(int input_channels, int output_channels) = 0;
// Initialize with unpacked parameters. See Initialize() above for details.
//
// TODO(mgraczyk): Remove once clients are updated to use the new interface.
virtual int Initialize(int input_sample_rate_hz,
int output_sample_rate_hz,
int reverse_sample_rate_hz,
ChannelLayout input_layout,
ChannelLayout output_layout,
ChannelLayout reverse_layout) = 0;
// Pass down additional options which don't have explicit setters. This
// ensures the options are applied immediately.
virtual void SetExtraOptions(const Config& config) = 0;
// TODO(ajm): Only intended for internal use. Make private and friend the
// necessary classes?
virtual int proc_sample_rate_hz() const = 0;
virtual int proc_split_sample_rate_hz() const = 0;
virtual int num_input_channels() const = 0;
virtual int num_output_channels() const = 0;
// Sets the number of channels for the reverse audio stream. Input frames must
// contain a number of channels given by |channels|.
virtual int set_num_reverse_channels(int channels) = 0;
virtual int num_reverse_channels() const = 0;
// Set to true when the output of AudioProcessing will be muted or in some
// other way not used. Ideally, the captured audio would still be processed,
// but some components may change behavior based on this information.
// Default false.
virtual void set_output_will_be_muted(bool muted) = 0;
// Processes a 10 ms |frame| of the primary audio stream. On the client-side,
// this is the near-end (or captured) audio.
//
@ -147,11 +285,40 @@ class AudioProcessing : public Module {
// must be called prior to processing the current frame. Any getter function
// with the stream_ tag which is needed should be called after processing.
//
// The |_frequencyInHz|, |_audioChannel|, and |_payloadDataLengthInSamples|
// members of |frame| must be valid, and correspond to settings supplied
// to APM.
// The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
// members of |frame| must be valid. If changed from the previous call to this
// method, it will trigger an initialization.
virtual int ProcessStream(AudioFrame* frame) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element
// of |src| points to a channel buffer, arranged according to
// |input_layout|. At output, the channels will be arranged according to
// |output_layout| at |output_sample_rate_hz| in |dest|.
//
// The output layout must have one channel or as many channels as the input.
// |src| and |dest| may use the same memory, if desired.
//
// TODO(mgraczyk): Remove once clients are updated to use the new interface.
virtual int ProcessStream(const float* const* src,
size_t samples_per_channel,
int input_sample_rate_hz,
ChannelLayout input_layout,
int output_sample_rate_hz,
ChannelLayout output_layout,
float* const* dest) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element of
// |src| points to a channel buffer, arranged according to |input_stream|. At
// output, the channels will be arranged according to |output_stream| in
// |dest|.
//
// The output must have one channel or as many channels as the input. |src|
// and |dest| may use the same memory, if desired.
virtual int ProcessStream(const float* const* src,
const StreamConfig& input_config,
const StreamConfig& output_config,
float* const* dest) = 0;
// Analyzes a 10 ms |frame| of the reverse direction audio stream. The frame
// will not be modified. On the client-side, this is the far-end (or to be
// rendered) audio.
@ -162,12 +329,34 @@ class AudioProcessing : public Module {
// typically will not be used. If you're not sure what to pass in here,
// chances are you don't need to use it.
//
// The |_frequencyInHz|, |_audioChannel|, and |_payloadDataLengthInSamples|
// members of |frame| must be valid.
// The |sample_rate_hz_|, |num_channels_|, and |samples_per_channel_|
// members of |frame| must be valid. |sample_rate_hz_| must correspond to
// |input_sample_rate_hz()|
//
// TODO(ajm): add const to input; requires an implementation fix.
// DEPRECATED: Use |ProcessReverseStream| instead.
// TODO(ekm): Remove once all users have updated to |ProcessReverseStream|.
virtual int AnalyzeReverseStream(AudioFrame* frame) = 0;
// Same as |AnalyzeReverseStream|, but may modify |frame| if intelligibility
// is enabled.
virtual int ProcessReverseStream(AudioFrame* frame) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element
// of |data| points to a channel buffer, arranged according to |layout|.
// TODO(mgraczyk): Remove once clients are updated to use the new interface.
virtual int AnalyzeReverseStream(const float* const* data,
size_t samples_per_channel,
int rev_sample_rate_hz,
ChannelLayout layout) = 0;
// Accepts deinterleaved float audio with the range [-1, 1]. Each element of
// |data| points to a channel buffer, arranged according to |reverse_config|.
virtual int ProcessReverseStream(const float* const* src,
const StreamConfig& reverse_input_config,
const StreamConfig& reverse_output_config,
float* const* dest) = 0;
// This must be called if and only if echo processing is enabled.
//
// Sets the |delay| in ms between AnalyzeReverseStream() receiving a far-end
@ -183,18 +372,46 @@ class AudioProcessing : public Module {
// ProcessStream().
virtual int set_stream_delay_ms(int delay) = 0;
virtual int stream_delay_ms() const = 0;
virtual bool was_stream_delay_set() const = 0;
// Call to signal that a key press occurred (true) or did not occur (false)
// with this chunk of audio.
virtual void set_stream_key_pressed(bool key_pressed) = 0;
// Sets a delay |offset| in ms to add to the values passed in through
// set_stream_delay_ms(). May be positive or negative.
//
// Note that this could cause an otherwise valid value passed to
// set_stream_delay_ms() to return an error.
virtual void set_delay_offset_ms(int offset) = 0;
virtual int delay_offset_ms() const = 0;
// Starts recording debugging information to a file specified by |filename|,
// a NULL-terminated string. If there is an ongoing recording, the old file
// will be closed, and recording will continue in the newly specified file.
// An already existing file will be overwritten without warning.
static const int kMaxFilenameSize = 1024;
static const size_t kMaxFilenameSize = 1024;
virtual int StartDebugRecording(const char filename[kMaxFilenameSize]) = 0;
// Same as above but uses an existing file handle. Takes ownership
// of |handle| and closes it at StopDebugRecording().
virtual int StartDebugRecording(FILE* handle) = 0;
// Same as above but uses an existing PlatformFile handle. Takes ownership
// of |handle| and closes it at StopDebugRecording().
// TODO(xians): Make this interface pure virtual.
virtual int StartDebugRecordingForPlatformFile(rtc::PlatformFile handle) {
return -1;
}
// Stops recording debugging information, and closes the file. Recording
// cannot be resumed in the same file (without overwriting it).
virtual int StopDebugRecording() = 0;
// Use to send UMA histograms at end of a call. Note that all histogram
// specific member variables are reset.
virtual void UpdateHistogramsOnCallEnd() = 0;
// These provide access to the component interfaces and should never return
// NULL. The pointers will be valid for the lifetime of the APM instance.
// The memory for these objects is entirely managed internally.
@ -213,8 +430,8 @@ class AudioProcessing : public Module {
int minimum; // Long-term minimum.
};
// Fatal errors.
enum Errors {
enum Error {
// Fatal errors.
kNoError = 0,
kUnspecifiedError = -1,
kCreationFailedError = -2,
@ -227,22 +444,134 @@ class AudioProcessing : public Module {
kBadNumberChannelsError = -9,
kFileError = -10,
kStreamParameterNotSetError = -11,
kNotEnabledError = -12
};
kNotEnabledError = -12,
// Warnings are non-fatal.
enum Warnings {
// Warnings are non-fatal.
// This results when a set_stream_ parameter is out of range. Processing
// will continue, but the parameter may have been truncated.
kBadStreamParameterWarning = -13,
kBadStreamParameterWarning = -13
};
// Inherited from Module.
virtual WebRtc_Word32 TimeUntilNextProcess() { return -1; };
virtual WebRtc_Word32 Process() { return -1; };
enum NativeRate {
kSampleRate8kHz = 8000,
kSampleRate16kHz = 16000,
kSampleRate32kHz = 32000,
kSampleRate48kHz = 48000
};
protected:
virtual ~AudioProcessing() {};
static const int kNativeSampleRatesHz[];
static const size_t kNumNativeSampleRates;
static const int kMaxNativeSampleRateHz;
static const int kMaxAECMSampleRateHz;
static const int kChunkSizeMs = 10;
};
class StreamConfig {
public:
// sample_rate_hz: The sampling rate of the stream.
//
// num_channels: The number of audio channels in the stream, excluding the
// keyboard channel if it is present. When passing a
// StreamConfig with an array of arrays T*[N],
//
// N == {num_channels + 1 if has_keyboard
// {num_channels if !has_keyboard
//
// has_keyboard: True if the stream has a keyboard channel. When has_keyboard
// is true, the last channel in any corresponding list of
// channels is the keyboard channel.
StreamConfig(int sample_rate_hz = 0,
int num_channels = 0,
bool has_keyboard = false)
: sample_rate_hz_(sample_rate_hz),
num_channels_(num_channels),
has_keyboard_(has_keyboard),
num_frames_(calculate_frames(sample_rate_hz)) {}
void set_sample_rate_hz(int value) {
sample_rate_hz_ = value;
num_frames_ = calculate_frames(value);
}
void set_num_channels(int value) { num_channels_ = value; }
void set_has_keyboard(bool value) { has_keyboard_ = value; }
int sample_rate_hz() const { return sample_rate_hz_; }
// The number of channels in the stream, not including the keyboard channel if
// present.
int num_channels() const { return num_channels_; }
bool has_keyboard() const { return has_keyboard_; }
size_t num_frames() const { return num_frames_; }
size_t num_samples() const { return num_channels_ * num_frames_; }
bool operator==(const StreamConfig& other) const {
return sample_rate_hz_ == other.sample_rate_hz_ &&
num_channels_ == other.num_channels_ &&
has_keyboard_ == other.has_keyboard_;
}
bool operator!=(const StreamConfig& other) const { return !(*this == other); }
private:
static size_t calculate_frames(int sample_rate_hz) {
return static_cast<size_t>(
AudioProcessing::kChunkSizeMs * sample_rate_hz / 1000);
}
int sample_rate_hz_;
int num_channels_;
bool has_keyboard_;
size_t num_frames_;
};
class ProcessingConfig {
public:
enum StreamName {
kInputStream,
kOutputStream,
kReverseInputStream,
kReverseOutputStream,
kNumStreamNames,
};
const StreamConfig& input_stream() const {
return streams[StreamName::kInputStream];
}
const StreamConfig& output_stream() const {
return streams[StreamName::kOutputStream];
}
const StreamConfig& reverse_input_stream() const {
return streams[StreamName::kReverseInputStream];
}
const StreamConfig& reverse_output_stream() const {
return streams[StreamName::kReverseOutputStream];
}
StreamConfig& input_stream() { return streams[StreamName::kInputStream]; }
StreamConfig& output_stream() { return streams[StreamName::kOutputStream]; }
StreamConfig& reverse_input_stream() {
return streams[StreamName::kReverseInputStream];
}
StreamConfig& reverse_output_stream() {
return streams[StreamName::kReverseOutputStream];
}
bool operator==(const ProcessingConfig& other) const {
for (int i = 0; i < StreamName::kNumStreamNames; ++i) {
if (this->streams[i] != other.streams[i]) {
return false;
}
}
return true;
}
bool operator!=(const ProcessingConfig& other) const {
return !(*this == other);
}
StreamConfig streams[StreamName::kNumStreamNames];
};
// The acoustic echo cancellation (AEC) component provides better performance
@ -263,20 +592,14 @@ class EchoCancellation {
// render and capture devices are used, particularly with webcams.
//
// This enables a compensation mechanism, and requires that
// |set_device_sample_rate_hz()| and |set_stream_drift_samples()| be called.
// set_stream_drift_samples() be called.
virtual int enable_drift_compensation(bool enable) = 0;
virtual bool is_drift_compensation_enabled() const = 0;
// Provides the sampling rate of the audio devices. It is assumed the render
// and capture devices use the same nominal sample rate. Required if and only
// if drift compensation is enabled.
virtual int set_device_sample_rate_hz(int rate) = 0;
virtual int device_sample_rate_hz() const = 0;
// Sets the difference between the number of samples rendered and captured by
// the audio devices since the last call to |ProcessStream()|. Must be called
// if and only if drift compensation is enabled, prior to |ProcessStream()|.
virtual int set_stream_drift_samples(int drift) = 0;
// if drift compensation is enabled, prior to |ProcessStream()|.
virtual void set_stream_drift_samples(int drift) = 0;
virtual int stream_drift_samples() const = 0;
enum SuppressionLevel {
@ -328,12 +651,26 @@ class EchoCancellation {
virtual bool is_delay_logging_enabled() const = 0;
// The delay metrics consists of the delay |median| and the delay standard
// deviation |std|. The values are averaged over the time period since the
// last call to |GetDelayMetrics()|.
// deviation |std|. It also consists of the fraction of delay estimates
// |fraction_poor_delays| that can make the echo cancellation perform poorly.
// The values are aggregated until the first call to |GetDelayMetrics()| and
// afterwards aggregated and updated every second.
// Note that if there are several clients pulling metrics from
// |GetDelayMetrics()| during a session the first call from any of them will
// change to one second aggregation window for all.
// TODO(bjornv): Deprecated, remove.
virtual int GetDelayMetrics(int* median, int* std) = 0;
virtual int GetDelayMetrics(int* median, int* std,
float* fraction_poor_delays) = 0;
// Returns a pointer to the low level AEC component. In case of multiple
// channels, the pointer to the first one is returned. A NULL pointer is
// returned when the AEC component is disabled or has not been initialized
// successfully.
virtual struct AecCore* aec_core() const = 0;
protected:
virtual ~EchoCancellation() {};
virtual ~EchoCancellation() {}
};
// The acoustic echo control for mobile (AECM) component is a low complexity
@ -389,7 +726,7 @@ class EchoControlMobile {
static size_t echo_path_size_bytes();
protected:
virtual ~EchoControlMobile() {};
virtual ~EchoControlMobile() {}
};
// The automatic gain control (AGC) component brings the signal to an
@ -481,7 +818,7 @@ class GainControl {
virtual bool stream_is_saturated() const = 0;
protected:
virtual ~GainControl() {};
virtual ~GainControl() {}
};
// A filtering component which removes DC offset and low-frequency noise.
@ -492,34 +829,29 @@ class HighPassFilter {
virtual bool is_enabled() const = 0;
protected:
virtual ~HighPassFilter() {};
virtual ~HighPassFilter() {}
};
// An estimation component used to retrieve level metrics.
// NOTE: currently unavailable. All methods return errors.
class LevelEstimator {
public:
virtual int Enable(bool enable) = 0;
virtual bool is_enabled() const = 0;
// The metrics are reported in dBFs calculated as:
// Level = 10log_10(P_s / P_max) [dBFs], where
// P_s is the signal power and P_max is the maximum possible (or peak)
// power. With 16-bit signals, P_max = (2^15)^2.
struct Metrics {
AudioProcessing::Statistic signal; // Overall signal level.
AudioProcessing::Statistic speech; // Speech level.
AudioProcessing::Statistic noise; // Noise level.
};
virtual int GetMetrics(Metrics* metrics, Metrics* reverse_metrics) = 0;
//virtual int enable_noise_warning(bool enable) = 0;
//bool is_noise_warning_enabled() const = 0;
//virtual bool stream_has_high_noise() const = 0;
// Returns the root mean square (RMS) level in dBFs (decibels from digital
// full-scale), or alternately dBov. It is computed over all primary stream
// frames since the last call to RMS(). The returned value is positive but
// should be interpreted as negative. It is constrained to [0, 127].
//
// The computation follows: https://tools.ietf.org/html/rfc6465
// with the intent that it can provide the RTP audio level indication.
//
// Frames passed to ProcessStream() with an |_energy| of zero are considered
// to have been muted. The RMS of the frame will be interpreted as -127.
virtual int RMS() = 0;
protected:
virtual ~LevelEstimator() {};
virtual ~LevelEstimator() {}
};
// The noise suppression (NS) component attempts to remove noise while
@ -543,8 +875,13 @@ class NoiseSuppression {
virtual int set_level(Level level) = 0;
virtual Level level() const = 0;
// Returns the internally computed prior speech probability of current frame
// averaged over output channels. This is not supported in fixed point, for
// which |kUnsupportedFunctionError| is returned.
virtual float speech_probability() const = 0;
protected:
virtual ~NoiseSuppression() {};
virtual ~NoiseSuppression() {}
};
// The voice activity detection (VAD) component analyzes the stream to
@ -552,7 +889,7 @@ class NoiseSuppression {
// external VAD decision.
//
// In addition to |stream_has_voice()| the VAD decision is provided through the
// |AudioFrame| passed to |ProcessStream()|. The |_vadActivity| member will be
// |AudioFrame| passed to |ProcessStream()|. The |vad_activity_| member will be
// modified to reflect the current decision.
class VoiceDetection {
public:
@ -594,8 +931,8 @@ class VoiceDetection {
virtual int frame_size_ms() const = 0;
protected:
virtual ~VoiceDetection() {};
virtual ~VoiceDetection() {}
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_INTERFACE_AUDIO_PROCESSING_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_INCLUDE_AUDIO_PROCESSING_H_

View File

@ -0,0 +1,381 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
//
// Implements core class for intelligibility enhancer.
//
// Details of the model and algorithm can be found in the original paper:
// http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6882788
//
#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"
#include <math.h>
#include <stdlib.h>
#include <algorithm>
#include <numeric>
#include "webrtc/base/checks.h"
#include "webrtc/common_audio/include/audio_util.h"
#include "webrtc/common_audio/window_generator.h"
namespace webrtc {
namespace {
const size_t kErbResolution = 2;
const int kWindowSizeMs = 2;
const int kChunkSizeMs = 10; // Size provided by APM.
const float kClipFreq = 200.0f;
const float kConfigRho = 0.02f; // Default production and interpretation SNR.
const float kKbdAlpha = 1.5f;
const float kLambdaBot = -1.0f; // Extreme values in bisection
const float kLambdaTop = -10e-18f; // search for lamda.
} // namespace
using std::complex;
using std::max;
using std::min;
using VarianceType = intelligibility::VarianceArray::StepType;
IntelligibilityEnhancer::TransformCallback::TransformCallback(
IntelligibilityEnhancer* parent,
IntelligibilityEnhancer::AudioSource source)
: parent_(parent), source_(source) {
}
void IntelligibilityEnhancer::TransformCallback::ProcessAudioBlock(
const complex<float>* const* in_block,
int in_channels,
size_t frames,
int /* out_channels */,
complex<float>* const* out_block) {
RTC_DCHECK_EQ(parent_->freqs_, frames);
for (int i = 0; i < in_channels; ++i) {
parent_->DispatchAudio(source_, in_block[i], out_block[i]);
}
}
IntelligibilityEnhancer::IntelligibilityEnhancer()
: IntelligibilityEnhancer(IntelligibilityEnhancer::Config()) {
}
IntelligibilityEnhancer::IntelligibilityEnhancer(const Config& config)
: freqs_(RealFourier::ComplexLength(
RealFourier::FftOrder(config.sample_rate_hz * kWindowSizeMs / 1000))),
window_size_(static_cast<size_t>(1 << RealFourier::FftOrder(freqs_))),
chunk_length_(
static_cast<size_t>(config.sample_rate_hz * kChunkSizeMs / 1000)),
bank_size_(GetBankSize(config.sample_rate_hz, kErbResolution)),
sample_rate_hz_(config.sample_rate_hz),
erb_resolution_(kErbResolution),
num_capture_channels_(config.num_capture_channels),
num_render_channels_(config.num_render_channels),
analysis_rate_(config.analysis_rate),
active_(true),
clear_variance_(freqs_,
config.var_type,
config.var_window_size,
config.var_decay_rate),
noise_variance_(freqs_,
config.var_type,
config.var_window_size,
config.var_decay_rate),
filtered_clear_var_(new float[bank_size_]),
filtered_noise_var_(new float[bank_size_]),
filter_bank_(bank_size_),
center_freqs_(new float[bank_size_]),
rho_(new float[bank_size_]),
gains_eq_(new float[bank_size_]),
gain_applier_(freqs_, config.gain_change_limit),
temp_render_out_buffer_(chunk_length_, num_render_channels_),
temp_capture_out_buffer_(chunk_length_, num_capture_channels_),
kbd_window_(new float[window_size_]),
render_callback_(this, AudioSource::kRenderStream),
capture_callback_(this, AudioSource::kCaptureStream),
block_count_(0),
analysis_step_(0) {
RTC_DCHECK_LE(config.rho, 1.0f);
CreateErbBank();
// Assumes all rho equal.
for (size_t i = 0; i < bank_size_; ++i) {
rho_[i] = config.rho * config.rho;
}
float freqs_khz = kClipFreq / 1000.0f;
size_t erb_index = static_cast<size_t>(ceilf(
11.17f * logf((freqs_khz + 0.312f) / (freqs_khz + 14.6575f)) + 43.0f));
start_freq_ = std::max(static_cast<size_t>(1), erb_index * erb_resolution_);
WindowGenerator::KaiserBesselDerived(kKbdAlpha, window_size_,
kbd_window_.get());
render_mangler_.reset(new LappedTransform(
num_render_channels_, num_render_channels_, chunk_length_,
kbd_window_.get(), window_size_, window_size_ / 2, &render_callback_));
capture_mangler_.reset(new LappedTransform(
num_capture_channels_, num_capture_channels_, chunk_length_,
kbd_window_.get(), window_size_, window_size_ / 2, &capture_callback_));
}
void IntelligibilityEnhancer::ProcessRenderAudio(float* const* audio,
int sample_rate_hz,
int num_channels) {
RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);
RTC_CHECK_EQ(num_render_channels_, num_channels);
if (active_) {
render_mangler_->ProcessChunk(audio, temp_render_out_buffer_.channels());
}
if (active_) {
for (int i = 0; i < num_render_channels_; ++i) {
memcpy(audio[i], temp_render_out_buffer_.channels()[i],
chunk_length_ * sizeof(**audio));
}
}
}
void IntelligibilityEnhancer::AnalyzeCaptureAudio(float* const* audio,
int sample_rate_hz,
int num_channels) {
RTC_CHECK_EQ(sample_rate_hz_, sample_rate_hz);
RTC_CHECK_EQ(num_capture_channels_, num_channels);
capture_mangler_->ProcessChunk(audio, temp_capture_out_buffer_.channels());
}
void IntelligibilityEnhancer::DispatchAudio(
IntelligibilityEnhancer::AudioSource source,
const complex<float>* in_block,
complex<float>* out_block) {
switch (source) {
case kRenderStream:
ProcessClearBlock(in_block, out_block);
break;
case kCaptureStream:
ProcessNoiseBlock(in_block, out_block);
break;
}
}
void IntelligibilityEnhancer::ProcessClearBlock(const complex<float>* in_block,
complex<float>* out_block) {
if (block_count_ < 2) {
memset(out_block, 0, freqs_ * sizeof(*out_block));
++block_count_;
return;
}
// TODO(ekm): Use VAD to |Step| and |AnalyzeClearBlock| only if necessary.
if (true) {
clear_variance_.Step(in_block, false);
if (block_count_ % analysis_rate_ == analysis_rate_ - 1) {
const float power_target = std::accumulate(
clear_variance_.variance(), clear_variance_.variance() + freqs_, 0.f);
AnalyzeClearBlock(power_target);
++analysis_step_;
}
++block_count_;
}
if (active_) {
gain_applier_.Apply(in_block, out_block);
}
}
void IntelligibilityEnhancer::AnalyzeClearBlock(float power_target) {
FilterVariance(clear_variance_.variance(), filtered_clear_var_.get());
FilterVariance(noise_variance_.variance(), filtered_noise_var_.get());
SolveForGainsGivenLambda(kLambdaTop, start_freq_, gains_eq_.get());
const float power_top =
DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
SolveForGainsGivenLambda(kLambdaBot, start_freq_, gains_eq_.get());
const float power_bot =
DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
if (power_target >= power_bot && power_target <= power_top) {
SolveForLambda(power_target, power_bot, power_top);
UpdateErbGains();
} // Else experiencing variance underflow, so do nothing.
}
void IntelligibilityEnhancer::SolveForLambda(float power_target,
float power_bot,
float power_top) {
const float kConvergeThresh = 0.001f; // TODO(ekmeyerson): Find best values
const int kMaxIters = 100; // for these, based on experiments.
const float reciprocal_power_target = 1.f / power_target;
float lambda_bot = kLambdaBot;
float lambda_top = kLambdaTop;
float power_ratio = 2.0f; // Ratio of achieved power to target power.
int iters = 0;
while (std::fabs(power_ratio - 1.0f) > kConvergeThresh &&
iters <= kMaxIters) {
const float lambda = lambda_bot + (lambda_top - lambda_bot) / 2.0f;
SolveForGainsGivenLambda(lambda, start_freq_, gains_eq_.get());
const float power =
DotProduct(gains_eq_.get(), filtered_clear_var_.get(), bank_size_);
if (power < power_target) {
lambda_bot = lambda;
} else {
lambda_top = lambda;
}
power_ratio = std::fabs(power * reciprocal_power_target);
++iters;
}
}
void IntelligibilityEnhancer::UpdateErbGains() {
// (ERB gain) = filterbank' * (freq gain)
float* gains = gain_applier_.target();
for (size_t i = 0; i < freqs_; ++i) {
gains[i] = 0.0f;
for (size_t j = 0; j < bank_size_; ++j) {
gains[i] = fmaf(filter_bank_[j][i], gains_eq_[j], gains[i]);
}
}
}
void IntelligibilityEnhancer::ProcessNoiseBlock(const complex<float>* in_block,
complex<float>* /*out_block*/) {
noise_variance_.Step(in_block);
}
size_t IntelligibilityEnhancer::GetBankSize(int sample_rate,
size_t erb_resolution) {
float freq_limit = sample_rate / 2000.0f;
size_t erb_scale = static_cast<size_t>(ceilf(
11.17f * logf((freq_limit + 0.312f) / (freq_limit + 14.6575f)) + 43.0f));
return erb_scale * erb_resolution;
}
void IntelligibilityEnhancer::CreateErbBank() {
size_t lf = 1, rf = 4;
for (size_t i = 0; i < bank_size_; ++i) {
float abs_temp = fabsf((i + 1.0f) / static_cast<float>(erb_resolution_));
center_freqs_[i] = 676170.4f / (47.06538f - expf(0.08950404f * abs_temp));
center_freqs_[i] -= 14678.49f;
}
float last_center_freq = center_freqs_[bank_size_ - 1];
for (size_t i = 0; i < bank_size_; ++i) {
center_freqs_[i] *= 0.5f * sample_rate_hz_ / last_center_freq;
}
for (size_t i = 0; i < bank_size_; ++i) {
filter_bank_[i].resize(freqs_);
}
for (size_t i = 1; i <= bank_size_; ++i) {
size_t lll, ll, rr, rrr;
static const size_t kOne = 1; // Avoids repeated static_cast<>s below.
lll = static_cast<size_t>(round(
center_freqs_[max(kOne, i - lf) - 1] * freqs_ /
(0.5f * sample_rate_hz_)));
ll = static_cast<size_t>(round(
center_freqs_[max(kOne, i) - 1] * freqs_ / (0.5f * sample_rate_hz_)));
lll = min(freqs_, max(lll, kOne)) - 1;
ll = min(freqs_, max(ll, kOne)) - 1;
rrr = static_cast<size_t>(round(
center_freqs_[min(bank_size_, i + rf) - 1] * freqs_ /
(0.5f * sample_rate_hz_)));
rr = static_cast<size_t>(round(
center_freqs_[min(bank_size_, i + 1) - 1] * freqs_ /
(0.5f * sample_rate_hz_)));
rrr = min(freqs_, max(rrr, kOne)) - 1;
rr = min(freqs_, max(rr, kOne)) - 1;
float step, element;
step = 1.0f / (ll - lll);
element = 0.0f;
for (size_t j = lll; j <= ll; ++j) {
filter_bank_[i - 1][j] = element;
element += step;
}
step = 1.0f / (rrr - rr);
element = 1.0f;
for (size_t j = rr; j <= rrr; ++j) {
filter_bank_[i - 1][j] = element;
element -= step;
}
for (size_t j = ll; j <= rr; ++j) {
filter_bank_[i - 1][j] = 1.0f;
}
}
float sum;
for (size_t i = 0; i < freqs_; ++i) {
sum = 0.0f;
for (size_t j = 0; j < bank_size_; ++j) {
sum += filter_bank_[j][i];
}
for (size_t j = 0; j < bank_size_; ++j) {
filter_bank_[j][i] /= sum;
}
}
}
void IntelligibilityEnhancer::SolveForGainsGivenLambda(float lambda,
size_t start_freq,
float* sols) {
bool quadratic = (kConfigRho < 1.0f);
const float* var_x0 = filtered_clear_var_.get();
const float* var_n0 = filtered_noise_var_.get();
for (size_t n = 0; n < start_freq; ++n) {
sols[n] = 1.0f;
}
// Analytic solution for optimal gains. See paper for derivation.
for (size_t n = start_freq - 1; n < bank_size_; ++n) {
float alpha0, beta0, gamma0;
gamma0 = 0.5f * rho_[n] * var_x0[n] * var_n0[n] +
lambda * var_x0[n] * var_n0[n] * var_n0[n];
beta0 = lambda * var_x0[n] * (2 - rho_[n]) * var_x0[n] * var_n0[n];
if (quadratic) {
alpha0 = lambda * var_x0[n] * (1 - rho_[n]) * var_x0[n] * var_x0[n];
sols[n] =
(-beta0 - sqrtf(beta0 * beta0 - 4 * alpha0 * gamma0)) / (2 * alpha0);
} else {
sols[n] = -gamma0 / beta0;
}
sols[n] = fmax(0, sols[n]);
}
}
void IntelligibilityEnhancer::FilterVariance(const float* var, float* result) {
RTC_DCHECK_GT(freqs_, 0u);
for (size_t i = 0; i < bank_size_; ++i) {
result[i] = DotProduct(&filter_bank_[i][0], var, freqs_);
}
}
float IntelligibilityEnhancer::DotProduct(const float* a,
const float* b,
size_t length) {
float ret = 0.0f;
for (size_t i = 0; i < length; ++i) {
ret = fmaf(a[i], b[i], ret);
}
return ret;
}
bool IntelligibilityEnhancer::active() const {
return active_;
}
} // namespace webrtc

View File

@ -0,0 +1,182 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
//
// Specifies core class for intelligbility enhancement.
//
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_
#include <complex>
#include <vector>
#include "webrtc/base/scoped_ptr.h"
#include "webrtc/common_audio/lapped_transform.h"
#include "webrtc/common_audio/channel_buffer.h"
#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"
namespace webrtc {
// Speech intelligibility enhancement module. Reads render and capture
// audio streams and modifies the render stream with a set of gains per
// frequency bin to enhance speech against the noise background.
// Note: assumes speech and noise streams are already separated.
class IntelligibilityEnhancer {
public:
struct Config {
// |var_*| are parameters for the VarianceArray constructor for the
// clear speech stream.
// TODO(bercic): the |var_*|, |*_rate| and |gain_limit| parameters should
// probably go away once fine tuning is done.
Config()
: sample_rate_hz(16000),
num_capture_channels(1),
num_render_channels(1),
var_type(intelligibility::VarianceArray::kStepDecaying),
var_decay_rate(0.9f),
var_window_size(10),
analysis_rate(800),
gain_change_limit(0.1f),
rho(0.02f) {}
int sample_rate_hz;
int num_capture_channels;
int num_render_channels;
intelligibility::VarianceArray::StepType var_type;
float var_decay_rate;
size_t var_window_size;
int analysis_rate;
float gain_change_limit;
float rho;
};
explicit IntelligibilityEnhancer(const Config& config);
IntelligibilityEnhancer(); // Initialize with default config.
// Reads and processes chunk of noise stream in time domain.
void AnalyzeCaptureAudio(float* const* audio,
int sample_rate_hz,
int num_channels);
// Reads chunk of speech in time domain and updates with modified signal.
void ProcessRenderAudio(float* const* audio,
int sample_rate_hz,
int num_channels);
bool active() const;
private:
enum AudioSource {
kRenderStream = 0, // Clear speech stream.
kCaptureStream, // Noise stream.
};
// Provides access point to the frequency domain.
class TransformCallback : public LappedTransform::Callback {
public:
TransformCallback(IntelligibilityEnhancer* parent, AudioSource source);
// All in frequency domain, receives input |in_block|, applies
// intelligibility enhancement, and writes result to |out_block|.
void ProcessAudioBlock(const std::complex<float>* const* in_block,
int in_channels,
size_t frames,
int out_channels,
std::complex<float>* const* out_block) override;
private:
IntelligibilityEnhancer* parent_;
AudioSource source_;
};
friend class TransformCallback;
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestErbCreation);
FRIEND_TEST_ALL_PREFIXES(IntelligibilityEnhancerTest, TestSolveForGains);
// Sends streams to ProcessClearBlock or ProcessNoiseBlock based on source.
void DispatchAudio(AudioSource source,
const std::complex<float>* in_block,
std::complex<float>* out_block);
// Updates variance computation and analysis with |in_block_|,
// and writes modified speech to |out_block|.
void ProcessClearBlock(const std::complex<float>* in_block,
std::complex<float>* out_block);
// Computes and sets modified gains.
void AnalyzeClearBlock(float power_target);
// Bisection search for optimal |lambda|.
void SolveForLambda(float power_target, float power_bot, float power_top);
// Transforms freq gains to ERB gains.
void UpdateErbGains();
// Updates variance calculation for noise input with |in_block|.
void ProcessNoiseBlock(const std::complex<float>* in_block,
std::complex<float>* out_block);
// Returns number of ERB filters.
static size_t GetBankSize(int sample_rate, size_t erb_resolution);
// Initializes ERB filterbank.
void CreateErbBank();
// Analytically solves quadratic for optimal gains given |lambda|.
// Negative gains are set to 0. Stores the results in |sols|.
void SolveForGainsGivenLambda(float lambda, size_t start_freq, float* sols);
// Computes variance across ERB filters from freq variance |var|.
// Stores in |result|.
void FilterVariance(const float* var, float* result);
// Returns dot product of vectors specified by size |length| arrays |a|,|b|.
static float DotProduct(const float* a, const float* b, size_t length);
const size_t freqs_; // Num frequencies in frequency domain.
const size_t window_size_; // Window size in samples; also the block size.
const size_t chunk_length_; // Chunk size in samples.
const size_t bank_size_; // Num ERB filters.
const int sample_rate_hz_;
const int erb_resolution_;
const int num_capture_channels_;
const int num_render_channels_;
const int analysis_rate_; // Num blocks before gains recalculated.
const bool active_; // Whether render gains are being updated.
// TODO(ekm): Add logic for updating |active_|.
intelligibility::VarianceArray clear_variance_;
intelligibility::VarianceArray noise_variance_;
rtc::scoped_ptr<float[]> filtered_clear_var_;
rtc::scoped_ptr<float[]> filtered_noise_var_;
std::vector<std::vector<float>> filter_bank_;
rtc::scoped_ptr<float[]> center_freqs_;
size_t start_freq_;
rtc::scoped_ptr<float[]> rho_; // Production and interpretation SNR.
// for each ERB band.
rtc::scoped_ptr<float[]> gains_eq_; // Pre-filter modified gains.
intelligibility::GainApplier gain_applier_;
// Destination buffers used to reassemble blocked chunks before overwriting
// the original input array with modifications.
ChannelBuffer<float> temp_render_out_buffer_;
ChannelBuffer<float> temp_capture_out_buffer_;
rtc::scoped_ptr<float[]> kbd_window_;
TransformCallback render_callback_;
TransformCallback capture_callback_;
rtc::scoped_ptr<LappedTransform> render_mangler_;
rtc::scoped_ptr<LappedTransform> capture_mangler_;
int block_count_;
int analysis_step_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_ENHANCER_H_

View File

@ -0,0 +1,314 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
//
// Implements helper functions and classes for intelligibility enhancement.
//
#include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h"
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include <algorithm>
using std::complex;
using std::min;
namespace webrtc {
namespace intelligibility {
float UpdateFactor(float target, float current, float limit) {
float delta = fabsf(target - current);
float sign = copysign(1.0f, target - current);
return current + sign * fminf(delta, limit);
}
float AddDitherIfZero(float value) {
return value == 0.f ? std::rand() * 0.01f / RAND_MAX : value;
}
complex<float> zerofudge(complex<float> c) {
return complex<float>(AddDitherIfZero(c.real()), AddDitherIfZero(c.imag()));
}
complex<float> NewMean(complex<float> mean, complex<float> data, size_t count) {
return mean + (data - mean) / static_cast<float>(count);
}
void AddToMean(complex<float> data, size_t count, complex<float>* mean) {
(*mean) = NewMean(*mean, data, count);
}
static const size_t kWindowBlockSize = 10;
VarianceArray::VarianceArray(size_t num_freqs,
StepType type,
size_t window_size,
float decay)
: running_mean_(new complex<float>[num_freqs]()),
running_mean_sq_(new complex<float>[num_freqs]()),
sub_running_mean_(new complex<float>[num_freqs]()),
sub_running_mean_sq_(new complex<float>[num_freqs]()),
variance_(new float[num_freqs]()),
conj_sum_(new float[num_freqs]()),
num_freqs_(num_freqs),
window_size_(window_size),
decay_(decay),
history_cursor_(0),
count_(0),
array_mean_(0.0f),
buffer_full_(false) {
history_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
for (size_t i = 0; i < num_freqs_; ++i) {
history_[i].reset(new complex<float>[window_size_]());
}
subhistory_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
for (size_t i = 0; i < num_freqs_; ++i) {
subhistory_[i].reset(new complex<float>[window_size_]());
}
subhistory_sq_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]());
for (size_t i = 0; i < num_freqs_; ++i) {
subhistory_sq_[i].reset(new complex<float>[window_size_]());
}
switch (type) {
case kStepInfinite:
step_func_ = &VarianceArray::InfiniteStep;
break;
case kStepDecaying:
step_func_ = &VarianceArray::DecayStep;
break;
case kStepWindowed:
step_func_ = &VarianceArray::WindowedStep;
break;
case kStepBlocked:
step_func_ = &VarianceArray::BlockedStep;
break;
case kStepBlockBasedMovingAverage:
step_func_ = &VarianceArray::BlockBasedMovingAverage;
break;
}
}
// Compute the variance with Welford's algorithm, adding some fudge to
// the input in case of all-zeroes.
void VarianceArray::InfiniteStep(const complex<float>* data, bool skip_fudge) {
array_mean_ = 0.0f;
++count_;
for (size_t i = 0; i < num_freqs_; ++i) {
complex<float> sample = data[i];
if (!skip_fudge) {
sample = zerofudge(sample);
}
if (count_ == 1) {
running_mean_[i] = sample;
variance_[i] = 0.0f;
} else {
float old_sum = conj_sum_[i];
complex<float> old_mean = running_mean_[i];
running_mean_[i] =
old_mean + (sample - old_mean) / static_cast<float>(count_);
conj_sum_[i] =
(old_sum + std::conj(sample - old_mean) * (sample - running_mean_[i]))
.real();
variance_[i] =
conj_sum_[i] / (count_ - 1);
}
array_mean_ += (variance_[i] - array_mean_) / (i + 1);
}
}
// Compute the variance from the beginning, with exponential decaying of the
// series data.
void VarianceArray::DecayStep(const complex<float>* data, bool /*dummy*/) {
array_mean_ = 0.0f;
++count_;
for (size_t i = 0; i < num_freqs_; ++i) {
complex<float> sample = data[i];
sample = zerofudge(sample);
if (count_ == 1) {
running_mean_[i] = sample;
running_mean_sq_[i] = sample * std::conj(sample);
variance_[i] = 0.0f;
} else {
complex<float> prev = running_mean_[i];
complex<float> prev2 = running_mean_sq_[i];
running_mean_[i] = decay_ * prev + (1.0f - decay_) * sample;
running_mean_sq_[i] =
decay_ * prev2 + (1.0f - decay_) * sample * std::conj(sample);
variance_[i] = (running_mean_sq_[i] -
running_mean_[i] * std::conj(running_mean_[i])).real();
}
array_mean_ += (variance_[i] - array_mean_) / (i + 1);
}
}
// Windowed variance computation. On each step, the variances for the
// window are recomputed from scratch, using Welford's algorithm.
void VarianceArray::WindowedStep(const complex<float>* data, bool /*dummy*/) {
size_t num = min(count_ + 1, window_size_);
array_mean_ = 0.0f;
for (size_t i = 0; i < num_freqs_; ++i) {
complex<float> mean;
float conj_sum = 0.0f;
history_[i][history_cursor_] = data[i];
mean = history_[i][history_cursor_];
variance_[i] = 0.0f;
for (size_t j = 1; j < num; ++j) {
complex<float> sample =
zerofudge(history_[i][(history_cursor_ + j) % window_size_]);
sample = history_[i][(history_cursor_ + j) % window_size_];
float old_sum = conj_sum;
complex<float> old_mean = mean;
mean = old_mean + (sample - old_mean) / static_cast<float>(j + 1);
conj_sum =
(old_sum + std::conj(sample - old_mean) * (sample - mean)).real();
variance_[i] = conj_sum / (j);
}
array_mean_ += (variance_[i] - array_mean_) / (i + 1);
}
history_cursor_ = (history_cursor_ + 1) % window_size_;
++count_;
}
// Variance with a window of blocks. Within each block, the variances are
// recomputed from scratch at every stp, using |Var(X) = E(X^2) - E^2(X)|.
// Once a block is filled with kWindowBlockSize samples, it is added to the
// history window and a new block is started. The variances for the window
// are recomputed from scratch at each of these transitions.
void VarianceArray::BlockedStep(const complex<float>* data, bool /*dummy*/) {
size_t blocks = min(window_size_, history_cursor_ + 1);
for (size_t i = 0; i < num_freqs_; ++i) {
AddToMean(data[i], count_ + 1, &sub_running_mean_[i]);
AddToMean(data[i] * std::conj(data[i]), count_ + 1,
&sub_running_mean_sq_[i]);
subhistory_[i][history_cursor_ % window_size_] = sub_running_mean_[i];
subhistory_sq_[i][history_cursor_ % window_size_] = sub_running_mean_sq_[i];
variance_[i] =
(NewMean(running_mean_sq_[i], sub_running_mean_sq_[i], blocks) -
NewMean(running_mean_[i], sub_running_mean_[i], blocks) *
std::conj(NewMean(running_mean_[i], sub_running_mean_[i], blocks)))
.real();
if (count_ == kWindowBlockSize - 1) {
sub_running_mean_[i] = complex<float>(0.0f, 0.0f);
sub_running_mean_sq_[i] = complex<float>(0.0f, 0.0f);
running_mean_[i] = complex<float>(0.0f, 0.0f);
running_mean_sq_[i] = complex<float>(0.0f, 0.0f);
for (size_t j = 0; j < min(window_size_, history_cursor_); ++j) {
AddToMean(subhistory_[i][j], j + 1, &running_mean_[i]);
AddToMean(subhistory_sq_[i][j], j + 1, &running_mean_sq_[i]);
}
++history_cursor_;
}
}
++count_;
if (count_ == kWindowBlockSize) {
count_ = 0;
}
}
// Recomputes variances for each window from scratch based on previous window.
void VarianceArray::BlockBasedMovingAverage(const std::complex<float>* data,
bool /*dummy*/) {
// TODO(ekmeyerson) To mitigate potential divergence, add counter so that
// after every so often sums are computed scratch by summing over all
// elements instead of subtracting oldest and adding newest.
for (size_t i = 0; i < num_freqs_; ++i) {
sub_running_mean_[i] += data[i];
sub_running_mean_sq_[i] += data[i] * std::conj(data[i]);
}
++count_;
// TODO(ekmeyerson) Make kWindowBlockSize nonconstant to allow
// experimentation with different block size,window size pairs.
if (count_ >= kWindowBlockSize) {
count_ = 0;
for (size_t i = 0; i < num_freqs_; ++i) {
running_mean_[i] -= subhistory_[i][history_cursor_];
running_mean_sq_[i] -= subhistory_sq_[i][history_cursor_];
float scale = 1.f / kWindowBlockSize;
subhistory_[i][history_cursor_] = sub_running_mean_[i] * scale;
subhistory_sq_[i][history_cursor_] = sub_running_mean_sq_[i] * scale;
sub_running_mean_[i] = std::complex<float>(0.0f, 0.0f);
sub_running_mean_sq_[i] = std::complex<float>(0.0f, 0.0f);
running_mean_[i] += subhistory_[i][history_cursor_];
running_mean_sq_[i] += subhistory_sq_[i][history_cursor_];
scale = 1.f / (buffer_full_ ? window_size_ : history_cursor_ + 1);
variance_[i] = std::real(running_mean_sq_[i] * scale -
running_mean_[i] * scale *
std::conj(running_mean_[i]) * scale);
}
++history_cursor_;
if (history_cursor_ >= window_size_) {
buffer_full_ = true;
history_cursor_ = 0;
}
}
}
void VarianceArray::Clear() {
memset(running_mean_.get(), 0, sizeof(*running_mean_.get()) * num_freqs_);
memset(running_mean_sq_.get(), 0,
sizeof(*running_mean_sq_.get()) * num_freqs_);
memset(variance_.get(), 0, sizeof(*variance_.get()) * num_freqs_);
memset(conj_sum_.get(), 0, sizeof(*conj_sum_.get()) * num_freqs_);
history_cursor_ = 0;
count_ = 0;
array_mean_ = 0.0f;
}
void VarianceArray::ApplyScale(float scale) {
array_mean_ = 0.0f;
for (size_t i = 0; i < num_freqs_; ++i) {
variance_[i] *= scale * scale;
array_mean_ += (variance_[i] - array_mean_) / (i + 1);
}
}
GainApplier::GainApplier(size_t freqs, float change_limit)
: num_freqs_(freqs),
change_limit_(change_limit),
target_(new float[freqs]()),
current_(new float[freqs]()) {
for (size_t i = 0; i < freqs; ++i) {
target_[i] = 1.0f;
current_[i] = 1.0f;
}
}
void GainApplier::Apply(const complex<float>* in_block,
complex<float>* out_block) {
for (size_t i = 0; i < num_freqs_; ++i) {
float factor = sqrtf(fabsf(current_[i]));
if (!std::isnormal(factor)) {
factor = 1.0f;
}
out_block[i] = factor * in_block[i];
current_[i] = UpdateFactor(target_[i], current_[i], change_limit_);
}
}
} // namespace intelligibility
} // namespace webrtc

View File

@ -0,0 +1,160 @@
/*
* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
//
// Specifies helper classes for intelligibility enhancement.
//
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_
#include <complex>
#include "webrtc/base/scoped_ptr.h"
namespace webrtc {
namespace intelligibility {
// Return |current| changed towards |target|, with the change being at most
// |limit|.
float UpdateFactor(float target, float current, float limit);
// Apply a small fudge to degenerate complex values. The numbers in the array
// were chosen randomly, so that even a series of all zeroes has some small
// variability.
std::complex<float> zerofudge(std::complex<float> c);
// Incremental mean computation. Return the mean of the series with the
// mean |mean| with added |data|.
std::complex<float> NewMean(std::complex<float> mean,
std::complex<float> data,
size_t count);
// Updates |mean| with added |data|;
void AddToMean(std::complex<float> data,
size_t count,
std::complex<float>* mean);
// Internal helper for computing the variances of a stream of arrays.
// The result is an array of variances per position: the i-th variance
// is the variance of the stream of data on the i-th positions in the
// input arrays.
// There are four methods of computation:
// * kStepInfinite computes variances from the beginning onwards
// * kStepDecaying uses a recursive exponential decay formula with a
// settable forgetting factor
// * kStepWindowed computes variances within a moving window
// * kStepBlocked is similar to kStepWindowed, but history is kept
// as a rolling window of blocks: multiple input elements are used for
// one block and the history then consists of the variances of these blocks
// with the same effect as kStepWindowed, but less storage, so the window
// can be longer
class VarianceArray {
public:
enum StepType {
kStepInfinite = 0,
kStepDecaying,
kStepWindowed,
kStepBlocked,
kStepBlockBasedMovingAverage
};
// Construct an instance for the given input array length (|freqs|) and
// computation algorithm (|type|), with the appropriate parameters.
// |window_size| is the number of samples for kStepWindowed and
// the number of blocks for kStepBlocked. |decay| is the forgetting factor
// for kStepDecaying.
VarianceArray(size_t freqs, StepType type, size_t window_size, float decay);
// Add a new data point to the series and compute the new variances.
// TODO(bercic) |skip_fudge| is a flag for kStepWindowed and kStepDecaying,
// whether they should skip adding some small dummy values to the input
// to prevent problems with all-zero inputs. Can probably be removed.
void Step(const std::complex<float>* data, bool skip_fudge = false) {
(this->*step_func_)(data, skip_fudge);
}
// Reset variances to zero and forget all history.
void Clear();
// Scale the input data by |scale|. Effectively multiply variances
// by |scale^2|.
void ApplyScale(float scale);
// The current set of variances.
const float* variance() const { return variance_.get(); }
// The mean value of the current set of variances.
float array_mean() const { return array_mean_; }
private:
void InfiniteStep(const std::complex<float>* data, bool dummy);
void DecayStep(const std::complex<float>* data, bool dummy);
void WindowedStep(const std::complex<float>* data, bool dummy);
void BlockedStep(const std::complex<float>* data, bool dummy);
void BlockBasedMovingAverage(const std::complex<float>* data, bool dummy);
// TODO(ekmeyerson): Switch the following running means
// and histories from rtc::scoped_ptr to std::vector.
// The current average X and X^2.
rtc::scoped_ptr<std::complex<float>[]> running_mean_;
rtc::scoped_ptr<std::complex<float>[]> running_mean_sq_;
// Average X and X^2 for the current block in kStepBlocked.
rtc::scoped_ptr<std::complex<float>[]> sub_running_mean_;
rtc::scoped_ptr<std::complex<float>[]> sub_running_mean_sq_;
// Sample history for the rolling window in kStepWindowed and block-wise
// histories for kStepBlocked.
rtc::scoped_ptr<rtc::scoped_ptr<std::complex<float>[]>[]> history_;
rtc::scoped_ptr<rtc::scoped_ptr<std::complex<float>[]>[]> subhistory_;
rtc::scoped_ptr<rtc::scoped_ptr<std::complex<float>[]>[]> subhistory_sq_;
// The current set of variances and sums for Welford's algorithm.
rtc::scoped_ptr<float[]> variance_;
rtc::scoped_ptr<float[]> conj_sum_;
const size_t num_freqs_;
const size_t window_size_;
const float decay_;
size_t history_cursor_;
size_t count_;
float array_mean_;
bool buffer_full_;
void (VarianceArray::*step_func_)(const std::complex<float>*, bool);
};
// Helper class for smoothing gain changes. On each applicatiion step, the
// currently used gains are changed towards a set of settable target gains,
// constrained by a limit on the magnitude of the changes.
class GainApplier {
public:
GainApplier(size_t freqs, float change_limit);
// Copy |in_block| to |out_block|, multiplied by the current set of gains,
// and step the current set of gains towards the target set.
void Apply(const std::complex<float>* in_block,
std::complex<float>* out_block);
// Return the current target gain set. Modify this array to set the targets.
float* target() const { return target_.get(); }
private:
const size_t num_freqs_;
const float change_limit_;
rtc::scoped_ptr<float[]> target_;
rtc::scoped_ptr<float[]> current_;
};
} // namespace intelligibility
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_INTELLIGIBILITY_INTELLIGIBILITY_UTILS_H_

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,175 +8,79 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "level_estimator_impl.h"
#include "webrtc/modules/audio_processing/level_estimator_impl.h"
#include <cassert>
#include <cstring>
#include "critical_section_wrapper.h"
#include "audio_processing_impl.h"
#include "audio_buffer.h"
// TODO(ajm): implement the underlying level estimator component.
#include "webrtc/modules/audio_processing/audio_buffer.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/audio_processing/rms_level.h"
#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
namespace webrtc {
typedef void Handle;
namespace {
/*int EstimateLevel(AudioBuffer* audio, Handle* my_handle) {
assert(audio->samples_per_split_channel() <= 160);
WebRtc_Word16* mixed_data = audio->low_pass_split_data(0);
if (audio->num_channels() > 1) {
audio->CopyAndMixLowPass(1);
mixed_data = audio->mixed_low_pass_data(0);
}
int err = UpdateLvlEst(my_handle,
mixed_data,
audio->samples_per_split_channel());
if (err != AudioProcessing::kNoError) {
return GetHandleError(my_handle);
}
return AudioProcessing::kNoError;
}
int GetMetricsLocal(Handle* my_handle, LevelEstimator::Metrics* metrics) {
level_t levels;
memset(&levels, 0, sizeof(levels));
int err = ExportLevels(my_handle, &levels, 2);
if (err != AudioProcessing::kNoError) {
return err;
}
metrics->signal.instant = levels.instant;
metrics->signal.average = levels.average;
metrics->signal.maximum = levels.max;
metrics->signal.minimum = levels.min;
err = ExportLevels(my_handle, &levels, 1);
if (err != AudioProcessing::kNoError) {
return err;
}
metrics->speech.instant = levels.instant;
metrics->speech.average = levels.average;
metrics->speech.maximum = levels.max;
metrics->speech.minimum = levels.min;
err = ExportLevels(my_handle, &levels, 0);
if (err != AudioProcessing::kNoError) {
return err;
}
metrics->noise.instant = levels.instant;
metrics->noise.average = levels.average;
metrics->noise.maximum = levels.max;
metrics->noise.minimum = levels.min;
return AudioProcessing::kNoError;
}*/
} // namespace
LevelEstimatorImpl::LevelEstimatorImpl(const AudioProcessingImpl* apm)
: ProcessingComponent(apm),
apm_(apm) {}
LevelEstimatorImpl::LevelEstimatorImpl(const AudioProcessing* apm,
CriticalSectionWrapper* crit)
: ProcessingComponent(),
crit_(crit) {}
LevelEstimatorImpl::~LevelEstimatorImpl() {}
int LevelEstimatorImpl::AnalyzeReverseStream(AudioBuffer* /*audio*/) {
return apm_->kUnsupportedComponentError;
/*if (!is_component_enabled()) {
return apm_->kNoError;
int LevelEstimatorImpl::ProcessStream(AudioBuffer* audio) {
if (!is_component_enabled()) {
return AudioProcessing::kNoError;
}
return EstimateLevel(audio, static_cast<Handle*>(handle(1)));*/
}
int LevelEstimatorImpl::ProcessCaptureAudio(AudioBuffer* /*audio*/) {
return apm_->kUnsupportedComponentError;
/*if (!is_component_enabled()) {
return apm_->kNoError;
RMSLevel* rms_level = static_cast<RMSLevel*>(handle(0));
for (int i = 0; i < audio->num_channels(); ++i) {
rms_level->Process(audio->channels_const()[i],
audio->num_frames());
}
return EstimateLevel(audio, static_cast<Handle*>(handle(0)));*/
return AudioProcessing::kNoError;
}
int LevelEstimatorImpl::Enable(bool /*enable*/) {
CriticalSectionScoped crit_scoped(*apm_->crit());
return apm_->kUnsupportedComponentError;
//return EnableComponent(enable);
int LevelEstimatorImpl::Enable(bool enable) {
CriticalSectionScoped crit_scoped(crit_);
return EnableComponent(enable);
}
bool LevelEstimatorImpl::is_enabled() const {
return is_component_enabled();
}
int LevelEstimatorImpl::GetMetrics(LevelEstimator::Metrics* /*metrics*/,
LevelEstimator::Metrics* /*reverse_metrics*/) {
return apm_->kUnsupportedComponentError;
/*if (!is_component_enabled()) {
return apm_->kNotEnabledError;
int LevelEstimatorImpl::RMS() {
if (!is_component_enabled()) {
return AudioProcessing::kNotEnabledError;
}
int err = GetMetricsLocal(static_cast<Handle*>(handle(0)), metrics);
if (err != apm_->kNoError) {
return err;
}
err = GetMetricsLocal(static_cast<Handle*>(handle(1)), reverse_metrics);
if (err != apm_->kNoError) {
return err;
}
return apm_->kNoError;*/
}
int LevelEstimatorImpl::get_version(char* version,
int version_len_bytes) const {
// An empty string is used to indicate no version information.
memset(version, 0, version_len_bytes);
return apm_->kNoError;
RMSLevel* rms_level = static_cast<RMSLevel*>(handle(0));
return rms_level->RMS();
}
// The ProcessingComponent implementation is pretty weird in this class since
// we have only a single instance of the trivial underlying component.
void* LevelEstimatorImpl::CreateHandle() const {
Handle* handle = NULL;
/*if (CreateLvlEst(&handle) != apm_->kNoError) {
handle = NULL;
} else {
assert(handle != NULL);
}*/
return handle;
return new RMSLevel;
}
int LevelEstimatorImpl::DestroyHandle(void* /*handle*/) const {
return apm_->kUnsupportedComponentError;
//return FreeLvlEst(static_cast<Handle*>(handle));
void LevelEstimatorImpl::DestroyHandle(void* handle) const {
delete static_cast<RMSLevel*>(handle);
}
int LevelEstimatorImpl::InitializeHandle(void* /*handle*/) const {
return apm_->kUnsupportedComponentError;
/*const double kIntervalSeconds = 1.5;
return InitLvlEst(static_cast<Handle*>(handle),
apm_->sample_rate_hz(),
kIntervalSeconds);*/
int LevelEstimatorImpl::InitializeHandle(void* handle) const {
static_cast<RMSLevel*>(handle)->Reset();
return AudioProcessing::kNoError;
}
int LevelEstimatorImpl::ConfigureHandle(void* /*handle*/) const {
return apm_->kUnsupportedComponentError;
//return apm_->kNoError;
return AudioProcessing::kNoError;
}
int LevelEstimatorImpl::num_handles_required() const {
return apm_->kUnsupportedComponentError;
//return 2;
return 1;
}
int LevelEstimatorImpl::GetHandleError(void* handle) const {
// The component has no detailed errors.
assert(handle != NULL);
return apm_->kUnspecifiedError;
int LevelEstimatorImpl::GetHandleError(void* /*handle*/) const {
return AudioProcessing::kUnspecifiedError;
}
} // namespace webrtc

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,46 +8,46 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_LEVEL_ESTIMATOR_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_LEVEL_ESTIMATOR_IMPL_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_
#include "audio_processing.h"
#include "processing_component.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/audio_processing/processing_component.h"
#include "webrtc/modules/audio_processing/rms_level.h"
namespace webrtc {
class AudioProcessingImpl;
class AudioBuffer;
class CriticalSectionWrapper;
class LevelEstimatorImpl : public LevelEstimator,
public ProcessingComponent {
public:
explicit LevelEstimatorImpl(const AudioProcessingImpl* apm);
LevelEstimatorImpl(const AudioProcessing* apm,
CriticalSectionWrapper* crit);
virtual ~LevelEstimatorImpl();
int AnalyzeReverseStream(AudioBuffer* audio);
int ProcessCaptureAudio(AudioBuffer* audio);
int ProcessStream(AudioBuffer* audio);
// LevelEstimator implementation.
virtual bool is_enabled() const;
// ProcessingComponent implementation.
virtual int get_version(char* version, int version_len_bytes) const;
bool is_enabled() const override;
private:
// LevelEstimator implementation.
virtual int Enable(bool enable);
virtual int GetMetrics(Metrics* metrics, Metrics* reverse_metrics);
int Enable(bool enable) override;
int RMS() override;
// ProcessingComponent implementation.
virtual void* CreateHandle() const;
virtual int InitializeHandle(void* handle) const;
virtual int ConfigureHandle(void* handle) const;
virtual int DestroyHandle(void* handle) const;
virtual int num_handles_required() const;
virtual int GetHandleError(void* handle) const;
void* CreateHandle() const override;
int InitializeHandle(void* handle) const override;
int ConfigureHandle(void* handle) const override;
void DestroyHandle(void* handle) const override;
int num_handles_required() const override;
int GetHandleError(void* handle) const override;
const AudioProcessingImpl* apm_;
CriticalSectionWrapper* crit_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_LEVEL_ESTIMATOR_IMPL_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_LEVEL_ESTIMATOR_IMPL_H_

View File

@ -0,0 +1,86 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_
#include <stdio.h>
#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h"
// To enable AEC logging, invoke GYP with -Daec_debug_dump=1.
#ifdef WEBRTC_AEC_DEBUG_DUMP
// Dumps a wav data to file.
#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \
do { \
rtc_WavWriteSamples(file, data, num_samples); \
} while (0)
// (Re)opens a wav file for writing using the specified sample rate.
#define RTC_AEC_DEBUG_WAV_REOPEN(name, instance_index, process_rate, \
sample_rate, wav_file) \
do { \
WebRtcAec_ReopenWav(name, instance_index, process_rate, sample_rate, \
wav_file); \
} while (0)
// Closes a wav file.
#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \
do { \
rtc_WavClose(wav_file); \
} while (0)
// Dumps a raw data to file.
#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \
do { \
(void) fwrite(data, data_size, 1, file); \
} while (0)
// Opens a raw data file for writing using the specified sample rate.
#define RTC_AEC_DEBUG_RAW_OPEN(name, instance_counter, file) \
do { \
WebRtcAec_RawFileOpen(name, instance_counter, file); \
} while (0)
// Closes a raw data file.
#define RTC_AEC_DEBUG_RAW_CLOSE(file) \
do { \
fclose(file); \
} while (0)
#else // RTC_AEC_DEBUG_DUMP
#define RTC_AEC_DEBUG_WAV_WRITE(file, data, num_samples) \
do { \
} while (0)
#define RTC_AEC_DEBUG_WAV_REOPEN(wav_file, name, instance_index, process_rate, \
sample_rate) \
do { \
} while (0)
#define RTC_AEC_DEBUG_WAV_CLOSE(wav_file) \
do { \
} while (0)
#define RTC_AEC_DEBUG_RAW_WRITE(file, data, data_size) \
do { \
} while (0)
#define RTC_AEC_DEBUG_RAW_OPEN(file, name, instance_counter) \
do { \
} while (0)
#define RTC_AEC_DEBUG_RAW_CLOSE(file) \
do { \
} while (0)
#endif // WEBRTC_AEC_DEBUG_DUMP
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_

View File

@ -0,0 +1,57 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/logging/aec_logging_file_handling.h"
#include <stdint.h>
#include <stdio.h>
#include "webrtc/base/checks.h"
#include "webrtc/base/stringutils.h"
#include "webrtc/common_audio/wav_file.h"
#include "webrtc/typedefs.h"
#ifdef WEBRTC_AEC_DEBUG_DUMP
void WebRtcAec_ReopenWav(const char* name,
int instance_index,
int process_rate,
int sample_rate,
rtc_WavWriter** wav_file) {
if (*wav_file) {
if (rtc_WavSampleRate(*wav_file) == sample_rate)
return;
rtc_WavClose(*wav_file);
}
char filename[64];
int written = rtc::sprintfn(filename, sizeof(filename), "%s%d-%d.wav", name,
instance_index, process_rate);
// Ensure there was no buffer output error.
RTC_DCHECK_GE(written, 0);
// Ensure that the buffer size was sufficient.
RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename));
*wav_file = rtc_WavOpen(filename, sample_rate, 1);
}
void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file) {
char filename[64];
int written = rtc::sprintfn(filename, sizeof(filename), "%s_%d.dat", name,
instance_index);
// Ensure there was no buffer output error.
RTC_DCHECK_GE(written, 0);
// Ensure that the buffer size was sufficient.
RTC_DCHECK_LT(static_cast<size_t>(written), sizeof(filename));
*file = fopen(filename, "wb");
}
#endif // WEBRTC_AEC_DEBUG_DUMP

View File

@ -0,0 +1,41 @@
/*
* Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_
#include <stdio.h>
#include "webrtc/common_audio/wav_file.h"
#include "webrtc/typedefs.h"
#ifdef __cplusplus
extern "C" {
#endif
#ifdef WEBRTC_AEC_DEBUG_DUMP
// Opens a new Wav file for writing. If it was already open with a different
// sample frequency, it closes it first.
void WebRtcAec_ReopenWav(const char* name,
int instance_index,
int process_rate,
int sample_rate,
rtc_WavWriter** wav_file);
// Opens dumpfile with instance-specific filename.
void WebRtcAec_RawFileOpen(const char* name, int instance_index, FILE** file);
#endif // WEBRTC_AEC_DEBUG_DUMP
#ifdef __cplusplus
}
#endif
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AEC_AEC_LOGGING_FILE_HANDLING_

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,19 +8,18 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "noise_suppression_impl.h"
#include "webrtc/modules/audio_processing/noise_suppression_impl.h"
#include <cassert>
#include <assert.h>
#include "critical_section_wrapper.h"
#include "webrtc/modules/audio_processing/audio_buffer.h"
#if defined(WEBRTC_NS_FLOAT)
#include "noise_suppression.h"
#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h"
#elif defined(WEBRTC_NS_FIXED)
#include "noise_suppression_x.h"
#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
#endif
#include "webrtc/system_wrappers/interface/critical_section_wrapper.h"
#include "audio_processing_impl.h"
#include "audio_buffer.h"
namespace webrtc {
@ -41,54 +40,64 @@ int MapSetting(NoiseSuppression::Level level) {
return 2;
case NoiseSuppression::kVeryHigh:
return 3;
default:
return -1;
}
assert(false);
return -1;
}
} // namespace
NoiseSuppressionImpl::NoiseSuppressionImpl(const AudioProcessingImpl* apm)
: ProcessingComponent(apm),
NoiseSuppressionImpl::NoiseSuppressionImpl(const AudioProcessing* apm,
CriticalSectionWrapper* crit)
: ProcessingComponent(),
apm_(apm),
crit_(crit),
level_(kModerate) {}
NoiseSuppressionImpl::~NoiseSuppressionImpl() {}
int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
int err = apm_->kNoError;
int NoiseSuppressionImpl::AnalyzeCaptureAudio(AudioBuffer* audio) {
#if defined(WEBRTC_NS_FLOAT)
if (!is_component_enabled()) {
return apm_->kNoError;
}
assert(audio->samples_per_split_channel() <= 160);
assert(audio->num_frames_per_band() <= 160);
assert(audio->num_channels() == num_handles());
for (int i = 0; i < num_handles(); i++) {
for (int i = 0; i < num_handles(); ++i) {
Handle* my_handle = static_cast<Handle*>(handle(i));
WebRtcNs_Analyze(my_handle, audio->split_bands_const_f(i)[kBand0To8kHz]);
}
#endif
return apm_->kNoError;
}
int NoiseSuppressionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
if (!is_component_enabled()) {
return apm_->kNoError;
}
assert(audio->num_frames_per_band() <= 160);
assert(audio->num_channels() == num_handles());
for (int i = 0; i < num_handles(); ++i) {
Handle* my_handle = static_cast<Handle*>(handle(i));
#if defined(WEBRTC_NS_FLOAT)
err = WebRtcNs_Process(static_cast<Handle*>(handle(i)),
audio->low_pass_split_data(i),
audio->high_pass_split_data(i),
audio->low_pass_split_data(i),
audio->high_pass_split_data(i));
WebRtcNs_Process(my_handle,
audio->split_bands_const_f(i),
audio->num_bands(),
audio->split_bands_f(i));
#elif defined(WEBRTC_NS_FIXED)
err = WebRtcNsx_Process(static_cast<Handle*>(handle(i)),
audio->low_pass_split_data(i),
audio->high_pass_split_data(i),
audio->low_pass_split_data(i),
audio->high_pass_split_data(i));
WebRtcNsx_Process(my_handle,
audio->split_bands_const(i),
audio->num_bands(),
audio->split_bands(i));
#endif
if (err != apm_->kNoError) {
return GetHandleError(my_handle);
}
}
return apm_->kNoError;
}
int NoiseSuppressionImpl::Enable(bool enable) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
return EnableComponent(enable);
}
@ -97,7 +106,7 @@ bool NoiseSuppressionImpl::is_enabled() const {
}
int NoiseSuppressionImpl::set_level(Level level) {
CriticalSectionScoped crit_scoped(*apm_->crit());
CriticalSectionScoped crit_scoped(crit_);
if (MapSetting(level) == -1) {
return apm_->kBadParameterError;
}
@ -110,49 +119,43 @@ NoiseSuppression::Level NoiseSuppressionImpl::level() const {
return level_;
}
int NoiseSuppressionImpl::get_version(char* version,
int version_len_bytes) const {
float NoiseSuppressionImpl::speech_probability() const {
#if defined(WEBRTC_NS_FLOAT)
if (WebRtcNs_get_version(version, version_len_bytes) != 0)
#elif defined(WEBRTC_NS_FIXED)
if (WebRtcNsx_get_version(version, version_len_bytes) != 0)
#endif
{
return apm_->kBadParameterError;
float probability_average = 0.0f;
for (int i = 0; i < num_handles(); i++) {
Handle* my_handle = static_cast<Handle*>(handle(i));
probability_average += WebRtcNs_prior_speech_probability(my_handle);
}
return apm_->kNoError;
return probability_average / num_handles();
#elif defined(WEBRTC_NS_FIXED)
// Currently not available for the fixed point implementation.
return apm_->kUnsupportedFunctionError;
#endif
}
void* NoiseSuppressionImpl::CreateHandle() const {
Handle* handle = NULL;
#if defined(WEBRTC_NS_FLOAT)
if (WebRtcNs_Create(&handle) != apm_->kNoError)
return WebRtcNs_Create();
#elif defined(WEBRTC_NS_FIXED)
if (WebRtcNsx_Create(&handle) != apm_->kNoError)
return WebRtcNsx_Create();
#endif
{
handle = NULL;
} else {
assert(handle != NULL);
}
return handle;
}
int NoiseSuppressionImpl::DestroyHandle(void* handle) const {
void NoiseSuppressionImpl::DestroyHandle(void* handle) const {
#if defined(WEBRTC_NS_FLOAT)
return WebRtcNs_Free(static_cast<Handle*>(handle));
WebRtcNs_Free(static_cast<Handle*>(handle));
#elif defined(WEBRTC_NS_FIXED)
return WebRtcNsx_Free(static_cast<Handle*>(handle));
WebRtcNsx_Free(static_cast<Handle*>(handle));
#endif
}
int NoiseSuppressionImpl::InitializeHandle(void* handle) const {
#if defined(WEBRTC_NS_FLOAT)
return WebRtcNs_Init(static_cast<Handle*>(handle), apm_->sample_rate_hz());
return WebRtcNs_Init(static_cast<Handle*>(handle),
apm_->proc_sample_rate_hz());
#elif defined(WEBRTC_NS_FIXED)
return WebRtcNsx_Init(static_cast<Handle*>(handle), apm_->sample_rate_hz());
return WebRtcNsx_Init(static_cast<Handle*>(handle),
apm_->proc_sample_rate_hz());
#endif
}
@ -176,4 +179,3 @@ int NoiseSuppressionImpl::GetHandleError(void* handle) const {
return apm_->kUnspecifiedError;
}
} // namespace webrtc

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,47 +8,50 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_NOISE_SUPPRESSION_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_NOISE_SUPPRESSION_IMPL_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_
#include "audio_processing.h"
#include "processing_component.h"
#include "webrtc/modules/audio_processing/include/audio_processing.h"
#include "webrtc/modules/audio_processing/processing_component.h"
namespace webrtc {
class AudioProcessingImpl;
class AudioBuffer;
class CriticalSectionWrapper;
class NoiseSuppressionImpl : public NoiseSuppression,
public ProcessingComponent {
public:
explicit NoiseSuppressionImpl(const AudioProcessingImpl* apm);
NoiseSuppressionImpl(const AudioProcessing* apm,
CriticalSectionWrapper* crit);
virtual ~NoiseSuppressionImpl();
int AnalyzeCaptureAudio(AudioBuffer* audio);
int ProcessCaptureAudio(AudioBuffer* audio);
// NoiseSuppression implementation.
virtual bool is_enabled() const;
// ProcessingComponent implementation.
virtual int get_version(char* version, int version_len_bytes) const;
bool is_enabled() const override;
float speech_probability() const override;
Level level() const override;
private:
// NoiseSuppression implementation.
virtual int Enable(bool enable);
virtual int set_level(Level level);
virtual Level level() const;
int Enable(bool enable) override;
int set_level(Level level) override;
// ProcessingComponent implementation.
virtual void* CreateHandle() const;
virtual int InitializeHandle(void* handle) const;
virtual int ConfigureHandle(void* handle) const;
virtual int DestroyHandle(void* handle) const;
virtual int num_handles_required() const;
virtual int GetHandleError(void* handle) const;
void* CreateHandle() const override;
int InitializeHandle(void* handle) const override;
int ConfigureHandle(void* handle) const override;
void DestroyHandle(void* handle) const override;
int num_handles_required() const override;
int GetHandleError(void* handle) const override;
const AudioProcessingImpl* apm_;
const AudioProcessing* apm_;
CriticalSectionWrapper* crit_;
Level level_;
};
} // namespace webrtc
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_NOISE_SUPPRESSION_IMPL_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NOISE_SUPPRESSION_IMPL_H_

View File

@ -1,20 +0,0 @@
noinst_LTLIBRARIES = libns.la libns_fix.la
libns_la_SOURCES = interface/noise_suppression.h \
noise_suppression.c \
windows_private.h \
defines.h \
ns_core.c \
ns_core.h
libns_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
-I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-I$(top_srcdir)/src/modules/audio_processing/utility
libns_fix_la_SOURCES = interface/noise_suppression_x.h \
noise_suppression_x.c \
nsx_defines.h \
nsx_core.c \
nsx_core.h
libns_fix_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
-I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-I$(top_srcdir)/src/modules/audio_processing/utility

View File

@ -11,13 +11,10 @@
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_DEFINES_H_
//#define PROCESS_FLOW_0 // Use the traditional method.
//#define PROCESS_FLOW_1 // Use traditional with DD estimate of prior SNR.
#define PROCESS_FLOW_2 // Use the new method of speech/noise classification.
#define BLOCKL_MAX 160 // max processing block length: 160
#define ANAL_BLOCKL_MAX 256 // max analysis block length: 256
#define HALF_ANAL_BLOCKL 129 // half max analysis block length + 1
#define NUM_HIGH_BANDS_MAX 2 // max number of high bands: 2
#define QUANTILE (float)0.25
@ -27,7 +24,6 @@
#define FACTOR (float)40.0
#define WIDTH (float)0.01
#define SMOOTH (float)0.75 // filter smoothing
// Length of fft work arrays.
#define IP_LENGTH (ANAL_BLOCKL_MAX >> 1) // must be at least ceil(2 + sqrt(ANAL_BLOCKL_MAX/2))
#define W_LENGTH (ANAL_BLOCKL_MAX >> 1)

View File

@ -0,0 +1,116 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_
#include <stddef.h>
#include "webrtc/typedefs.h"
typedef struct NsHandleT NsHandle;
#ifdef __cplusplus
extern "C" {
#endif
/*
* This function creates an instance of the floating point Noise Suppression.
*/
NsHandle* WebRtcNs_Create();
/*
* This function frees the dynamic memory of a specified noise suppression
* instance.
*
* Input:
* - NS_inst : Pointer to NS instance that should be freed
*/
void WebRtcNs_Free(NsHandle* NS_inst);
/*
* This function initializes a NS instance and has to be called before any other
* processing is made.
*
* Input:
* - NS_inst : Instance that should be initialized
* - fs : sampling frequency
*
* Output:
* - NS_inst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs);
/*
* This changes the aggressiveness of the noise suppression method.
*
* Input:
* - NS_inst : Noise suppression instance.
* - mode : 0: Mild, 1: Medium , 2: Aggressive
*
* Output:
* - NS_inst : Updated instance.
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
/*
* This functions estimates the background noise for the inserted speech frame.
* The input and output signals should always be 10ms (80 or 160 samples).
*
* Input
* - NS_inst : Noise suppression instance.
* - spframe : Pointer to speech frame buffer for L band
*
* Output:
* - NS_inst : Updated NS instance
*/
void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe);
/*
* This functions does Noise Suppression for the inserted speech frame. The
* input and output signals should always be 10ms (80 or 160 samples).
*
* Input
* - NS_inst : Noise suppression instance.
* - spframe : Pointer to speech frame buffer for each band
* - num_bands : Number of bands
*
* Output:
* - NS_inst : Updated NS instance
* - outframe : Pointer to output frame for each band
*/
void WebRtcNs_Process(NsHandle* NS_inst,
const float* const* spframe,
size_t num_bands,
float* const* outframe);
/* Returns the internally used prior speech probability of the current frame.
* There is a frequency bin based one as well, with which this should not be
* confused.
*
* Input
* - handle : Noise suppression instance.
*
* Return value : Prior speech probability in interval [0.0, 1.0].
* -1 - NULL pointer or uninitialized instance.
*/
float WebRtcNs_prior_speech_probability(NsHandle* handle);
#ifdef __cplusplus
}
#endif
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_H_

View File

@ -0,0 +1,88 @@
/*
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_
#include "webrtc/typedefs.h"
typedef struct NsxHandleT NsxHandle;
#ifdef __cplusplus
extern "C" {
#endif
/*
* This function creates an instance of the fixed point Noise Suppression.
*/
NsxHandle* WebRtcNsx_Create();
/*
* This function frees the dynamic memory of a specified Noise Suppression
* instance.
*
* Input:
* - nsxInst : Pointer to NS instance that should be freed
*/
void WebRtcNsx_Free(NsxHandle* nsxInst);
/*
* This function initializes a NS instance
*
* Input:
* - nsxInst : Instance that should be initialized
* - fs : sampling frequency
*
* Output:
* - nsxInst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs);
/*
* This changes the aggressiveness of the noise suppression method.
*
* Input:
* - nsxInst : Instance that should be initialized
* - mode : 0: Mild, 1: Medium , 2: Aggressive
*
* Output:
* - nsxInst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode);
/*
* This functions does noise suppression for the inserted speech frame. The
* input and output signals should always be 10ms (80 or 160 samples).
*
* Input
* - nsxInst : NSx instance. Needs to be initiated before call.
* - speechFrame : Pointer to speech frame buffer for each band
* - num_bands : Number of bands
*
* Output:
* - nsxInst : Updated NSx instance
* - outFrame : Pointer to output frame for each band
*/
void WebRtcNsx_Process(NsxHandle* nsxInst,
const short* const* speechFrame,
int num_bands,
short* const* outFrame);
#ifdef __cplusplus
}
#endif
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_INCLUDE_NOISE_SUPPRESSION_X_H_

View File

@ -1,124 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_H_
#include "typedefs.h"
typedef struct NsHandleT NsHandle;
#ifdef __cplusplus
extern "C" {
#endif
/*
* This function returns the version number of the code.
*
* Input:
* - version : Pointer to a character array where the version
* info is stored.
* - length : Length of version.
*
* Return value : 0 - Ok
* -1 - Error (probably length is not sufficient)
*/
int WebRtcNs_get_version(char* version, short length);
/*
* This function creates an instance to the noise reduction structure
*
* Input:
* - NS_inst : Pointer to noise reduction instance that should be
* created
*
* Output:
* - NS_inst : Pointer to created noise reduction instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_Create(NsHandle** NS_inst);
/*
* This function frees the dynamic memory of a specified Noise Reduction
* instance.
*
* Input:
* - NS_inst : Pointer to NS instance that should be freed
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_Free(NsHandle* NS_inst);
/*
* This function initializes a NS instance
*
* Input:
* - NS_inst : Instance that should be initialized
* - fs : sampling frequency
*
* Output:
* - NS_inst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs);
/*
* This changes the aggressiveness of the noise suppression method.
*
* Input:
* - NS_inst : Instance that should be initialized
* - mode : 0: Mild, 1: Medium , 2: Aggressive
*
* Output:
* - NS_inst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
/*
* This functions does Noise Suppression for the inserted speech frame. The
* input and output signals should always be 10ms (80 or 160 samples).
*
* Input
* - NS_inst : NS Instance. Needs to be initiated before call.
* - spframe : Pointer to speech frame buffer for L band
* - spframe_H : Pointer to speech frame buffer for H band
* - fs : sampling frequency
*
* Output:
* - NS_inst : Updated NS instance
* - outframe : Pointer to output frame for L band
* - outframe_H : Pointer to output frame for H band
*
* Return value : 0 - OK
* -1 - Error
*/
int WebRtcNs_Process(NsHandle* NS_inst,
short* spframe,
short* spframe_H,
short* outframe,
short* outframe_H);
#ifdef __cplusplus
}
#endif
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_H_

View File

@ -1,123 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_
#include "typedefs.h"
typedef struct NsxHandleT NsxHandle;
#ifdef __cplusplus
extern "C" {
#endif
/*
* This function returns the version number of the code.
*
* Input:
* - version : Pointer to a character array where the version
* info is stored.
* - length : Length of version.
*
* Return value : 0 - Ok
* -1 - Error (probably length is not sufficient)
*/
int WebRtcNsx_get_version(char* version, short length);
/*
* This function creates an instance to the noise reduction structure
*
* Input:
* - nsxInst : Pointer to noise reduction instance that should be
* created
*
* Output:
* - nsxInst : Pointer to created noise reduction instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_Create(NsxHandle** nsxInst);
/*
* This function frees the dynamic memory of a specified Noise Suppression
* instance.
*
* Input:
* - nsxInst : Pointer to NS instance that should be freed
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_Free(NsxHandle* nsxInst);
/*
* This function initializes a NS instance
*
* Input:
* - nsxInst : Instance that should be initialized
* - fs : sampling frequency
*
* Output:
* - nsxInst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_Init(NsxHandle* nsxInst, WebRtc_UWord32 fs);
/*
* This changes the aggressiveness of the noise suppression method.
*
* Input:
* - nsxInst : Instance that should be initialized
* - mode : 0: Mild, 1: Medium , 2: Aggressive
*
* Output:
* - nsxInst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode);
/*
* This functions does noise suppression for the inserted speech frame. The
* input and output signals should always be 10ms (80 or 160 samples).
*
* Input
* - nsxInst : NSx instance. Needs to be initiated before call.
* - speechFrame : Pointer to speech frame buffer for L band
* - speechFrameHB : Pointer to speech frame buffer for H band
* - fs : sampling frequency
*
* Output:
* - nsxInst : Updated NSx instance
* - outFrame : Pointer to output frame for L band
* - outFrameHB : Pointer to output frame for H band
*
* Return value : 0 - OK
* -1 - Error
*/
int WebRtcNsx_Process(NsxHandle* nsxInst,
short* speechFrame,
short* speechFrameHB,
short* outFrame,
short* outFrameHB);
#ifdef __cplusplus
}
#endif
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,58 +8,52 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/ns/include/noise_suppression.h"
#include <stdlib.h>
#include <string.h>
#include "noise_suppression.h"
#include "ns_core.h"
#include "defines.h"
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/ns/defines.h"
#include "webrtc/modules/audio_processing/ns/ns_core.h"
int WebRtcNs_get_version(char* versionStr, short length) {
const char version[] = "NS 2.2.0";
const short versionLen = (short)strlen(version) + 1; // +1: null-termination
if (versionStr == NULL) {
return -1;
}
if (versionLen > length) {
return -1;
}
strncpy(versionStr, version, versionLen);
return 0;
NsHandle* WebRtcNs_Create() {
NoiseSuppressionC* self = malloc(sizeof(NoiseSuppressionC));
self->initFlag = 0;
return (NsHandle*)self;
}
int WebRtcNs_Create(NsHandle** NS_inst) {
*NS_inst = (NsHandle*) malloc(sizeof(NSinst_t));
if (*NS_inst != NULL) {
(*(NSinst_t**)NS_inst)->initFlag = 0;
return 0;
} else {
return -1;
}
}
int WebRtcNs_Free(NsHandle* NS_inst) {
void WebRtcNs_Free(NsHandle* NS_inst) {
free(NS_inst);
return 0;
}
int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs) {
return WebRtcNs_InitCore((NSinst_t*) NS_inst, fs);
int WebRtcNs_Init(NsHandle* NS_inst, uint32_t fs) {
return WebRtcNs_InitCore((NoiseSuppressionC*)NS_inst, fs);
}
int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) {
return WebRtcNs_set_policy_core((NSinst_t*) NS_inst, mode);
return WebRtcNs_set_policy_core((NoiseSuppressionC*)NS_inst, mode);
}
int WebRtcNs_Process(NsHandle* NS_inst, short* spframe, short* spframe_H,
short* outframe, short* outframe_H) {
return WebRtcNs_ProcessCore(
(NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H);
void WebRtcNs_Analyze(NsHandle* NS_inst, const float* spframe) {
WebRtcNs_AnalyzeCore((NoiseSuppressionC*)NS_inst, spframe);
}
void WebRtcNs_Process(NsHandle* NS_inst,
const float* const* spframe,
size_t num_bands,
float* const* outframe) {
WebRtcNs_ProcessCore((NoiseSuppressionC*)NS_inst, spframe, num_bands,
outframe);
}
float WebRtcNs_prior_speech_probability(NsHandle* handle) {
NoiseSuppressionC* self = (NoiseSuppressionC*)handle;
if (handle == NULL) {
return -1;
}
if (self->initFlag == 0) {
return -1;
}
return self->priorSpeechProb;
}

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -8,58 +8,39 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include "webrtc/modules/audio_processing/ns/include/noise_suppression_x.h"
#include <stdlib.h>
#include <string.h>
#include "noise_suppression_x.h"
#include "nsx_core.h"
#include "nsx_defines.h"
#include "webrtc/common_audio/signal_processing/include/real_fft.h"
#include "webrtc/modules/audio_processing/ns/nsx_core.h"
#include "webrtc/modules/audio_processing/ns/nsx_defines.h"
int WebRtcNsx_get_version(char* versionStr, short length) {
const char version[] = "NS\t3.1.0";
const short versionLen = (short)strlen(version) + 1; // +1: null-termination
if (versionStr == NULL) {
return -1;
}
if (versionLen > length) {
return -1;
}
strncpy(versionStr, version, versionLen);
return 0;
NsxHandle* WebRtcNsx_Create() {
NoiseSuppressionFixedC* self = malloc(sizeof(NoiseSuppressionFixedC));
WebRtcSpl_Init();
self->real_fft = NULL;
self->initFlag = 0;
return (NsxHandle*)self;
}
int WebRtcNsx_Create(NsxHandle** nsxInst) {
*nsxInst = (NsxHandle*)malloc(sizeof(NsxInst_t));
if (*nsxInst != NULL) {
(*(NsxInst_t**)nsxInst)->initFlag = 0;
return 0;
} else {
return -1;
}
}
int WebRtcNsx_Free(NsxHandle* nsxInst) {
void WebRtcNsx_Free(NsxHandle* nsxInst) {
WebRtcSpl_FreeRealFFT(((NoiseSuppressionFixedC*)nsxInst)->real_fft);
free(nsxInst);
return 0;
}
int WebRtcNsx_Init(NsxHandle* nsxInst, WebRtc_UWord32 fs) {
return WebRtcNsx_InitCore((NsxInst_t*)nsxInst, fs);
int WebRtcNsx_Init(NsxHandle* nsxInst, uint32_t fs) {
return WebRtcNsx_InitCore((NoiseSuppressionFixedC*)nsxInst, fs);
}
int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) {
return WebRtcNsx_set_policy_core((NsxInst_t*)nsxInst, mode);
return WebRtcNsx_set_policy_core((NoiseSuppressionFixedC*)nsxInst, mode);
}
int WebRtcNsx_Process(NsxHandle* nsxInst, short* speechFrame,
short* speechFrameHB, short* outFrame,
short* outFrameHB) {
return WebRtcNsx_ProcessCore(
(NsxInst_t*)nsxInst, speechFrame, speechFrameHB, outFrame, outFrameHB);
void WebRtcNsx_Process(NsxHandle* nsxInst,
const short* const* speechFrame,
int num_bands,
short* const* outFrame) {
WebRtcNsx_ProcessCore((NoiseSuppressionFixedC*)nsxInst, speechFrame,
num_bands, outFrame);
}

View File

@ -1,58 +0,0 @@
# Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
#
# Use of this source code is governed by a BSD-style license
# that can be found in the LICENSE file in the root of the source
# tree. An additional intellectual property rights grant can be found
# in the file PATENTS. All contributing project authors may
# be found in the AUTHORS file in the root of the source tree.
{
'targets': [
{
'target_name': 'ns',
'type': '<(library)',
'dependencies': [
'<(webrtc_root)/common_audio/common_audio.gyp:spl',
'apm_util'
],
'include_dirs': [
'interface',
],
'direct_dependent_settings': {
'include_dirs': [
'interface',
],
},
'sources': [
'interface/noise_suppression.h',
'noise_suppression.c',
'windows_private.h',
'defines.h',
'ns_core.c',
'ns_core.h',
],
},
{
'target_name': 'ns_fix',
'type': '<(library)',
'dependencies': [
'<(webrtc_root)/common_audio/common_audio.gyp:spl',
],
'include_dirs': [
'interface',
],
'direct_dependent_settings': {
'include_dirs': [
'interface',
],
},
'sources': [
'interface/noise_suppression_x.h',
'noise_suppression_x.c',
'nsx_defines.h',
'nsx_core.c',
'nsx_core.h',
],
},
],
}

File diff suppressed because it is too large Load Diff

View File

@ -8,105 +8,110 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_
#include "defines.h"
#include "webrtc/modules/audio_processing/ns/defines.h"
typedef struct NSParaExtract_t_ {
//bin size of histogram
typedef struct NSParaExtract_ {
// Bin size of histogram.
float binSizeLrt;
float binSizeSpecFlat;
float binSizeSpecDiff;
//range of histogram over which lrt threshold is computed
// Range of histogram over which LRT threshold is computed.
float rangeAvgHistLrt;
//scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
//thresholds for prior model
float factor1ModelPars; //for lrt and spectral difference
float factor2ModelPars; //for spectral_flatness: used when noise is flatter than speech
//peak limit for spectral flatness (varies between 0 and 1)
// Scale parameters: multiply dominant peaks of the histograms by scale factor
// to obtain thresholds for prior model.
float factor1ModelPars; // For LRT and spectral difference.
float factor2ModelPars; // For spectral_flatness: used when noise is flatter
// than speech.
// Peak limit for spectral flatness (varies between 0 and 1).
float thresPosSpecFlat;
//limit on spacing of two highest peaks in histogram: spacing determined by bin size
// Limit on spacing of two highest peaks in histogram: spacing determined by
// bin size.
float limitPeakSpacingSpecFlat;
float limitPeakSpacingSpecDiff;
//limit on relevance of second peak:
// Limit on relevance of second peak.
float limitPeakWeightsSpecFlat;
float limitPeakWeightsSpecDiff;
//limit on fluctuation of lrt feature
// Limit on fluctuation of LRT feature.
float thresFluctLrt;
//limit on the max and min values for the feature thresholds
// Limit on the max and min values for the feature thresholds.
float maxLrt;
float minLrt;
float maxSpecFlat;
float minSpecFlat;
float maxSpecDiff;
float minSpecDiff;
//criteria of weight of histogram peak to accept/reject feature
// Criteria of weight of histogram peak to accept/reject feature.
int thresWeightSpecFlat;
int thresWeightSpecDiff;
} NSParaExtract_t;
} NSParaExtract;
typedef struct NSinst_t_ {
typedef struct NoiseSuppressionC_ {
uint32_t fs;
size_t blockLen;
size_t windShift;
size_t anaLen;
size_t magnLen;
int aggrMode;
const float* window;
float analyzeBuf[ANAL_BLOCKL_MAX];
float dataBuf[ANAL_BLOCKL_MAX];
float syntBuf[ANAL_BLOCKL_MAX];
WebRtc_UWord32 fs;
int blockLen;
int blockLen10ms;
int windShift;
int outLen;
int anaLen;
int magnLen;
int aggrMode;
const float* window;
float dataBuf[ANAL_BLOCKL_MAX];
float syntBuf[ANAL_BLOCKL_MAX];
float outBuf[3 * BLOCKL_MAX];
int initFlag;
// Parameters for quantile noise estimation.
float density[SIMULT * HALF_ANAL_BLOCKL];
float lquantile[SIMULT * HALF_ANAL_BLOCKL];
float quantile[HALF_ANAL_BLOCKL];
int counter[SIMULT];
int updates;
// Parameters for Wiener filter.
float smooth[HALF_ANAL_BLOCKL];
float overdrive;
float denoiseBound;
int gainmap;
// FFT work arrays.
size_t ip[IP_LENGTH];
float wfft[W_LENGTH];
int initFlag;
// parameters for quantile noise estimation
float density[SIMULT* HALF_ANAL_BLOCKL];
float lquantile[SIMULT* HALF_ANAL_BLOCKL];
float quantile[HALF_ANAL_BLOCKL];
int counter[SIMULT];
int updates;
// parameters for Wiener filter
float smooth[HALF_ANAL_BLOCKL];
float overdrive;
float denoiseBound;
int gainmap;
// fft work arrays.
int ip[IP_LENGTH];
float wfft[W_LENGTH];
// parameters for new method: some not needed, will reduce/cleanup later
WebRtc_Word32 blockInd; //frame index counter
int modelUpdatePars[4]; //parameters for updating or estimating
// thresholds/weights for prior model
float priorModelPars[7]; //parameters for prior model
float noisePrev[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame
float magnPrev[HALF_ANAL_BLOCKL]; //magnitude spectrum of previous frame
float logLrtTimeAvg[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing
float priorSpeechProb; //prior speech/noise probability
float featureData[7]; //data for features
float magnAvgPause[HALF_ANAL_BLOCKL]; //conservative noise spectrum estimate
float signalEnergy; //energy of magn
float sumMagn; //sum of magn
float whiteNoiseLevel; //initial noise estimate
float initMagnEst[HALF_ANAL_BLOCKL]; //initial magnitude spectrum estimate
float pinkNoiseNumerator; //pink noise parameter: numerator
float pinkNoiseExp; //pink noise parameter: power of freq
NSParaExtract_t featureExtractionParams; //parameters for feature extraction
//histograms for parameter estimation
int histLrt[HIST_PAR_EST];
int histSpecFlat[HIST_PAR_EST];
int histSpecDiff[HIST_PAR_EST];
//quantities for high band estimate
float speechProbHB[HALF_ANAL_BLOCKL]; //final speech/noise prob: prior + LRT
float dataBufHB[ANAL_BLOCKL_MAX]; //buffering data for HB
} NSinst_t;
// Parameters for new method: some not needed, will reduce/cleanup later.
int32_t blockInd; // Frame index counter.
int modelUpdatePars[4]; // Parameters for updating or estimating.
// Thresholds/weights for prior model.
float priorModelPars[7]; // Parameters for prior model.
float noise[HALF_ANAL_BLOCKL]; // Noise spectrum from current frame.
float noisePrev[HALF_ANAL_BLOCKL]; // Noise spectrum from previous frame.
// Magnitude spectrum of previous analyze frame.
float magnPrevAnalyze[HALF_ANAL_BLOCKL];
// Magnitude spectrum of previous process frame.
float magnPrevProcess[HALF_ANAL_BLOCKL];
float logLrtTimeAvg[HALF_ANAL_BLOCKL]; // Log LRT factor with time-smoothing.
float priorSpeechProb; // Prior speech/noise probability.
float featureData[7];
// Conservative noise spectrum estimate.
float magnAvgPause[HALF_ANAL_BLOCKL];
float signalEnergy; // Energy of |magn|.
float sumMagn;
float whiteNoiseLevel; // Initial noise estimate.
float initMagnEst[HALF_ANAL_BLOCKL]; // Initial magnitude spectrum estimate.
float pinkNoiseNumerator; // Pink noise parameter: numerator.
float pinkNoiseExp; // Pink noise parameter: power of frequencies.
float parametricNoise[HALF_ANAL_BLOCKL];
// Parameters for feature extraction.
NSParaExtract featureExtractionParams;
// Histograms for parameter estimation.
int histLrt[HIST_PAR_EST];
int histSpecFlat[HIST_PAR_EST];
int histSpecDiff[HIST_PAR_EST];
// Quantities for high band estimate.
float speechProb[HALF_ANAL_BLOCKL]; // Final speech/noise prob: prior + LRT.
// Buffering data for HB.
float dataBufHB[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];
} NoiseSuppressionC;
#ifdef __cplusplus
extern "C" {
@ -118,16 +123,16 @@ extern "C" {
* This function initializes a noise suppression instance
*
* Input:
* - inst : Instance that should be initialized
* - self : Instance that should be initialized
* - fs : Sampling frequency
*
* Output:
* - inst : Initialized instance
* - self : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_InitCore(NSinst_t* inst, WebRtc_UWord32 fs);
int WebRtcNs_InitCore(NoiseSuppressionC* self, uint32_t fs);
/****************************************************************************
* WebRtcNs_set_policy_core(...)
@ -135,16 +140,30 @@ int WebRtcNs_InitCore(NSinst_t* inst, WebRtc_UWord32 fs);
* This changes the aggressiveness of the noise suppression method.
*
* Input:
* - inst : Instance that should be initialized
* - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
* - self : Instance that should be initialized
* - mode : 0: Mild (6dB), 1: Medium (10dB), 2: Aggressive (15dB)
*
* Output:
* - NS_inst : Initialized instance
* - self : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_set_policy_core(NSinst_t* inst, int mode);
int WebRtcNs_set_policy_core(NoiseSuppressionC* self, int mode);
/****************************************************************************
* WebRtcNs_AnalyzeCore
*
* Estimate the background noise.
*
* Input:
* - self : Instance that should be initialized
* - speechFrame : Input speech frame for lower band
*
* Output:
* - self : Updated instance
*/
void WebRtcNs_AnalyzeCore(NoiseSuppressionC* self, const float* speechFrame);
/****************************************************************************
* WebRtcNs_ProcessCore
@ -152,28 +171,20 @@ int WebRtcNs_set_policy_core(NSinst_t* inst, int mode);
* Do noise suppression.
*
* Input:
* - inst : Instance that should be initialized
* - inFrameLow : Input speech frame for lower band
* - inFrameHigh : Input speech frame for higher band
* - self : Instance that should be initialized
* - inFrame : Input speech frame for each band
* - num_bands : Number of bands
*
* Output:
* - inst : Updated instance
* - outFrameLow : Output speech frame for lower band
* - outFrameHigh : Output speech frame for higher band
*
* Return value : 0 - OK
* -1 - Error
* - self : Updated instance
* - outFrame : Output speech frame for each band
*/
int WebRtcNs_ProcessCore(NSinst_t* inst,
short* inFrameLow,
short* inFrameHigh,
short* outFrameLow,
short* outFrameHigh);
void WebRtcNs_ProcessCore(NoiseSuppressionC* self,
const float* const* inFrame,
size_t num_bands,
float* const* outFrame);
#ifdef __cplusplus
}
#endif
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NS_CORE_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_NS_CORE_H_

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
* Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
@ -11,95 +11,103 @@
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_
#include "typedefs.h"
#include "signal_processing_library.h"
#include "nsx_defines.h"
#ifdef NS_FILEDEBUG
#include <stdio.h>
#endif
typedef struct NsxInst_t_ {
WebRtc_UWord32 fs;
#include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
#include "webrtc/modules/audio_processing/ns/nsx_defines.h"
#include "webrtc/typedefs.h"
const WebRtc_Word16* window;
WebRtc_Word16 analysisBuffer[ANAL_BLOCKL_MAX];
WebRtc_Word16 synthesisBuffer[ANAL_BLOCKL_MAX];
WebRtc_UWord16 noiseSupFilter[HALF_ANAL_BLOCKL];
WebRtc_UWord16 overdrive; /* Q8 */
WebRtc_UWord16 denoiseBound; /* Q14 */
const WebRtc_Word16* factor2Table;
WebRtc_Word16 noiseEstLogQuantile[SIMULT* HALF_ANAL_BLOCKL];
WebRtc_Word16 noiseEstDensity[SIMULT* HALF_ANAL_BLOCKL];
WebRtc_Word16 noiseEstCounter[SIMULT];
WebRtc_Word16 noiseEstQuantile[HALF_ANAL_BLOCKL];
typedef struct NoiseSuppressionFixedC_ {
uint32_t fs;
WebRtc_Word16 anaLen;
int anaLen2;
int magnLen;
const int16_t* window;
int16_t analysisBuffer[ANAL_BLOCKL_MAX];
int16_t synthesisBuffer[ANAL_BLOCKL_MAX];
uint16_t noiseSupFilter[HALF_ANAL_BLOCKL];
uint16_t overdrive; /* Q8 */
uint16_t denoiseBound; /* Q14 */
const int16_t* factor2Table;
int16_t noiseEstLogQuantile[SIMULT* HALF_ANAL_BLOCKL];
int16_t noiseEstDensity[SIMULT* HALF_ANAL_BLOCKL];
int16_t noiseEstCounter[SIMULT];
int16_t noiseEstQuantile[HALF_ANAL_BLOCKL];
size_t anaLen;
size_t anaLen2;
size_t magnLen;
int aggrMode;
int stages;
int initFlag;
int gainMap;
WebRtc_Word32 maxLrt;
WebRtc_Word32 minLrt;
WebRtc_Word32 logLrtTimeAvgW32[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing in Q8
WebRtc_Word32 featureLogLrt;
WebRtc_Word32 thresholdLogLrt;
WebRtc_Word16 weightLogLrt;
int32_t maxLrt;
int32_t minLrt;
// Log LRT factor with time-smoothing in Q8.
int32_t logLrtTimeAvgW32[HALF_ANAL_BLOCKL];
int32_t featureLogLrt;
int32_t thresholdLogLrt;
int16_t weightLogLrt;
WebRtc_UWord32 featureSpecDiff;
WebRtc_UWord32 thresholdSpecDiff;
WebRtc_Word16 weightSpecDiff;
uint32_t featureSpecDiff;
uint32_t thresholdSpecDiff;
int16_t weightSpecDiff;
WebRtc_UWord32 featureSpecFlat;
WebRtc_UWord32 thresholdSpecFlat;
WebRtc_Word16 weightSpecFlat;
uint32_t featureSpecFlat;
uint32_t thresholdSpecFlat;
int16_t weightSpecFlat;
WebRtc_Word32 avgMagnPause[HALF_ANAL_BLOCKL]; //conservative estimate of noise spectrum
WebRtc_UWord32 magnEnergy;
WebRtc_UWord32 sumMagn;
WebRtc_UWord32 curAvgMagnEnergy;
WebRtc_UWord32 timeAvgMagnEnergy;
WebRtc_UWord32 timeAvgMagnEnergyTmp;
// Conservative estimate of noise spectrum.
int32_t avgMagnPause[HALF_ANAL_BLOCKL];
uint32_t magnEnergy;
uint32_t sumMagn;
uint32_t curAvgMagnEnergy;
uint32_t timeAvgMagnEnergy;
uint32_t timeAvgMagnEnergyTmp;
WebRtc_UWord32 whiteNoiseLevel; //initial noise estimate
WebRtc_UWord32 initMagnEst[HALF_ANAL_BLOCKL];//initial magnitude spectrum estimate
WebRtc_Word32 pinkNoiseNumerator; //pink noise parameter: numerator
WebRtc_Word32 pinkNoiseExp; //pink noise parameter: power of freq
int minNorm; //smallest normalization factor
int zeroInputSignal; //zero input signal flag
uint32_t whiteNoiseLevel; // Initial noise estimate.
// Initial magnitude spectrum estimate.
uint32_t initMagnEst[HALF_ANAL_BLOCKL];
// Pink noise parameters:
int32_t pinkNoiseNumerator; // Numerator.
int32_t pinkNoiseExp; // Power of freq.
int minNorm; // Smallest normalization factor.
int zeroInputSignal; // Zero input signal flag.
WebRtc_UWord32 prevNoiseU32[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame
WebRtc_UWord16 prevMagnU16[HALF_ANAL_BLOCKL]; //magnitude spectrum from previous frame
WebRtc_Word16 priorNonSpeechProb; //prior speech/noise probability // Q14
// Noise spectrum from previous frame.
uint32_t prevNoiseU32[HALF_ANAL_BLOCKL];
// Magnitude spectrum from previous frame.
uint16_t prevMagnU16[HALF_ANAL_BLOCKL];
// Prior speech/noise probability in Q14.
int16_t priorNonSpeechProb;
int blockIndex; //frame index counter
int modelUpdate; //parameter for updating or estimating thresholds/weights for prior model
int blockIndex; // Frame index counter.
// Parameter for updating or estimating thresholds/weights for prior model.
int modelUpdate;
int cntThresUpdate;
//histograms for parameter estimation
WebRtc_Word16 histLrt[HIST_PAR_EST];
WebRtc_Word16 histSpecFlat[HIST_PAR_EST];
WebRtc_Word16 histSpecDiff[HIST_PAR_EST];
// Histograms for parameter estimation.
int16_t histLrt[HIST_PAR_EST];
int16_t histSpecFlat[HIST_PAR_EST];
int16_t histSpecDiff[HIST_PAR_EST];
//quantities for high band estimate
WebRtc_Word16 dataBufHBFX[ANAL_BLOCKL_MAX]; /* Q0 */
// Quantities for high band estimate.
int16_t dataBufHBFX[NUM_HIGH_BANDS_MAX][ANAL_BLOCKL_MAX];
int qNoise;
int prevQNoise;
int prevQMagn;
int blockLen10ms;
size_t blockLen10ms;
WebRtc_Word16 real[ANAL_BLOCKL_MAX];
WebRtc_Word16 imag[ANAL_BLOCKL_MAX];
WebRtc_Word32 energyIn;
int16_t real[ANAL_BLOCKL_MAX];
int16_t imag[ANAL_BLOCKL_MAX];
int32_t energyIn;
int scaleEnergyIn;
int normData;
} NsxInst_t;
struct RealFFT* real_fft;
} NoiseSuppressionFixedC;
#ifdef __cplusplus
extern "C"
@ -121,7 +129,7 @@ extern "C"
* Return value : 0 - Ok
* -1 - Error
*/
WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs);
int32_t WebRtcNsx_InitCore(NoiseSuppressionFixedC* inst, uint32_t fs);
/****************************************************************************
* WebRtcNsx_set_policy_core(...)
@ -129,16 +137,16 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs);
* This changes the aggressiveness of the noise suppression method.
*
* Input:
* - inst : Instance that should be initialized
* - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
* - inst : Instance that should be initialized
* - mode : 0: Mild (6 dB), 1: Medium (10 dB), 2: Aggressive (15 dB)
*
* Output:
* - NS_inst : Initialized instance
* - inst : Initialized instance
*
* Return value : 0 - Ok
* -1 - Error
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode);
int WebRtcNsx_set_policy_core(NoiseSuppressionFixedC* inst, int mode);
/****************************************************************************
* WebRtcNsx_ProcessCore
@ -147,34 +155,109 @@ int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode);
*
* Input:
* - inst : Instance that should be initialized
* - inFrameLow : Input speech frame for lower band
* - inFrameHigh : Input speech frame for higher band
* - inFrame : Input speech frame for each band
* - num_bands : Number of bands
*
* Output:
* - inst : Updated instance
* - outFrameLow : Output speech frame for lower band
* - outFrameHigh : Output speech frame for higher band
*
* Return value : 0 - OK
* -1 - Error
* - outFrame : Output speech frame for each band
*/
int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* inFrameLow, short* inFrameHigh,
short* outFrameLow, short* outFrameHigh);
void WebRtcNsx_ProcessCore(NoiseSuppressionFixedC* inst,
const short* const* inFrame,
int num_bands,
short* const* outFrame);
/****************************************************************************
* Internal functions and variable declarations shared with optimized code.
* Some function pointers, for internal functions shared by ARM NEON and
* generic C code.
*/
void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset);
// Noise Estimation.
typedef void (*NoiseEstimation)(NoiseSuppressionFixedC* inst,
uint16_t* magn,
uint32_t* noise,
int16_t* q_noise);
extern NoiseEstimation WebRtcNsx_NoiseEstimation;
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise,
WebRtc_Word16* qNoise);
// Filter the data in the frequency domain, and create spectrum.
typedef void (*PrepareSpectrum)(NoiseSuppressionFixedC* inst,
int16_t* freq_buff);
extern PrepareSpectrum WebRtcNsx_PrepareSpectrum;
extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];
extern const WebRtc_Word16 WebRtcNsx_kCounterDiv[201];
// For the noise supression process, synthesis, read out fully processed
// segment, and update synthesis buffer.
typedef void (*SynthesisUpdate)(NoiseSuppressionFixedC* inst,
int16_t* out_frame,
int16_t gain_factor);
extern SynthesisUpdate WebRtcNsx_SynthesisUpdate;
// Update analysis buffer for lower band, and window data before FFT.
typedef void (*AnalysisUpdate)(NoiseSuppressionFixedC* inst,
int16_t* out,
int16_t* new_speech);
extern AnalysisUpdate WebRtcNsx_AnalysisUpdate;
// Denormalize the real-valued signal |in|, the output from inverse FFT.
typedef void (*Denormalize)(NoiseSuppressionFixedC* inst,
int16_t* in,
int factor);
extern Denormalize WebRtcNsx_Denormalize;
// Normalize the real-valued signal |in|, the input to forward FFT.
typedef void (*NormalizeRealBuffer)(NoiseSuppressionFixedC* inst,
const int16_t* in,
int16_t* out);
extern NormalizeRealBuffer WebRtcNsx_NormalizeRealBuffer;
// Compute speech/noise probability.
// Intended to be private.
void WebRtcNsx_SpeechNoiseProb(NoiseSuppressionFixedC* inst,
uint16_t* nonSpeechProbFinal,
uint32_t* priorLocSnr,
uint32_t* postLocSnr);
#if (defined WEBRTC_DETECT_NEON || defined WEBRTC_HAS_NEON)
// For the above function pointers, functions for generic platforms are declared
// and defined as static in file nsx_core.c, while those for ARM Neon platforms
// are declared below and defined in file nsx_core_neon.c.
void WebRtcNsx_NoiseEstimationNeon(NoiseSuppressionFixedC* inst,
uint16_t* magn,
uint32_t* noise,
int16_t* q_noise);
void WebRtcNsx_SynthesisUpdateNeon(NoiseSuppressionFixedC* inst,
int16_t* out_frame,
int16_t gain_factor);
void WebRtcNsx_AnalysisUpdateNeon(NoiseSuppressionFixedC* inst,
int16_t* out,
int16_t* new_speech);
void WebRtcNsx_PrepareSpectrumNeon(NoiseSuppressionFixedC* inst,
int16_t* freq_buff);
#endif
#if defined(MIPS32_LE)
// For the above function pointers, functions for generic platforms are declared
// and defined as static in file nsx_core.c, while those for MIPS platforms
// are declared below and defined in file nsx_core_mips.c.
void WebRtcNsx_SynthesisUpdate_mips(NoiseSuppressionFixedC* inst,
int16_t* out_frame,
int16_t gain_factor);
void WebRtcNsx_AnalysisUpdate_mips(NoiseSuppressionFixedC* inst,
int16_t* out,
int16_t* new_speech);
void WebRtcNsx_PrepareSpectrum_mips(NoiseSuppressionFixedC* inst,
int16_t* freq_buff);
void WebRtcNsx_NormalizeRealBuffer_mips(NoiseSuppressionFixedC* inst,
const int16_t* in,
int16_t* out);
#if defined(MIPS_DSP_R1_LE)
void WebRtcNsx_Denormalize_mips(NoiseSuppressionFixedC* inst,
int16_t* in,
int factor);
#endif
#endif
#ifdef __cplusplus
}
#endif
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_SOURCE_NSX_CORE_H_

Some files were not shown because too many files have changed in this diff Show More