Update code to upstream revision r766

Removes matlab tests, adds delay estimation logging, and some other
minor fixes/improvements.
This commit is contained in:
Arun Raghavan 2011-10-20 13:10:08 +05:30
parent 139f0b6dc0
commit 7e71fffb59
79 changed files with 6238 additions and 8599 deletions

View File

@ -1,5 +1,5 @@
# Revision changelog (version - date, svn rev. from upstream that was merged)
# 0.1 - 15 Sep 2011, r597
# 0.1 - 19 Oct 2011, r766
AC_INIT([webrtc-audio-processing], [0.1])
AM_INIT_AUTOMAKE([tar-ustar])

View File

@ -1,3 +0,0 @@
bjornv@webrtc.org
tina.legrand@webrtc.org
jan.skoglund@webrtc.org

View File

@ -1659,6 +1659,30 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
// - out_data : Super-wideband speech signal, 0-16 kHz
//
// WebRtc_Word16 WebRtcSpl_SatW32ToW16(...)
//
// This function saturates a 32-bit word into a 16-bit word.
//
// Input:
// - value32 : The value of a 32-bit word.
//
// Output:
// - out16 : the saturated 16-bit word.
//
// int32_t WebRtc_MulAccumW16(...)
//
// This function multiply a 16-bit word by a 16-bit word, and accumulate this
// value to a 32-bit integer.
//
// Input:
// - a : The value of the first 16-bit word.
// - b : The value of the second 16-bit word.
// - c : The value of an 32-bit integer.
//
// Return Value: The value of a * b + c.
//
// WebRtc_Word16 WebRtcSpl_get_version(...)
//
// This function gives the version string of the Signal Processing Library.

View File

@ -19,16 +19,20 @@
#include "spl_inl_armv7.h"
#else
static __inline WebRtc_Word16 WebRtcSpl_SatW32ToW16(WebRtc_Word32 value32) {
WebRtc_Word16 out16 = (WebRtc_Word16) value32;
if (value32 > 32767)
out16 = 32767;
else if (value32 < -32768)
out16 = -32768;
return out16;
}
static __inline WebRtc_Word16 WebRtcSpl_AddSatW16(WebRtc_Word16 a,
WebRtc_Word16 b) {
WebRtc_Word32 s_sum = (WebRtc_Word32) a + (WebRtc_Word32) b;
if (s_sum > WEBRTC_SPL_WORD16_MAX)
s_sum = WEBRTC_SPL_WORD16_MAX;
else if (s_sum < WEBRTC_SPL_WORD16_MIN)
s_sum = WEBRTC_SPL_WORD16_MIN;
return (WebRtc_Word16)s_sum;
return WebRtcSpl_SatW32ToW16((WebRtc_Word32) a + (WebRtc_Word32) b);
}
static __inline WebRtc_Word32 WebRtcSpl_AddSatW32(WebRtc_Word32 l_var1,
@ -54,24 +58,7 @@ static __inline WebRtc_Word32 WebRtcSpl_AddSatW32(WebRtc_Word32 l_var1,
static __inline WebRtc_Word16 WebRtcSpl_SubSatW16(WebRtc_Word16 var1,
WebRtc_Word16 var2) {
WebRtc_Word32 l_diff;
WebRtc_Word16 s_diff;
// perform subtraction
l_diff = (WebRtc_Word32)var1 - (WebRtc_Word32)var2;
// default setting
s_diff = (WebRtc_Word16) l_diff;
// check for overflow
if (l_diff > (WebRtc_Word32)32767)
s_diff = (WebRtc_Word16)32767;
// check for underflow
if (l_diff < (WebRtc_Word32)-32768)
s_diff = (WebRtc_Word16)-32768;
return s_diff;
return WebRtcSpl_SatW32ToW16((WebRtc_Word32) var1 - (WebRtc_Word32) var2);
}
static __inline WebRtc_Word32 WebRtcSpl_SubSatW32(WebRtc_Word32 l_var1,
@ -161,6 +148,12 @@ static __inline int WebRtcSpl_NormW16(WebRtc_Word16 a) {
return zeros;
}
static __inline int32_t WebRtc_MulAccumW16(int16_t a,
int16_t b,
int32_t c) {
return (a * b + c);
}
#endif // WEBRTC_ARCH_ARM_V7A
#endif // WEBRTC_SPL_SPL_INL_H_

View File

@ -45,6 +45,14 @@ static __inline WebRtc_Word32 WEBRTC_SPL_MUL_16_16(WebRtc_Word16 a,
return tmp;
}
static __inline int32_t WebRtc_MulAccumW16(int16_t a,
int16_t b,
int32_t c) {
int32_t tmp = 0;
__asm__("smlabb %0, %1, %2, %3":"=r"(tmp):"r"(a), "r"(b), "r"(c));
return tmp;
}
static __inline WebRtc_Word16 WebRtcSpl_AddSatW16(WebRtc_Word16 a,
WebRtc_Word16 b) {
WebRtc_Word32 s_sum;
@ -119,4 +127,11 @@ static __inline int WebRtcSpl_NormW16(WebRtc_Word16 a) {
return tmp - 17;
}
static __inline WebRtc_Word16 WebRtcSpl_SatW32ToW16(WebRtc_Word32 value32) {
WebRtc_Word16 out16;
__asm__("ssat %r0, #16, %r1" : "=r"(out16) : "r"(value32));
return out16;
}
#endif // WEBRTC_SPL_SPL_INL_ARMV7_H_

View File

@ -52,7 +52,7 @@ int WebRtcSpl_DownsampleFast(WebRtc_Word16 *in_ptr, WebRtc_Word16 in_length,
// If output is higher than 32768, saturate it. Same with negative side
*downsampled_ptr++ = (WebRtc_Word16)WEBRTC_SPL_SAT(32767, o, -32768);
*downsampled_ptr++ = WebRtcSpl_SatW32ToW16(o);
}
return 0;

View File

@ -17,154 +17,165 @@
#include "signal_processing_library.h"
#ifdef WEBRTC_ARCH_ARM_V7A
// allpass filter coefficients.
static const WebRtc_UWord32 kResampleAllpass1[3] = {3284, 24441, 49528 << 15};
static const WebRtc_UWord32 kResampleAllpass2[3] =
{12199, 37471 << 15, 60255 << 15};
// Multiply two 32-bit values and accumulate to another input value.
// Return: state + ((diff * tbl_value) >> 16)
static __inline WebRtc_Word32 MUL_ACCUM_1(WebRtc_Word32 tbl_value,
WebRtc_Word32 diff,
WebRtc_Word32 state) {
WebRtc_Word32 result;
__asm__("smlawb %r0, %r1, %r2, %r3": "=r"(result): "r"(diff),
"r"(tbl_value), "r"(state));
return result;
}
// Multiply two 32-bit values and accumulate to another input value.
// Return: Return: state + (((diff << 1) * tbl_value) >> 32)
//
// The reason to introduce this function is that, in case we can't use smlawb
// instruction (in MUL_ACCUM_1) due to input value range, we can still use
// smmla to save some cycles.
static __inline WebRtc_Word32 MUL_ACCUM_2(WebRtc_Word32 tbl_value,
WebRtc_Word32 diff,
WebRtc_Word32 state) {
WebRtc_Word32 result;
__asm__("smmla %r0, %r1, %r2, %r3": "=r"(result): "r"(diff << 1),
"r"(tbl_value), "r"(state));
return result;
}
#else
// allpass filter coefficients.
static const WebRtc_UWord16 kResampleAllpass1[3] = {3284, 24441, 49528};
static const WebRtc_UWord16 kResampleAllpass2[3] = {12199, 37471, 60255};
// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
#endif // WEBRTC_ARCH_ARM_V7A
// decimator
void WebRtcSpl_DownsampleBy2(const WebRtc_Word16* in, const WebRtc_Word16 len,
WebRtc_Word16* out, WebRtc_Word32* filtState)
{
WebRtc_Word32 tmp1, tmp2, diff, in32, out32;
WebRtc_Word16 i;
WebRtc_Word16* out, WebRtc_Word32* filtState) {
WebRtc_Word32 tmp1, tmp2, diff, in32, out32;
WebRtc_Word16 i;
register WebRtc_Word32 state0 = filtState[0];
register WebRtc_Word32 state1 = filtState[1];
register WebRtc_Word32 state2 = filtState[2];
register WebRtc_Word32 state3 = filtState[3];
register WebRtc_Word32 state4 = filtState[4];
register WebRtc_Word32 state5 = filtState[5];
register WebRtc_Word32 state6 = filtState[6];
register WebRtc_Word32 state7 = filtState[7];
register WebRtc_Word32 state0 = filtState[0];
register WebRtc_Word32 state1 = filtState[1];
register WebRtc_Word32 state2 = filtState[2];
register WebRtc_Word32 state3 = filtState[3];
register WebRtc_Word32 state4 = filtState[4];
register WebRtc_Word32 state5 = filtState[5];
register WebRtc_Word32 state6 = filtState[6];
register WebRtc_Word32 state7 = filtState[7];
for (i = (len >> 1); i > 0; i--)
{
// lower allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state1;
tmp1 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[2], diff, state2);
state2 = tmp2;
for (i = (len >> 1); i > 0; i--) {
// lower allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state1;
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
state2 = tmp2;
// upper allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state5;
tmp1 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[2], diff, state6);
state6 = tmp2;
// upper allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state5;
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
state6 = tmp2;
// add two allpass outputs, divide by two and round
out32 = (state3 + state7 + 1024) >> 11;
// add two allpass outputs, divide by two and round
out32 = (state3 + state7 + 1024) >> 11;
// limit amplitude to prevent wrap-around, and write to output array
#ifdef WEBRTC_ARCH_ARM_V7A
__asm__("ssat %r0, #16, %r1" : "=r"(*out) : "r"(out32));
out++;
#else
if (out32 > 32767)
*out++ = 32767;
else if (out32 < -32768)
*out++ = -32768;
else
*out++ = (WebRtc_Word16)out32;
#endif
}
// limit amplitude to prevent wrap-around, and write to output array
*out++ = WebRtcSpl_SatW32ToW16(out32);
}
filtState[0] = state0;
filtState[1] = state1;
filtState[2] = state2;
filtState[3] = state3;
filtState[4] = state4;
filtState[5] = state5;
filtState[6] = state6;
filtState[7] = state7;
filtState[0] = state0;
filtState[1] = state1;
filtState[2] = state2;
filtState[3] = state3;
filtState[4] = state4;
filtState[5] = state5;
filtState[6] = state6;
filtState[7] = state7;
}
void WebRtcSpl_UpsampleBy2(const WebRtc_Word16* in, WebRtc_Word16 len, WebRtc_Word16* out,
WebRtc_Word32* filtState)
{
WebRtc_Word32 tmp1, tmp2, diff, in32, out32;
WebRtc_Word16 i;
register WebRtc_Word32 state0 = filtState[0];
register WebRtc_Word32 state1 = filtState[1];
register WebRtc_Word32 state2 = filtState[2];
register WebRtc_Word32 state3 = filtState[3];
register WebRtc_Word32 state4 = filtState[4];
register WebRtc_Word32 state5 = filtState[5];
register WebRtc_Word32 state6 = filtState[6];
register WebRtc_Word32 state7 = filtState[7];
void WebRtcSpl_UpsampleBy2(const WebRtc_Word16* in, WebRtc_Word16 len,
WebRtc_Word16* out, WebRtc_Word32* filtState) {
WebRtc_Word32 tmp1, tmp2, diff, in32, out32;
WebRtc_Word16 i;
for (i = len; i > 0; i--)
{
// lower allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state1;
tmp1 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[2], diff, state2);
state2 = tmp2;
register WebRtc_Word32 state0 = filtState[0];
register WebRtc_Word32 state1 = filtState[1];
register WebRtc_Word32 state2 = filtState[2];
register WebRtc_Word32 state3 = filtState[3];
register WebRtc_Word32 state4 = filtState[4];
register WebRtc_Word32 state5 = filtState[5];
register WebRtc_Word32 state6 = filtState[6];
register WebRtc_Word32 state7 = filtState[7];
// round; limit amplitude to prevent wrap-around; write to output array
out32 = (state3 + 512) >> 10;
#ifdef WEBRTC_ARCH_ARM_V7A
__asm__("ssat %r0, #16, %r1":"=r"(*out): "r"(out32));
out++;
#else
if (out32 > 32767)
*out++ = 32767;
else if (out32 < -32768)
*out++ = -32768;
else
*out++ = (WebRtc_Word16)out32;
#endif
for (i = len; i > 0; i--) {
// lower allpass filter
in32 = (WebRtc_Word32)(*in++) << 10;
diff = in32 - state1;
tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state0);
state0 = in32;
diff = tmp1 - state2;
tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state1);
state1 = tmp1;
diff = tmp2 - state3;
state3 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state2);
state2 = tmp2;
// upper allpass filter
diff = in32 - state5;
tmp1 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[2], diff, state6);
state6 = tmp2;
// round; limit amplitude to prevent wrap-around; write to output array
out32 = (state3 + 512) >> 10;
*out++ = WebRtcSpl_SatW32ToW16(out32);
// round; limit amplitude to prevent wrap-around; write to output array
out32 = (state7 + 512) >> 10;
#ifdef WEBRTC_ARCH_ARM_V7A
__asm__("ssat %r0, #16, %r1":"=r"(*out): "r"(out32));
out++;
#else
if (out32 > 32767)
*out++ = 32767;
else if (out32 < -32768)
*out++ = -32768;
else
*out++ = (WebRtc_Word16)out32;
#endif
}
filtState[0] = state0;
filtState[1] = state1;
filtState[2] = state2;
filtState[3] = state3;
filtState[4] = state4;
filtState[5] = state5;
filtState[6] = state6;
filtState[7] = state7;
// upper allpass filter
diff = in32 - state5;
tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state4);
state4 = in32;
diff = tmp1 - state6;
tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state5);
state5 = tmp1;
diff = tmp2 - state7;
state7 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state6);
state6 = tmp2;
// round; limit amplitude to prevent wrap-around; write to output array
out32 = (state7 + 512) >> 10;
*out++ = WebRtcSpl_SatW32ToW16(out32);
}
filtState[0] = state0;
filtState[1] = state1;
filtState[2] = state2;
filtState[3] = state3;
filtState[4] = state4;
filtState[5] = state5;
filtState[6] = state6;
filtState[7] = state7;
}

View File

@ -147,13 +147,11 @@ void WebRtcSpl_AnalysisQMF(const WebRtc_Word16* in_data, WebRtc_Word16* low_band
{
tmp = filter1[i] + filter2[i] + 1024;
tmp = WEBRTC_SPL_RSHIFT_W32(tmp, 11);
low_band[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
tmp, WEBRTC_SPL_WORD16_MIN);
low_band[i] = WebRtcSpl_SatW32ToW16(tmp);
tmp = filter1[i] - filter2[i] + 1024;
tmp = WEBRTC_SPL_RSHIFT_W32(tmp, 11);
high_band[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
tmp, WEBRTC_SPL_WORD16_MIN);
high_band[i] = WebRtcSpl_SatW32ToW16(tmp);
}
}
@ -191,10 +189,10 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band, const WebRtc_Word16*
for (i = 0, k = 0; i < kBandFrameLength; i++)
{
tmp = WEBRTC_SPL_RSHIFT_W32(filter2[i] + 512, 10);
out_data[k++] = (WebRtc_Word16)WEBRTC_SPL_SAT(32767, tmp, -32768);
out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
tmp = WEBRTC_SPL_RSHIFT_W32(filter1[i] + 512, 10);
out_data[k++] = (WebRtc_Word16)WEBRTC_SPL_SAT(32767, tmp, -32768);
out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
}
}

View File

@ -125,7 +125,7 @@ void WebRtcSpl_ScaleVectorWithSat(G_CONST WebRtc_Word16 *in_vector, WebRtc_Word1
for (i = 0; i < in_vector_length; i++)
{
tmpW32 = WEBRTC_SPL_MUL_16_16_RSFT(*inptr++, gain, right_shifts);
( *outptr++) = (WebRtc_Word16)WEBRTC_SPL_SAT(32767, tmpW32, -32768);
(*outptr++) = WebRtcSpl_SatW32ToW16(tmpW32);
}
}

View File

@ -2,11 +2,9 @@ noinst_LTLIBRARIES = libvad.la
libvad_la_SOURCES = main/interface/webrtc_vad.h \
main/source/webrtc_vad.c \
main/source/vad_const.c \
main/source/vad_const.h \
main/source/vad_defines.h \
main/source/vad_core.c \
main/source/vad_core.h \
main/source/vad_defines.h \
main/source/vad_filterbank.c \
main/source/vad_filterbank.h \
main/source/vad_gmm.c \

View File

@ -1,2 +0,0 @@
bjornv@webrtc.org
jan.skoglund@webrtc.org

View File

@ -25,11 +25,9 @@
'sources': [
'../interface/webrtc_vad.h',
'webrtc_vad.c',
'vad_const.c',
'vad_const.h',
'vad_defines.h',
'vad_core.c',
'vad_core.h',
'vad_defines.h',
'vad_filterbank.c',
'vad_filterbank.h',
'vad_gmm.c',

View File

@ -1,80 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This file includes the constant values used internally in VAD.
*/
#include "vad_const.h"
// Spectrum Weighting
const WebRtc_Word16 kSpectrumWeight[6] = {6, 8, 10, 12, 14, 16};
const WebRtc_Word16 kCompVar = 22005;
// Constant 160*log10(2) in Q9
const WebRtc_Word16 kLogConst = 24660;
// Constant log2(exp(1)) in Q12
const WebRtc_Word16 kLog10Const = 5909;
// Q15
const WebRtc_Word16 kNoiseUpdateConst = 655;
const WebRtc_Word16 kSpeechUpdateConst = 6554;
// Q8
const WebRtc_Word16 kBackEta = 154;
// Coefficients used by WebRtcVad_HpOutput, Q14
const WebRtc_Word16 kHpZeroCoefs[3] = {6631, -13262, 6631};
const WebRtc_Word16 kHpPoleCoefs[3] = {16384, -7756, 5620};
// Allpass filter coefficients, upper and lower, in Q15
// Upper: 0.64, Lower: 0.17
const WebRtc_Word16 kAllPassCoefsQ15[2] = {20972, 5571};
const WebRtc_Word16 kAllPassCoefsQ13[2] = {5243, 1392}; // Q13
// Minimum difference between the two models, Q5
const WebRtc_Word16 kMinimumDifference[6] = {544, 544, 576, 576, 576, 576};
// Upper limit of mean value for speech model, Q7
const WebRtc_Word16 kMaximumSpeech[6] = {11392, 11392, 11520, 11520, 11520, 11520};
// Minimum value for mean value
const WebRtc_Word16 kMinimumMean[2] = {640, 768};
// Upper limit of mean value for noise model, Q7
const WebRtc_Word16 kMaximumNoise[6] = {9216, 9088, 8960, 8832, 8704, 8576};
// Adjustment for division with two in WebRtcVad_SplitFilter
const WebRtc_Word16 kOffsetVector[6] = {368, 368, 272, 176, 176, 176};
// Start values for the Gaussian models, Q7
// Weights for the two Gaussians for the six channels (noise)
const WebRtc_Word16 kNoiseDataWeights[12] = {34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103};
// Weights for the two Gaussians for the six channels (speech)
const WebRtc_Word16 kSpeechDataWeights[12] = {48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81};
// Means for the two Gaussians for the six channels (noise)
const WebRtc_Word16 kNoiseDataMeans[12] = {6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863,
7820, 7266, 5020, 4362};
// Means for the two Gaussians for the six channels (speech)
const WebRtc_Word16 kSpeechDataMeans[12] = {8306, 10085, 10078, 11823, 11843, 6309, 9473,
9571, 10879, 7581, 8180, 7483};
// Stds for the two Gaussians for the six channels (noise)
const WebRtc_Word16 kNoiseDataStds[12] = {378, 1064, 493, 582, 688, 593, 474, 697, 475, 688,
421, 455};
// Stds for the two Gaussians for the six channels (speech)
const WebRtc_Word16 kSpeechDataStds[12] = {555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540,
1079, 850};

View File

@ -1,59 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
/*
* This header file includes the declarations of the internally used constants.
*/
#ifndef WEBRTC_VAD_CONST_H_
#define WEBRTC_VAD_CONST_H_
#include "typedefs.h"
// TODO(ajm): give these internal-linkage by moving to the appropriate file
// where possible, and otherwise tag with WebRtcVad_.
// Spectrum Weighting
extern const WebRtc_Word16 kSpectrumWeight[];
extern const WebRtc_Word16 kCompVar;
// Logarithm constant
extern const WebRtc_Word16 kLogConst;
extern const WebRtc_Word16 kLog10Const;
// Q15
extern const WebRtc_Word16 kNoiseUpdateConst;
extern const WebRtc_Word16 kSpeechUpdateConst;
// Q8
extern const WebRtc_Word16 kBackEta;
// Coefficients used by WebRtcVad_HpOutput, Q14
extern const WebRtc_Word16 kHpZeroCoefs[];
extern const WebRtc_Word16 kHpPoleCoefs[];
// Allpass filter coefficients, upper and lower, in Q15 resp. Q13
extern const WebRtc_Word16 kAllPassCoefsQ15[];
extern const WebRtc_Word16 kAllPassCoefsQ13[];
// Minimum difference between the two models, Q5
extern const WebRtc_Word16 kMinimumDifference[];
// Maximum value when updating the speech model, Q7
extern const WebRtc_Word16 kMaximumSpeech[];
// Minimum value for mean value
extern const WebRtc_Word16 kMinimumMean[];
// Upper limit of mean value for noise model, Q7
extern const WebRtc_Word16 kMaximumNoise[];
// Adjustment for division with two in WebRtcVad_SplitFilter
extern const WebRtc_Word16 kOffsetVector[];
// Start values for the Gaussian models, Q7
extern const WebRtc_Word16 kNoiseDataWeights[];
extern const WebRtc_Word16 kSpeechDataWeights[];
extern const WebRtc_Word16 kNoiseDataMeans[];
extern const WebRtc_Word16 kSpeechDataMeans[];
extern const WebRtc_Word16 kNoiseDataStds[];
extern const WebRtc_Word16 kSpeechDataStds[];
#endif // WEBRTC_VAD_CONST_H_

View File

@ -15,12 +15,50 @@
*/
#include "vad_core.h"
#include "vad_const.h"
#include "signal_processing_library.h"
#include "typedefs.h"
#include "vad_defines.h"
#include "vad_filterbank.h"
#include "vad_gmm.h"
#include "vad_sp.h"
#include "signal_processing_library.h"
// Spectrum Weighting
static const WebRtc_Word16 kSpectrumWeight[6] = { 6, 8, 10, 12, 14, 16 };
static const WebRtc_Word16 kNoiseUpdateConst = 655; // Q15
static const WebRtc_Word16 kSpeechUpdateConst = 6554; // Q15
static const WebRtc_Word16 kBackEta = 154; // Q8
// Minimum difference between the two models, Q5
static const WebRtc_Word16 kMinimumDifference[6] = {
544, 544, 576, 576, 576, 576 };
// Upper limit of mean value for speech model, Q7
static const WebRtc_Word16 kMaximumSpeech[6] = {
11392, 11392, 11520, 11520, 11520, 11520 };
// Minimum value for mean value
static const WebRtc_Word16 kMinimumMean[2] = { 640, 768 };
// Upper limit of mean value for noise model, Q7
static const WebRtc_Word16 kMaximumNoise[6] = {
9216, 9088, 8960, 8832, 8704, 8576 };
// Start values for the Gaussian models, Q7
// Weights for the two Gaussians for the six channels (noise)
static const WebRtc_Word16 kNoiseDataWeights[12] = {
34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103 };
// Weights for the two Gaussians for the six channels (speech)
static const WebRtc_Word16 kSpeechDataWeights[12] = {
48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81 };
// Means for the two Gaussians for the six channels (noise)
static const WebRtc_Word16 kNoiseDataMeans[12] = {
6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362 };
// Means for the two Gaussians for the six channels (speech)
static const WebRtc_Word16 kSpeechDataMeans[12] = {
8306, 10085, 10078, 11823, 11843, 6309, 9473, 9571, 10879, 7581, 8180, 7483
};
// Stds for the two Gaussians for the six channels (noise)
static const WebRtc_Word16 kNoiseDataStds[12] = {
378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455 };
// Stds for the two Gaussians for the six channels (speech)
static const WebRtc_Word16 kSpeechDataStds[12] = {
555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, 1079, 850 };
static const int kInitCheck = 42;

View File

@ -15,9 +15,21 @@
*/
#include "vad_filterbank.h"
#include "vad_defines.h"
#include "vad_const.h"
#include "signal_processing_library.h"
#include "typedefs.h"
#include "vad_defines.h"
// Constant 160*log10(2) in Q9
static const WebRtc_Word16 kLogConst = 24660;
// Coefficients used by WebRtcVad_HpOutput, Q14
static const WebRtc_Word16 kHpZeroCoefs[3] = {6631, -13262, 6631};
static const WebRtc_Word16 kHpPoleCoefs[3] = {16384, -7756, 5620};
// Allpass filter coefficients, upper and lower, in Q15
// Upper: 0.64, Lower: 0.17
static const WebRtc_Word16 kAllPassCoefsQ15[2] = {20972, 5571};
// Adjustment for division with two in WebRtcVad_SplitFilter
static const WebRtc_Word16 kOffsetVector[6] = {368, 368, 272, 176, 176, 176};
void WebRtcVad_HpOutput(WebRtc_Word16 *in_vector,
WebRtc_Word16 in_vector_length,

View File

@ -15,8 +15,13 @@
*/
#include "vad_gmm.h"
#include "signal_processing_library.h"
#include "vad_const.h"
#include "typedefs.h"
static const WebRtc_Word32 kCompVar = 22005;
// Constant log2(exp(1)) in Q12
static const WebRtc_Word16 kLog10Const = 5909;
WebRtc_Word32 WebRtcVad_GaussianProbability(WebRtc_Word16 in_sample,
WebRtc_Word16 mean,

View File

@ -10,15 +10,20 @@
/*
* This file includes the implementation of the VAD internal calls for Downsampling and
* FindMinimum.
* This file includes the implementation of the VAD internal calls for
* Downsampling and FindMinimum.
* For function call descriptions; See vad_sp.h.
*/
#include "vad_sp.h"
#include "vad_defines.h"
#include "vad_const.h"
#include "signal_processing_library.h"
#include "typedefs.h"
#include "vad_defines.h"
// Allpass filter coefficients, upper and lower, in Q13
// Upper: 0.64, Lower: 0.17
static const WebRtc_Word16 kAllPassCoefsQ13[2] = {5243, 1392}; // Q13
// Downsampling filter based on the splitting filter and the allpass functions
// in vad_filterbank.c

View File

@ -485,6 +485,7 @@ enum RawVideoType
// Video codec
enum { kConfigParameterSize = 128};
enum { kPayloadNameSize = 32};
enum { kMaxSimulcastStreams = 4};
// H.263 specific
struct VideoCodecH263
@ -530,9 +531,10 @@ struct VideoCodecH264
// VP8 specific
struct VideoCodecVP8
{
bool pictureLossIndicationOn;
bool feedbackModeOn;
VideoCodecComplexity complexity;
bool pictureLossIndicationOn;
bool feedbackModeOn;
VideoCodecComplexity complexity;
unsigned char numberOfTemporalLayers;
};
// MPEG-4 specific
@ -570,6 +572,19 @@ union VideoCodecUnion
VideoCodecGeneric Generic;
};
/*
* Simulcast is when the same stream is encoded multiple times with different
* settings such as resolution.
*/
struct SimulcastStream
{
unsigned short width;
unsigned short height;
unsigned char numberOfTemporalLayers;
unsigned int maxBitrate;
unsigned int qpMax; // minimum quality
};
// Common video codec properties
struct VideoCodec
{
@ -588,8 +603,8 @@ struct VideoCodec
VideoCodecUnion codecSpecific;
unsigned int qpMax;
unsigned char numberOfSimulcastStreams;
SimulcastStream simulcastStream[kMaxSimulcastStreams];
};
} // namespace webrtc
#endif // WEBRTC_COMMON_TYPES_H

View File

@ -38,6 +38,7 @@ typedef struct {
WebRtc_Word16 nlpMode; // default kAecNlpModerate
WebRtc_Word16 skewMode; // default kAecFalse
WebRtc_Word16 metricsMode; // default kAecFalse
int delay_logging; // default kAecFalse
//float realSkew;
} AecConfig;
@ -66,7 +67,7 @@ extern "C" {
* Inputs Description
* -------------------------------------------------------------------
* void **aecInst Pointer to the AEC instance to be created
* and initilized
* and initialized
*
* Outputs Description
* -------------------------------------------------------------------
@ -225,6 +226,23 @@ WebRtc_Word32 WebRtcAec_get_echo_status(void *aecInst, WebRtc_Word16 *status);
*/
WebRtc_Word32 WebRtcAec_GetMetrics(void *aecInst, AecMetrics *metrics);
/*
* Gets the current delay metrics for the session.
*
* Inputs Description
* -------------------------------------------------------------------
* void* handle Pointer to the AEC instance
*
* Outputs Description
* -------------------------------------------------------------------
* int* median Delay median value.
* int* std Delay standard deviation.
*
* int return 0: OK
* -1: error
*/
int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std);
/*
* Gets the last error code.
*

View File

@ -1,953 +0,0 @@
% Partitioned block frequency domain adaptive filtering NLMS and
% standard time-domain sample-based NLMS
%fid=fopen('aecFar-samsung.pcm', 'rb'); % Load far end
fid=fopen('aecFar.pcm', 'rb'); % Load far end
%fid=fopen(farFile, 'rb'); % Load far end
rrin=fread(fid,inf,'int16');
fclose(fid);
%rrin=loadsl('data/far_me2.pcm'); % Load far end
%fid=fopen('aecNear-samsung.pcm', 'rb'); % Load near end
fid=fopen('aecNear.pcm', 'rb'); % Load near end
%fid=fopen(nearFile, 'rb'); % Load near end
ssin=fread(fid,inf,'int16');
%ssin = [zeros(1024,1) ; ssin(1:end-1024)];
fclose(fid);
rand('state',13);
fs=16000;
mult=fs/8000;
%rrin=rrin(fs*0+1:round(fs*120));
%ssin=ssin(fs*0+1:round(fs*120));
if fs == 8000
cohRange = 2:3;
elseif fs==16000
cohRange = 2;
end
% Flags
NLPon=1; % NLP
CNon=1; % Comfort noise
PLTon=1; % Plotting
M = 16; % Number of partitions
N = 64; % Partition length
L = M*N; % Filter length
if fs == 8000
mufb = 0.6;
else
mufb = 0.5;
end
%mufb=1;
VADtd=48;
alp = 0.1; % Power estimation factor alc = 0.1; % Coherence estimation factor
beta = 0.9; % Plotting factor
%% Changed a little %%
step = 0.3;%0.1875; % Downward step size
%%
if fs == 8000
threshold=2e-6; % DTrob threshold
else
%threshold=0.7e-6;
threshold=1.5e-6; end
if fs == 8000
echoBandRange = ceil(300*2/fs*N):floor(1800*2/fs*N);
%echoBandRange = ceil(1500*2/fs*N):floor(2500*2/fs*N);
else
echoBandRange = ceil(300*2/fs*N):floor(1800*2/fs*N);
%echoBandRange = ceil(300*2/fs*N):floor(1800*2/fs*N);
end
%echoBandRange = ceil(1600*2/fs*N):floor(1900*2/fs*N);
%echoBandRange = ceil(2000*2/fs*N):floor(4000*2/fs*N);
suppState = 1;
transCtr = 0;
Nt=1;
vt=1;
ramp = 1.0003; % Upward ramp
rampd = 0.999; % Downward ramp
cvt = 20; % Subband VAD threshold;
nnthres = 20; % Noise threshold
shh=logspace(-1.3,-2.2,N+1)';
sh=[shh;flipud(shh(2:end-1))]; % Suppression profile
len=length(ssin);
w=zeros(L,1); % Sample-based TD NLMS
WFb=zeros(N+1,M); % Block-based FD NLMS
WFbOld=zeros(N+1,M); % Block-based FD NLMS
YFb=zeros(N+1,M);
erfb=zeros(len,1);
erfb3=zeros(len,1);
ercn=zeros(len,1);
zm=zeros(N,1);
XFm=zeros(N+1,M);
YFm=zeros(N+1,M);
pn0=10*ones(N+1,1);
pn=zeros(N+1,1);
NN=len;
Nb=floor(NN/N)-M;
erifb=zeros(Nb+1,1)+0.1;
erifb3=zeros(Nb+1,1)+0.1;
ericn=zeros(Nb+1,1)+0.1;
dri=zeros(Nb+1,1)+0.1;
start=1;
xo=zeros(N,1);
do=xo;
eo=xo;
echoBands=zeros(Nb+1,1);
cohxdAvg=zeros(Nb+1,1);
cohxdSlow=zeros(Nb+1,N+1);
cohedSlow=zeros(Nb+1,N+1);
%overdriveM=zeros(Nb+1,N+1);
cohxdFastAvg=zeros(Nb+1,1);
cohxdAvgBad=zeros(Nb+1,1);
cohedAvg=zeros(Nb+1,1);
cohedFastAvg=zeros(Nb+1,1);
hnledAvg=zeros(Nb+1,1);
hnlxdAvg=zeros(Nb+1,1);
ovrdV=zeros(Nb+1,1);
dIdxV=zeros(Nb+1,1);
SLxV=zeros(Nb+1,1);
hnlSortQV=zeros(Nb+1,1);
hnlPrefAvgV=zeros(Nb+1,1);
mutInfAvg=zeros(Nb+1,1);
%overdrive=zeros(Nb+1,1);
hnled = zeros(N+1, 1);
weight=zeros(N+1,1);
hnlMax = zeros(N+1, 1);
hnl = zeros(N+1, 1);
overdrive = ones(1, N+1);
xfwm=zeros(N+1,M);
dfm=zeros(N+1,M);
WFbD=ones(N+1,1);
fbSupp = 0;
hnlLocalMin = 1;
cohxdLocalMin = 1;
hnlLocalMinV=zeros(Nb+1,1);
cohxdLocalMinV=zeros(Nb+1,1);
hnlMinV=zeros(Nb+1,1);
dkEnV=zeros(Nb+1,1);
ekEnV=zeros(Nb+1,1);
ovrd = 2;
ovrdPos = floor((N+1)/4);
ovrdSm = 2;
hnlMin = 1;
minCtr = 0;
SeMin = 0;
SdMin = 0;
SeLocalAvg = 0;
SeMinSm = 0;
divergeFact = 1;
dIdx = 1;
hnlMinCtr = 0;
hnlNewMin = 0;
divergeState = 0;
Sy=ones(N+1,1);
Sym=1e7*ones(N+1,1);
wins=[0;sqrt(hanning(2*N-1))];
ubufn=zeros(2*N,1);
ebuf=zeros(2*N,1);
ebuf2=zeros(2*N,1);
ebuf4=zeros(2*N,1);
mbuf=zeros(2*N,1);
cohedFast = zeros(N+1,1);
cohxdFast = zeros(N+1,1);
cohxd = zeros(N+1,1);
Se = zeros(N+1,1);
Sd = zeros(N+1,1);
Sx = zeros(N+1,1);
SxBad = zeros(N+1,1);
Sed = zeros(N+1,1);
Sxd = zeros(N+1,1);
SxdBad = zeros(N+1,1);
hnledp=[];
cohxdMax = 0;
%hh=waitbar(0,'Please wait...');
progressbar(0);
%spaces = ' ';
%spaces = repmat(spaces, 50, 1);
%spaces = ['[' ; spaces ; ']'];
%fprintf(1, spaces);
%fprintf(1, '\n');
for kk=1:Nb
pos = N * (kk-1) + start;
% FD block method
% ---------------------- Organize data
xk = rrin(pos:pos+N-1);
dk = ssin(pos:pos+N-1);
xx = [xo;xk];
xo = xk;
tmp = fft(xx);
XX = tmp(1:N+1);
dd = [do;dk]; % Overlap
do = dk;
tmp = fft(dd); % Frequency domain
DD = tmp(1:N+1);
% ------------------------ Power estimation
pn0 = (1 - alp) * pn0 + alp * real(XX.* conj(XX));
pn = pn0;
%pn = (1 - alp) * pn + alp * M * pn0;
if (CNon)
Yp = real(conj(DD).*DD); % Instantaneous power
Sy = (1 - alp) * Sy + alp * Yp; % Averaged power
mm = min(Sy,Sym);
diff = Sym - mm;
if (kk>50)
Sym = (mm + step*diff) * ramp; % Estimated background noise power
end
end
% ---------------------- Filtering
XFm(:,1) = XX;
for mm=0:(M-1)
m=mm+1;
YFb(:,m) = XFm(:,m) .* WFb(:,m);
end
yfk = sum(YFb,2);
tmp = [yfk ; flipud(conj(yfk(2:N)))];
ykt = real(ifft(tmp));
ykfb = ykt(end-N+1:end);
% ---------------------- Error estimation
ekfb = dk - ykfb;
%if sum(abs(ekfb)) < sum(abs(dk))
%ekfb = dk - ykfb;
% erfb(pos:pos+N-1) = ekfb;
%else
%ekfb = dk;
% erfb(pos:pos+N-1) = dk;
%end
%(kk-1)*(N*2)+1
erfb(pos:pos+N-1) = ekfb;
tmp = fft([zm;ekfb]); % FD version for cancelling part (overlap-save)
Ek = tmp(1:N+1);
% ------------------------ Adaptation
Ek2 = Ek ./(M*pn + 0.001); % Normalized error
%Ek2 = Ek ./(pn + 0.001); % Normalized error
%Ek2 = Ek ./(100*pn + 0.001); % Normalized error
absEf = max(abs(Ek2), threshold);
absEf = ones(N+1,1)*threshold./absEf;
Ek2 = Ek2.*absEf;
mEk = mufb.*Ek2;
PP = conj(XFm).*(ones(M,1) * mEk')';
tmp = [PP ; flipud(conj(PP(2:N,:)))];
IFPP = real(ifft(tmp));
PH = IFPP(1:N,:);
tmp = fft([PH;zeros(N,M)]);
FPH = tmp(1:N+1,:);
WFb = WFb + FPH;
if mod(kk, 10*mult) == 0
WFbEn = sum(real(WFb.*conj(WFb)));
%WFbEn = sum(abs(WFb));
[tmp, dIdx] = max(WFbEn);
WFbD = sum(abs(WFb(:, dIdx)),2);
%WFbD = WFbD / (mean(WFbD) + 1e-10);
WFbD = min(max(WFbD, 0.5), 4);
end
dIdxV(kk) = dIdx;
% NLP
if (NLPon)
ee = [eo;ekfb];
eo = ekfb;
window = wins;
if fs == 8000
%gamma = 0.88;
gamma = 0.9;
else
%gamma = 0.92;
gamma = 0.93;
end
%gamma = 0.9;
tmp = fft(xx.*window);
xf = tmp(1:N+1);
tmp = fft(dd.*window);
df = tmp(1:N+1);
tmp = fft(ee.*window);
ef = tmp(1:N+1);
xfwm(:,1) = xf;
xf = xfwm(:,dIdx);
%fprintf(1,'%d: %f\n', kk, xf(4));
dfm(:,1) = df;
SxOld = Sx;
Se = gamma*Se + (1-gamma)*real(ef.*conj(ef));
Sd = gamma*Sd + (1-gamma)*real(df.*conj(df));
Sx = gamma*Sx + (1 - gamma)*real(xf.*conj(xf));
%xRatio = real(xfwm(:,1).*conj(xfwm(:,1))) ./ ...
% (real(xfwm(:,2).*conj(xfwm(:,2))) + 1e-10);
%xRatio = Sx ./ (SxOld + 1e-10);
%SLx = log(1/(N+1)*sum(xRatio)) - 1/(N+1)*sum(log(xRatio));
%SLxV(kk) = SLx;
%freqSm = 0.9;
%Sx = filter(freqSm, [1 -(1-freqSm)], Sx);
%Sx(end:1) = filter(freqSm, [1 -(1-freqSm)], Sx(end:1));
%Se = filter(freqSm, [1 -(1-freqSm)], Se);
%Se(end:1) = filter(freqSm, [1 -(1-freqSm)], Se(end:1));
%Sd = filter(freqSm, [1 -(1-freqSm)], Sd);
%Sd(end:1) = filter(freqSm, [1 -(1-freqSm)], Sd(end:1));
%SeFast = ef.*conj(ef);
%SdFast = df.*conj(df);
%SxFast = xf.*conj(xf);
%cohedFast = 0.9*cohedFast + 0.1*SeFast ./ (SdFast + 1e-10);
%cohedFast(find(cohedFast > 1)) = 1;
%cohedFast(find(cohedFast > 1)) = 1 ./ cohedFast(find(cohedFast>1));
%cohedFastAvg(kk) = mean(cohedFast(echoBandRange));
%cohedFastAvg(kk) = min(cohedFast);
%cohxdFast = 0.8*cohxdFast + 0.2*log(SdFast ./ (SxFast + 1e-10));
%cohxdFastAvg(kk) = mean(cohxdFast(echoBandRange));
% coherence
Sxd = gamma*Sxd + (1 - gamma)*xf.*conj(df);
Sed = gamma*Sed + (1-gamma)*ef.*conj(df);
%Sxd = filter(freqSm, [1 -(1-freqSm)], Sxd);
%Sxd(end:1) = filter(freqSm, [1 -(1-freqSm)], Sxd(end:1));
%Sed = filter(freqSm, [1 -(1-freqSm)], Sed);
%Sed(end:1) = filter(freqSm, [1 -(1-freqSm)], Sed(end:1));
cohed = real(Sed.*conj(Sed))./(Se.*Sd + 1e-10);
%cohedAvg(kk) = mean(cohed(echoBandRange));
%cohedAvg(kk) = cohed(6);
%cohedAvg(kk) = min(cohed);
cohxd = real(Sxd.*conj(Sxd))./(Sx.*Sd + 1e-10);
%freqSm = 0.5;
%cohxd(3:end) = filter(freqSm, [1 -(1-freqSm)], cohxd(3:end));
%cohxd(end:3) = filter(freqSm, [1 -(1-freqSm)], cohxd(end:3));
%cohxdAvg(kk) = mean(cohxd(echoBandRange));
%cohxdAvg(kk) = (cohxd(32));
%cohxdAvg(kk) = max(cohxd);
%xf = xfm(:,dIdx);
%SxBad = gamma*SxBad + (1 - gamma)*real(xf.*conj(xf));
%SxdBad = gamma*SxdBad + (1 - gamma)*xf.*conj(df);
%cohxdBad = real(SxdBad.*conj(SxdBad))./(SxBad.*Sd + 0.01);
%cohxdAvgBad(kk) = mean(cohxdBad);
%for j=1:N+1
% mutInf(j) = 0.9*mutInf(j) + 0.1*information(abs(xfm(j,:)), abs(dfm(j,:)));
%end
%mutInfAvg(kk) = mean(mutInf);
%hnled = cohedFast;
%xIdx = find(cohxd > 1 - cohed);
%hnled(xIdx) = 1 - cohxd(xIdx);
%hnled = 1 - max(cohxd, 1-cohedFast);
hnled = min(1 - cohxd, cohed);
%hnled = 1 - cohxd;
%hnled = max(1 - (cohxd + (1-cohedFast)), 0);
%hnled = 1 - max(cohxd, 1-cohed);
if kk > 1
cohxdSlow(kk,:) = 0.99*cohxdSlow(kk-1,:) + 0.01*cohxd';
cohedSlow(kk,:) = 0.99*cohedSlow(kk-1,:) + 0.01*(1-cohed)';
end
if 0
%if kk > 50
%idx = find(hnled > 0.3);
hnlMax = hnlMax*0.9999;
%hnlMax(idx) = max(hnlMax(idx), hnled(idx));
hnlMax = max(hnlMax, hnled);
%overdrive(idx) = max(log(hnlMax(idx))/log(0.99), 1);
avgHnl = mean(hnlMax(echoBandRange));
if avgHnl > 0.3
overdrive = max(log(avgHnl)/log(0.99), 1);
end
weight(4:end) = max(hnlMax) - hnlMax(4:end);
end
%[hg, gidx] = max(hnled);
%fnrg = Sx(gidx) / (Sd(gidx) + 1e-10);
%[tmp, bidx] = find((Sx / Sd + 1e-10) > fnrg);
%hnled(bidx) = hg;
%cohed1 = mean(cohed(cohRange)); % range depends on bandwidth
%cohed1 = cohed1^2;
%echoBands(kk) = length(find(cohed(echoBandRange) < 0.25))/length(echoBandRange);
%if (fbSupp == 0)
% if (echoBands(kk) > 0.8)
% fbSupp = 1;
% end
%else
% if (echoBands(kk) < 0.6)
% fbSupp = 0;
% end
%end
%overdrive(kk) = 7.5*echoBands(kk) + 0.5;
% Factor by which to weight other bands
%if (cohed1 < 0.1)
% w = 0.8 - cohed1*10*0.4;
%else
% w = 0.4;
%end
% Weight coherence subbands
%hnled = w*cohed1 + (1 - w)*cohed;
%hnled = (hnled).^2;
%cohed(floor(N/2):end) = cohed(floor(N/2):end).^2;
%if fbSupp == 1
% cohed = zeros(size(cohed));
%end
%cohed = cohed.^overdrive(kk);
%hnled = gamma*hnled + (1 - gamma)*cohed;
% Additional hf suppression
%hnledp = [hnledp ; mean(hnled)];
%hnled(floor(N/2):end) = hnled(floor(N/2):end).^2;
%ef = ef.*((weight*(min(1 - hnled)).^2 + (1 - weight).*(1 - hnled)).^2);
cohedMean = mean(cohed(echoBandRange));
%aggrFact = 4*(1-mean(hnled(echoBandRange))) + 1;
%[hnlSort, hnlSortIdx] = sort(hnled(echoBandRange));
[hnlSort, hnlSortIdx] = sort(1-cohxd(echoBandRange));
[xSort, xSortIdx] = sort(Sx);
%aggrFact = (1-mean(hnled(echoBandRange)));
%hnlSortQ = hnlSort(qIdx);
hnlSortQ = mean(1 - cohxd(echoBandRange));
%hnlSortQ = mean(1 - cohxd);
[hnlSort2, hnlSortIdx2] = sort(hnled(echoBandRange));
%[hnlSort2, hnlSortIdx2] = sort(hnled);
hnlQuant = 0.75;
hnlQuantLow = 0.5;
qIdx = floor(hnlQuant*length(hnlSort2));
qIdxLow = floor(hnlQuantLow*length(hnlSort2));
hnlPrefAvg = hnlSort2(qIdx);
hnlPrefAvgLow = hnlSort2(qIdxLow);
%hnlPrefAvgLow = mean(hnled);
%hnlPrefAvg = max(hnlSort2);
%hnlPrefAvgLow = min(hnlSort2);
%hnlPref = hnled(echoBandRange);
%hnlPrefAvg = mean(hnlPref(xSortIdx((0.5*length(xSortIdx)):end)));
%hnlPrefAvg = min(hnlPrefAvg, hnlSortQ);
%hnlSortQIdx = hnlSortIdx(qIdx);
%SeQ = Se(qIdx + echoBandRange(1) - 1);
%SdQ = Sd(qIdx + echoBandRange(1) - 1);
%SeQ = Se(qIdxLow + echoBandRange(1) - 1);
%SdQ = Sd(qIdxLow + echoBandRange(1) - 1);
%propLow = length(find(hnlSort < 0.1))/length(hnlSort);
%aggrFact = min((1 - hnlSortQ)/2, 0.5);
%aggrTerm = 1/aggrFact;
%hnlg = mean(hnled(echoBandRange));
%hnlg = hnlSortQ;
%if suppState == 0
% if hnlg < 0.05
% suppState = 2;
% transCtr = 0;
% elseif hnlg < 0.75
% suppState = 1;
% transCtr = 0;
% end
%elseif suppState == 1
% if hnlg > 0.8
% suppState = 0;
% transCtr = 0;
% elseif hnlg < 0.05
% suppState = 2;
% transCtr = 0;
% end
%else
% if hnlg > 0.8
% suppState = 0;
% transCtr = 0;
% elseif hnlg > 0.25
% suppState = 1;
% transCtr = 0;
% end
%end
%if kk > 50
if cohedMean > 0.98 & hnlSortQ > 0.9
%if suppState == 1
% hnled = 0.5*hnled + 0.5*cohed;
% %hnlSortQ = 0.5*hnlSortQ + 0.5*cohedMean;
% hnlPrefAvg = 0.5*hnlPrefAvg + 0.5*cohedMean;
%else
% hnled = cohed;
% %hnlSortQ = cohedMean;
% hnlPrefAvg = cohedMean;
%end
suppState = 0;
elseif cohedMean < 0.95 | hnlSortQ < 0.8
%if suppState == 0
% hnled = 0.5*hnled + 0.5*cohed;
% %hnlSortQ = 0.5*hnlSortQ + 0.5*cohedMean;
% hnlPrefAvg = 0.5*hnlPrefAvg + 0.5*cohedMean;
%end
suppState = 1;
end
if hnlSortQ < cohxdLocalMin & hnlSortQ < 0.75
cohxdLocalMin = hnlSortQ;
end
if cohxdLocalMin == 1
ovrd = 3;
hnled = 1-cohxd;
hnlPrefAvg = hnlSortQ;
hnlPrefAvgLow = hnlSortQ;
end
if suppState == 0
hnled = cohed;
hnlPrefAvg = cohedMean;
hnlPrefAvgLow = cohedMean;
end
%if hnlPrefAvg < hnlLocalMin & hnlPrefAvg < 0.6
if hnlPrefAvgLow < hnlLocalMin & hnlPrefAvgLow < 0.6
%hnlLocalMin = hnlPrefAvg;
%hnlMin = hnlPrefAvg;
hnlLocalMin = hnlPrefAvgLow;
hnlMin = hnlPrefAvgLow;
hnlNewMin = 1;
hnlMinCtr = 0;
%if hnlMinCtr == 0
% hnlMinCtr = hnlMinCtr + 1;
%else
% hnlMinCtr = 0;
% hnlMin = hnlLocalMin;
%SeLocalMin = SeQ;
%SdLocalMin = SdQ;
%SeLocalAvg = 0;
%minCtr = 0;
% ovrd = max(log(0.0001)/log(hnlMin), 2);
%divergeFact = hnlLocalMin;
end
if hnlNewMin == 1
hnlMinCtr = hnlMinCtr + 1;
end
if hnlMinCtr == 2
hnlNewMin = 0;
hnlMinCtr = 0;
%ovrd = max(log(0.0001)/log(hnlMin), 2);
ovrd = max(log(0.00001)/(log(hnlMin + 1e-10) + 1e-10), 3);
%ovrd = max(log(0.00000001)/(log(hnlMin + 1e-10) + 1e-10), 5);
%ovrd = max(log(0.0001)/log(hnlPrefAvg), 2);
%ovrd = max(log(0.001)/log(hnlMin), 2);
end
hnlLocalMin = min(hnlLocalMin + 0.0008/mult, 1);
cohxdLocalMin = min(cohxdLocalMin + 0.0004/mult, 1);
%divergeFact = hnlSortQ;
%if minCtr > 0 & hnlLocalMin < 1
% hnlMin = hnlLocalMin;
% %SeMin = 0.9*SeMin + 0.1*sqrt(SeLocalMin);
% SdMin = sqrt(SdLocalMin);
% %SeMin = sqrt(SeLocalMin)*hnlSortQ;
% SeMin = sqrt(SeLocalMin);
% %ovrd = log(100/SeMin)/log(hnlSortQ);
% %ovrd = log(100/SeMin)/log(hnlSortQ);
% ovrd = log(0.01)/log(hnlMin);
% ovrd = max(ovrd, 2);
% ovrdPos = hnlSortQIdx;
% %ovrd = max(ovrd, 1);
% %SeMin = sqrt(SeLocalAvg/5);
% minCtr = 0;
%else
% %SeLocalMin = 0.9*SeLocalMin +0.1*SeQ;
% SeLocalAvg = SeLocalAvg + SeQ;
% minCtr = minCtr + 1;
%end
if ovrd < ovrdSm
ovrdSm = 0.99*ovrdSm + 0.01*ovrd;
else
ovrdSm = 0.9*ovrdSm + 0.1*ovrd;
end
%end
%ekEn = sum(real(ekfb.^2));
%dkEn = sum(real(dk.^2));
ekEn = sum(Se);
dkEn = sum(Sd);
if divergeState == 0
if ekEn > dkEn
ef = df;
divergeState = 1;
%hnlPrefAvg = hnlSortQ;
%hnled = (1 - cohxd);
end
else
%if ekEn*1.1 < dkEn
%if ekEn*1.26 < dkEn
if ekEn*1.05 < dkEn
divergeState = 0;
else
ef = df;
end
end
if ekEn > dkEn*19.95
WFb=zeros(N+1,M); % Block-based FD NLMS
end
ekEnV(kk) = ekEn;
dkEnV(kk) = dkEn;
hnlLocalMinV(kk) = hnlLocalMin;
cohxdLocalMinV(kk) = cohxdLocalMin;
hnlMinV(kk) = hnlMin;
%cohxdMaxLocal = max(cohxdSlow(kk,:));
%if kk > 50
%cohxdMaxLocal = 1-hnlSortQ;
%if cohxdMaxLocal > 0.5
% %if cohxdMaxLocal > cohxdMax
% odScale = max(log(cohxdMaxLocal)/log(0.95), 1);
% %overdrive(7:end) = max(log(cohxdSlow(kk,7:end))/log(0.9), 1);
% cohxdMax = cohxdMaxLocal;
% end
%end
%end
%cohxdMax = cohxdMax*0.999;
%overdriveM(kk,:) = max(overdrive, 1);
%aggrFact = 0.25;
aggrFact = 0.3;
%aggrFact = 0.5*propLow;
%if fs == 8000
% wCurve = [0 ; 0 ; aggrFact*sqrt(linspace(0,1,N-1))' + 0.1];
%else
% wCurve = [0; 0; 0; aggrFact*sqrt(linspace(0,1,N-2))' + 0.1];
%end
wCurve = [0; aggrFact*sqrt(linspace(0,1,N))' + 0.1];
% For sync with C
%if fs == 8000
% wCurve = wCurve(2:end);
%else
% wCurve = wCurve(1:end-1);
%end
%weight = aggrFact*(sqrt(linspace(0,1,N+1)'));
%weight = aggrFact*wCurve;
weight = wCurve;
%weight = aggrFact*ones(N+1,1);
%weight = zeros(N+1,1);
%hnled = weight.*min(hnled) + (1 - weight).*hnled;
%hnled = weight.*min(mean(hnled(echoBandRange)), hnled) + (1 - weight).*hnled;
%hnled = weight.*min(hnlSortQ, hnled) + (1 - weight).*hnled;
%hnlSortQV(kk) = mean(hnled);
%hnlPrefAvgV(kk) = mean(hnled(echoBandRange));
hnled = weight.*min(hnlPrefAvg, hnled) + (1 - weight).*hnled;
%od = aggrFact*(sqrt(linspace(0,1,N+1)') + aggrTerm);
%od = 4*(sqrt(linspace(0,1,N+1)') + 1/4);
%ovrdFact = (ovrdSm - 1) / sqrt(ovrdPos/(N+1));
%ovrdFact = ovrdSm / sqrt(echoBandRange(floor(length(echoBandRange)/2))/(N+1));
%od = ovrdFact*sqrt(linspace(0,1,N+1))' + 1;
%od = ovrdSm*ones(N+1,1).*abs(WFb(:,dIdx))/(max(abs(WFb(:,dIdx)))+1e-10);
%od = ovrdSm*ones(N+1,1);
%od = ovrdSm*WFbD.*(sqrt(linspace(0,1,N+1))' + 1);
od = ovrdSm*(sqrt(linspace(0,1,N+1))' + 1);
%od = 4*(sqrt(linspace(0,1,N+1))' + 1);
%od = 2*ones(N+1,1);
%od = 2*ones(N+1,1);
%sshift = ((1-hnled)*2-1).^3+1;
sshift = ones(N+1,1);
hnled = hnled.^(od.*sshift);
%if hnlg > 0.75
%if (suppState ~= 0)
% transCtr = 0;
%end
% suppState = 0;
%elseif hnlg < 0.6 & hnlg > 0.2
% suppState = 1;
%elseif hnlg < 0.1
%hnled = zeros(N+1, 1);
%if (suppState ~= 2)
% transCtr = 0;
%end
% suppState = 2;
%else
% if (suppState ~= 2)
% transCtr = 0;
% end
% suppState = 2;
%end
%if suppState == 0
% hnled = ones(N+1, 1);
%elseif suppState == 2
% hnled = zeros(N+1, 1);
%end
%hnled(find(hnled < 0.1)) = 0;
%hnled = hnled.^2;
%if transCtr < 5
%hnl = 0.75*hnl + 0.25*hnled;
% transCtr = transCtr + 1;
%else
hnl = hnled;
%end
%hnled(find(hnled < 0.05)) = 0;
ef = ef.*(hnl);
%ef = ef.*(min(1 - cohxd, cohed).^2);
%ef = ef.*((1-cohxd).^2);
ovrdV(kk) = ovrdSm;
%ovrdV(kk) = dIdx;
%ovrdV(kk) = divergeFact;
%hnledAvg(kk) = 1-mean(1-cohedFast(echoBandRange));
hnledAvg(kk) = 1-mean(1-cohed(echoBandRange));
hnlxdAvg(kk) = 1-mean(cohxd(echoBandRange));
%hnlxdAvg(kk) = cohxd(5);
%hnlSortQV(kk) = mean(hnled);
hnlSortQV(kk) = hnlPrefAvgLow;
hnlPrefAvgV(kk) = hnlPrefAvg;
%hnlAvg(kk) = propLow;
%ef(N/2:end) = 0;
%ner = (sum(Sd) ./ (sum(Se.*(hnl.^2)) + 1e-10));
% Comfort noise
if (CNon)
snn=sqrt(Sym);
snn(1)=0; % Reject LF noise
Un=snn.*exp(j*2*pi.*[0;rand(N-1,1);0]);
% Weight comfort noise by suppression
Un = sqrt(1-hnled.^2).*Un;
Fmix = ef + Un;
else
Fmix = ef;
end
% Overlap and add in time domain for smoothness
tmp = [Fmix ; flipud(conj(Fmix(2:N)))];
mixw = wins.*real(ifft(tmp));
mola = mbuf(end-N+1:end) + mixw(1:N);
mbuf = mixw;
ercn(pos:pos+N-1) = mola;
end % NLPon
% Filter update
%Ek2 = Ek ./(12*pn + 0.001); % Normalized error
%Ek2 = Ek2 * divergeFact;
%Ek2 = Ek ./(pn + 0.001); % Normalized error
%Ek2 = Ek ./(100*pn + 0.001); % Normalized error
%divergeIdx = find(abs(Ek) > abs(DD));
%divergeIdx = find(Se > Sd);
%threshMod = threshold*ones(N+1,1);
%if length(divergeIdx) > 0
%if sum(abs(Ek)) > sum(abs(DD))
%WFb(divergeIdx,:) = WFb(divergeIdx,:) .* repmat(sqrt(Sd(divergeIdx)./(Se(divergeIdx)+1e-10))),1,M);
%Ek2(divergeIdx) = Ek2(divergeIdx) .* sqrt(Sd(divergeIdx)./(Se(divergeIdx)+1e-10));
%Ek2(divergeIdx) = Ek2(divergeIdx) .* abs(DD(divergeIdx))./(abs(Ek(divergeIdx))+1e-10);
%WFb(divergeIdx,:) = WFbOld(divergeIdx,:);
%WFb = WFbOld;
%threshMod(divergeIdx) = threshMod(divergeIdx) .* abs(DD(divergeIdx))./(abs(Ek(divergeIdx))+1e-10);
% threshMod(divergeIdx) = threshMod(divergeIdx) .* sqrt(Sd(divergeIdx)./(Se(divergeIdx)+1e-10));
%end
%absEf = max(abs(Ek2), threshold);
%absEf = ones(N+1,1)*threshold./absEf;
%absEf = max(abs(Ek2), threshMod);
%absEf = threshMod./absEf;
%Ek2 = Ek2.*absEf;
%if sum(Se) <= sum(Sd)
% mEk = mufb.*Ek2;
% PP = conj(XFm).*(ones(M,1) * mEk')';
% tmp = [PP ; flipud(conj(PP(2:N,:)))];
% IFPP = real(ifft(tmp));
% PH = IFPP(1:N,:);
% tmp = fft([PH;zeros(N,M)]);
% FPH = tmp(1:N+1,:);
% %WFbOld = WFb;
% WFb = WFb + FPH;
%else
% WF = WFbOld;
%end
% Shift old FFTs
%for m=M:-1:2
% XFm(:,m) = XFm(:,m-1);
% YFm(:,m) = YFm(:,m-1);
%end
XFm(:,2:end) = XFm(:,1:end-1);
YFm(:,2:end) = YFm(:,1:end-1);
xfwm(:,2:end) = xfwm(:,1:end-1);
dfm(:,2:end) = dfm(:,1:end-1);
%if mod(kk, floor(Nb/50)) == 0
% fprintf(1, '.');
%end
if mod(kk, floor(Nb/100)) == 0
%if mod(kk, floor(Nb/500)) == 0
progressbar(kk/Nb);
%figure(5)
%plot(abs(WFb));
%legend('1','2','3','4','5','6','7','8','9','10','11','12');
%title(kk*N/fs);
%figure(6)
%plot(WFbD);
%figure(6)
%plot(threshMod)
%if length(divergeIdx) > 0
% plot(abs(DD))
% hold on
% plot(abs(Ek), 'r')
% hold off
%plot(min(sqrt(Sd./(Se+1e-10)),1))
%axis([0 N 0 1]);
%end
%figure(6)
%plot(cohedFast);
%axis([1 N+1 0 1]);
%plot(WFbEn);
%figure(7)
%plot(weight);
%plot([cohxd 1-cohed]);
%plot([cohxd 1-cohed 1-cohedFast hnled]);
%plot([cohxd cohxdFast/max(cohxdFast)]);
%legend('cohxd', '1-cohed', '1-cohedFast');
%axis([1 65 0 1]);
%pause(0.5);
%overdrive
end
end
progressbar(1);
%figure(2);
%plot([feat(:,1) feat(:,2)+1 feat(:,3)+2 mfeat+3]);
%plot([feat(:,1) mfeat+1]);
%figure(3);
%plot(10*log10([dri erifb erifb3 ericn]));
%legend('Near-end','Error','Post NLP','Final',4);
% Compensate for delay
%ercn=[ercn(N+1:end);zeros(N,1)];
%ercn_=[ercn_(N+1:end);zeros(N,1)];
%figure(11);
%plot(cohxdSlow);
%figure(12);
%surf(cohxdSlow);
%shading interp;
%figure(13);
%plot(overdriveM);
%figure(14);
%surf(overdriveM);
%shading interp;
figure(10);
t = (0:Nb)*N/fs;
rrinSubSamp = rrin(N*(1:(Nb+1)));
plot(t, rrinSubSamp/max(abs(rrinSubSamp)),'b');
hold on
plot(t, hnledAvg, 'r');
plot(t, hnlxdAvg, 'g');
plot(t, hnlSortQV, 'y');
plot(t, hnlLocalMinV, 'k');
plot(t, cohxdLocalMinV, 'c');
plot(t, hnlPrefAvgV, 'm');
%plot(t, cohxdAvg, 'r');
%plot(cohxdFastAvg, 'r');
%plot(cohxdAvgBad, 'k');
%plot(t, cohedAvg, 'k');
%plot(t, 1-cohedFastAvg, 'k');
%plot(ssin(N*(1:floor(length(ssin)/N)))/max(abs(ssin)));
%plot(echoBands,'r');
%plot(overdrive, 'g');
%plot(erfb(N*(1:floor(length(erfb)/N)))/max(abs(erfb)));
hold off
tightx;
figure(11)
plot(t, ovrdV);
tightx;
%plot(mfeat,'r');
%plot(1-cohxyp_,'r');
%plot(Hnlxydp,'y');
%plot(hnledp,'k');
%plot(Hnlxydp, 'c');
%plot(ccohpd_,'k');
%plot(supplot_, 'g');
%plot(ones(length(mfeat),1)*rr1_, 'k');
%plot(ones(length(mfeat),1)*rr2_, 'k');
%plot(N*(1:length(feat)), feat);
%plot(Sep_,'r');
%axis([1 floor(length(erfb)/N) -1 1])
%hold off
%plot(10*log10([Se_, Sx_, Seu_, real(sf_.*conj(sf_))]));
%legend('Se','Sx','Seu','S');
%figure(5)
%plot([ercn ercn_]);
figure(12)
plot(t, dIdxV);
%plot(t, SLxV);
tightx;
%figure(13)
%plot(t, [ekEnV dkEnV]);
%plot(t, dkEnV./(ekEnV+1e-10));
%tightx;
%close(hh);
%spclab(fs,ssin,erfb,ercn,'outxd.pcm');
%spclab(fs,rrin,ssin,erfb,1.78*ercn,'vqeOut-1.pcm');
%spclab(fs,erfb,'aecOutLp.pcm');
%spclab(fs,rrin,ssin,erfb,1.78*ercn,'aecOut25.pcm','vqeOut-1.pcm');
%spclab(fs,rrin,ssin,erfb,ercn,'aecOut-mba.pcm');
%spclab(fs,rrin,ssin,erfb,ercn,'aecOut.pcm');
%spclab(fs, ssin, erfb, ercn, 'out0.pcm');

View File

@ -12,12 +12,14 @@
* The core AEC algorithm, which is presented with time-aligned signals.
*/
#include "aec_core.h"
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "aec_core.h"
#include "aec_rdft.h"
#include "delay_estimator_float.h"
#include "ring_buffer.h"
#include "system_wrappers/interface/cpu_features_wrapper.h"
@ -34,26 +36,9 @@ static const float cnScaleHband = (float)0.4; // scale for comfort noise in H ba
// Initial bin for averaging nlp gain in low band
static const int freqAvgIc = PART_LEN / 2;
/* Matlab code to produce table:
win = sqrt(hanning(63)); win = [0 ; win(1:32)];
fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
*/
/*
static const float sqrtHanning[33] = {
0.00000000000000, 0.04906767432742, 0.09801714032956,
0.14673047445536, 0.19509032201613, 0.24298017990326,
0.29028467725446, 0.33688985339222, 0.38268343236509,
0.42755509343028, 0.47139673682600, 0.51410274419322,
0.55557023301960, 0.59569930449243, 0.63439328416365,
0.67155895484702, 0.70710678118655, 0.74095112535496,
0.77301045336274, 0.80320753148064, 0.83146961230255,
0.85772861000027, 0.88192126434835, 0.90398929312344,
0.92387953251129, 0.94154406518302, 0.95694033573221,
0.97003125319454, 0.98078528040323, 0.98917650996478,
0.99518472667220, 0.99879545620517, 1.00000000000000
};
*/
// Matlab code to produce table:
// win = sqrt(hanning(63)); win = [0 ; win(1:32)];
// fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
static const float sqrtHanning[65] = {
0.00000000000000f, 0.02454122852291f, 0.04906767432742f,
0.07356456359967f, 0.09801714032956f, 0.12241067519922f,
@ -79,10 +64,9 @@ static const float sqrtHanning[65] = {
0.99969881869620f, 1.00000000000000f
};
/* Matlab code to produce table:
weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1];
fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve);
*/
// Matlab code to produce table:
// weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1];
// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve);
const float WebRtcAec_weightCurve[65] = {
0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f,
0.1845f, 0.1926f, 0.2000f, 0.2069f, 0.2134f, 0.2195f,
@ -97,10 +81,9 @@ const float WebRtcAec_weightCurve[65] = {
0.3903f, 0.3928f, 0.3952f, 0.3976f, 0.4000f
};
/* Matlab code to produce table:
overDriveCurve = [sqrt(linspace(0,1,65))' + 1];
fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve);
*/
// Matlab code to produce table:
// overDriveCurve = [sqrt(linspace(0,1,65))' + 1];
// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve);
const float WebRtcAec_overDriveCurve[65] = {
1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f,
1.3062f, 1.3307f, 1.3536f, 1.3750f, 1.3953f, 1.4146f,
@ -193,6 +176,15 @@ int WebRtcAec_CreateAec(aec_t **aecInst)
return -1;
}
if (WebRtc_CreateDelayEstimatorFloat(&aec->delay_estimator,
PART_LEN1,
kMaxDelay,
0) == -1) {
WebRtcAec_FreeAec(aec);
aec = NULL;
return -1;
}
return 0;
}
@ -209,6 +201,8 @@ int WebRtcAec_FreeAec(aec_t *aec)
WebRtcApm_FreeBuffer(aec->nearFrBufH);
WebRtcApm_FreeBuffer(aec->outFrBufH);
WebRtc_FreeDelayEstimatorFloat(aec->delay_estimator);
free(aec);
return 0;
}
@ -255,6 +249,32 @@ static void ScaleErrorSignal(aec_t *aec, float ef[2][PART_LEN1])
}
}
// Time-unconstrined filter adaptation.
// TODO(andrew): consider for a low-complexity mode.
//static void FilterAdaptationUnconstrained(aec_t *aec, float *fft,
// float ef[2][PART_LEN1]) {
// int i, j;
// for (i = 0; i < NR_PART; i++) {
// int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
// int pos;
// // Check for wrap
// if (i + aec->xfBufBlockPos >= NR_PART) {
// xPos -= NR_PART * PART_LEN1;
// }
//
// pos = i * PART_LEN1;
//
// for (j = 0; j < PART_LEN1; j++) {
// aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0],
// -aec->xfBuf[xPos + j][1],
// ef[j][0], ef[j][1]);
// aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0],
// -aec->xfBuf[xPos + j][1],
// ef[j][0], ef[j][1]);
// }
// }
//}
static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) {
int i, j;
for (i = 0; i < NR_PART; i++) {
@ -267,16 +287,6 @@ static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) {
pos = i * PART_LEN1;
#ifdef UNCONSTR
for (j = 0; j < PART_LEN1; j++) {
aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0],
-aec->xfBuf[xPos + j][1],
ef[j][0], ef[j][1]);
aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0],
-aec->xfBuf[xPos + j][1],
ef[j][0], ef[j][1]);
}
#else
for (j = 0; j < PART_LEN; j++) {
fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j],
@ -309,7 +319,6 @@ static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) {
aec->wfBuf[0][pos + j] += fft[2 * j];
aec->wfBuf[1][pos + j] += fft[2 * j + 1];
}
#endif // UNCONSTR
}
}
@ -375,6 +384,12 @@ int WebRtcAec_InitAec(aec_t *aec, int sampFreq)
return -1;
}
if (WebRtc_InitDelayEstimatorFloat(aec->delay_estimator) != 0) {
return -1;
}
aec->delay_logging_enabled = 0;
memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram));
// Default target suppression level
aec->targetSupp = -11.5;
aec->minOverDrive = 2.0;
@ -565,6 +580,10 @@ static void ProcessBlock(aec_t *aec, const short *farend,
float fft[PART_LEN2];
float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
complex_t df[PART_LEN1];
float far_spectrum = 0.0f;
float near_spectrum = 0.0f;
float abs_far_spectrum[PART_LEN1];
float abs_near_spectrum[PART_LEN1];
const float gPow[2] = {0.9f, 0.1f};
@ -629,10 +648,15 @@ static void ProcessBlock(aec_t *aec, const short *farend,
// Power smoothing
for (i = 0; i < PART_LEN1; i++) {
aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * NR_PART *
(xf[0][i] * xf[0][i] + xf[1][i] * xf[1][i]);
aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] *
(df[i][0] * df[i][0] + df[i][1] * df[i][1]);
far_spectrum = xf[0][i] * xf[0][i] + xf[1][i] * xf[1][i];
aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * NR_PART * far_spectrum;
// Calculate absolute spectra
abs_far_spectrum[i] = sqrtf(far_spectrum);
near_spectrum = df[i][0] * df[i][0] + df[i][1] * df[i][1];
aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum;
// Calculate absolute spectra
abs_near_spectrum[i] = sqrtf(near_spectrum);
}
// Estimate noise power. Wait until dPow is more stable.
@ -667,6 +691,20 @@ static void ProcessBlock(aec_t *aec, const short *farend,
aec->noisePow = aec->dMinPow;
}
// Block wise delay estimation used for logging
if (aec->delay_logging_enabled) {
int delay_estimate = 0;
// Estimate the delay
delay_estimate = WebRtc_DelayEstimatorProcessFloat(aec->delay_estimator,
abs_far_spectrum,
abs_near_spectrum,
PART_LEN1,
aec->echoState);
if (delay_estimate >= 0) {
// Update delay estimate buffer
aec->delay_histogram[delay_estimate]++;
}
}
// Update the xfBuf block position.
aec->xfBufBlockPos--;
@ -720,9 +758,7 @@ static void ProcessBlock(aec_t *aec, const short *farend,
// Scale error signal inversely with far power.
WebRtcAec_ScaleErrorSignal(aec, ef);
// Filter adaptation
WebRtcAec_FilterAdaptation(aec, fft, ef);
NonLinearProcessing(aec, output, outputH);
#ifdef AEC_DEBUG

View File

@ -20,7 +20,6 @@
#include "signal_processing_library.h"
#include "typedefs.h"
//#define UNCONSTR // time-unconstrained filter
//#define AEC_DEBUG // for recording files
#define FRAME_LEN 80
@ -34,6 +33,8 @@
#define PREF_BAND_SIZE 24
#define BLOCKL_MAX FRAME_LEN
// Maximum delay in fixed point delay estimator, used for logging
enum {kMaxDelay = 100};
typedef float complex_t[2];
// For performance reasons, some arrays of complex numbers are replaced by twice
@ -142,6 +143,10 @@ typedef struct {
int flag_Hband_cn; //for comfort noise
float cn_scale_Hband; //scale for comfort noise in H band
int delay_histogram[kMaxDelay];
int delay_logging_enabled;
void* delay_estimator;
#ifdef AEC_DEBUG
FILE *farFile;
FILE *nearFile;

View File

@ -138,16 +138,6 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1])
xPos -= NR_PART * PART_LEN1;
}
#ifdef UNCONSTR
for (j = 0; j < PART_LEN1; j++) {
aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0],
-aec->xfBuf[xPos + j][1],
ef[j][0], ef[j][1]);
aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0],
-aec->xfBuf[xPos + j][1],
ef[j][0], ef[j][1]);
}
#else
// Process the whole array...
for (j = 0; j < PART_LEN; j+= 4) {
// Load xfBuf and ef.
@ -208,7 +198,6 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1])
}
aec->wfBuf[1][pos] = wt1;
}
#endif // UNCONSTR
}
}
@ -246,10 +235,9 @@ static __m128 mm_pow_ps(__m128 a, __m128 b)
{0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};
static const int shift_exponent_into_top_mantissa = 8;
const __m128 two_n = _mm_and_ps(a, *((__m128 *)float_exponent_mask));
const __m128 n_1 = (__m128)_mm_srli_epi32((__m128i)two_n,
shift_exponent_into_top_mantissa);
const __m128 n_0 = _mm_or_ps(
(__m128)n_1, *((__m128 *)eight_biased_exponent));
const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(_mm_castps_si128(two_n),
shift_exponent_into_top_mantissa));
const __m128 n_0 = _mm_or_ps(n_1, *((__m128 *)eight_biased_exponent));
const __m128 n = _mm_sub_ps(n_0, *((__m128 *)implicit_leading_one));
// Compute y.
@ -328,8 +316,8 @@ static __m128 mm_pow_ps(__m128 a, __m128 b)
static const int float_exponent_shift = 23;
const __m128i two_n_exponent = _mm_add_epi32(
x_minus_half_floor, *((__m128i *)float_exponent_bias));
const __m128 two_n = (__m128)_mm_slli_epi32(
two_n_exponent, float_exponent_shift);
const __m128 two_n = _mm_castsi128_ps(_mm_slli_epi32(
two_n_exponent, float_exponent_shift));
// Compute y.
const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor));
// Approximate 2^y ~= C2 * y^2 + C1 * y + C0.

View File

@ -11,6 +11,14 @@
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
// These intrinsics were unavailable before VS 2008.
// TODO(andrew): move to a common file.
#if defined(_MSC_VER) && _MSC_VER < 1500
#include <emmintrin.h>
static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; }
static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; }
#endif
#ifdef _MSC_VER /* visual c++ */
# define ALIGN16_BEG __declspec(align(16))
# define ALIGN16_END

View File

@ -42,27 +42,33 @@ static void cft1st_128_SSE2(float *a) {
const __m128 x1v = _mm_sub_ps(a01v, a23v);
const __m128 x2v = _mm_add_ps(a45v, a67v);
const __m128 x3v = _mm_sub_ps(a45v, a67v);
__m128 x0w;
a01v = _mm_add_ps(x0v, x2v);
x0v = _mm_sub_ps(x0v, x2v);
__m128 x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v);
const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w);
a45v = _mm_add_ps(a45_0v, a45_1v);
const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0 ,1));
const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w);
x0v = _mm_add_ps(x1v, x3s);
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
const __m128 a23_0v = _mm_mul_ps(wk1rv, x0v);
const __m128 a23_1v = _mm_mul_ps(wk1iv, x0w);
a23v = _mm_add_ps(a23_0v, a23_1v);
{
const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v);
const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w);
a45v = _mm_add_ps(a45_0v, a45_1v);
}
{
__m128 a23_0v, a23_1v;
const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0 ,1));
const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w);
x0v = _mm_add_ps(x1v, x3s);
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
a23_0v = _mm_mul_ps(wk1rv, x0v);
a23_1v = _mm_mul_ps(wk1iv, x0w);
a23v = _mm_add_ps(a23_0v, a23_1v);
x0v = _mm_sub_ps(x1v, x3s);
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v);
const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w);
a67v = _mm_add_ps(a67_0v, a67_1v);
x0v = _mm_sub_ps(x1v, x3s);
x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
}
{
const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v);
const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w);
a67v = _mm_add_ps(a67_0v, a67_1v);
}
a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1 ,0));
a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1 ,0));
@ -78,7 +84,7 @@ static void cft1st_128_SSE2(float *a) {
static void cftmdl_128_SSE2(float *a) {
const int l = 8;
const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
int j0, k, k1, k2;
int j0;
__m128 wk1rv = _mm_load_ps(cftmdl_wk1r);
for (j0 = 0; j0 < l; j0 += 2) {
@ -86,9 +92,11 @@ static void cftmdl_128_SSE2(float *a) {
const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
const __m128 a_00_32 = _mm_shuffle_ps((__m128)a_00, (__m128)a_32,
const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
_mm_castsi128_ps(a_32),
_MM_SHUFFLE(1, 0, 1 ,0));
const __m128 a_08_40 = _mm_shuffle_ps((__m128)a_08, (__m128)a_40,
const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
_mm_castsi128_ps(a_40),
_MM_SHUFFLE(1, 0, 1 ,0));
__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
@ -97,30 +105,24 @@ static void cftmdl_128_SSE2(float *a) {
const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
const __m128 a_16_48 = _mm_shuffle_ps((__m128)a_16, (__m128)a_48,
const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
_mm_castsi128_ps(a_48),
_MM_SHUFFLE(1, 0, 1 ,0));
const __m128 a_24_56 = _mm_shuffle_ps((__m128)a_24, (__m128)a_56,
const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
_mm_castsi128_ps(a_56),
_MM_SHUFFLE(1, 0, 1 ,0));
const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
_mm_storel_epi64((__m128i*)&a[j0 + 0], (__m128i)xx0);
_mm_storel_epi64((__m128i*)&a[j0 + 32],
_mm_shuffle_epi32((__m128i)xx0, _MM_SHUFFLE(3, 2, 3, 2)));
const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
_mm_storel_epi64((__m128i*)&a[j0 + 16], (__m128i)xx1);
_mm_storel_epi64((__m128i*)&a[j0 + 48],
_mm_shuffle_epi32((__m128i)xx1, _MM_SHUFFLE(2, 3, 2, 3)));
a[j0 + 48] = -a[j0 + 48];
const __m128 x3i0_3r0_3i1_x3r1 = (__m128)
_mm_shuffle_epi32((__m128i)x3r0_3i0_3r1_x3i1, _MM_SHUFFLE(2, 3, 0, 1));
const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(
_mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1),
_MM_SHUFFLE(2, 3, 0, 1)));
const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
_mm_storel_epi64((__m128i*)&a[j0 + 8], (__m128i)x1_x3_add);
_mm_storel_epi64((__m128i*)&a[j0 + 24], (__m128i)x1_x3_sub);
const __m128 yy0 = _mm_shuffle_ps(x1_x3_add, x1_x3_sub,
_MM_SHUFFLE(2, 2, 2 ,2));
@ -129,79 +131,111 @@ static void cftmdl_128_SSE2(float *a) {
const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1);
const __m128 yy3 = _mm_add_ps(yy0, yy2);
const __m128 yy4 = _mm_mul_ps(wk1rv, yy3);
_mm_storel_epi64((__m128i*)&a[j0 + 40], (__m128i)yy4);
_mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0));
_mm_storel_epi64((__m128i*)&a[j0 + 32],
_mm_shuffle_epi32(_mm_castps_si128(xx0),
_MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1));
_mm_storel_epi64((__m128i*)&a[j0 + 48],
_mm_shuffle_epi32(_mm_castps_si128(xx1),
_MM_SHUFFLE(2, 3, 2, 3)));
a[j0 + 48] = -a[j0 + 48];
_mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add));
_mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub));
_mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4));
_mm_storel_epi64((__m128i*)&a[j0 + 56],
_mm_shuffle_epi32((__m128i)yy4, _MM_SHUFFLE(2, 3, 2, 3)));
_mm_shuffle_epi32(_mm_castps_si128(yy4),
_MM_SHUFFLE(2, 3, 2, 3)));
}
k1 = 0;
k = 64;
k1 += 2;
k2 = 2 * k1;
const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2+0]);
const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2+0]);
wk1rv = _mm_load_ps(&rdft_wk1r[k2+0]);
const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2+0]);
const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2+0]);
const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2+0]);
for (j0 = k; j0 < l + k; j0 += 2) {
const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
const __m128 a_00_32 = _mm_shuffle_ps((__m128)a_00, (__m128)a_32,
_MM_SHUFFLE(1, 0, 1 ,0));
const __m128 a_08_40 = _mm_shuffle_ps((__m128)a_08, (__m128)a_40,
_MM_SHUFFLE(1, 0, 1 ,0));
__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
{
int k = 64;
int k1 = 2;
int k2 = 2 * k1;
const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2+0]);
const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2+0]);
const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2+0]);
const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2+0]);
const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2+0]);
wk1rv = _mm_load_ps(&rdft_wk1r[k2+0]);
for (j0 = k; j0 < l + k; j0 += 2) {
const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]);
const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]);
const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
_mm_castsi128_ps(a_32),
_MM_SHUFFLE(1, 0, 1 ,0));
const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
_mm_castsi128_ps(a_40),
_MM_SHUFFLE(1, 0, 1 ,0));
__m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
const __m128 a_16_48 = _mm_shuffle_ps((__m128)a_16, (__m128)a_48,
_MM_SHUFFLE(1, 0, 1 ,0));
const __m128 a_24_56 = _mm_shuffle_ps((__m128)a_24, (__m128)a_56,
_MM_SHUFFLE(1, 0, 1 ,0));
const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
_mm_castsi128_ps(a_48),
_MM_SHUFFLE(1, 0, 1 ,0));
const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
_mm_castsi128_ps(a_56),
_MM_SHUFFLE(1, 0, 1 ,0));
const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
_mm_storel_epi64((__m128i*)&a[j0 + 0], (__m128i)xx);
_mm_storel_epi64((__m128i*)&a[j0 + 32],
_mm_shuffle_epi32((__m128i)xx, _MM_SHUFFLE(3, 2, 3, 2)));
const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const __m128 xx2 = _mm_mul_ps(xx1 , wk2rv);
const __m128 xx3 = _mm_mul_ps(wk2iv,
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1),
_MM_SHUFFLE(2, 3, 0, 1))));
const __m128 xx4 = _mm_add_ps(xx2, xx3);
const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
const __m128 xx2 = _mm_mul_ps(xx1 , wk2rv);
const __m128 xx3 = _mm_mul_ps(wk2iv,
(__m128)_mm_shuffle_epi32((__m128i)xx1, _MM_SHUFFLE(2, 3, 0, 1)));
const __m128 xx4 = _mm_add_ps(xx2, xx3);
_mm_storel_epi64((__m128i*)&a[j0 + 16], (__m128i)xx4);
_mm_storel_epi64((__m128i*)&a[j0 + 48],
_mm_shuffle_epi32((__m128i)xx4, _MM_SHUFFLE(3, 2, 3, 2)));
const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(
_mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1),
_MM_SHUFFLE(2, 3, 0, 1)));
const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
const __m128 x3i0_3r0_3i1_x3r1 = (__m128)
_mm_shuffle_epi32((__m128i)x3r0_3i0_3r1_x3i1, _MM_SHUFFLE(2, 3, 0, 1));
const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
const __m128 xx11 = _mm_mul_ps(wk1iv,
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
_MM_SHUFFLE(2, 3, 0, 1))));
const __m128 xx12 = _mm_add_ps(xx10, xx11);
const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
const __m128 xx11 = _mm_mul_ps(wk1iv,
(__m128)_mm_shuffle_epi32((__m128i)x1_x3_add, _MM_SHUFFLE(2, 3, 0, 1)));
const __m128 xx12 = _mm_add_ps(xx10, xx11);
_mm_storel_epi64((__m128i*)&a[j0 + 8], (__m128i)xx12);
_mm_storel_epi64((__m128i*)&a[j0 + 40],
_mm_shuffle_epi32((__m128i)xx12, _MM_SHUFFLE(3, 2, 3, 2)));
const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
const __m128 xx21 = _mm_mul_ps(wk3iv,
_mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
_MM_SHUFFLE(2, 3, 0, 1))));
const __m128 xx22 = _mm_add_ps(xx20, xx21);
const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
const __m128 xx21 = _mm_mul_ps(wk3iv,
(__m128)_mm_shuffle_epi32((__m128i)x1_x3_sub, _MM_SHUFFLE(2, 3, 0, 1)));
const __m128 xx22 = _mm_add_ps(xx20, xx21);
_mm_storel_epi64((__m128i*)&a[j0 + 24], (__m128i)xx22);
_mm_storel_epi64((__m128i*)&a[j0 + 56],
_mm_shuffle_epi32((__m128i)xx22, _MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx));
_mm_storel_epi64((__m128i*)&a[j0 + 32],
_mm_shuffle_epi32(_mm_castps_si128(xx),
_MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4));
_mm_storel_epi64((__m128i*)&a[j0 + 48],
_mm_shuffle_epi32(_mm_castps_si128(xx4),
_MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12));
_mm_storel_epi64((__m128i*)&a[j0 + 40],
_mm_shuffle_epi32(_mm_castps_si128(xx12),
_MM_SHUFFLE(3, 2, 3, 2)));
_mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22));
_mm_storel_epi64((__m128i*)&a[j0 + 56],
_mm_shuffle_epi32(_mm_castps_si128(xx22),
_MM_SHUFFLE(3, 2, 3, 2)));
}
}
}

View File

@ -11,16 +11,18 @@
/*
* Contains the API functions for the AEC.
*/
#include "echo_cancellation.h"
#include <math.h>
#ifdef AEC_DEBUG
#include <stdio.h>
#endif
#include <stdlib.h>
#include <string.h>
#include "echo_cancellation.h"
#include "aec_core.h"
#include "ring_buffer.h"
#include "resampler.h"
#ifdef AEC_DEBUG
#include <stdio.h>
#endif
#include "ring_buffer.h"
#define BUF_SIZE_FRAMES 50 // buffer size (frames)
// Maximum length of resampled signal. Must be an integer multiple of frames
@ -215,7 +217,7 @@ WebRtc_Word32 WebRtcAec_Init(void *aecInst, WebRtc_Word32 sampFreq, WebRtc_Word3
return -1;
}
aecpc->initFlag = initCheck; // indicates that initilisation has been done
aecpc->initFlag = initCheck; // indicates that initialization has been done
if (aecpc->sampFreq == 32000) {
aecpc->splitSampFreq = 16000;
@ -254,6 +256,7 @@ WebRtc_Word32 WebRtcAec_Init(void *aecInst, WebRtc_Word32 sampFreq, WebRtc_Word3
aecConfig.nlpMode = kAecNlpModerate;
aecConfig.skewMode = kAecFalse;
aecConfig.metricsMode = kAecFalse;
aecConfig.delay_logging = kAecFalse;
if (WebRtcAec_set_config(aecpc, aecConfig) == -1) {
aecpc->lastError = AEC_UNSPECIFIED_ERROR;
@ -566,6 +569,15 @@ WebRtc_Word32 WebRtcAec_set_config(void *aecInst, AecConfig config)
WebRtcAec_InitMetrics(aecpc->aec);
}
if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) {
aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
return -1;
}
aecpc->aec->delay_logging_enabled = config.delay_logging;
if (aecpc->aec->delay_logging_enabled == kAecTrue) {
memset(aecpc->aec->delay_histogram, 0, sizeof(aecpc->aec->delay_histogram));
}
return 0;
}
@ -590,6 +602,7 @@ WebRtc_Word32 WebRtcAec_get_config(void *aecInst, AecConfig *config)
config->nlpMode = aecpc->nlpMode;
config->skewMode = aecpc->skewMode;
config->metricsMode = aecpc->aec->metricsMode;
config->delay_logging = aecpc->aec->delay_logging_enabled;
return 0;
}
@ -717,6 +730,69 @@ WebRtc_Word32 WebRtcAec_GetMetrics(void *aecInst, AecMetrics *metrics)
return 0;
}
int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std) {
aecpc_t* self = handle;
int i = 0;
int delay_values = 0;
int num_delay_values = 0;
int my_median = 0;
float l1_norm = 0;
if (self == NULL) {
return -1;
}
if (median == NULL) {
self->lastError = AEC_NULL_POINTER_ERROR;
return -1;
}
if (std == NULL) {
self->lastError = AEC_NULL_POINTER_ERROR;
return -1;
}
if (self->initFlag != initCheck) {
self->lastError = AEC_UNINITIALIZED_ERROR;
return -1;
}
if (self->aec->delay_logging_enabled == 0) {
// Logging disabled
self->lastError = AEC_UNSUPPORTED_FUNCTION_ERROR;
return -1;
}
// Get number of delay values since last update
for (i = 0; i < kMaxDelay; i++) {
num_delay_values += self->aec->delay_histogram[i];
}
if (num_delay_values == 0) {
// We have no new delay value data
*median = -1;
*std = -1;
return 0;
}
delay_values = num_delay_values >> 1; // Start value for median count down
// Get median of delay values since last update
for (i = 0; i < kMaxDelay; i++) {
delay_values -= self->aec->delay_histogram[i];
if (delay_values < 0) {
my_median = i;
break;
}
}
*median = my_median;
// Calculate the L1 norm, with median value as central moment
for (i = 0; i < kMaxDelay; i++) {
l1_norm += (float) (fabs(i - my_median) * self->aec->delay_histogram[i]);
}
*std = (int) (l1_norm / (float) num_delay_values + 0.5f);
// Reset histogram
memset(self->aec->delay_histogram, 0, sizeof(self->aec->delay_histogram));
return 0;
}
WebRtc_Word32 WebRtcAec_get_version(WebRtc_Word8 *versionStr, WebRtc_Word16 len)
{
const char version[] = "AEC 2.5.0";

View File

@ -3,9 +3,7 @@ noinst_LTLIBRARIES = libaecm.la
libaecm_la_SOURCES = main/interface/echo_control_mobile.h \
main/source/echo_control_mobile.c \
main/source/aecm_core.c \
main/source/aecm_core.h \
main/source/aecm_delay_estimator.c \
main/source/aecm_delay_estimator.h
main/source/aecm_core.h
libaecm_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
-I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
-I$(top_srcdir)/src/modules/audio_processing/utility

View File

@ -1,447 +0,0 @@
function [emicrophone,aaa]=compsup(microphone,TheFarEnd,avtime,samplingfreq);
% microphone = microphone signal
% aaa = nonlinearity input variable
% TheFarEnd = far end signal
% avtime = interval to compute suppression from (seconds)
% samplingfreq = sampling frequency
%if(nargin==6)
% fprintf(1,'suppress has received a delay sequence\n');
%end
Ap500=[ 1.00, -4.95, 9.801, -9.70299, 4.80298005, -0.9509900499];
Bp500=[ 0.662743088639636, -2.5841655608125, 3.77668102146288, -2.45182477425154, 0.596566274575251, 0.0];
Ap200=[ 1.00, -4.875, 9.50625, -9.26859375, 4.518439453125, -0.881095693359375];
Bp200=[ 0.862545460994275, -3.2832804496114, 4.67892032308828, -2.95798023879133, 0.699796870041299, 0.0];
maxDelay=0.4; %[s]
histLen=1; %[s]
% CONSTANTS THAT YOU CAN EXPERIMENT WITH
A_GAIN=10.0; % for the suppress case
oversampling = 2; % must be power of 2; minimum is 2; 4 works
% fine for support=64, but for support=128,
% 8 gives better results.
support=64; %512 % fft support (frequency resolution; at low
% settings you can hear more distortion
% (e.g. pitch that is left-over from far-end))
% 128 works well, 64 is ok)
lowlevel = mean(abs(microphone))*0.0001;
G_ol = 0; % Use overlapping sets of estimates
% ECHO SUPPRESSION SPECIFIC PARAMETERS
suppress_overdrive=1.0; % overdrive factor for suppression 1.4 is good
gamma_echo=1.0; % same as suppress_overdrive but at different place
de_echo_bound=0.0;
mLim=10; % rank of matrix G
%limBW = 1; % use bandwidth-limited response for G
if mLim > (support/2+1)
error('mLim in suppress.m too large\n');
end
dynrange=1.0000e-004;
% other, constants
hsupport = support/2;
hsupport1 = hsupport+1;
factor = 2 / oversampling;
updatel = support/oversampling;
win=sqrt(designwindow(0,support));
estLen = round(avtime * samplingfreq/updatel)
runningfmean =0.0;
mLim = floor(hsupport1/2);
V = sqrt(2/hsupport1)*cos(pi/hsupport1*(repmat((0:hsupport1-1) + 0.5, mLim, 1).* ...
repmat((0:mLim-1)' + 0.5, 1, hsupport1)));
fprintf(1,'updatel is %5.3f s\n', updatel/samplingfreq);
bandfirst=8; bandlast=25;
dosmooth=0; % to get rid of wavy bin counts (can be worse or better)
% compute some constants
blockLen = support/oversampling;
maxDelayb = floor(samplingfreq*maxDelay/updatel); % in blocks
histLenb = floor(samplingfreq*histLen/updatel); % in blocks
x0=TheFarEnd;
y0=microphone;
%input
tlength=min([length(microphone),length(TheFarEnd)]);
updateno=floor(tlength/updatel);
tlength=updatel*updateno;
updateno = updateno - oversampling + 1;
TheFarEnd =TheFarEnd(1:tlength);
microphone =microphone(1:tlength);
TheFarEnd =[zeros(hsupport,1);TheFarEnd(1:tlength)];
microphone =[zeros(hsupport,1);microphone(1:tlength)];
% signal length
n = min([floor(length(x0)/support)*support,floor(length(y0)/support)*support]);
nb = n/blockLen - oversampling + 1; % in blocks
% initialize space
win = sqrt([0 ; hanning(support-1)]);
sxAll2 = zeros(hsupport1,nb);
syAll2 = zeros(hsupport1,nb);
z500=zeros(5,maxDelayb+1);
z200=zeros(5,hsupport1);
bxspectrum=uint32(zeros(nb,1));
bxhist=uint32(zeros(maxDelayb+1,1));
byspectrum=uint32(zeros(nb,1));
bcount=zeros(1+maxDelayb,nb);
fcount=zeros(1+maxDelayb,nb);
fout=zeros(1+maxDelayb,nb);
delay=zeros(nb,1);
tdelay=zeros(nb,1);
nlgains=zeros(nb,1);
% create space (mainly for debugging)
emicrophone=zeros(tlength,1);
femicrophone=complex(zeros(hsupport1,updateno));
thefilter=zeros(hsupport1,updateno);
thelimiter=ones(hsupport1,updateno);
fTheFarEnd=complex(zeros(hsupport1,updateno));
afTheFarEnd=zeros(hsupport1,updateno);
fmicrophone=complex(zeros(hsupport1,updateno));
afmicrophone=zeros(hsupport1,updateno);
G = zeros(hsupport1, hsupport1);
zerovec = zeros(hsupport1,1);
zeromat = zeros(hsupport1);
% Reset sums
mmxs_a = zerovec;
mmys_a = zerovec;
s2xs_a = zerovec;
s2ys_a = zerovec;
Rxxs_a = zeromat;
Ryxs_a = zeromat;
count_a = 1;
mmxs_b = zerovec;
mmys_b = zerovec;
s2xs_b = zerovec;
s2ys_b = zerovec;
Rxxs_b = zeromat;
Ryxs_b = zeromat;
count_b = 1;
nog=0;
aaa=zeros(size(TheFarEnd));
% loop over signal blocks
fprintf(1,'.. Suppression; averaging G over %5.1f seconds; file length %5.1f seconds ..\n',avtime, length(microphone)/samplingfreq);
fprintf(1,'.. SUPPRESSING ONLY AFTER %5.1f SECONDS! ..\n',avtime);
fprintf(1,'.. 20 seconds is good ..\n');
hh = waitbar_j(0,'Please wait...');
for i=1:updateno
sb = (i-1)*updatel + 1;
se=sb+support-1;
% analysis FFTs
temp=fft(win .* TheFarEnd(sb:se));
fTheFarEnd(:,i)=temp(1:hsupport1);
xf=fTheFarEnd(:,i);
afTheFarEnd(:,i)= abs(fTheFarEnd(:,i));
temp=win .* microphone(sb:se);
temp=fft(win .* microphone(sb:se));
fmicrophone(:,i)=temp(1:hsupport1);
yf=fmicrophone(:,i);
afmicrophone(:,i)= abs(fmicrophone(:,i));
ener_orig = afmicrophone(:,i)'*afmicrophone(:,i);
if( ener_orig == 0)
afmicrophone(:,i)=lowlevel*ones(size(afmicrophone(:,i)));
end
% use log domain (showed improved performance)
xxf= sqrt(real(xf.*conj(xf))+1e-20);
yyf= sqrt(real(yf.*conj(yf))+1e-20);
sxAll2(:,i) = 20*log10(xxf);
syAll2(:,i) = 20*log10(yyf);
mD=min(i-1,maxDelayb);
xthreshold = sum(sxAll2(:,i-mD:i),2)/(maxDelayb+1);
[yout, z200] = filter(Bp200,Ap200,syAll2(:,i),z200,2);
yout=yout/(maxDelayb+1);
ythreshold = mean(syAll2(:,i-mD:i),2);
bxspectrum(i)=getBspectrum(sxAll2(:,i),xthreshold,bandfirst,bandlast);
byspectrum(i)=getBspectrum(syAll2(:,i),yout,bandfirst,bandlast);
bxhist(end-mD:end)=bxspectrum(i-mD:i);
bcount(:,i)=hisser2( ...
byspectrum(i),flipud(bxhist),bandfirst,bandlast);
[fout(:,i), z500] = filter(Bp500,Ap500,bcount(:,i),z500,2);
fcount(:,i)=sum(bcount(:,max(1,i-histLenb+1):i),2); % using the history range
fout(:,i)=round(fout(:,i));
[value,delay(i)]=min(fout(:,i),[],1);
tdelay(i)=(delay(i)-1)*support/(samplingfreq*oversampling);
% compensate
idel = max(i - delay(i) + 1,1);
% echo suppression
noisyspec = afmicrophone(:,i);
% Estimate G using covariance matrices
% Cumulative estimates
xx = afTheFarEnd(:,idel);
yy = afmicrophone(:,i);
% Means
mmxs_a = mmxs_a + xx;
mmys_a = mmys_a + yy;
if (G_ol)
mmxs_b = mmxs_b + xx;
mmys_b = mmys_b + yy;
mmy = mean([mmys_a/count_a mmys_b/count_b],2);
mmx = mean([mmxs_a/count_a mmxs_b/count_b],2);
else
mmx = mmxs_a/count_a;
mmy = mmys_a/count_a;
end
count_a = count_a + 1;
count_b = count_b + 1;
% Mean removal
xxm = xx - mmx;
yym = yy - mmy;
% Variances
s2xs_a = s2xs_a + xxm .* xxm;
s2ys_a = s2ys_a + yym .* yym;
s2xs_b = s2xs_b + xxm .* xxm;
s2ys_b = s2ys_b + yym .* yym;
% Correlation matrices
Rxxs_a = Rxxs_a + xxm * xxm';
Ryxs_a = Ryxs_a + yym * xxm';
Rxxs_b = Rxxs_b + xxm * xxm';
Ryxs_b = Ryxs_b + yym * xxm';
% Gain matrix A
if mod(i, estLen) == 0
% Cumulative based estimates
Rxxf = Rxxs_a / (estLen - 1);
Ryxf = Ryxs_a / (estLen - 1);
% Variance normalization
s2x2 = s2xs_a / (estLen - 1);
s2x2 = sqrt(s2x2);
% Sx = diag(max(s2x2,dynrange*max(s2x2)));
Sx = diag(s2x2);
if (sum(s2x2) > 0)
iSx = inv(Sx);
else
iSx= Sx + 0.01;
end
s2y2 = s2ys_a / (estLen - 1);
s2y2 = sqrt(s2y2);
% Sy = diag(max(s2y2,dynrange*max(s2y2)));
Sy = diag(s2y2);
iSy = inv(Sy);
rx = iSx * Rxxf * iSx;
ryx = iSy * Ryxf * iSx;
dbd= 7; % Us less than the full matrix
% k x m
% Bandlimited structure on G
LSEon = 0; % Default is using MMSE
if (LSEon)
ryx = ryx*rx;
rx = rx*rx;
end
p = dbd-1;
gaj = min(min(hsupport1,2*p+1),min([p+(1:hsupport1); hsupport1+p+1-(1:hsupport1)]));
cgaj = [0 cumsum(gaj)];
G3 = zeros(hsupport1);
for kk=1:hsupport1
ki = max(0,kk-p-1);
if (sum(sum(rx(ki+1:ki+gaj(kk),ki+1:ki+gaj(kk))))>0)
G3(kk,ki+1:ki+gaj(kk)) = ryx(kk,ki+1:ki+gaj(kk))/rx(ki+1:ki+gaj(kk),ki+1:ki+gaj(kk));
else
G3(kk,ki+1:ki+gaj(kk)) = ryx(kk,ki+1:ki+gaj(kk));
end
end
% End Bandlimited structure
G = G3;
G(abs(G)<0.01)=0;
G = suppress_overdrive * Sy * G * iSx;
if 1
figure(32); mi=2;
surf(max(min(G,mi),-mi)); view(2)
title('Unscaled Masked Limited-bandwidth G');
end
pause(0.05);
% Reset sums
mmxs_a = zerovec;
mmys_a = zerovec;
s2xs_a = zerovec;
s2ys_a = zerovec;
Rxxs_a = zeromat;
Ryxs_a = zeromat;
count_a = 1;
end
if (G_ol)
% Gain matrix B
if ((mod((i-estLen/2), estLen) == 0) & i>estLen)
% Cumulative based estimates
Rxxf = Rxxs_b / (estLen - 1);
Ryxf = Ryxs_b / (estLen - 1);
% Variance normalization
s2x2 = s2xs_b / (estLen - 1);
s2x2 = sqrt(s2x2);
Sx = diag(max(s2x2,dynrange*max(s2x2)));
iSx = inv(Sx);
s2y2 = s2ys_b / (estLen - 1);
s2y2 = sqrt(s2y2);
Sy = diag(max(s2y2,dynrange*max(s2y2)));
iSy = inv(Sy);
rx = iSx * Rxxf * iSx;
ryx = iSy * Ryxf * iSx;
% Bandlimited structure on G
LSEon = 0; % Default is using MMSE
if (LSEon)
ryx = ryx*rx;
rx = rx*rx;
end
p = dbd-1;
gaj = min(min(hsupport1,2*p+1),min([p+(1:hsupport1); hsupport1+p+1-(1:hsupport1)]));
cgaj = [0 cumsum(gaj)];
G3 = zeros(hsupport1);
for kk=1:hsupport1
ki = max(0,kk-p-1);
G3(kk,ki+1:ki+gaj(kk)) = ryx(kk,ki+1:ki+gaj(kk))/rx(ki+1:ki+gaj(kk),ki+1:ki+gaj(kk));
end
% End Bandlimited structure
G = G3;
G(abs(G)<0.01)=0;
G = suppress_overdrive * Sy * G * iSx;
if 1
figure(32); mi=2;
surf(max(min(G,mi),-mi)); view(2)
title('Unscaled Masked Limited-bandwidth G');
end
pause(0.05);
% Reset sums
mmxs_b = zerovec;
mmys_b = zerovec;
s2xs_b = zerovec;
s2ys_b = zerovec;
Rxxs_b = zeromat;
Ryxs_b = zeromat;
count_b = 1;
end
end
FECestimate2 = G*afTheFarEnd(:,idel);
% compute Wiener filter and suppressor function
thefilter(:,i) = (noisyspec - gamma_echo*FECestimate2) ./ noisyspec;
ix0 = find(thefilter(:,i)<de_echo_bound); % bounding trick 1
thefilter(ix0,i) = de_echo_bound; % bounding trick 2
ix0 = find(thefilter(:,i)>1); % bounding in reasonable range
thefilter(ix0,i) = 1;
% NONLINEARITY
nl_alpha=0.8; % memory; seems not very critical
nlSeverity=0.3; % nonlinearity severity: 0 does nothing; 1 suppresses all
thefmean=mean(thefilter(8:16,i));
if (thefmean<1)
disp('');
end
runningfmean = nl_alpha*runningfmean + (1-nl_alpha)*thefmean;
aaa(sb+20+1:sb+20+updatel)=10000*runningfmean* ones(updatel,1); % debug
slope0=1.0/(1.0-nlSeverity); %
thegain = max(0.0,min(1.0,slope0*(runningfmean-nlSeverity)));
% END NONLINEARITY
thefilter(:,i) = thegain*thefilter(:,i);
% Wiener filtering
femicrophone(:,i) = fmicrophone(:,i) .* thefilter(:,i);
thelimiter(:,i) = (noisyspec - A_GAIN*FECestimate2) ./ noisyspec;
index = find(thelimiter(:,i)>1.0);
thelimiter(index,i) = 1.0;
index = find(thelimiter(:,i)<0.0);
thelimiter(index,i) = 0.0;
if (rem(i,floor(updateno/20))==0)
fprintf(1,'.');
end
if mod(i,50)==0
waitbar_j(i/updateno,hh);
end
% reconstruction; first make spectrum odd
temp=[femicrophone(:,i);flipud(conj(femicrophone(2:hsupport,i)))];
emicrophone(sb:se) = emicrophone(sb:se) + factor * win .* real(ifft(temp));
end
fprintf(1,'\n');
close(hh);

View File

@ -1,22 +0,0 @@
function bspectrum=getBspectrum(ps,threshold,bandfirst,bandlast)
% function bspectrum=getBspectrum(ps,threshold,bandfirst,bandlast)
% compute binary spectrum using threshold spectrum as pivot
% bspectrum = binary spectrum (binary)
% ps=current power spectrum (float)
% threshold=threshold spectrum (float)
% bandfirst = first band considered
% bandlast = last band considered
% initialization stuff
if( length(ps)<bandlast | bandlast>32 | length(ps)~=length(threshold))
error('BinDelayEst:spectrum:invalid','Dimensionality error');
end
% get current binary spectrum
diff = ps - threshold;
bspectrum=uint32(0);
for(i=bandfirst:bandlast)
if( diff(i)>0 )
bspectrum = bitset(bspectrum,i);
end
end

View File

@ -1,21 +0,0 @@
function bcount=hisser2(bs,bsr,bandfirst,bandlast)
% function bcount=hisser(bspectrum,bandfirst,bandlast)
% histogram for the binary spectra
% bcount= array of bit counts
% bs=binary spectrum (one int32 number each)
% bsr=reference binary spectra (one int32 number each)
% blockSize = histogram over blocksize blocks
% bandfirst = first band considered
% bandlast = last band considered
% weight all delays equally
maxDelay = length(bsr);
% compute counts (two methods; the first works better and is operational)
bcount=zeros(maxDelay,1);
for(i=1:maxDelay)
% the delay should have low count for low-near&high-far and high-near&low-far
bcount(i)= sum(bitget(bitxor(bs,bsr(i)),bandfirst:bandlast));
% the delay should have low count for low-near&high-far (works less well)
% bcount(i)= sum(bitget(bitand(bsr(i),bitxor(bs,bsr(i))),bandfirst:bandlast));
end

View File

@ -1,19 +0,0 @@
fid=fopen('aecfar.pcm'); far=fread(fid,'short'); fclose(fid);
fid=fopen('aecnear.pcm'); mic=fread(fid,'short'); fclose(fid);
%fid=fopen('QA1far.pcm'); far=fread(fid,'short'); fclose(fid);
%fid=fopen('QA1near.pcm'); mic=fread(fid,'short'); fclose(fid);
start=0 * 8000+1;
stop= 30 * 8000;
microphone=mic(start:stop);
TheFarEnd=far(start:stop);
avtime=1;
% 16000 to make it compatible with the C-version
[emicrophone,tdel]=compsup(microphone,TheFarEnd,avtime,16000);
spclab(8000,TheFarEnd,microphone,emicrophone);

View File

@ -1,269 +0,0 @@
function [femicrophone, aecmStructNew, enerNear, enerFar] = AECMobile(fmicrophone, afTheFarEnd, setupStruct, aecmStruct)
global NEARENDFFT;
global F;
aecmStructNew = aecmStruct;
% Magnitude spectrum of near end signal
afmicrophone = abs(fmicrophone);
%afmicrophone = NEARENDFFT(setupStruct.currentBlock,:)'/2^F(setupStruct.currentBlock,end);
% Near end energy level
ener_orig = afmicrophone'*afmicrophone;
if( ener_orig == 0)
lowlevel = 0.01;
afmicrophone = lowlevel*ones(size(afmicrophone));
end
%adiff = max(abs(afmicrophone - afTheFarEnd));
%if (adiff > 0)
% disp([setupStruct.currentBlock adiff])
%end
% Store the near end energy
%aecmStructNew.enerNear(setupStruct.currentBlock) = log(afmicrophone'*afmicrophone);
aecmStructNew.enerNear(setupStruct.currentBlock) = log(sum(afmicrophone));
% Store the far end energy
%aecmStructNew.enerFar(setupStruct.currentBlock) = log(afTheFarEnd'*afTheFarEnd);
aecmStructNew.enerFar(setupStruct.currentBlock) = log(sum(afTheFarEnd));
% Update subbands (We currently use all frequency bins, hence .useSubBand is turned off)
if aecmStructNew.useSubBand
internalIndex = 1;
for kk=1:setupStruct.subBandLength+1
ySubBand(kk) = mean(afmicrophone(internalIndex:internalIndex+setupStruct.numInBand(kk)-1).^aecmStructNew.bandFactor);
xSubBand(kk) = mean(afTheFarEnd(internalIndex:internalIndex+setupStruct.numInBand(kk)-1).^aecmStructNew.bandFactor);
internalIndex = internalIndex + setupStruct.numInBand(kk);
end
else
ySubBand = afmicrophone.^aecmStructNew.bandFactor;
xSubBand = afTheFarEnd.^aecmStructNew.bandFactor;
end
% Estimated echo energy
if (aecmStructNew.bandFactor == 1)
%aecmStructNew.enerEcho(setupStruct.currentBlock) = log((aecmStructNew.H.*xSubBand)'*(aecmStructNew.H.*xSubBand));
%aecmStructNew.enerEchoStored(setupStruct.currentBlock) = log((aecmStructNew.HStored.*xSubBand)'*(aecmStructNew.HStored.*xSubBand));
aecmStructNew.enerEcho(setupStruct.currentBlock) = log(sum(aecmStructNew.H.*xSubBand));
aecmStructNew.enerEchoStored(setupStruct.currentBlock) = log(sum(aecmStructNew.HStored.*xSubBand));
elseif (aecmStructNew.bandFactor == 2)
aecmStructNew.enerEcho(setupStruct.currentBlock) = log(aecmStructNew.H'*xSubBand);
aecmStructNew.enerEchoStored(setupStruct.currentBlock) = log(aecmStructNew.HStored'*xSubBand);
end
% Last 100 blocks of data, used for plotting
n100 = max(1,setupStruct.currentBlock-99):setupStruct.currentBlock;
enerError = aecmStructNew.enerNear(n100)-aecmStructNew.enerEcho(n100);
enerErrorStored = aecmStructNew.enerNear(n100)-aecmStructNew.enerEchoStored(n100);
% Store the far end sub band. This is needed if we use LSE instead of NLMS
aecmStructNew.X = [xSubBand aecmStructNew.X(:,1:end-1)];
% Update energy levels, which control the VAD
if ((aecmStructNew.enerFar(setupStruct.currentBlock) < aecmStructNew.energyMin) & (aecmStructNew.enerFar(setupStruct.currentBlock) >= aecmStruct.FAR_ENERGY_MIN))
aecmStructNew.energyMin = aecmStructNew.enerFar(setupStruct.currentBlock);
%aecmStructNew.energyMin = max(aecmStructNew.energyMin,12);
aecmStructNew.energyMin = max(aecmStructNew.energyMin,aecmStruct.FAR_ENERGY_MIN);
aecmStructNew.energyLevel = (aecmStructNew.energyMax-aecmStructNew.energyMin)*aecmStructNew.energyThres+aecmStructNew.energyMin;
aecmStructNew.energyLevelMSE = (aecmStructNew.energyMax-aecmStructNew.energyMin)*aecmStructNew.energyThresMSE+aecmStructNew.energyMin;
end
if (aecmStructNew.enerFar(setupStruct.currentBlock) > aecmStructNew.energyMax)
aecmStructNew.energyMax = aecmStructNew.enerFar(setupStruct.currentBlock);
aecmStructNew.energyLevel = (aecmStructNew.energyMax-aecmStructNew.energyMin)*aecmStructNew.energyThres+aecmStructNew.energyMin;
aecmStructNew.energyLevelMSE = (aecmStructNew.energyMax-aecmStructNew.energyMin)*aecmStructNew.energyThresMSE+aecmStructNew.energyMin;
end
% Calculate current energy error in near end (estimated echo vs. near end)
dE = aecmStructNew.enerNear(setupStruct.currentBlock)-aecmStructNew.enerEcho(setupStruct.currentBlock);
%%%%%%%%
% Calculate step size used in LMS algorithm, based on current far end energy and near end energy error (dE)
%%%%%%%%
if setupStruct.stepSize_flag
[mu, aecmStructNew] = calcStepSize(aecmStructNew.enerFar(setupStruct.currentBlock), dE, aecmStructNew, setupStruct.currentBlock, 1);
else
mu = 0.25;
end
aecmStructNew.muLog(setupStruct.currentBlock) = mu; % Store the step size
% Estimate Echo Spectral Shape
[U, aecmStructNew.H] = fallerEstimator(ySubBand,aecmStructNew.X,aecmStructNew.H,mu);
%%%%%
% Determine if we should store or restore the channel
%%%%%
if ((setupStruct.currentBlock <= aecmStructNew.convLength) | (~setupStruct.channelUpdate_flag))
aecmStructNew.HStored = aecmStructNew.H; % Store what you have after startup
elseif ((setupStruct.currentBlock > aecmStructNew.convLength) & (setupStruct.channelUpdate_flag))
if ((aecmStructNew.enerFar(setupStruct.currentBlock) < aecmStructNew.energyLevelMSE) & (aecmStructNew.enerFar(setupStruct.currentBlock-1) >= aecmStructNew.energyLevelMSE))
xxx = aecmStructNew.countMseH;
if (xxx > 20)
mseStored = mean(abs(aecmStructNew.enerEchoStored(setupStruct.currentBlock-xxx:setupStruct.currentBlock-1)-aecmStructNew.enerNear(setupStruct.currentBlock-xxx:setupStruct.currentBlock-1)));
mseLatest = mean(abs(aecmStructNew.enerEcho(setupStruct.currentBlock-xxx:setupStruct.currentBlock-1)-aecmStructNew.enerNear(setupStruct.currentBlock-xxx:setupStruct.currentBlock-1)));
%fprintf('Stored: %4f Latest: %4f\n', mseStored, mseLatest) % Uncomment if you want to display the MSE values
if ((mseStored < 0.8*mseLatest) & (aecmStructNew.mseHStoredOld < 0.8*aecmStructNew.mseHLatestOld))
aecmStructNew.H = aecmStructNew.HStored;
fprintf('Restored H at block %d\n',setupStruct.currentBlock)
elseif (((0.8*mseStored > mseLatest) & (mseLatest < aecmStructNew.mseHThreshold) & (aecmStructNew.mseHLatestOld < aecmStructNew.mseHThreshold)) | (mseStored == Inf))
aecmStructNew.HStored = aecmStructNew.H;
fprintf('Stored new H at block %d\n',setupStruct.currentBlock)
end
aecmStructNew.mseHStoredOld = mseStored;
aecmStructNew.mseHLatestOld = mseLatest;
end
elseif ((aecmStructNew.enerFar(setupStruct.currentBlock) >= aecmStructNew.energyLevelMSE) & (aecmStructNew.enerFar(setupStruct.currentBlock-1) < aecmStructNew.energyLevelMSE))
aecmStructNew.countMseH = 1;
elseif (aecmStructNew.enerFar(setupStruct.currentBlock) >= aecmStructNew.energyLevelMSE)
aecmStructNew.countMseH = aecmStructNew.countMseH + 1;
end
end
%%%%%
% Check delay (calculate the delay offset (if we can))
% The algorithm is not tuned and should be used with care. It runs separately from Bastiaan's algorithm.
%%%%%
yyy = 31; % Correlation buffer length (currently unfortunately hard coded)
dxxx = 25; % Maximum offset (currently unfortunately hard coded)
if (setupStruct.currentBlock > aecmStructNew.convLength)
if (aecmStructNew.enerFar(setupStruct.currentBlock-(yyy+2*dxxx-1):setupStruct.currentBlock) > aecmStructNew.energyLevelMSE)
for xxx = -dxxx:dxxx
aecmStructNew.delayLatestS(xxx+dxxx+1) = sum(sign(aecmStructNew.enerEcho(setupStruct.currentBlock-(yyy+dxxx-xxx)+1:setupStruct.currentBlock+xxx-dxxx)-mean(aecmStructNew.enerEcho(setupStruct.currentBlock-(yyy++dxxx-xxx)+1:setupStruct.currentBlock+xxx-dxxx))).*sign(aecmStructNew.enerNear(setupStruct.currentBlock-yyy-dxxx+1:setupStruct.currentBlock-dxxx)-mean(aecmStructNew.enerNear(setupStruct.currentBlock-yyy-dxxx+1:setupStruct.currentBlock-dxxx))));
end
aecmStructNew.newDelayCurve = 1;
end
end
if ((setupStruct.currentBlock > 2*aecmStructNew.convLength) & ~rem(setupStruct.currentBlock,yyy*2) & aecmStructNew.newDelayCurve)
[maxV,maxP] = max(aecmStructNew.delayLatestS);
if ((maxP > 2) & (maxP < 2*dxxx))
maxVLeft = aecmStructNew.delayLatestS(max(1,maxP-4));
maxVRight = aecmStructNew.delayLatestS(min(2*dxxx+1,maxP+4));
%fprintf('Max %d, Left %d, Right %d\n',maxV,maxVLeft,maxVRight) % Uncomment if you want to see max value
if ((maxV > 24) & (maxVLeft < maxV - 10) & (maxVRight < maxV - 10))
aecmStructNew.feedbackDelay = maxP-dxxx-1;
aecmStructNew.newDelayCurve = 0;
aecmStructNew.feedbackDelayUpdate = 1;
fprintf('Feedback Update at block %d\n',setupStruct.currentBlock)
end
end
end
% End of "Check delay"
%%%%%%%%
%%%%%
% Calculate suppression gain, based on far end energy and near end energy error (dE)
if (setupStruct.supGain_flag)
[gamma_echo, aecmStructNew.cntIn, aecmStructNew.cntOut] = calcFilterGain(aecmStructNew.enerFar(setupStruct.currentBlock), dE, aecmStructNew, setupStruct.currentBlock, aecmStructNew.convLength, aecmStructNew.cntIn, aecmStructNew.cntOut);
else
gamma_echo = 1;
end
aecmStructNew.gammaLog(setupStruct.currentBlock) = gamma_echo; % Store the gain
gamma_use = gamma_echo;
% Use the stored channel
U = aecmStructNew.HStored.*xSubBand;
% compute Wiener filter and suppressor function
Iy = find(ySubBand);
subBandFilter = zeros(size(ySubBand));
if (aecmStructNew.bandFactor == 2)
subBandFilter(Iy) = (1 - gamma_use*sqrt(U(Iy)./ySubBand(Iy))); % For Faller
else
subBandFilter(Iy) = (1 - gamma_use*(U(Iy)./ySubBand(Iy))); % For COV
end
ix0 = find(subBandFilter < 0); % bounding trick 1
subBandFilter(ix0) = 0;
ix0 = find(subBandFilter > 1); % bounding trick 1
subBandFilter(ix0) = 1;
% Interpolate back to normal frequency bins if we use sub bands
if aecmStructNew.useSubBand
thefilter = interp1(setupStruct.centerFreq,subBandFilter,linspace(0,setupStruct.samplingfreq/2,setupStruct.hsupport1)','nearest');
testfilter = interp1(setupStruct.centerFreq,subBandFilter,linspace(0,setupStruct.samplingfreq/2,1000),'nearest');
thefilter(end) = subBandFilter(end);
internalIndex = 1;
for kk=1:setupStruct.subBandLength+1
internalIndex:internalIndex+setupStruct.numInBand(kk)-1;
thefilter(internalIndex:internalIndex+setupStruct.numInBand(kk)-1) = subBandFilter(kk);
internalIndex = internalIndex + setupStruct.numInBand(kk);
end
else
thefilter = subBandFilter;
testfilter = subBandFilter;
end
% Bound the filter
ix0 = find(thefilter < setupStruct.de_echo_bound); % bounding trick 1
thefilter(ix0) = setupStruct.de_echo_bound; % bounding trick 2
ix0 = find(thefilter > 1); % bounding in reasonable range
thefilter(ix0) = 1;
%%%%
% NLP
%%%%
thefmean = mean(thefilter(8:16));
if (thefmean < 1)
disp('');
end
aecmStructNew.runningfmean = setupStruct.nl_alpha*aecmStructNew.runningfmean + (1-setupStruct.nl_alpha)*thefmean;
slope0 = 1.0/(1.0 - setupStruct.nlSeverity); %
thegain = max(0.0, min(1.0, slope0*(aecmStructNew.runningfmean - setupStruct.nlSeverity)));
if ~setupStruct.nlp_flag
thegain = 1;
end
% END NONLINEARITY
thefilter = thegain*thefilter;
%%%%
% The suppression
%%%%
femicrophone = fmicrophone .* thefilter;
% Store the output energy (used for plotting)
%aecmStructNew.enerOut(setupStruct.currentBlock) = log(abs(femicrophone)'*abs(femicrophone));
aecmStructNew.enerOut(setupStruct.currentBlock) = log(sum(abs(femicrophone)));
if aecmStructNew.plotIt
figure(13)
subplot(311)
%plot(n100,enerFar(n100),'b-',n100,enerNear(n100),'k--',n100,enerEcho(n100),'r-',[n100(1) n100(end)],[1 1]*vadThNew,'b:',[n100(1) n100(end)],[1 1]*((energyMax-energyMin)/4+energyMin),'r-.',[n100(1) n100(end)],[1 1]*vadNearThNew,'g:',[n100(1) n100(end)],[1 1]*energyMax,'r-.',[n100(1) n100(end)],[1 1]*energyMin,'r-.','LineWidth',2)
plot(n100,aecmStructNew.enerFar(n100),'b-',n100,aecmStructNew.enerNear(n100),'k--',n100,aecmStructNew.enerOut(n100),'r-.',n100,aecmStructNew.enerEcho(n100),'r-',n100,aecmStructNew.enerEchoStored(n100),'c-',[n100(1) n100(end)],[1 1]*((aecmStructNew.energyMax-aecmStructNew.energyMin)/4+aecmStructNew.energyMin),'g-.',[n100(1) n100(end)],[1 1]*aecmStructNew.energyMax,'g-.',[n100(1) n100(end)],[1 1]*aecmStructNew.energyMin,'g-.','LineWidth',2)
%title(['Frame ',int2str(i),' av ',int2str(setupStruct.updateno),' State = ',int2str(speechState),' \mu = ',num2str(mu)])
title(['\gamma = ',num2str(gamma_echo),' \mu = ',num2str(mu)])
subplot(312)
%plot(n100,enerError,'b-',[n100(1) n100(end)],[1 1]*vadNearTh,'r:',[n100(1) n100(end)],[-1.5 -1.5]*vadNearTh,'r:','LineWidth',2)
%plot(n100,enerError,'b-',[n100(1) n100(end)],[1 1],'r:',[n100(1) n100(end)],[-2 -2],'r:','LineWidth',2)
plot(n100,enerError,'b-',n100,enerErrorStored,'c-',[n100(1) n100(end)],[1 1]*aecmStructNew.varMean,'k--',[n100(1) n100(end)],[1 1],'r:',[n100(1) n100(end)],[-2 -2],'r:','LineWidth',2)
% Plot mu
%plot(n100,log2(aecmStructNew.muLog(n100)),'b-','LineWidth',2)
%plot(n100,log2(aecmStructNew.HGain(n100)),'b-',[n100(1) n100(end)],[1 1]*log2(sum(aecmStructNew.HStored)),'r:','LineWidth',2)
title(['Block ',int2str(setupStruct.currentBlock),' av ',int2str(setupStruct.updateno)])
subplot(313)
%plot(n100,enerVar(n100),'b-',[n100(1) n100(end)],[1 1],'r:',[n100(1) n100(end)],[-2 -2],'r:','LineWidth',2)
%plot(n100,enerVar(n100),'b-','LineWidth',2)
% Plot correlation curve
%plot(-25:25,aecmStructNew.delayStored/max(aecmStructNew.delayStored),'c-',-25:25,aecmStructNew.delayLatest/max(aecmStructNew.delayLatest),'r-',-25:25,(max(aecmStructNew.delayStoredS)-aecmStructNew.delayStoredS)/(max(aecmStructNew.delayStoredS)-min(aecmStructNew.delayStoredS)),'c:',-25:25,(max(aecmStructNew.delayLatestS)-aecmStructNew.delayLatestS)/(max(aecmStructNew.delayLatestS)-min(aecmStructNew.delayLatestS)),'r:','LineWidth',2)
%plot(-25:25,aecmStructNew.delayStored,'c-',-25:25,aecmStructNew.delayLatest,'r-',-25:25,(max(aecmStructNew.delayStoredS)-aecmStructNew.delayStoredS)/(max(aecmStructNew.delayStoredS)-min(aecmStructNew.delayStoredS)),'c:',-25:25,(max(aecmStructNew.delayLatestS)-aecmStructNew.delayLatestS)/(max(aecmStructNew.delayLatestS)-min(aecmStructNew.delayLatestS)),'r:','LineWidth',2)
%plot(-25:25,aecmStructNew.delayLatest,'r-',-25:25,(50-aecmStructNew.delayLatestS)/100,'r:','LineWidth',2)
plot(-25:25,aecmStructNew.delayLatestS,'r:','LineWidth',2)
%plot(-25:25,aecmStructNew.delayStored,'c-',-25:25,aecmStructNew.delayLatest,'r-','LineWidth',2)
plot(0:32,aecmStruct.HStored,'bo-','LineWidth',2)
%title(['\gamma | In = ',int2str(aecmStructNew.muStruct.countInInterval),' | Out High = ',int2str(aecmStructNew.muStruct.countOutHighInterval),' | Out Low = ',int2str(aecmStructNew.muStruct.countOutLowInterval)])
pause(1)
%if ((setupStruct.currentBlock == 860) | (setupStruct.currentBlock == 420) | (setupStruct.currentBlock == 960))
if 0%(setupStruct.currentBlock == 960)
figure(60)
plot(n100,aecmStructNew.enerNear(n100),'k--',n100,aecmStructNew.enerEcho(n100),'k:','LineWidth',2)
legend('Near End','Estimated Echo')
title('Signal Energy witH offset compensation')
figure(61)
subplot(211)
stem(sign(aecmStructNew.enerNear(n100)-mean(aecmStructNew.enerNear(n100))))
title('Near End Energy Pattern (around mean value)')
subplot(212)
stem(sign(aecmStructNew.enerEcho(n100)-mean(aecmStructNew.enerEcho(n100))))
title('Estimated Echo Energy Pattern (around mean value)')
pause
end
drawnow%,pause
elseif ~rem(setupStruct.currentBlock,100)
fprintf('Block %d of %d\n',setupStruct.currentBlock,setupStruct.updateno)
end

View File

@ -1,98 +0,0 @@
function [delayStructNew] = align(xf, yf, delayStruct, i, trueDelay);
%%%%%%%
% Bastiaan's algorithm copied
%%%%%%%
Ap500 = [1.00, -4.95, 9.801, -9.70299, 4.80298005, -0.9509900499];
Bp500 = [0.662743088639636, -2.5841655608125, 3.77668102146288, -2.45182477425154, 0.596566274575251, 0.0];
Ap200 = [1.00, -4.875, 9.50625, -9.26859375, 4.518439453125, -0.881095693359375];
Bp200 = [0.862545460994275, -3.2832804496114, 4.67892032308828, -2.95798023879133, 0.699796870041299, 0.0];
oldMethod = 1; % Turn on or off the old method. The new one is Bastiaan's August 2008 updates
THReSHoLD = 2.0; % ADJUSTABLE threshold factor; 4.0 seems good
%%%%%%%%%%%%%%%%%%%
% use log domain (showed improved performance)
xxf = sqrt(real(xf.*conj(xf))+1e-20);
yyf = sqrt(real(yf.*conj(yf))+1e-20);
delayStruct.sxAll2(:,i) = 20*log10(xxf);
delayStruct.syAll2(:,i) = 20*log10(yyf);
mD = min(i-1,delayStruct.maxDelayb);
if oldMethod
factor = 1.0;
histLenb = 250;
xthreshold = factor*median(delayStruct.sxAll2(:,i-mD:i),2);
ythreshold = factor*median(delayStruct.syAll2(:,i-mD:i),2);
else
xthreshold = sum(delayStruct.sxAll2(:,i-mD:i),2)/(delayStruct.maxDelayb+1);
[yout, delayStruct.z200] = filter(Bp200, Ap200, delayStruct.syAll2(:,i), delayStruct.z200, 2);
yout = yout/(delayStruct.maxDelayb+1);
ythreshold = mean(delayStruct.syAll2(:,i-mD:i),2);
ythreshold = yout;
end
delayStruct.bxspectrum(i) = getBspectrum(delayStruct.sxAll2(:,i), xthreshold, delayStruct.bandfirst, delayStruct.bandlast);
delayStruct.byspectrum(i) = getBspectrum(delayStruct.syAll2(:,i), ythreshold, delayStruct.bandfirst, delayStruct.bandlast);
delayStruct.bxhist(end-mD:end) = delayStruct.bxspectrum(i-mD:i);
delayStruct.bcount(:,i) = hisser2(delayStruct.byspectrum(i), flipud(delayStruct.bxhist), delayStruct.bandfirst, delayStruct.bandlast);
[delayStruct.fout(:,i), delayStruct.z500] = filter(Bp500, Ap500, delayStruct.bcount(:,i), delayStruct.z500, 2);
if oldMethod
%delayStruct.new(:,i) = sum(delayStruct.bcount(:,max(1,i-histLenb+1):i),2); % using the history range
tmpVec = [delayStruct.fout(1,i)*ones(2,1); delayStruct.fout(:,i); delayStruct.fout(end,i)*ones(2,1)]; % using the history range
tmpVec = filter(ones(1,5), 1, tmpVec);
delayStruct.new(:,i) = tmpVec(5:end);
%delayStruct.new(:,i) = delayStruct.fout(:,i); % using the history range
else
[delayStruct.fout(:,i), delayStruct.z500] = filter(Bp500, Ap500, delayStruct.bcount(:,i), delayStruct.z500, 2);
% NEW CODE
delayStruct.new(:,i) = filter([-1,-2,1,4,1,-2,-1], 1, delayStruct.fout(:,i)); %remv smth component
delayStruct.new(1:end-3,i) = delayStruct.new(1+3:end,i);
delayStruct.new(1:6,i) = 0.0;
delayStruct.new(end-6:end,i) = 0.0; % ends are no good
end
[valuen, tempdelay] = min(delayStruct.new(:,i)); % find minimum
if oldMethod
threshold = valuen + (max(delayStruct.new(:,i)) - valuen)/4;
thIndex = find(delayStruct.new(:,i) <= threshold);
if (i > 1)
delayDiff = abs(delayStruct.delay(i-1)-tempdelay+1);
if (delayStruct.oneGoodEstimate & (max(diff(thIndex)) > 1) & (delayDiff < 10))
% We consider this minimum to be significant, hence update the delay
delayStruct.delay(i) = tempdelay;
elseif (~delayStruct.oneGoodEstimate & (max(diff(thIndex)) > 1))
delayStruct.delay(i) = tempdelay;
if (i > histLenb)
delayStruct.oneGoodEstimate = 1;
end
else
delayStruct.delay(i) = delayStruct.delay(i-1);
end
else
delayStruct.delay(i) = tempdelay;
end
else
threshold = THReSHoLD*std(delayStruct.new(:,i)); % set updata threshold
if ((-valuen > threshold) | (i < delayStruct.smlength)) % see if you want to update delay
delayStruct.delay(i) = tempdelay;
else
delayStruct.delay(i) = delayStruct.delay(i-1);
end
% END NEW CODE
end
delayStructNew = delayStruct;
% administrative and plotting stuff
if( 0)
figure(10);
plot([1:length(delayStructNew.new(:,i))],delayStructNew.new(:,i),trueDelay*[1 1],[min(delayStructNew.new(:,i)),max(delayStructNew.new(:,i))],'r',[1 length(delayStructNew.new(:,i))],threshold*[1 1],'r:', 'LineWidth',2);
%plot([1:length(delayStructNew.bcount(:,i))],delayStructNew.bcount(:,i),trueDelay*[1 1],[min(delayStructNew.bcount(:,i)),max(delayStructNew.bcount(:,i))],'r','LineWidth',2);
%plot([thedelay,thedelay],[min(fcount(:,i)),max(fcount(:,i))],'r');
%title(sprintf('bin count and known delay at time %5.1f s\n',(i-1)*(support/(fs*oversampling))));
title(delayStructNew.oneGoodEstimate)
xlabel('delay in frames');
%hold off;
drawnow
end

View File

@ -1,88 +0,0 @@
function [gam, cntIn2, cntOut2] = calcFilterGain(energy, dE, aecmStruct, t, T, cntIn, cntOut)
defaultLevel = 1.2;
cntIn2 = cntIn;
cntOut2 = cntOut;
if (t < T)
gam = 1;
else
dE1 = -5;
dE2 = 1;
gamMid = 0.2;
gam = max(0,min((energy - aecmStruct.energyMin)/(aecmStruct.energyLevel - aecmStruct.energyMin), 1-(1-gamMid)*(aecmStruct.energyMax-energy)/(aecmStruct.energyMax-aecmStruct.energyLevel)));
dEOffset = -0.5;
dEWidth = 1.5;
%gam2 = max(1,2-((dE-dEOffset)/(dE2-dEOffset)).^2);
gam2 = 1+(abs(dE-dEOffset)<(dE2-dEOffset));
gam = gam*gam2;
if (energy < aecmStruct.energyLevel)
gam = 0;
else
gam = defaultLevel;
end
dEVec = aecmStruct.enerNear(t-63:t)-aecmStruct.enerEcho(t-63:t);
%dEVec = aecmStruct.enerNear(t-20:t)-aecmStruct.enerEcho(t-20:t);
numCross = 0;
currentState = 0;
for ii=1:64
if (currentState == 0)
currentState = (dEVec(ii) > dE2) - (dEVec(ii) < -2);
elseif ((currentState == 1) & (dEVec(ii) < -2))
numCross = numCross + 1;
currentState = -1;
elseif ((currentState == -1) & (dEVec(ii) > dE2))
numCross = numCross + 1;
currentState = 1;
end
end
gam = max(0, gam - numCross/25);
gam = 1;
ener_A = 1;
ener_B = 0.8;
ener_C = aecmStruct.energyLevel + (aecmStruct.energyMax-aecmStruct.energyLevel)/5;
dE_A = 4;%2;
dE_B = 3.6;%1.8;
dE_C = 0.9*dEWidth;
dE_D = 1;
timeFactorLength = 10;
ddE = abs(dE-dEOffset);
if (energy < aecmStruct.energyLevel)
gam = 0;
else
gam = 1;
gam2 = max(0, min(ener_B*(energy-aecmStruct.energyLevel)/(ener_C-aecmStruct.energyLevel), ener_B+(ener_A-ener_B)*(energy-ener_C)/(aecmStruct.energyMax-ener_C)));
if (ddE < dEWidth)
% Update counters
cntIn2 = cntIn2 + 1;
if (cntIn2 > 2)
cntOut2 = 0;
end
gam3 = max(dE_D, min(dE_A-(dE_A-dE_B)*(ddE/dE_C), dE_D+(dE_B-dE_D)*(dEWidth-ddE)/(dEWidth-dE_C)));
gam3 = dE_A;
else
% Update counters
cntOut2 = cntOut2 + 1;
if (cntOut2 > 2)
cntIn2 = 0;
end
%gam2 = 1;
gam3 = dE_D;
end
timeFactor = min(1, cntIn2/timeFactorLength);
gam = gam*(1-timeFactor) + timeFactor*gam2*gam3;
end
%gam = gam/floor(numCross/2+1);
end
if isempty(gam)
numCross
timeFactor
cntIn2
cntOut2
gam2
gam3
end

View File

@ -1,105 +0,0 @@
function [mu, aecmStructNew] = calcStepSize(energy, dE, aecmStruct, t, logscale)
if (nargin < 4)
t = 1;
logscale = 1;
elseif (nargin == 4)
logscale = 1;
end
T = aecmStruct.convLength;
if logscale
currentMuMax = aecmStruct.MU_MIN + (aecmStruct.MU_MAX-aecmStruct.MU_MIN)*min(t,T)/T;
if (aecmStruct.energyMin >= aecmStruct.energyMax)
mu = aecmStruct.MU_MIN;
else
mu = (energy - aecmStruct.energyMin)/(aecmStruct.energyMax - aecmStruct.energyMin)*(currentMuMax-aecmStruct.MU_MIN) + aecmStruct.MU_MIN;
end
mu = 2^mu;
if (energy < aecmStruct.energyLevel)
mu = 0;
end
else
muMin = 0;
muMax = 0.5;
currentMuMax = muMin + (muMax-muMin)*min(t,T)/T;
if (aecmStruct.energyMin >= aecmStruct.energyMax)
mu = muMin;
else
mu = (energy - aecmStruct.energyMin)/(aecmStruct.energyMax - aecmStruct.energyMin)*(currentMuMax-muMin) + muMin;
end
end
dE2 = 1;
dEOffset = -0.5;
offBoost = 5;
if (mu > 0)
if (abs(dE-aecmStruct.ENERGY_DEV_OFFSET) > aecmStruct.ENERGY_DEV_TOL)
aecmStruct.muStruct.countInInterval = 0;
else
aecmStruct.muStruct.countInInterval = aecmStruct.muStruct.countInInterval + 1;
end
if (dE < aecmStruct.ENERGY_DEV_OFFSET - aecmStruct.ENERGY_DEV_TOL)
aecmStruct.muStruct.countOutLowInterval = aecmStruct.muStruct.countOutLowInterval + 1;
else
aecmStruct.muStruct.countOutLowInterval = 0;
end
if (dE > aecmStruct.ENERGY_DEV_OFFSET + aecmStruct.ENERGY_DEV_TOL)
aecmStruct.muStruct.countOutHighInterval = aecmStruct.muStruct.countOutHighInterval + 1;
else
aecmStruct.muStruct.countOutHighInterval = 0;
end
end
muVar = 2^min(-3,5/50*aecmStruct.muStruct.countInInterval-3);
muOff = 2^max(offBoost,min(0,offBoost*(aecmStruct.muStruct.countOutLowInterval-aecmStruct.muStruct.minOutLowInterval)/(aecmStruct.muStruct.maxOutLowInterval-aecmStruct.muStruct.minOutLowInterval)));
muLow = 1/64;
muVar = 1;
if (t < 2*T)
muDT = 1;
muVar = 1;
mdEVec = 0;
numCross = 0;
else
muDT = min(1,max(muLow,1-(1-muLow)*(dE-aecmStruct.ENERGY_DEV_OFFSET)/aecmStruct.ENERGY_DEV_TOL));
dEVec = aecmStruct.enerNear(t-63:t)-aecmStruct.enerEcho(t-63:t);
%dEVec = aecmStruct.enerNear(t-20:t)-aecmStruct.enerEcho(t-20:t);
numCross = 0;
currentState = 0;
for ii=1:64
if (currentState == 0)
currentState = (dEVec(ii) > dE2) - (dEVec(ii) < -2);
elseif ((currentState == 1) & (dEVec(ii) < -2))
numCross = numCross + 1;
currentState = -1;
elseif ((currentState == -1) & (dEVec(ii) > dE2))
numCross = numCross + 1;
currentState = 1;
end
end
%logicDEVec = (dEVec > dE2) - (dEVec < -2);
%numCross = sum(abs(diff(logicDEVec)));
%mdEVec = mean(abs(dEVec-dEOffset));
%mdEVec = mean(abs(dEVec-mean(dEVec)));
%mdEVec = max(dEVec)-min(dEVec);
%if (mdEVec > 4)%1.5)
% muVar = 0;
%end
muVar = 2^(-floor(numCross/2));
muVar = 2^(-numCross);
end
%muVar = 1;
% if (eStd > (dE2-dEOffset))
% muVar = 1/8;
% else
% muVar = 1;
% end
%mu = mu*muDT*muVar*muOff;
mu = mu*muDT*muVar;
mu = min(mu,0.25);
aecmStructNew = aecmStruct;
%aecmStructNew.varMean = mdEVec;
aecmStructNew.varMean = numCross;

View File

@ -1,42 +0,0 @@
function [U, Hnew] = fallerEstimator(Y, X, H, mu)
% Near end signal is stacked frame by frame columnwise in matrix Y and far end in X
%
% Possible estimation procedures are
% 1) LSE
% 2) NLMS
% 3) Separated numerator and denomerator filters
regParam = 1;
[numFreqs, numFrames] = size(Y);
[numFreqs, Q] = size(X);
U = zeros(numFreqs, 1);
if ((nargin == 3) | (nargin == 5))
dtd = 0;
end
if (nargin == 4)
dtd = H;
end
Emax = 7;
dEH = Emax-sum(sum(H));
nu = 2*mu;
% if (nargin < 5)
% H = zeros(numFreqs, Q);
% for kk = 1:numFreqs
% Xmatrix = hankel(X(kk,1:Q),X(kk,Q:end));
% y = Y(kk,1:end-Q+1)';
% H(kk,:) = (y'*Xmatrix')*inv(Xmatrix*Xmatrix'+regParam);
% U(kk,1) = H(kk,:)*Xmatrix(:,1);
% end
% else
for kk = 1:numFreqs
x = X(kk,1:Q)';
y = Y(kk,1);
Htmp = mu*(y-H(kk,:)*x)/(x'*x+regParam)*x;
%Htmp = (mu*(y-H(kk,:)*x)/(x'*x+regParam) - nu/dEH)*x;
H(kk,:) = H(kk,:) + Htmp';
U(kk,1) = H(kk,:)*x;
end
% end
Hnew = H;

View File

@ -1,22 +0,0 @@
function bspectrum=getBspectrum(ps,threshold,bandfirst,bandlast)
% function bspectrum=getBspectrum(ps,threshold,bandfirst,bandlast)
% compute binary spectrum using threshold spectrum as pivot
% bspectrum = binary spectrum (binary)
% ps=current power spectrum (float)
% threshold=threshold spectrum (float)
% bandfirst = first band considered
% bandlast = last band considered
% initialization stuff
if( length(ps)<bandlast | bandlast>32 | length(ps)~=length(threshold))
error('BinDelayEst:spectrum:invalid','Dimensionality error');
end
% get current binary spectrum
diff = ps - threshold;
bspectrum=uint32(0);
for(i=bandfirst:bandlast)
if( diff(i)>0 )
bspectrum = bitset(bspectrum,i);
end
end

View File

@ -1,21 +0,0 @@
function bcount=hisser2(bs,bsr,bandfirst,bandlast)
% function bcount=hisser(bspectrum,bandfirst,bandlast)
% histogram for the binary spectra
% bcount= array of bit counts
% bs=binary spectrum (one int32 number each)
% bsr=reference binary spectra (one int32 number each)
% blockSize = histogram over blocksize blocks
% bandfirst = first band considered
% bandlast = last band considered
% weight all delays equally
maxDelay = length(bsr);
% compute counts (two methods; the first works better and is operational)
bcount=zeros(maxDelay,1);
for(i=1:maxDelay)
% the delay should have low count for low-near&high-far and high-near&low-far
bcount(i)= sum(bitget(bitxor(bs,bsr(i)),bandfirst:bandlast));
% the delay should have low count for low-near&high-far (works less well)
% bcount(i)= sum(bitget(bitand(bsr(i),bitxor(bs,bsr(i))),bandfirst:bandlast));
end

View File

@ -1,283 +0,0 @@
useHTC = 1; % Set this if you want to run a single file and set file names below. Otherwise use simEnvironment to run from several scenarios in a row
delayCompensation_flag = 0; % Set this flag to one if you want to turn on the delay compensation/enhancement
global FARENDFFT;
global NEARENDFFT;
global F;
if useHTC
% fid=fopen('./htcTouchHd/nb/aecFar.pcm'); xFar=fread(fid,'short'); fclose(fid);
% fid=fopen('./htcTouchHd/nb/aecNear.pcm'); yNear=fread(fid,'short'); fclose(fid);
% fid=fopen('./samsungBlackjack/nb/aecFar.pcm'); xFar=fread(fid,'short'); fclose(fid);
% fid=fopen('./samsungBlackjack/nb/aecNear.pcm'); yNear=fread(fid,'short'); fclose(fid);
% fid=fopen('aecFarPoor.pcm'); xFar=fread(fid,'short'); fclose(fid);
% fid=fopen('aecNearPoor.pcm'); yNear=fread(fid,'short'); fclose(fid);
% fid=fopen('out_aes.pcm'); outAES=fread(fid,'short'); fclose(fid);
fid=fopen('aecFar4.pcm'); xFar=fread(fid,'short'); fclose(fid);
fid=fopen('aecNear4.pcm'); yNear=fread(fid,'short'); fclose(fid);
yNearSpeech = zeros(size(xFar));
fs = 8000;
frameSize = 64;
% frameSize = 128;
fs = 16000;
% frameSize = 256;
%F = load('fftValues.txt');
%FARENDFFT = F(:,1:33);
%NEARENDFFT = F(:,34:66);
else
loadFileFar = [speakerType, '_s_',scenario,'_far_b.wav'];
[xFar,fs,nbits] = wavread(loadFileFar);
xFar = xFar*2^(nbits-1);
loadFileNear = [speakerType, '_s_',scenario,'_near_b.wav'];
[yNear,fs,nbits] = wavread(loadFileNear);
yNear = yNear*2^(nbits-1);
loadFileNearSpeech = [speakerType, '_s_',scenario,'_nearSpeech_b.wav'];
[yNearSpeech,fs,nbits] = wavread(loadFileNearSpeech);
yNearSpeech = yNearSpeech*2^(nbits-1);
frameSize = 256;
end
dtRegions = [];
% General settings for the AECM
setupStruct = struct(...
'stepSize_flag', 1,... % This flag turns on the step size calculation. If turned off, mu = 0.25.
'supGain_flag', 0,... % This flag turns on the suppression gain calculation. If turned off, gam = 1.
'channelUpdate_flag', 0,... % This flag turns on the channel update. If turned off, H is updated for convLength and then kept constant.
'nlp_flag', 0,... % Turn on/off NLP
'withVAD_flag', 0,... % Turn on/off NLP
'useSubBand', 0,... % Set to 1 if to use subBands
'useDelayEstimation', 1,... % Set to 1 if to use delay estimation
'support', frameSize,... % # of samples per frame
'samplingfreq',fs,... % Sampling frequency
'oversampling', 2,... % Overlap between blocks/frames
'updatel', 0,... % # of samples between blocks
'hsupport1', 0,... % # of bins in frequency domain
'factor', 0,... % synthesis window amplification
'tlength', 0,... % # of samples of entire file
'updateno', 0,... % # of updates
'nb', 1,... % # of blocks
'currentBlock', 0,... %
'win', zeros(frameSize,1),...% Window to apply for fft and synthesis
'avtime', 1,... % Time (in sec.) to perform averaging
'estLen', 0,... % Averaging in # of blocks
'A_GAIN', 10.0,... %
'suppress_overdrive', 1.0,... % overdrive factor for suppression 1.4 is good
'gamma_echo', 1.0,... % same as suppress_overdrive but at different place
'de_echo_bound', 0.0,... %
'nl_alpha', 0.4,... % memory; seems not very critical
'nlSeverity', 0.2,... % nonlinearity severity: 0 does nothing; 1 suppresses all
'numInBand', [],... % # of frequency bins in resp. subBand
'centerFreq', [],... % Center frequency of resp. subBand
'dtRegions', dtRegions,... % Regions where we have DT
'subBandLength', frameSize/2);%All bins
%'subBandLength', 11); %Something's wrong when subBandLength even
%'nl_alpha', 0.8,... % memory; seems not very critical
delayStruct = struct(...
'bandfirst', 8,...
'bandlast', 25,...
'smlength', 600,...
'maxDelay', 0.4,...
'oneGoodEstimate', 0,...
'delayAdjust', 0,...
'maxDelayb', 0);
% More parameters in delayStruct are constructed in "updateSettings" below
% Make struct settings
[setupStruct, delayStruct] = updateSettings(yNear, xFar, setupStruct, delayStruct);
setupStruct.numInBand = ones(setupStruct.hsupport1,1);
Q = 1; % Time diversity in channel
% General settings for the step size calculation
muStruct = struct(...
'countInInterval', 0,...
'countOutHighInterval', 0,...
'countOutLowInterval', 0,...
'minInInterval', 50,...
'minOutHighInterval', 10,...
'minOutLowInterval', 10,...
'maxOutLowInterval', 50);
% General settings for the AECM
aecmStruct = struct(...
'plotIt', 0,... % Set to 0 to turn off plotting
'useSubBand', 0,...
'bandFactor', 1,...
'H', zeros(setupStruct.subBandLength+1,Q),...
'HStored', zeros(setupStruct.subBandLength+1,Q),...
'X', zeros(setupStruct.subBandLength+1,Q),...
'energyThres', 0.28,...
'energyThresMSE', 0.4,...
'energyMin', inf,...
'energyMax', -inf,...
'energyLevel', 0,...
'energyLevelMSE', 0,...
'convLength', 100,...
'gammaLog', ones(setupStruct.updateno,1),...
'muLog', ones(setupStruct.updateno,1),...
'enerFar', zeros(setupStruct.updateno,1),...
'enerNear', zeros(setupStruct.updateno,1),...
'enerEcho', zeros(setupStruct.updateno,1),...
'enerEchoStored', zeros(setupStruct.updateno,1),...
'enerOut', zeros(setupStruct.updateno,1),...
'runningfmean', 0,...
'muStruct', muStruct,...
'varMean', 0,...
'countMseH', 0,...
'mseHThreshold', 1.1,...
'mseHStoredOld', inf,...
'mseHLatestOld', inf,...
'delayLatestS', zeros(1,51),...
'feedbackDelay', 0,...
'feedbackDelayUpdate', 0,...
'cntIn', 0,...
'cntOut', 0,...
'FAR_ENERGY_MIN', 1,...
'ENERGY_DEV_OFFSET', 0.5,...
'ENERGY_DEV_TOL', 1.5,...
'MU_MIN', -16,...
'MU_MAX', -2,...
'newDelayCurve', 0);
% Adjust speech signals
xFar = [zeros(setupStruct.hsupport1-1,1);xFar(1:setupStruct.tlength)];
yNear = [zeros(setupStruct.hsupport1-1,1);yNear(1:setupStruct.tlength)];
yNearSpeech = [zeros(setupStruct.hsupport1-1,1);yNearSpeech(1:setupStruct.tlength)];
xFar = xFar(1:setupStruct.tlength);
yNear = yNear(1:setupStruct.tlength);
% Set figure settings
if aecmStruct.plotIt
figure(13)
set(gcf,'doublebuffer','on')
end
%%%%%%%%%%
% Here starts the algorithm
% Dividing into frames and then estimating the near end speech
%%%%%%%%%%
fTheFarEnd = complex(zeros(setupStruct.hsupport1,1));
afTheFarEnd = zeros(setupStruct.hsupport1,setupStruct.updateno+1);
fFar = zeros(setupStruct.hsupport1,setupStruct.updateno+1);
fmicrophone = complex(zeros(setupStruct.hsupport1,1));
afmicrophone = zeros(setupStruct.hsupport1,setupStruct.updateno+1);
fNear = zeros(setupStruct.hsupport1,setupStruct.updateno+1);
femicrophone = complex(zeros(setupStruct.hsupport1,1));
emicrophone = zeros(setupStruct.tlength,1);
if (setupStruct.useDelayEstimation == 2)
delSamples = [1641 1895 2032 1895 2311 2000 2350 2222 NaN 2332 2330 2290 2401 2415 NaN 2393 2305 2381 2398];
delBlocks = round(delSamples/setupStruct.updatel);
delStarts = floor([25138 46844 105991 169901 195739 218536 241803 333905 347703 362660 373753 745135 765887 788078 806257 823835 842443 860139 881869]/setupStruct.updatel);
else
delStarts = [];
end
for i=1:setupStruct.updateno
setupStruct.currentBlock = i;
sb = (i-1)*setupStruct.updatel + 1;
se = sb + setupStruct.support - 1;
%%%%%%%
% Analysis FFTs
%%%%%%%
% Far end signal
temp = fft(setupStruct.win .* xFar(sb:se))/frameSize;
fTheFarEnd = temp(1:setupStruct.hsupport1);
afTheFarEnd(:,i) = abs(fTheFarEnd);
fFar(:,i) = fTheFarEnd;
% Near end signal
temp = fft(setupStruct.win .* yNear(sb:se))/frameSize;%,pause
fmicrophone = temp(1:setupStruct.hsupport1);
afmicrophone(:,i) = abs(fmicrophone);
fNear(:,i) = fmicrophone;
%abs(fmicrophone),pause
% The true near end speaker (if we have such info)
temp = fft(setupStruct.win .* yNearSpeech(sb:se));
aftrueSpeech = abs(temp(1:setupStruct.hsupport1));
if(i == 1000)
%break;
end
% Perform delay estimation
if (setupStruct.useDelayEstimation == 1)
% Delay Estimation
delayStruct = align(fTheFarEnd, fmicrophone, delayStruct, i);
%delayStruct.delay(i) = 39;%19;
idel = max(i - delayStruct.delay(i) + 1,1);
if delayCompensation_flag
% If we have a new delay estimate from Bastiaan's alg. update the offset
if (delayStruct.delay(i) ~= delayStruct.delay(max(1,i-1)))
delayStruct.delayAdjust = delayStruct.delayAdjust + delayStruct.delay(i) - delayStruct.delay(i-1);
end
% Store the compensated delay
delayStruct.delayNew(i) = delayStruct.delay(i) - delayStruct.delayAdjust;
if (delayStruct.delayNew(i) < 1)
% Something's wrong
pause,break
end
% Compensate with the offset estimate
idel = idel + delayStruct.delayAdjust;
end
if 0%aecmStruct.plotIt
figure(1)
plot(1:i,delayStruct.delay(1:i),'k:',1:i,delayStruct.delayNew(1:i),'k--','LineWidth',2),drawnow
end
elseif (setupStruct.useDelayEstimation == 2)
% Use "manual delay"
delIndex = find(delStarts<i);
if isempty(delIndex)
idel = i;
else
idel = i - delBlocks(max(delIndex));
if isnan(idel)
idel = i - delBlocks(max(delIndex)-1);
end
end
else
% No delay estimation
%idel = max(i - 18, 1);
idel = max(i - 50, 1);
end
%%%%%%%%
% This is the AECM algorithm
%
% Output is the new frequency domain signal (hopefully) echo compensated
%%%%%%%%
[femicrophone, aecmStruct] = AECMobile(fmicrophone, afTheFarEnd(:,idel), setupStruct, aecmStruct);
%[femicrophone, aecmStruct] = AECMobile(fmicrophone, FARENDFFT(idel,:)'/2^F(idel,end-1), setupStruct, aecmStruct);
if aecmStruct.feedbackDelayUpdate
% If the feedback tells us there is a new offset out there update the enhancement
delayStruct.delayAdjust = delayStruct.delayAdjust + aecmStruct.feedbackDelay;
aecmStruct.feedbackDelayUpdate = 0;
end
% reconstruction; first make spectrum odd
temp = [femicrophone; flipud(conj(femicrophone(2:(setupStruct.hsupport1-1))))];
emicrophone(sb:se) = emicrophone(sb:se) + setupStruct.factor * setupStruct.win .* real(ifft(temp))*frameSize;
if max(isnan(emicrophone(sb:se)))
% Something's wrong with the output at block i
i
break
end
end
if useHTC
fid=fopen('aecOutMatlabC.pcm','w');fwrite(fid,int16(emicrophone),'short');fclose(fid);
%fid=fopen('farendFFT.txt','w');fwrite(fid,int16(afTheFarEnd(:)),'short');fclose(fid);
%fid=fopen('farendFFTreal.txt','w');fwrite(fid,int16(imag(fFar(:))),'short');fclose(fid);
%fid=fopen('farendFFTimag.txt','w');fwrite(fid,int16(real(fFar(:))),'short');fclose(fid);
%fid=fopen('nearendFFT.txt','w');fwrite(fid,int16(afmicrophone(:)),'short');fclose(fid);
%fid=fopen('nearendFFTreal.txt','w');fwrite(fid,int16(real(fNear(:))),'short');fclose(fid);
%fid=fopen('nearendFFTimag.txt','w');fwrite(fid,int16(imag(fNear(:))),'short');fclose(fid);
end
if useHTC
%spclab(setupStruct.samplingfreq,xFar,yNear,emicrophone)
else
spclab(setupStruct.samplingfreq,xFar,yNear,emicrophone,yNearSpeech)
end

View File

@ -1,15 +0,0 @@
speakerType = 'fm';
%for k=2:5
%for k=[2 4 5]
for k=3
scenario = int2str(k);
fprintf('Current scenario: %d\n',k)
mainProgram
%saveFile = [speakerType, '_s_',scenario,'_delayEst_v2_vad_man.wav'];
%wavwrite(emic,fs,nbits,saveFile);
%saveFile = ['P:\Engineering_share\BjornV\AECM\',speakerType, '_s_',scenario,'_delayEst_v2_vad_man.pcm'];
%saveFile = [speakerType, '_s_',scenario,'_adaptMu_adaptGamma_withVar_gammFilt_HSt.pcm'];
saveFile = ['scenario_',scenario,'_090417_backupH_nlp.pcm'];
fid=fopen(saveFile,'w');fwrite(fid,int16(emicrophone),'short');fclose(fid);
%pause
end

View File

@ -1,94 +0,0 @@
function [setupStructNew, delayStructNew] = updateSettings(microphone, TheFarEnd, setupStruct, delayStruct);
% other, constants
setupStruct.hsupport1 = setupStruct.support/2 + 1;
setupStruct.factor = 2 / setupStruct.oversampling;
setupStruct.updatel = setupStruct.support/setupStruct.oversampling;
setupStruct.estLen = round(setupStruct.avtime * setupStruct.samplingfreq/setupStruct.updatel);
% compute some constants
blockLen = setupStruct.support/setupStruct.oversampling;
delayStruct.maxDelayb = floor(setupStruct.samplingfreq*delayStruct.maxDelay/setupStruct.updatel); % in blocks
%input
tlength = min([length(microphone),length(TheFarEnd)]);
updateno = floor(tlength/setupStruct.updatel);
setupStruct.tlength = setupStruct.updatel*updateno;
setupStruct.updateno = updateno - setupStruct.oversampling + 1;
% signal length
n = floor(min([length(TheFarEnd), length(microphone)])/setupStruct.support)*setupStruct.support;
setupStruct.nb = n/blockLen - setupStruct.oversampling + 1; % in blocks
setupStruct.win = sqrt([0 ; hanning(setupStruct.support-1)]);
% Construct filterbank in Bark-scale
K = setupStruct.subBandLength; %Something's wrong when K even
erbs = 21.4*log10(0.00437*setupStruct.samplingfreq/2+1);
fe = (10.^((0:K)'*erbs/K/21.4)-1)/0.00437;
setupStruct.centerFreq = fe;
H = diag(ones(1,K-1))+diag(ones(1,K-2),-1);
Hinv = inv(H);
aty = 2*Hinv(end,:)*fe(2:end-1);
boundary = aty - (setupStruct.samplingfreq/2 + fe(end-1))/2;
if rem(K,2)
x1 = min([fe(2)/2, -boundary]);
else
x1 = max([0, boundary]);
end
%x1
g = fe(2:end-1);
g(1) = g(1) - x1/2;
x = 2*Hinv*g;
x = [x1;x];
%figure(42), clf
xy = zeros((K+1)*4,1);
yy = zeros((K+1)*4,1);
xy(1:4) = [fe(1) fe(1) x(1) x(1)]';
yy(1:4) = [0 1 1 0]'/x(1);
for kk=2:K
xy((kk-1)*4+(1:4)) = [x(kk-1) x(kk-1) x(kk) x(kk)]';
yy((kk-1)*4+(1:4)) = [0 1 1 0]'/(x(kk)-x(kk-1));
end
xy(end-3:end) = [x(K) x(K) fe(end) fe(end)]';
yy(end-3:end) = [0 1 1 0]'/(fe(end)*2-2*x(K));
%plot(xy,yy,'LineWidth',2)
%fill(xy,yy,'y')
x = [0;x];
xk = x*setupStruct.hsupport1/setupStruct.samplingfreq*2;
%setupStruct.erbBoundaries = xk;
numInBand = zeros(length(xk),1);
xh = (0:setupStruct.hsupport1-1);
for kk=1:length(xk)
if (kk==length(xk))
numInBand(kk) = length(find(xh>=xk(kk)));
else
numInBand(kk) = length(intersect(find(xh>=xk(kk)),find(xh<xk(kk+1))));
end
end
setupStruct.numInBand = numInBand;
setupStructNew = setupStruct;
delayStructNew = struct(...
'sxAll2',zeros(setupStructNew.hsupport1,setupStructNew.nb),...
'syAll2',zeros(setupStructNew.hsupport1,setupStructNew.nb),...
'z200',zeros(5,setupStructNew.hsupport1),...
'z500',zeros(5,delayStruct.maxDelayb+1),...
'bxspectrum',uint32(zeros(setupStructNew.nb,1)),...
'byspectrum',uint32(zeros(setupStructNew.nb,1)),...
'bandfirst',delayStruct.bandfirst,'bandlast',delayStruct.bandlast,...
'bxhist',uint32(zeros(delayStruct.maxDelayb+1,1)),...
'bcount',zeros(1+delayStruct.maxDelayb,setupStructNew.nb),...
'fout',zeros(1+delayStruct.maxDelayb,setupStructNew.nb),...
'new',zeros(1+delayStruct.maxDelayb,setupStructNew.nb),...
'smlength',delayStruct.smlength,...
'maxDelay', delayStruct.maxDelay,...
'maxDelayb', delayStruct.maxDelayb,...
'oneGoodEstimate', 0,...
'delayAdjust', 0,...
'delayNew',zeros(setupStructNew.nb,1),...
'delay',zeros(setupStructNew.nb,1));

View File

@ -1,234 +0,0 @@
function fout = waitbar_j(x,whichbar, varargin)
%WAITBAR Display wait bar.
% H = WAITBAR(X,'title', property, value, property, value, ...)
% creates and displays a waitbar of fractional length X. The
% handle to the waitbar figure is returned in H.
% X should be between 0 and 1. Optional arguments property and
% value allow to set corresponding waitbar figure properties.
% Property can also be an action keyword 'CreateCancelBtn', in
% which case a cancel button will be added to the figure, and
% the passed value string will be executed upon clicking on the
% cancel button or the close figure button.
%
% WAITBAR(X) will set the length of the bar in the most recently
% created waitbar window to the fractional length X.
%
% WAITBAR(X,H) will set the length of the bar in waitbar H
% to the fractional length X.
%
% WAITBAR(X,H,'updated title') will update the title text in
% the waitbar figure, in addition to setting the fractional
% length to X.
%
% WAITBAR is typically used inside a FOR loop that performs a
% lengthy computation. A sample usage is shown below:
%
% h = waitbar(0,'Please wait...');
% for i=1:100,
% % computation here %
% waitbar(i/100,h)
% end
% close(h)
% Clay M. Thompson 11-9-92
% Vlad Kolesnikov 06-7-99
% Copyright 1984-2001 The MathWorks, Inc.
% $Revision: 1.22 $ $Date: 2001/04/15 12:03:29 $
if nargin>=2
if ischar(whichbar)
type=2; %we are initializing
name=whichbar;
elseif isnumeric(whichbar)
type=1; %we are updating, given a handle
f=whichbar;
else
error(['Input arguments of type ' class(whichbar) ' not valid.'])
end
elseif nargin==1
f = findobj(allchild(0),'flat','Tag','TMWWaitbar');
if isempty(f)
type=2;
name='Waitbar';
else
type=1;
f=f(1);
end
else
error('Input arguments not valid.');
end
x = max(0,min(100*x,100));
switch type
case 1, % waitbar(x) update
p = findobj(f,'Type','patch');
l = findobj(f,'Type','line');
if isempty(f) | isempty(p) | isempty(l),
error('Couldn''t find waitbar handles.');
end
xpatch = get(p,'XData');
xpatch = [0 x x 0];
set(p,'XData',xpatch)
xline = get(l,'XData');
set(l,'XData',xline);
if nargin>2,
% Update waitbar title:
hAxes = findobj(f,'type','axes');
hTitle = get(hAxes,'title');
set(hTitle,'string',varargin{1});
end
case 2, % waitbar(x,name) initialize
vertMargin = 0;
if nargin > 2,
% we have optional arguments: property-value pairs
if rem (nargin, 2 ) ~= 0
error( 'Optional initialization arguments must be passed in pairs' );
end
end
oldRootUnits = get(0,'Units');
set(0, 'Units', 'points');
screenSize = get(0,'ScreenSize');
axFontSize=get(0,'FactoryAxesFontSize');
pointsPerPixel = 72/get(0,'ScreenPixelsPerInch');
width = 360 * pointsPerPixel;
height = 75 * pointsPerPixel;
pos = [screenSize(3)/2-width/2 screenSize(4)/2-height/2 width height];
%pos= [501.75 589.5 393.75 52.5];
f = figure(...
'Units', 'points', ...
'BusyAction', 'queue', ...
'Position', pos, ...
'Resize','on', ...
'CreateFcn','', ...
'NumberTitle','off', ...
'IntegerHandle','off', ...
'MenuBar', 'none', ...
'Tag','TMWWaitbar',...
'Interruptible', 'off', ...
'Visible','on');
%%%%%%%%%%%%%%%%%%%%%
% set figure properties as passed to the fcn
% pay special attention to the 'cancel' request
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
if nargin > 2,
propList = varargin(1:2:end);
valueList = varargin(2:2:end);
cancelBtnCreated = 0;
for ii = 1:length( propList )
try
if strcmp(lower(propList{ii}), 'createcancelbtn' ) & ~cancelBtnCreated
cancelBtnHeight = 23 * pointsPerPixel;
cancelBtnWidth = 60 * pointsPerPixel;
newPos = pos;
vertMargin = vertMargin + cancelBtnHeight;
newPos(4) = newPos(4)+vertMargin;
callbackFcn = [valueList{ii}];
set( f, 'Position', newPos, 'CloseRequestFcn', callbackFcn );
cancelButt = uicontrol('Parent',f, ...
'Units','points', ...
'Callback',callbackFcn, ...
'ButtonDownFcn', callbackFcn, ...
'Enable','on', ...
'Interruptible','off', ...
'Position', [pos(3)-cancelBtnWidth*1.4, 7, ...
cancelBtnWidth, cancelBtnHeight], ...
'String','Cancel', ...
'Tag','TMWWaitbarCancelButton');
cancelBtnCreated = 1;
else
% simply set the prop/value pair of the figure
set( f, propList{ii}, valueList{ii});
end
catch
disp ( ['Warning: could not set property ''' propList{ii} ''' with value ''' num2str(valueList{ii}) '''' ] );
end
end
end
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
colormap([]);
axNorm=[.05 .3 .9 .2];
% axNorm=[1 1 1 1];
axPos=axNorm.*[pos(3:4),pos(3:4)] + [0 vertMargin 0 0];
h = axes('XLim',[0 100],...
'YLim',[0 1],...
'Box','on', ...
'Units','Points',...
'FontSize', axFontSize,...
'Position',axPos,...
'XTickMode','manual',...
'YTickMode','manual',...
'XTick',[],...
'YTick',[],...
'XTickLabelMode','manual',...
'XTickLabel',[],...
'YTickLabelMode','manual',...
'YTickLabel',[]);
tHandle=title(name);
tHandle=get(h,'title');
oldTitleUnits=get(tHandle,'Units');
set(tHandle,...
'Units', 'points',...
'String', name);
tExtent=get(tHandle,'Extent');
set(tHandle,'Units',oldTitleUnits);
titleHeight=tExtent(4)+axPos(2)+axPos(4)+5;
if titleHeight>pos(4)
pos(4)=titleHeight;
pos(2)=screenSize(4)/2-pos(4)/2;
figPosDirty=logical(1);
else
figPosDirty=logical(0);
end
if tExtent(3)>pos(3)*1.10;
pos(3)=min(tExtent(3)*1.10,screenSize(3));
pos(1)=screenSize(3)/2-pos(3)/2;
axPos([1,3])=axNorm([1,3])*pos(3);
set(h,'Position',axPos);
figPosDirty=logical(1);
end
if figPosDirty
set(f,'Position',pos);
end
xpatch = [0 x x 0];
ypatch = [0 0 1 1];
xline = [100 0 0 100 100];
yline = [0 0 1 1 0];
p = patch(xpatch,ypatch,'r','EdgeColor','r','EraseMode','none');
l = line(xline,yline,'EraseMode','none');
set(l,'Color',get(gca,'XColor'));
set(f,'HandleVisibility','callback','visible','on', 'resize','off');
set(0, 'Units', oldRootUnits);
end % case
drawnow;
if nargout==1,
fout = f;
end

View File

@ -28,8 +28,6 @@
'echo_control_mobile.c',
'aecm_core.c',
'aecm_core.h',
'aecm_delay_estimator.c',
'aecm_delay_estimator.h',
],
},
],

View File

@ -13,8 +13,8 @@
#include <assert.h>
#include <stdlib.h>
#include "aecm_delay_estimator.h"
#include "echo_control_mobile.h"
#include "delay_estimator.h"
#include "ring_buffer.h"
#include "typedefs.h"
@ -153,11 +153,13 @@ int WebRtcAecm_CreateCore(AecmCore_t **aecmInst)
return -1;
}
if (WebRtcAecm_CreateDelayEstimator(&aecm->delay_estimator, PART_LEN1, MAX_DELAY) == -1)
{
WebRtcAecm_FreeCore(aecm);
aecm = NULL;
return -1;
if (WebRtc_CreateDelayEstimator(&aecm->delay_estimator,
PART_LEN1,
MAX_DELAY,
1) == -1) {
WebRtcAecm_FreeCore(aecm);
aecm = NULL;
return -1;
}
// Init some aecm pointers. 16 and 32 byte alignment is only necessary
@ -242,9 +244,8 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq)
aecm->seed = 666;
aecm->totCount = 0;
if (WebRtcAecm_InitDelayEstimator(aecm->delay_estimator) != 0)
{
return -1;
if (WebRtc_InitDelayEstimator(aecm->delay_estimator) != 0) {
return -1;
}
// Initialize to reasonable values
@ -339,7 +340,7 @@ int WebRtcAecm_FreeCore(AecmCore_t *aecm)
WebRtcApm_FreeBuffer(aecm->nearCleanFrameBuf);
WebRtcApm_FreeBuffer(aecm->outFrameBuf);
WebRtcAecm_FreeDelayEstimator(aecm->delay_estimator);
WebRtc_FreeDelayEstimator(aecm->delay_estimator);
free(aecm);
return 0;
@ -1161,6 +1162,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
WebRtc_Word16 supGain;
WebRtc_Word16 zeros32, zeros16;
WebRtc_Word16 zerosDBufNoisy, zerosDBufClean, zerosXBuf;
int far_q;
WebRtc_Word16 resolutionDiff, qDomainDiff;
const int kMinPrefBand = 4;
@ -1200,10 +1202,10 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
#endif
// Transform far end signal from time domain to frequency domain.
zerosXBuf = TimeToFrequencyDomain(aecm->xBuf,
dfw,
xfa,
&xfaSum);
far_q = TimeToFrequencyDomain(aecm->xBuf,
dfw,
xfa,
&xfaSum);
// Transform noisy near end signal from time domain to frequency domain.
zerosDBufNoisy = TimeToFrequencyDomain(aecm->dBufNoisy,
@ -1211,7 +1213,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
dfaNoisy,
&dfaNoisySum);
aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
aecm->dfaNoisyQDomain = zerosDBufNoisy;
aecm->dfaNoisyQDomain = (WebRtc_Word16)zerosDBufNoisy;
if (nearendClean == NULL)
@ -1228,7 +1230,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
dfaClean,
&dfaCleanSum);
aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
aecm->dfaCleanQDomain = zerosDBufClean;
aecm->dfaCleanQDomain = (WebRtc_Word16)zerosDBufClean;
}
#ifdef ARM_WINM_LOG_
@ -1243,12 +1245,12 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
// Get the delay
// Save far-end history and estimate delay
delay = WebRtcAecm_DelayEstimatorProcess(aecm->delay_estimator,
xfa,
dfaNoisy,
PART_LEN1,
zerosXBuf,
aecm->currentVADValue);
delay = WebRtc_DelayEstimatorProcess(aecm->delay_estimator,
xfa,
dfaNoisy,
PART_LEN1,
far_q,
aecm->currentVADValue);
if (delay < 0)
{
return -1;
@ -1272,16 +1274,21 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
QueryPerformanceCounter((LARGE_INTEGER*)&start);
#endif
// Get aligned far end spectrum
far_spectrum_ptr = WebRtcAecm_GetAlignedFarend(aecm->delay_estimator,
PART_LEN1,
&zerosXBuf);
far_spectrum_ptr = WebRtc_AlignedFarend(aecm->delay_estimator,
PART_LEN1,
&far_q);
zerosXBuf = (WebRtc_Word16) far_q;
if (far_spectrum_ptr == NULL)
{
return -1;
}
// Calculate log(energy) and update energy threshold levels
WebRtcAecm_CalcEnergies(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisySum, echoEst32);
WebRtcAecm_CalcEnergies(aecm,
far_spectrum_ptr,
zerosXBuf,
dfaNoisySum,
echoEst32);
// Calculate stepsize
mu = WebRtcAecm_CalcStepSize(aecm);
@ -1923,4 +1930,3 @@ void WebRtcAecm_ResetAdaptiveChannel(AecmCore_t* aecm)
}
#endif // !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))

View File

@ -178,7 +178,7 @@ typedef struct
WebRtc_Word16 farEnergyMaxMin;
WebRtc_Word16 farEnergyVAD;
WebRtc_Word16 farEnergyMSE;
WebRtc_Word16 currentVADValue;
int currentVADValue;
WebRtc_Word16 vadUpdateCount;
WebRtc_Word16 startupState;

View File

@ -1,604 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "aecm_delay_estimator.h"
#include <assert.h>
#include <stdlib.h>
#include "signal_processing_library.h"
#include "typedefs.h"
typedef struct
{
// Pointers to mean values of spectrum and bit counts
WebRtc_Word32* mean_far_spectrum;
WebRtc_Word32* mean_near_spectrum;
WebRtc_Word32* mean_bit_counts;
// Arrays only used locally in DelayEstimatorProcess() but whose size
// is determined at run-time.
WebRtc_Word32* bit_counts;
WebRtc_Word32* far_spectrum_32;
WebRtc_Word32* near_spectrum_32;
// Binary history variables
WebRtc_UWord32* binary_far_history;
// Far end history variables
WebRtc_UWord16* far_history;
int far_history_position;
WebRtc_Word16* far_q_domains;
// Delay histogram variables
WebRtc_Word16* delay_histogram;
WebRtc_Word16 vad_counter;
// Delay memory
int last_delay;
// Buffer size parameters
int history_size;
int spectrum_size;
} DelayEstimator_t;
// Only bit |kBandFirst| through bit |kBandLast| are processed
// |kBandFirst| - |kBandLast| must be < 32
static const int kBandFirst = 12;
static const int kBandLast = 43;
static __inline WebRtc_UWord32 SetBit(WebRtc_UWord32 in,
WebRtc_Word32 pos)
{
WebRtc_UWord32 mask = WEBRTC_SPL_LSHIFT_W32(1, pos);
WebRtc_UWord32 out = (in | mask);
return out;
}
// Compares the binary vector |binary_vector| with all rows of the binary
// matrix |binary_matrix| and counts per row the number of times they have the
// same value.
// Input:
// - binary_vector : binary "vector" stored in a long
// - binary_matrix : binary "matrix" stored as a vector of long
// - matrix_size : size of binary "matrix"
// Output:
// - bit_counts : "Vector" stored as a long, containing for each
// row the number of times the matrix row and the
// input vector have the same value
//
static void BitCountComparison(const WebRtc_UWord32 binary_vector,
const WebRtc_UWord32* binary_matrix,
int matrix_size,
WebRtc_Word32* bit_counts)
{
int n = 0;
WebRtc_UWord32 a = binary_vector;
register WebRtc_UWord32 tmp;
// compare binary vector |binary_vector| with all rows of the binary matrix
// |binary_matrix|
for (; n < matrix_size; n++)
{
a = (binary_vector ^ binary_matrix[n]);
// Returns bit counts in tmp
tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111);
tmp = ((tmp + (tmp >> 3)) & 030707070707);
tmp = (tmp + (tmp >> 6));
tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077;
bit_counts[n] = (WebRtc_Word32)tmp;
}
}
// Computes the binary spectrum by comparing the input |spectrum| with a
// |threshold_spectrum|.
//
// Input:
// - spectrum : Spectrum of which the binary spectrum should
// be calculated.
// - threshold_spectrum : Threshold spectrum with which the input
// spectrum is compared.
// Return:
// - out : Binary spectrum
//
static WebRtc_UWord32 GetBinarySpectrum(WebRtc_Word32* spectrum,
WebRtc_Word32* threshold_spectrum)
{
int k = kBandFirst;
WebRtc_UWord32 out = 0;
for (; k <= kBandLast; k++)
{
if (spectrum[k] > threshold_spectrum[k])
{
out = SetBit(out, k - kBandFirst);
}
}
return out;
}
// Calculates the mean recursively.
//
// Input:
// - new_value : new additional value
// - factor : factor for smoothing
//
// Input/Output:
// - mean_value : pointer to the mean value that should be updated
//
static void MeanEstimator(const WebRtc_Word32 new_value,
int factor,
WebRtc_Word32* mean_value)
{
WebRtc_Word32 mean_new = *mean_value;
WebRtc_Word32 diff = new_value - mean_new;
// mean_new = mean_value + ((new_value - mean_value) >> factor);
if (diff < 0)
{
diff = -WEBRTC_SPL_RSHIFT_W32(-diff, factor);
}
else
{
diff = WEBRTC_SPL_RSHIFT_W32(diff, factor);
}
mean_new += diff;
*mean_value = mean_new;
}
// Moves the pointer to the next entry and inserts new far end spectrum and
// corresponding Q-domain in its buffer.
//
// Input:
// - handle : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum
// - far_q : Q-domain of far end spectrum
//
static void UpdateFarHistory(DelayEstimator_t* self,
WebRtc_UWord16* far_spectrum,
WebRtc_Word16 far_q)
{
// Get new buffer position
self->far_history_position++;
if (self->far_history_position >= self->history_size)
{
self->far_history_position = 0;
}
// Update Q-domain buffer
self->far_q_domains[self->far_history_position] = far_q;
// Update far end spectrum buffer
memcpy(&(self->far_history[self->far_history_position * self->spectrum_size]),
far_spectrum,
sizeof(WebRtc_UWord16) * self->spectrum_size);
}
int WebRtcAecm_FreeDelayEstimator(void* handle)
{
DelayEstimator_t* self = (DelayEstimator_t*)handle;
if (self == NULL)
{
return -1;
}
if (self->mean_far_spectrum != NULL)
{
free(self->mean_far_spectrum);
self->mean_far_spectrum = NULL;
}
if (self->mean_near_spectrum != NULL)
{
free(self->mean_near_spectrum);
self->mean_near_spectrum = NULL;
}
if (self->mean_bit_counts != NULL)
{
free(self->mean_bit_counts);
self->mean_bit_counts = NULL;
}
if (self->bit_counts != NULL)
{
free(self->bit_counts);
self->bit_counts = NULL;
}
if (self->far_spectrum_32 != NULL)
{
free(self->far_spectrum_32);
self->far_spectrum_32 = NULL;
}
if (self->near_spectrum_32 != NULL)
{
free(self->near_spectrum_32);
self->near_spectrum_32 = NULL;
}
if (self->far_history != NULL)
{
free(self->far_history);
self->far_history = NULL;
}
if (self->binary_far_history != NULL)
{
free(self->binary_far_history);
self->binary_far_history = NULL;
}
if (self->far_q_domains != NULL)
{
free(self->far_q_domains);
self->far_q_domains = NULL;
}
if (self->delay_histogram != NULL)
{
free(self->delay_histogram);
self->delay_histogram = NULL;
}
free(self);
return 0;
}
int WebRtcAecm_CreateDelayEstimator(void** handle,
int spectrum_size,
int history_size)
{
DelayEstimator_t *self = NULL;
// Check if the sub band used in the delay estimation is small enough to
// fit in a Word32.
assert(kBandLast - kBandFirst < 32);
if (spectrum_size < kBandLast)
{
return -1;
}
if (history_size < 0)
{
return -1;
}
self = malloc(sizeof(DelayEstimator_t));
*handle = self;
if (self == NULL)
{
return -1;
}
self->mean_far_spectrum = NULL;
self->mean_near_spectrum = NULL;
self->bit_counts = NULL;
self->far_spectrum_32 = NULL;
self->near_spectrum_32 = NULL;
self->far_history = NULL;
self->mean_bit_counts = NULL;
self->binary_far_history = NULL;
self->far_q_domains = NULL;
self->delay_histogram = NULL;
// Allocate memory for spectrum buffers
self->mean_far_spectrum = malloc(spectrum_size * sizeof(WebRtc_Word32));
if (self->mean_far_spectrum == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->mean_near_spectrum = malloc(spectrum_size * sizeof(WebRtc_Word32));
if (self->mean_near_spectrum == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->mean_bit_counts = malloc(history_size * sizeof(WebRtc_Word32));
if (self->mean_bit_counts == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->bit_counts = malloc(history_size * sizeof(WebRtc_Word32));
if (self->bit_counts == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->far_spectrum_32 = malloc(spectrum_size * sizeof(WebRtc_Word32));
if (self->far_spectrum_32 == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->near_spectrum_32 = malloc(spectrum_size * sizeof(WebRtc_Word32));
if (self->near_spectrum_32 == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
// Allocate memory for history buffers
self->far_history = malloc(spectrum_size * history_size *
sizeof(WebRtc_UWord16));
if (self->far_history == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->binary_far_history = malloc(history_size * sizeof(WebRtc_UWord32));
if (self->binary_far_history == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->far_q_domains = malloc(history_size * sizeof(WebRtc_Word16));
if (self->far_q_domains == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->delay_histogram = malloc(history_size * sizeof(WebRtc_Word16));
if (self->delay_histogram == NULL)
{
WebRtcAecm_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->spectrum_size = spectrum_size;
self->history_size = history_size;
return 0;
}
int WebRtcAecm_InitDelayEstimator(void* handle)
{
DelayEstimator_t* self = (DelayEstimator_t*)handle;
if (self == NULL)
{
return -1;
}
// Set averaged far and near end spectra to zero
memset(self->mean_far_spectrum,
0,
sizeof(WebRtc_Word32) * self->spectrum_size);
memset(self->mean_near_spectrum,
0,
sizeof(WebRtc_Word32) * self->spectrum_size);
// Set averaged bit counts to zero
memset(self->mean_bit_counts,
0,
sizeof(WebRtc_Word32) * self->history_size);
memset(self->bit_counts,
0,
sizeof(WebRtc_Word32) * self->history_size);
memset(self->far_spectrum_32,
0,
sizeof(WebRtc_Word32) * self->spectrum_size);
memset(self->near_spectrum_32,
0,
sizeof(WebRtc_Word32) * self->spectrum_size);
// Set far end histories to zero
memset(self->binary_far_history,
0,
sizeof(WebRtc_UWord32) * self->history_size);
memset(self->far_history,
0,
sizeof(WebRtc_UWord16) * self->spectrum_size *
self->history_size);
memset(self->far_q_domains,
0,
sizeof(WebRtc_Word16) * self->history_size);
self->far_history_position = self->history_size;
// Set delay histogram to zero
memset(self->delay_histogram,
0,
sizeof(WebRtc_Word16) * self->history_size);
// Set VAD counter to zero
self->vad_counter = 0;
// Set delay memory to zero
self->last_delay = 0;
return 0;
}
int WebRtcAecm_DelayEstimatorProcess(void* handle,
WebRtc_UWord16* far_spectrum,
WebRtc_UWord16* near_spectrum,
int spectrum_size,
WebRtc_Word16 far_q,
WebRtc_Word16 vad_value)
{
DelayEstimator_t* self = (DelayEstimator_t*)handle;
WebRtc_UWord32 bxspectrum, byspectrum;
int i;
WebRtc_Word32 dtmp1;
WebRtc_Word16 maxHistLvl = 0;
WebRtc_Word16 minpos = -1;
const int kVadCountThreshold = 25;
const int kMaxHistogram = 600;
if (self == NULL)
{
return -1;
}
if (spectrum_size != self->spectrum_size)
{
// Data sizes don't match
return -1;
}
if (far_q > 15)
{
// If far_Q is larger than 15 we can not guarantee no wrap around
return -1;
}
// Update far end history
UpdateFarHistory(self, far_spectrum, far_q);
// Update the far and near end means
for (i = 0; i < self->spectrum_size; i++)
{
self->far_spectrum_32[i] = (WebRtc_Word32)far_spectrum[i];
MeanEstimator(self->far_spectrum_32[i], 6, &(self->mean_far_spectrum[i]));
self->near_spectrum_32[i] = (WebRtc_Word32)near_spectrum[i];
MeanEstimator(self->near_spectrum_32[i], 6, &(self->mean_near_spectrum[i]));
}
// Shift binary spectrum history
memmove(&(self->binary_far_history[1]),
&(self->binary_far_history[0]),
(self->history_size - 1) * sizeof(WebRtc_UWord32));
// Get binary spectra
bxspectrum = GetBinarySpectrum(self->far_spectrum_32, self->mean_far_spectrum);
byspectrum = GetBinarySpectrum(self->near_spectrum_32, self->mean_near_spectrum);
// Insert new binary spectrum
self->binary_far_history[0] = bxspectrum;
// Compare with delayed spectra
BitCountComparison(byspectrum,
self->binary_far_history,
self->history_size,
self->bit_counts);
// Smooth bit count curve
for (i = 0; i < self->history_size; i++)
{
// Update sum
// |bit_counts| is constrained to [0, 32], meaning we can smooth with a
// factor up to 2^26. We use Q9.
dtmp1 = WEBRTC_SPL_LSHIFT_W32(self->bit_counts[i], 9); // Q9
MeanEstimator(dtmp1, 9, &(self->mean_bit_counts[i]));
}
// Find minimum position of bit count curve
minpos = WebRtcSpl_MinIndexW32(self->mean_bit_counts, self->history_size);
// If the farend has been active sufficiently long, begin accumulating a
// histogram of the minimum positions. Search for the maximum bin to
// determine the delay.
if (vad_value == 1)
{
if (self->vad_counter >= kVadCountThreshold)
{
// Increment the histogram at the current minimum position.
if (self->delay_histogram[minpos] < kMaxHistogram)
{
self->delay_histogram[minpos] += 3;
}
#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
// Decrement the entire histogram.
// Select the histogram index corresponding to the maximum bin as
// the delay.
self->last_delay = 0;
for (i = 0; i < self->history_size; i++)
{
if (self->delay_histogram[i] > 0)
{
self->delay_histogram[i]--;
}
if (self->delay_histogram[i] > maxHistLvl)
{
maxHistLvl = self->delay_histogram[i];
self->last_delay = i;
}
}
#else
self->last_delay = 0;
for (i = 0; i < self->history_size; i++)
{
WebRtc_Word16 tempVar = self->delay_histogram[i];
// Decrement the entire histogram.
if (tempVar > 0)
{
tempVar--;
self->delay_histogram[i] = tempVar;
// Select the histogram index corresponding to the maximum
// bin as the delay.
if (tempVar > maxHistLvl)
{
maxHistLvl = tempVar;
self->last_delay = i;
}
}
}
#endif
} else
{
self->vad_counter++;
}
} else
{
self->vad_counter = 0;
}
return self->last_delay;
}
const WebRtc_UWord16* WebRtcAecm_GetAlignedFarend(void* handle,
int far_spectrum_size,
WebRtc_Word16* far_q)
{
DelayEstimator_t* self = (DelayEstimator_t*)handle;
int buffer_position = 0;
if (self == NULL)
{
return NULL;
}
if (far_spectrum_size != self->spectrum_size)
{
return NULL;
}
// Get buffer position
buffer_position = self->far_history_position - self->last_delay;
if (buffer_position < 0)
{
buffer_position += self->history_size;
}
// Get Q-domain
*far_q = self->far_q_domains[buffer_position];
// Return far end spectrum
return (self->far_history + (buffer_position * self->spectrum_size));
}
int WebRtcAecm_GetLastDelay(void* handle)
{
DelayEstimator_t* self = (DelayEstimator_t*)handle;
if (self == NULL)
{
return -1;
}
// Return last calculated delay
return self->last_delay;
}

View File

@ -1,112 +0,0 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Performs delay estimation on a block by block basis
// The return value is 0 - OK and -1 - Error, unless otherwise stated.
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_
#include "typedefs.h"
// Releases the memory allocated by WebRtcAecm_CreateDelayEstimator(...)
// Input:
// - handle : Pointer to the delay estimation instance
//
int WebRtcAecm_FreeDelayEstimator(void* handle);
// Allocates the memory needed by the delay estimation. The memory needs to be
// initialized separately using the WebRtcAecm_InitDelayEstimator(...) function.
//
// Input:
// - handle : Instance that should be created
// - spectrum_size : Size of the spectrum used both in far end and near
// end. Used to allocate memory for spectrum specific
// buffers.
// - history_size : Size of the far end history used to estimate the
// delay from. Used to allocate memory for history
// specific buffers.
//
// Output:
// - handle : Created instance
//
int WebRtcAecm_CreateDelayEstimator(void** handle,
int spectrum_size,
int history_size);
// Initializes the delay estimation instance created with
// WebRtcAecm_CreateDelayEstimator(...)
// Input:
// - handle : Pointer to the delay estimation instance
//
// Output:
// - handle : Initialized instance
//
int WebRtcAecm_InitDelayEstimator(void* handle);
// Estimates and returns the delay between the far end and near end blocks.
// Input:
// - handle : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum data
// - near_spectrum : Pointer to the near end spectrum data of the current
// block
// - spectrum_size : The size of the data arrays (same for both far and
// near end)
// - far_q : The Q-domain of the far end data
// - vad_value : The VAD decision of the current block
//
// Output:
// - handle : Updated instance
//
// Return value:
// - delay : >= 0 - Calculated delay value
// -1 - Error
//
int WebRtcAecm_DelayEstimatorProcess(void* handle,
WebRtc_UWord16* far_spectrum,
WebRtc_UWord16* near_spectrum,
int spectrum_size,
WebRtc_Word16 far_q,
WebRtc_Word16 vad_value);
// Returns a pointer to the far end spectrum aligned to current near end
// spectrum. The function WebRtcAecm_DelayEstimatorProcess(...) should
// have been called before WebRtcAecm_GetAlignedFarend(...). Otherwise, you get
// the pointer to the previous frame. The memory is only valid until the next
// call of WebRtcAecm_DelayEstimatorProcess(...).
//
// Inputs:
// - handle : Pointer to the delay estimation instance
//
// Output:
// - far_spectrum_size : Size of far_spectrum allocated by the caller
// - far_q : The Q-domain of the aligned far end spectrum
//
// Return value:
// - far_spectrum : Pointer to the aligned far end spectrum
// NULL - Error
//
const WebRtc_UWord16* WebRtcAecm_GetAlignedFarend(void* handle,
int far_spectrum_size,
WebRtc_Word16* far_q);
// Returns the last calculated delay updated by the function
// WebRtcAecm_DelayEstimatorProcess(...)
//
// Inputs:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - delay : >= 0 - Last calculated delay value
// -1 - Error
//
int WebRtcAecm_GetLastDelay(void* handle);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_

View File

@ -1,32 +0,0 @@
% Outputs a file for testing purposes.
%
% Adjust the following parameters to suit. Their purpose becomes more clear on
% viewing the gain plots.
% MaxGain: Max gain in dB
% MinGain: Min gain at overload (0 dBov) in dB
% CompRatio: Compression ratio, essentially determines the slope of the gain
% function between the max and min gains
% Knee: The smoothness of the transition to max gain (smaller is smoother)
MaxGain = 5; MinGain = 0; CompRatio = 3; Knee = 1;
% Compute gains
zeros = 0:31; lvl = 2.^(1-zeros);
A = -10*log10(lvl) * (CompRatio - 1) / CompRatio;
B = MaxGain - MinGain;
gains = round(2^16*10.^(0.05 * (MinGain + B * ( log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) / log(1/(1+exp(Knee*B))))));
fprintf(1, '\t%i, %i, %i, %i,\n', gains);
% Save gains to file
fid = fopen('gains', 'wb');
if fid == -1
error(sprintf('Unable to open file %s', filename));
return
end
fwrite(fid, gains, 'int32');
fclose(fid);
% Plotting
in = 10*log10(lvl); out = 20*log10(gains/65536);
subplot(121); plot(in, out); axis([-60, 0, -5, 30]); grid on; xlabel('Input (dB)'); ylabel('Gain (dB)');
subplot(122); plot(in, in+out); axis([-60, 0, -60, 10]); grid on; xlabel('Input (dB)'); ylabel('Output (dB)');
zoom on;

View File

@ -27,6 +27,7 @@
'audio_processing',
'<(webrtc_root)/common_audio/common_audio.gyp:spl',
'<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
'<(webrtc_root)/../test/test.gyp:test_support',
'<(webrtc_root)/../testing/gtest.gyp:gtest',
'<(webrtc_root)/../testing/gtest.gyp:gtest_main',
'<(webrtc_root)/../third_party/protobuf/protobuf.gyp:protobuf_lite',

View File

@ -322,6 +322,16 @@ class EchoCancellation {
// TODO(ajm): discuss the metrics update period.
virtual int GetMetrics(Metrics* metrics) = 0;
// Enables computation and logging of delay values. Statistics are obtained
// through |GetDelayMetrics()|.
virtual int enable_delay_logging(bool enable) = 0;
virtual bool is_delay_logging_enabled() const = 0;
// The delay metrics consists of the delay |median| and the delay standard
// deviation |std|. The values are averaged over the time period since the
// last call to |GetDelayMetrics()|.
virtual int GetDelayMetrics(int* median, int* std) = 0;
protected:
virtual ~EchoCancellation() {};
};
@ -486,6 +496,7 @@ class HighPassFilter {
};
// An estimation component used to retrieve level metrics.
// NOTE: currently unavailable. All methods return errors.
class LevelEstimator {
public:
virtual int Enable(bool enable) = 0;
@ -539,6 +550,10 @@ class NoiseSuppression {
// The voice activity detection (VAD) component analyzes the stream to
// determine if voice is present. A facility is also provided to pass in an
// external VAD decision.
//
// In addition to |stream_has_voice()| the VAD decision is provided through the
// |AudioFrame| passed to |ProcessStream()|. The |_vadActivity| member will be
// modified to reflect the current decision.
class VoiceDetection {
public:
virtual int Enable(bool enable) = 0;

View File

@ -10,8 +10,6 @@
#include "audio_buffer.h"
#include "module_common_types.h"
namespace webrtc {
namespace {
@ -64,21 +62,22 @@ struct SplitAudioChannel {
WebRtc_Word32 synthesis_filter_state2[6];
};
// TODO(am): check range of input parameters?
AudioBuffer::AudioBuffer(WebRtc_Word32 max_num_channels,
WebRtc_Word32 samples_per_channel)
: max_num_channels_(max_num_channels),
num_channels_(0),
num_mixed_channels_(0),
num_mixed_low_pass_channels_(0),
samples_per_channel_(samples_per_channel),
samples_per_split_channel_(samples_per_channel),
reference_copied_(false),
data_(NULL),
channels_(NULL),
split_channels_(NULL),
mixed_low_pass_channels_(NULL),
low_pass_reference_channels_(NULL) {
// TODO(andrew): check range of input parameters?
AudioBuffer::AudioBuffer(int max_num_channels,
int samples_per_channel)
: max_num_channels_(max_num_channels),
num_channels_(0),
num_mixed_channels_(0),
num_mixed_low_pass_channels_(0),
samples_per_channel_(samples_per_channel),
samples_per_split_channel_(samples_per_channel),
reference_copied_(false),
activity_(AudioFrame::kVadUnknown),
data_(NULL),
channels_(NULL),
split_channels_(NULL),
mixed_low_pass_channels_(NULL),
low_pass_reference_channels_(NULL) {
if (max_num_channels_ > 1) {
channels_ = new AudioChannel[max_num_channels_];
mixed_low_pass_channels_ = new AudioChannel[max_num_channels_];
@ -109,7 +108,7 @@ AudioBuffer::~AudioBuffer() {
}
}
WebRtc_Word16* AudioBuffer::data(WebRtc_Word32 channel) const {
WebRtc_Word16* AudioBuffer::data(int channel) const {
assert(channel >= 0 && channel < num_channels_);
if (data_ != NULL) {
return data_;
@ -118,7 +117,7 @@ WebRtc_Word16* AudioBuffer::data(WebRtc_Word32 channel) const {
return channels_[channel].data;
}
WebRtc_Word16* AudioBuffer::low_pass_split_data(WebRtc_Word32 channel) const {
WebRtc_Word16* AudioBuffer::low_pass_split_data(int channel) const {
assert(channel >= 0 && channel < num_channels_);
if (split_channels_ == NULL) {
return data(channel);
@ -127,7 +126,7 @@ WebRtc_Word16* AudioBuffer::low_pass_split_data(WebRtc_Word32 channel) const {
return split_channels_[channel].low_pass_data;
}
WebRtc_Word16* AudioBuffer::high_pass_split_data(WebRtc_Word32 channel) const {
WebRtc_Word16* AudioBuffer::high_pass_split_data(int channel) const {
assert(channel >= 0 && channel < num_channels_);
if (split_channels_ == NULL) {
return NULL;
@ -136,13 +135,13 @@ WebRtc_Word16* AudioBuffer::high_pass_split_data(WebRtc_Word32 channel) const {
return split_channels_[channel].high_pass_data;
}
WebRtc_Word16* AudioBuffer::mixed_low_pass_data(WebRtc_Word32 channel) const {
WebRtc_Word16* AudioBuffer::mixed_low_pass_data(int channel) const {
assert(channel >= 0 && channel < num_mixed_low_pass_channels_);
return mixed_low_pass_channels_[channel].data;
}
WebRtc_Word16* AudioBuffer::low_pass_reference(WebRtc_Word32 channel) const {
WebRtc_Word16* AudioBuffer::low_pass_reference(int channel) const {
assert(channel >= 0 && channel < num_channels_);
if (!reference_copied_) {
return NULL;
@ -151,58 +150,67 @@ WebRtc_Word16* AudioBuffer::low_pass_reference(WebRtc_Word32 channel) const {
return low_pass_reference_channels_[channel].data;
}
WebRtc_Word32* AudioBuffer::analysis_filter_state1(WebRtc_Word32 channel) const {
WebRtc_Word32* AudioBuffer::analysis_filter_state1(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].analysis_filter_state1;
}
WebRtc_Word32* AudioBuffer::analysis_filter_state2(WebRtc_Word32 channel) const {
WebRtc_Word32* AudioBuffer::analysis_filter_state2(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].analysis_filter_state2;
}
WebRtc_Word32* AudioBuffer::synthesis_filter_state1(WebRtc_Word32 channel) const {
WebRtc_Word32* AudioBuffer::synthesis_filter_state1(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].synthesis_filter_state1;
}
WebRtc_Word32* AudioBuffer::synthesis_filter_state2(WebRtc_Word32 channel) const {
WebRtc_Word32* AudioBuffer::synthesis_filter_state2(int channel) const {
assert(channel >= 0 && channel < num_channels_);
return split_channels_[channel].synthesis_filter_state2;
}
WebRtc_Word32 AudioBuffer::num_channels() const {
void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
activity_ = activity;
}
AudioFrame::VADActivity AudioBuffer::activity() {
return activity_;
}
int AudioBuffer::num_channels() const {
return num_channels_;
}
WebRtc_Word32 AudioBuffer::samples_per_channel() const {
int AudioBuffer::samples_per_channel() const {
return samples_per_channel_;
}
WebRtc_Word32 AudioBuffer::samples_per_split_channel() const {
int AudioBuffer::samples_per_split_channel() const {
return samples_per_split_channel_;
}
// TODO(ajm): Do deinterleaving and mixing in one step?
void AudioBuffer::DeinterleaveFrom(AudioFrame* audioFrame) {
assert(audioFrame->_audioChannel <= max_num_channels_);
assert(audioFrame->_payloadDataLengthInSamples == samples_per_channel_);
// TODO(andrew): Do deinterleaving and mixing in one step?
void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
assert(frame->_audioChannel <= max_num_channels_);
assert(frame->_payloadDataLengthInSamples == samples_per_channel_);
num_channels_ = audioFrame->_audioChannel;
num_channels_ = frame->_audioChannel;
num_mixed_channels_ = 0;
num_mixed_low_pass_channels_ = 0;
reference_copied_ = false;
activity_ = frame->_vadActivity;
if (num_channels_ == 1) {
// We can get away with a pointer assignment in this case.
data_ = audioFrame->_payloadData;
data_ = frame->_payloadData;
return;
}
WebRtc_Word16* interleaved = frame->_payloadData;
for (int i = 0; i < num_channels_; i++) {
WebRtc_Word16* deinterleaved = channels_[i].data;
WebRtc_Word16* interleaved = audioFrame->_payloadData;
WebRtc_Word32 interleaved_idx = i;
int interleaved_idx = i;
for (int j = 0; j < samples_per_channel_; j++) {
deinterleaved[j] = interleaved[interleaved_idx];
interleaved_idx += num_channels_;
@ -210,27 +218,28 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* audioFrame) {
}
}
void AudioBuffer::InterleaveTo(AudioFrame* audioFrame) const {
assert(audioFrame->_audioChannel == num_channels_);
assert(audioFrame->_payloadDataLengthInSamples == samples_per_channel_);
void AudioBuffer::InterleaveTo(AudioFrame* frame) const {
assert(frame->_audioChannel == num_channels_);
assert(frame->_payloadDataLengthInSamples == samples_per_channel_);
frame->_vadActivity = activity_;
if (num_channels_ == 1) {
if (num_mixed_channels_ == 1) {
memcpy(audioFrame->_payloadData,
memcpy(frame->_payloadData,
channels_[0].data,
sizeof(WebRtc_Word16) * samples_per_channel_);
} else {
// These should point to the same buffer in this case.
assert(data_ == audioFrame->_payloadData);
assert(data_ == frame->_payloadData);
}
return;
}
WebRtc_Word16* interleaved = frame->_payloadData;
for (int i = 0; i < num_channels_; i++) {
WebRtc_Word16* deinterleaved = channels_[i].data;
WebRtc_Word16* interleaved = audioFrame->_payloadData;
WebRtc_Word32 interleaved_idx = i;
int interleaved_idx = i;
for (int j = 0; j < samples_per_channel_; j++) {
interleaved[interleaved_idx] = deinterleaved[j];
interleaved_idx += num_channels_;
@ -238,9 +247,10 @@ void AudioBuffer::InterleaveTo(AudioFrame* audioFrame) const {
}
}
// TODO(ajm): would be good to support the no-mix case with pointer assignment.
// TODO(ajm): handle mixing to multiple channels?
void AudioBuffer::Mix(WebRtc_Word32 num_mixed_channels) {
// TODO(andrew): would be good to support the no-mix case with pointer
// assignment.
// TODO(andrew): handle mixing to multiple channels?
void AudioBuffer::Mix(int num_mixed_channels) {
// We currently only support the stereo to mono case.
assert(num_channels_ == 2);
assert(num_mixed_channels == 1);
@ -254,7 +264,7 @@ void AudioBuffer::Mix(WebRtc_Word32 num_mixed_channels) {
num_mixed_channels_ = num_mixed_channels;
}
void AudioBuffer::CopyAndMixLowPass(WebRtc_Word32 num_mixed_channels) {
void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) {
// We currently only support the stereo to mono case.
assert(num_channels_ == 2);
assert(num_mixed_channels == 1);

View File

@ -11,55 +11,58 @@
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_
#include "module_common_types.h"
#include "typedefs.h"
namespace webrtc {
struct AudioChannel;
struct SplitAudioChannel;
class AudioFrame;
class AudioBuffer {
public:
AudioBuffer(WebRtc_Word32 max_num_channels, WebRtc_Word32 samples_per_channel);
AudioBuffer(int max_num_channels, int samples_per_channel);
virtual ~AudioBuffer();
WebRtc_Word32 num_channels() const;
WebRtc_Word32 samples_per_channel() const;
WebRtc_Word32 samples_per_split_channel() const;
int num_channels() const;
int samples_per_channel() const;
int samples_per_split_channel() const;
WebRtc_Word16* data(WebRtc_Word32 channel) const;
WebRtc_Word16* low_pass_split_data(WebRtc_Word32 channel) const;
WebRtc_Word16* high_pass_split_data(WebRtc_Word32 channel) const;
WebRtc_Word16* mixed_low_pass_data(WebRtc_Word32 channel) const;
WebRtc_Word16* low_pass_reference(WebRtc_Word32 channel) const;
WebRtc_Word16* data(int channel) const;
WebRtc_Word16* low_pass_split_data(int channel) const;
WebRtc_Word16* high_pass_split_data(int channel) const;
WebRtc_Word16* mixed_low_pass_data(int channel) const;
WebRtc_Word16* low_pass_reference(int channel) const;
WebRtc_Word32* analysis_filter_state1(WebRtc_Word32 channel) const;
WebRtc_Word32* analysis_filter_state2(WebRtc_Word32 channel) const;
WebRtc_Word32* synthesis_filter_state1(WebRtc_Word32 channel) const;
WebRtc_Word32* synthesis_filter_state2(WebRtc_Word32 channel) const;
WebRtc_Word32* analysis_filter_state1(int channel) const;
WebRtc_Word32* analysis_filter_state2(int channel) const;
WebRtc_Word32* synthesis_filter_state1(int channel) const;
WebRtc_Word32* synthesis_filter_state2(int channel) const;
void set_activity(AudioFrame::VADActivity activity);
AudioFrame::VADActivity activity();
void DeinterleaveFrom(AudioFrame* audioFrame);
void InterleaveTo(AudioFrame* audioFrame) const;
void Mix(WebRtc_Word32 num_mixed_channels);
void CopyAndMixLowPass(WebRtc_Word32 num_mixed_channels);
void Mix(int num_mixed_channels);
void CopyAndMixLowPass(int num_mixed_channels);
void CopyLowPassToReference();
private:
const WebRtc_Word32 max_num_channels_;
WebRtc_Word32 num_channels_;
WebRtc_Word32 num_mixed_channels_;
WebRtc_Word32 num_mixed_low_pass_channels_;
const WebRtc_Word32 samples_per_channel_;
WebRtc_Word32 samples_per_split_channel_;
const int max_num_channels_;
int num_channels_;
int num_mixed_channels_;
int num_mixed_low_pass_channels_;
const int samples_per_channel_;
int samples_per_split_channel_;
bool reference_copied_;
AudioFrame::VADActivity activity_;
WebRtc_Word16* data_;
// TODO(ajm): Prefer to make these vectors if permitted...
// TODO(andrew): use vectors here.
AudioChannel* channels_;
SplitAudioChannel* split_channels_;
// TODO(ajm): improve this, we don't need the full 32 kHz space here.
// TODO(andrew): improve this, we don't need the full 32 kHz space here.
AudioChannel* mixed_low_pass_channels_;
AudioChannel* low_pass_reference_channels_;
};

View File

@ -66,7 +66,8 @@ EchoCancellationImpl::EchoCancellationImpl(const AudioProcessingImpl* apm)
device_sample_rate_hz_(48000),
stream_drift_samples_(0),
was_stream_drift_set_(false),
stream_has_echo_(false) {}
stream_has_echo_(false),
delay_logging_enabled_(false) {}
EchoCancellationImpl::~EchoCancellationImpl() {}
@ -283,6 +284,39 @@ bool EchoCancellationImpl::stream_has_echo() const {
return stream_has_echo_;
}
int EchoCancellationImpl::enable_delay_logging(bool enable) {
CriticalSectionScoped crit_scoped(*apm_->crit());
delay_logging_enabled_ = enable;
return Configure();
}
bool EchoCancellationImpl::is_delay_logging_enabled() const {
return delay_logging_enabled_;
}
// TODO(bjornv): How should we handle the multi-channel case?
int EchoCancellationImpl::GetDelayMetrics(int* median, int* std) {
CriticalSectionScoped crit_scoped(*apm_->crit());
if (median == NULL) {
return apm_->kNullPointerError;
}
if (std == NULL) {
return apm_->kNullPointerError;
}
if (!is_component_enabled() || !delay_logging_enabled_) {
return apm_->kNotEnabledError;
}
Handle* my_handle = static_cast<Handle*>(handle(0));
if (WebRtcAec_GetDelayMetrics(my_handle, median, std) !=
apm_->kNoError) {
return GetHandleError(my_handle);
}
return apm_->kNoError;
}
int EchoCancellationImpl::Initialize() {
int err = ProcessingComponent::Initialize();
if (err != apm_->kNoError || !is_component_enabled()) {
@ -332,6 +366,7 @@ int EchoCancellationImpl::ConfigureHandle(void* handle) const {
config.metricsMode = metrics_enabled_;
config.nlpMode = MapSetting(suppression_level_);
config.skewMode = drift_compensation_enabled_;
config.delay_logging = delay_logging_enabled_;
return WebRtcAec_set_config(static_cast<Handle*>(handle), config);
}

View File

@ -49,6 +49,9 @@ class EchoCancellationImpl : public EchoCancellation,
virtual bool are_metrics_enabled() const;
virtual bool stream_has_echo() const;
virtual int GetMetrics(Metrics* metrics);
virtual int enable_delay_logging(bool enable);
virtual bool is_delay_logging_enabled() const;
virtual int GetDelayMetrics(int* median, int* std);
// ProcessingComponent implementation.
virtual void* CreateHandle() const;
@ -66,6 +69,7 @@ class EchoCancellationImpl : public EchoCancellation,
int stream_drift_samples_;
bool was_stream_drift_set_;
bool stream_has_echo_;
bool delay_logging_enabled_;
};
} // namespace webrtc

View File

@ -74,16 +74,16 @@ int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {
// TODO(ajm): concatenate data in frame buffer here.
int vad_ret_val;
vad_ret_val = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
apm_->split_sample_rate_hz(),
mixed_data,
frame_size_samples_);
if (vad_ret_val == 0) {
int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
apm_->split_sample_rate_hz(),
mixed_data,
frame_size_samples_);
if (vad_ret == 0) {
stream_has_voice_ = false;
} else if (vad_ret_val == 1) {
audio->set_activity(AudioFrame::kVadPassive);
} else if (vad_ret == 1) {
stream_has_voice_ = true;
audio->set_activity(AudioFrame::kVadActive);
} else {
return apm_->kUnspecifiedError;
}

View File

@ -28,6 +28,7 @@
using webrtc::AudioFrame;
using webrtc::AudioProcessing;
using webrtc::EchoCancellation;
using webrtc::GainControl;
using webrtc::NoiseSuppression;
using webrtc::TickInterval;
@ -61,6 +62,12 @@ bool ReadMessageFromFile(FILE* file,
return msg->ParseFromArray(array, usize);
}
void PrintStat(const AudioProcessing::Statistic& stat) {
printf("%d, %d, %d\n", stat.average,
stat.maximum,
stat.minimum);
}
void usage() {
printf(
"Usage: process_test [options] [-pb PROTOBUF_FILE]\n"
@ -86,6 +93,8 @@ void usage() {
printf("\n -aec Echo cancellation\n");
printf(" --drift_compensation\n");
printf(" --no_drift_compensation\n");
printf(" --no_echo_metrics\n");
printf(" --no_delay_logging\n");
printf("\n -aecm Echo control mobile\n");
printf(" --aecm_echo_path_in_file FILE\n");
printf(" --aecm_echo_path_out_file FILE\n");
@ -107,6 +116,7 @@ void usage() {
printf(" --vad_out_file FILE\n");
printf("\n");
printf("Modifiers:\n");
printf(" --noasm Disable SSE optimization.\n");
printf(" --perf Measure performance.\n");
printf(" --quiet Suppress text output.\n");
printf(" --no_progress Suppress progress.\n");
@ -156,7 +166,7 @@ void void_main(int argc, char* argv[]) {
//bool interleaved = true;
for (int i = 1; i < argc; i++) {
if (strcmp(argv[i], "-pb") == 0) {
if (strcmp(argv[i], "-pb") == 0) {
i++;
ASSERT_LT(i, argc) << "Specify protobuf filename after -pb";
pb_filename = argv[i];
@ -208,9 +218,10 @@ void void_main(int argc, char* argv[]) {
} else if (strcmp(argv[i], "-aec") == 0) {
ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
} else if (strcmp(argv[i], "-noasm") == 0) {
WebRtc_GetCPUInfo = WebRtc_GetCPUInfoNoASM;
ASSERT_EQ(apm->kNoError,
apm->echo_cancellation()->enable_metrics(true));
ASSERT_EQ(apm->kNoError,
apm->echo_cancellation()->enable_delay_logging(true));
} else if (strcmp(argv[i], "--drift_compensation") == 0) {
ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
@ -223,6 +234,16 @@ void void_main(int argc, char* argv[]) {
ASSERT_EQ(apm->kNoError,
apm->echo_cancellation()->enable_drift_compensation(false));
} else if (strcmp(argv[i], "--no_echo_metrics") == 0) {
ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
ASSERT_EQ(apm->kNoError,
apm->echo_cancellation()->enable_metrics(false));
} else if (strcmp(argv[i], "--no_delay_logging") == 0) {
ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
ASSERT_EQ(apm->kNoError,
apm->echo_cancellation()->enable_delay_logging(false));
} else if (strcmp(argv[i], "-aecm") == 0) {
ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true));
@ -316,6 +337,11 @@ void void_main(int argc, char* argv[]) {
ASSERT_LT(i, argc) << "Specify filename after --vad_out_file";
vad_out_filename = argv[i];
} else if (strcmp(argv[i], "--noasm") == 0) {
WebRtc_GetCPUInfo = WebRtc_GetCPUInfoNoASM;
// We need to reinitialize here if components have already been enabled.
ASSERT_EQ(apm->kNoError, apm->Initialize());
} else if (strcmp(argv[i], "--perf") == 0) {
perf_testing = true;
@ -460,13 +486,6 @@ void void_main(int argc, char* argv[]) {
<< aecm_echo_path_out_filename;
}
enum Events {
kInitializeEvent,
kRenderEvent,
kCaptureEvent,
kResetEventDeprecated
};
int16_t event = 0;
size_t read_count = 0;
int reverse_count = 0;
int primary_count = 0;
@ -642,9 +661,15 @@ void void_main(int argc, char* argv[]) {
}
ASSERT_TRUE(feof(pb_file));
printf("100%% complete\r");
} else {
enum Events {
kInitializeEvent,
kRenderEvent,
kCaptureEvent,
kResetEventDeprecated
};
int16_t event = 0;
while (simulating || feof(event_file) == 0) {
std::ostringstream trace_stream;
trace_stream << "Processed frames: " << reverse_count << " (reverse), "
@ -708,6 +733,10 @@ void void_main(int argc, char* argv[]) {
if (simulating) {
if (read_count != far_frame._payloadDataLengthInSamples) {
// Read an equal amount from the near file to avoid errors due to
// not reaching end-of-file.
EXPECT_EQ(0, fseek(near_file, read_count * sizeof(WebRtc_Word16),
SEEK_CUR));
break; // This is expected.
}
} else {
@ -828,6 +857,7 @@ void void_main(int argc, char* argv[]) {
}
}
}
printf("100%% complete\r");
if (aecm_echo_path_out_file != NULL) {
const size_t path_size =
@ -845,6 +875,27 @@ void void_main(int argc, char* argv[]) {
if (verbose) {
printf("\nProcessed frames: %d (primary), %d (reverse)\n",
primary_count, reverse_count);
if (apm->echo_cancellation()->are_metrics_enabled()) {
EchoCancellation::Metrics metrics;
apm->echo_cancellation()->GetMetrics(&metrics);
printf("\n--Echo metrics--\n");
printf("(avg, max, min)\n");
printf("ERL: ");
PrintStat(metrics.echo_return_loss);
printf("ERLE: ");
PrintStat(metrics.echo_return_loss_enhancement);
printf("ANLP: ");
PrintStat(metrics.a_nlp);
}
if (apm->echo_cancellation()->is_delay_logging_enabled()) {
int median = 0;
int std = 0;
apm->echo_cancellation()->GetDelayMetrics(&median, &std);
printf("\n--Delay metrics--\n");
printf("Median: %3d\n", median);
printf("Standard deviation: %3d\n", std);
}
}
if (!pb_file) {

View File

@ -10,12 +10,13 @@
#include <stdio.h>
#include <gtest/gtest.h>
#include "gtest/gtest.h"
#include "audio_processing.h"
#include "event_wrapper.h"
#include "module_common_types.h"
#include "signal_processing_library.h"
#include "testsupport/fileutils.h"
#include "thread_wrapper.h"
#include "trace.h"
#ifdef WEBRTC_ANDROID
@ -42,12 +43,6 @@ namespace {
// be set to true with the command-line switch --write_output_data.
bool write_output_data = false;
#if defined(WEBRTC_APM_UNIT_TEST_FIXED_PROFILE)
const char kOutputFileName[] = "output_data_fixed.pb";
#elif defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
const char kOutputFileName[] = "output_data_float.pb";
#endif
class ApmEnvironment : public ::testing::Environment {
public:
virtual void SetUp() {
@ -65,7 +60,9 @@ class ApmTest : public ::testing::Test {
ApmTest();
virtual void SetUp();
virtual void TearDown();
// Path to where the resource files to be used for this test are located.
const std::string kResourcePath;
const std::string kOutputFileName;
webrtc::AudioProcessing* apm_;
webrtc::AudioFrame* frame_;
webrtc::AudioFrame* revframe_;
@ -74,7 +71,14 @@ class ApmTest : public ::testing::Test {
};
ApmTest::ApmTest()
: apm_(NULL),
: kResourcePath(webrtc::test::GetProjectRootPath() +
"test/data/audio_processing/"),
#if defined(WEBRTC_APM_UNIT_TEST_FIXED_PROFILE)
kOutputFileName(kResourcePath + "output_data_fixed.pb"),
#elif defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
kOutputFileName(kResourcePath + "output_data_float.pb"),
#endif
apm_(NULL),
frame_(NULL),
revframe_(NULL),
far_file_(NULL),
@ -98,10 +102,14 @@ void ApmTest::SetUp() {
revframe_->_audioChannel = 2;
revframe_->_frequencyInHz = 32000;
far_file_ = fopen("aec_far.pcm", "rb");
ASSERT_TRUE(far_file_ != NULL) << "Could not open input file aec_far.pcm\n";
near_file_ = fopen("aec_near.pcm", "rb");
ASSERT_TRUE(near_file_ != NULL) << "Could not open input file aec_near.pcm\n";
std::string input_filename = kResourcePath + "aec_far.pcm";
far_file_ = fopen(input_filename.c_str(), "rb");
ASSERT_TRUE(far_file_ != NULL) << "Could not open input file " <<
input_filename << "\n";
input_filename = kResourcePath + "aec_near.pcm";
near_file_ = fopen(input_filename.c_str(), "rb");
ASSERT_TRUE(near_file_ != NULL) << "Could not open input file " <<
input_filename << "\n";
}
void ApmTest::TearDown() {
@ -177,11 +185,9 @@ void WriteStatsMessage(const AudioProcessing::Statistic& output,
message->set_minimum(output.minimum);
}
void WriteMessageLiteToFile(const char* filename,
void WriteMessageLiteToFile(const std::string filename,
const ::google::protobuf::MessageLite& message) {
assert(filename != NULL);
FILE* file = fopen(filename, "wb");
FILE* file = fopen(filename.c_str(), "wb");
ASSERT_TRUE(file != NULL) << "Could not open " << filename;
int size = message.ByteSize();
ASSERT_GT(size, 0);
@ -196,12 +202,11 @@ void WriteMessageLiteToFile(const char* filename,
fclose(file);
}
void ReadMessageLiteFromFile(const char* filename,
void ReadMessageLiteFromFile(const std::string filename,
::google::protobuf::MessageLite* message) {
assert(filename != NULL);
assert(message != NULL);
FILE* file = fopen(filename, "rb");
FILE* file = fopen(filename.c_str(), "rb");
ASSERT_TRUE(file != NULL) << "Could not open " << filename;
int size = 0;
ASSERT_EQ(1u, fread(&size, sizeof(int), 1, file));
@ -457,6 +462,8 @@ TEST_F(ApmTest, Process) {
apm_->echo_cancellation()->enable_drift_compensation(true));
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->enable_metrics(true));
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->enable_delay_logging(true));
EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true));
EXPECT_EQ(apm_->kNoError,
@ -555,6 +562,7 @@ TEST_F(ApmTest, Process) {
&temp_data[0],
sizeof(WebRtc_Word16) * read_count);
}
frame_->_vadActivity = AudioFrame::kVadUnknown;
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
@ -571,6 +579,9 @@ TEST_F(ApmTest, Process) {
}
if (apm_->voice_detection()->stream_has_voice()) {
has_voice_count++;
EXPECT_EQ(AudioFrame::kVadActive, frame_->_vadActivity);
} else {
EXPECT_EQ(AudioFrame::kVadPassive, frame_->_vadActivity);
}
frame_count++;
@ -587,6 +598,10 @@ TEST_F(ApmTest, Process) {
EchoCancellation::Metrics echo_metrics;
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->GetMetrics(&echo_metrics));
int median = 0;
int std = 0;
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->GetDelayMetrics(&median, &std));
#endif
if (!write_output_data) {
@ -608,6 +623,11 @@ TEST_F(ApmTest, Process) {
reference.echo_return_loss_enhancement());
TestStats(echo_metrics.a_nlp,
reference.a_nlp());
webrtc::audioproc::Test::DelayMetrics reference_delay =
test->delay_metrics();
EXPECT_EQ(median, reference_delay.median());
EXPECT_EQ(std, reference_delay.std());
#endif
} else {
test->set_has_echo_count(has_echo_count);
@ -628,6 +648,11 @@ TEST_F(ApmTest, Process) {
message->mutable_echo_return_loss_enhancement());
WriteStatsMessage(echo_metrics.a_nlp,
message->mutable_a_nlp());
webrtc::audioproc::Test::DelayMetrics* message_delay =
test->mutable_delay_metrics();
message_delay->set_median(median);
message_delay->set_std(std);
#endif
}
@ -692,6 +717,18 @@ TEST_F(ApmTest, EchoCancellation) {
apm_->echo_cancellation()->enable_metrics(false));
EXPECT_FALSE(apm_->echo_cancellation()->are_metrics_enabled());
int median = 0;
int std = 0;
EXPECT_EQ(apm_->kNotEnabledError,
apm_->echo_cancellation()->GetDelayMetrics(&median, &std));
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->enable_delay_logging(true));
EXPECT_TRUE(apm_->echo_cancellation()->is_delay_logging_enabled());
EXPECT_EQ(apm_->kNoError,
apm_->echo_cancellation()->enable_delay_logging(false));
EXPECT_FALSE(apm_->echo_cancellation()->is_delay_logging_enabled());
EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true));
EXPECT_TRUE(apm_->echo_cancellation()->is_enabled());
EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(false));
@ -966,27 +1003,27 @@ TEST_F(ApmTest, VoiceDetection) {
EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
EXPECT_FALSE(apm_->voice_detection()->is_enabled());
// Test that AudioFrame activity is maintained when VAD is disabled.
EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
AudioFrame::VADActivity activity[] = {
AudioFrame::kVadActive,
AudioFrame::kVadPassive,
AudioFrame::kVadUnknown
};
for (size_t i = 0; i < sizeof(activity)/sizeof(*activity); i++) {
frame_->_vadActivity = activity[i];
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_EQ(activity[i], frame_->_vadActivity);
}
// Test that AudioFrame activity is set when VAD is enabled.
EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true));
frame_->_vadActivity = AudioFrame::kVadUnknown;
EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
EXPECT_NE(AudioFrame::kVadUnknown, frame_->_vadActivity);
// TODO(bjornv): Add tests for streamed voice; stream_has_voice()
}
// Below are some ideas for tests from VPM.
/*TEST_F(VideoProcessingModuleTest, GetVersionTest)
{
}
TEST_F(VideoProcessingModuleTest, HandleNullBuffer)
{
}
TEST_F(VideoProcessingModuleTest, HandleBadSize)
{
}
TEST_F(VideoProcessingModuleTest, IdenticalResultsAfterReset)
{
}
*/
} // namespace
int main(int argc, char** argv) {

View File

@ -35,6 +35,13 @@ message Test {
}
optional EchoMetrics echo_metrics = 11;
message DelayMetrics {
optional int32 median = 1;
optional int32 std = 2;
}
optional DelayMetrics delay_metrics = 12;
}
message OutputData {

View File

@ -30,7 +30,7 @@ extern "C" {
* Return value : 0 - Ok
* -1 - Error (probably length is not sufficient)
*/
int WebRtcNs_get_version(char *version, short length);
int WebRtcNs_get_version(char* version, short length);
/*
@ -46,7 +46,7 @@ int WebRtcNs_get_version(char *version, short length);
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_Create(NsHandle **NS_inst);
int WebRtcNs_Create(NsHandle** NS_inst);
/*
@ -59,7 +59,7 @@ int WebRtcNs_Create(NsHandle **NS_inst);
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_Free(NsHandle *NS_inst);
int WebRtcNs_Free(NsHandle* NS_inst);
/*
@ -75,7 +75,7 @@ int WebRtcNs_Free(NsHandle *NS_inst);
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_Init(NsHandle *NS_inst, WebRtc_UWord32 fs);
int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs);
/*
* This changes the aggressiveness of the noise suppression method.
@ -90,7 +90,7 @@ int WebRtcNs_Init(NsHandle *NS_inst, WebRtc_UWord32 fs);
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_set_policy(NsHandle *NS_inst, int mode);
int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);
/*
@ -111,11 +111,11 @@ int WebRtcNs_set_policy(NsHandle *NS_inst, int mode);
* Return value : 0 - OK
* -1 - Error
*/
int WebRtcNs_Process(NsHandle *NS_inst,
short *spframe,
short *spframe_H,
short *outframe,
short *outframe_H);
int WebRtcNs_Process(NsHandle* NS_inst,
short* spframe,
short* spframe_H,
short* outframe,
short* outframe_H);
#ifdef __cplusplus
}

View File

@ -11,7 +11,7 @@
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_
#include "signal_processing_library.h"
#include "typedefs.h"
typedef struct NsxHandleT NsxHandle;
@ -30,7 +30,7 @@ extern "C" {
* Return value : 0 - Ok
* -1 - Error (probably length is not sufficient)
*/
int WebRtcNsx_get_version(char *version, short length);
int WebRtcNsx_get_version(char* version, short length);
/*
@ -46,7 +46,7 @@ int WebRtcNsx_get_version(char *version, short length);
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_Create(NsxHandle **nsxInst);
int WebRtcNsx_Create(NsxHandle** nsxInst);
/*
@ -59,7 +59,7 @@ int WebRtcNsx_Create(NsxHandle **nsxInst);
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_Free(NsxHandle *nsxInst);
int WebRtcNsx_Free(NsxHandle* nsxInst);
/*
@ -75,7 +75,7 @@ int WebRtcNsx_Free(NsxHandle *nsxInst);
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_Init(NsxHandle *nsxInst, WebRtc_UWord32 fs);
int WebRtcNsx_Init(NsxHandle* nsxInst, WebRtc_UWord32 fs);
/*
* This changes the aggressiveness of the noise suppression method.
@ -90,7 +90,7 @@ int WebRtcNsx_Init(NsxHandle *nsxInst, WebRtc_UWord32 fs);
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_set_policy(NsxHandle *nsxInst, int mode);
int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode);
/*
* This functions does noise suppression for the inserted speech frame. The
@ -110,11 +110,11 @@ int WebRtcNsx_set_policy(NsxHandle *nsxInst, int mode);
* Return value : 0 - OK
* -1 - Error
*/
int WebRtcNsx_Process(NsxHandle *nsxInst,
short *speechFrame,
short *speechFrameHB,
short *outFrame,
short *outFrameHB);
int WebRtcNsx_Process(NsxHandle* nsxInst,
short* speechFrame,
short* speechFrameHB,
short* outFrame,
short* outFrameHB);
#ifdef __cplusplus
}

View File

@ -15,55 +15,51 @@
#include "ns_core.h"
#include "defines.h"
int WebRtcNs_get_version(char *versionStr, short length)
{
const char version[] = "NS 2.2.0";
const short versionLen = (short)strlen(version) + 1; // +1 for null-termination
int WebRtcNs_get_version(char* versionStr, short length) {
const char version[] = "NS 2.2.0";
const short versionLen = (short)strlen(version) + 1; // +1: null-termination
if (versionStr == NULL) {
return -1;
}
if (versionStr == NULL) {
return -1;
}
if (versionLen > length) {
return -1;
}
if (versionLen > length) {
return -1;
}
strncpy(versionStr, version, versionLen);
strncpy(versionStr, version, versionLen);
return 0;
}
int WebRtcNs_Create(NsHandle** NS_inst) {
*NS_inst = (NsHandle*) malloc(sizeof(NSinst_t));
if (*NS_inst != NULL) {
(*(NSinst_t**)NS_inst)->initFlag = 0;
return 0;
}
int WebRtcNs_Create(NsHandle **NS_inst)
{
*NS_inst = (NsHandle*) malloc(sizeof(NSinst_t));
if (*NS_inst!=NULL) {
(*(NSinst_t**)NS_inst)->initFlag=0;
return 0;
} else {
return -1;
}
} else {
return -1;
}
}
int WebRtcNs_Free(NsHandle *NS_inst)
{
free(NS_inst);
return 0;
int WebRtcNs_Free(NsHandle* NS_inst) {
free(NS_inst);
return 0;
}
int WebRtcNs_Init(NsHandle *NS_inst, WebRtc_UWord32 fs)
{
return WebRtcNs_InitCore((NSinst_t*) NS_inst, fs);
int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs) {
return WebRtcNs_InitCore((NSinst_t*) NS_inst, fs);
}
int WebRtcNs_set_policy(NsHandle *NS_inst, int mode)
{
return WebRtcNs_set_policy_core((NSinst_t*) NS_inst, mode);
int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) {
return WebRtcNs_set_policy_core((NSinst_t*) NS_inst, mode);
}
int WebRtcNs_Process(NsHandle *NS_inst, short *spframe, short *spframe_H, short *outframe, short *outframe_H)
{
return WebRtcNs_ProcessCore((NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H);
int WebRtcNs_Process(NsHandle* NS_inst, short* spframe, short* spframe_H,
short* outframe, short* outframe_H) {
return WebRtcNs_ProcessCore(
(NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H);
}

View File

@ -15,60 +15,51 @@
#include "nsx_core.h"
#include "nsx_defines.h"
int WebRtcNsx_get_version(char *versionStr, short length)
{
const char version[] = "NS\t3.1.0";
const short versionLen = (short)strlen(version) + 1; // +1 for null-termination
int WebRtcNsx_get_version(char* versionStr, short length) {
const char version[] = "NS\t3.1.0";
const short versionLen = (short)strlen(version) + 1; // +1: null-termination
if (versionStr == NULL)
{
return -1;
}
if (versionStr == NULL) {
return -1;
}
if (versionLen > length)
{
return -1;
}
if (versionLen > length) {
return -1;
}
strncpy(versionStr, version, versionLen);
strncpy(versionStr, version, versionLen);
return 0;
}
int WebRtcNsx_Create(NsxHandle** nsxInst) {
*nsxInst = (NsxHandle*)malloc(sizeof(NsxInst_t));
if (*nsxInst != NULL) {
(*(NsxInst_t**)nsxInst)->initFlag = 0;
return 0;
}
int WebRtcNsx_Create(NsxHandle **nsxInst)
{
*nsxInst = (NsxHandle*)malloc(sizeof(NsxInst_t));
if (*nsxInst != NULL)
{
(*(NsxInst_t**)nsxInst)->initFlag = 0;
return 0;
} else
{
return -1;
}
} else {
return -1;
}
}
int WebRtcNsx_Free(NsxHandle *nsxInst)
{
free(nsxInst);
return 0;
int WebRtcNsx_Free(NsxHandle* nsxInst) {
free(nsxInst);
return 0;
}
int WebRtcNsx_Init(NsxHandle *nsxInst, WebRtc_UWord32 fs)
{
return WebRtcNsx_InitCore((NsxInst_t*)nsxInst, fs);
int WebRtcNsx_Init(NsxHandle* nsxInst, WebRtc_UWord32 fs) {
return WebRtcNsx_InitCore((NsxInst_t*)nsxInst, fs);
}
int WebRtcNsx_set_policy(NsxHandle *nsxInst, int mode)
{
return WebRtcNsx_set_policy_core((NsxInst_t*)nsxInst, mode);
int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) {
return WebRtcNsx_set_policy_core((NsxInst_t*)nsxInst, mode);
}
int WebRtcNsx_Process(NsxHandle *nsxInst, short *speechFrame, short *speechFrameHB,
short *outFrame, short *outFrameHB)
{
return WebRtcNsx_ProcessCore((NsxInst_t*)nsxInst, speechFrame, speechFrameHB, outFrame,
outFrameHB);
int WebRtcNsx_Process(NsxHandle* nsxInst, short* speechFrame,
short* speechFrameHB, short* outFrame,
short* outFrameHB) {
return WebRtcNsx_ProcessCore(
(NsxInst_t*)nsxInst, speechFrame, speechFrameHB, outFrame, outFrameHB);
}

File diff suppressed because it is too large Load Diff

View File

@ -15,95 +15,95 @@
typedef struct NSParaExtract_t_ {
//bin size of histogram
float binSizeLrt;
float binSizeSpecFlat;
float binSizeSpecDiff;
//range of histogram over which lrt threshold is computed
float rangeAvgHistLrt;
//scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
//thresholds for prior model
float factor1ModelPars; //for lrt and spectral difference
float factor2ModelPars; //for spectral_flatness: used when noise is flatter than speech
//peak limit for spectral flatness (varies between 0 and 1)
float thresPosSpecFlat;
//limit on spacing of two highest peaks in histogram: spacing determined by bin size
float limitPeakSpacingSpecFlat;
float limitPeakSpacingSpecDiff;
//limit on relevance of second peak:
float limitPeakWeightsSpecFlat;
float limitPeakWeightsSpecDiff;
//limit on fluctuation of lrt feature
float thresFluctLrt;
//limit on the max and min values for the feature thresholds
float maxLrt;
float minLrt;
float maxSpecFlat;
float minSpecFlat;
float maxSpecDiff;
float minSpecDiff;
//criteria of weight of histogram peak to accept/reject feature
int thresWeightSpecFlat;
int thresWeightSpecDiff;
//bin size of histogram
float binSizeLrt;
float binSizeSpecFlat;
float binSizeSpecDiff;
//range of histogram over which lrt threshold is computed
float rangeAvgHistLrt;
//scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
//thresholds for prior model
float factor1ModelPars; //for lrt and spectral difference
float factor2ModelPars; //for spectral_flatness: used when noise is flatter than speech
//peak limit for spectral flatness (varies between 0 and 1)
float thresPosSpecFlat;
//limit on spacing of two highest peaks in histogram: spacing determined by bin size
float limitPeakSpacingSpecFlat;
float limitPeakSpacingSpecDiff;
//limit on relevance of second peak:
float limitPeakWeightsSpecFlat;
float limitPeakWeightsSpecDiff;
//limit on fluctuation of lrt feature
float thresFluctLrt;
//limit on the max and min values for the feature thresholds
float maxLrt;
float minLrt;
float maxSpecFlat;
float minSpecFlat;
float maxSpecDiff;
float minSpecDiff;
//criteria of weight of histogram peak to accept/reject feature
int thresWeightSpecFlat;
int thresWeightSpecDiff;
} NSParaExtract_t;
typedef struct NSinst_t_ {
WebRtc_UWord32 fs;
int blockLen;
int blockLen10ms;
int windShift;
int outLen;
int anaLen;
int magnLen;
int aggrMode;
const float* window;
float dataBuf[ANAL_BLOCKL_MAX];
float syntBuf[ANAL_BLOCKL_MAX];
float outBuf[3 * BLOCKL_MAX];
WebRtc_UWord32 fs;
int blockLen;
int blockLen10ms;
int windShift;
int outLen;
int anaLen;
int magnLen;
int aggrMode;
const float* window;
float dataBuf[ANAL_BLOCKL_MAX];
float syntBuf[ANAL_BLOCKL_MAX];
float outBuf[3 * BLOCKL_MAX];
int initFlag;
// parameters for quantile noise estimation
float density[SIMULT * HALF_ANAL_BLOCKL];
float lquantile[SIMULT * HALF_ANAL_BLOCKL];
float quantile[HALF_ANAL_BLOCKL];
int counter[SIMULT];
int updates;
// parameters for Wiener filter
float smooth[HALF_ANAL_BLOCKL];
float overdrive;
float denoiseBound;
int gainmap;
// fft work arrays.
int ip[IP_LENGTH];
float wfft[W_LENGTH];
int initFlag;
// parameters for quantile noise estimation
float density[SIMULT* HALF_ANAL_BLOCKL];
float lquantile[SIMULT* HALF_ANAL_BLOCKL];
float quantile[HALF_ANAL_BLOCKL];
int counter[SIMULT];
int updates;
// parameters for Wiener filter
float smooth[HALF_ANAL_BLOCKL];
float overdrive;
float denoiseBound;
int gainmap;
// fft work arrays.
int ip[IP_LENGTH];
float wfft[W_LENGTH];
// parameters for new method: some not needed, will reduce/cleanup later
WebRtc_Word32 blockInd; //frame index counter
int modelUpdatePars[4]; //parameters for updating or estimating
// thresholds/weights for prior model
float priorModelPars[7]; //parameters for prior model
float noisePrev[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame
float magnPrev[HALF_ANAL_BLOCKL]; //magnitude spectrum of previous frame
float logLrtTimeAvg[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing
float priorSpeechProb; //prior speech/noise probability
float featureData[7]; //data for features
float magnAvgPause[HALF_ANAL_BLOCKL]; //conservative noise spectrum estimate
float signalEnergy; //energy of magn
float sumMagn; //sum of magn
float whiteNoiseLevel; //initial noise estimate
float initMagnEst[HALF_ANAL_BLOCKL]; //initial magnitude spectrum estimate
float pinkNoiseNumerator; //pink noise parameter: numerator
float pinkNoiseExp; //pink noise parameter: power of freq
NSParaExtract_t featureExtractionParams; //parameters for feature extraction
//histograms for parameter estimation
int histLrt[HIST_PAR_EST];
int histSpecFlat[HIST_PAR_EST];
int histSpecDiff[HIST_PAR_EST];
//quantities for high band estimate
float speechProbHB[HALF_ANAL_BLOCKL]; //final speech/noise prob: prior + LRT
float dataBufHB[ANAL_BLOCKL_MAX]; //buffering data for HB
// parameters for new method: some not needed, will reduce/cleanup later
WebRtc_Word32 blockInd; //frame index counter
int modelUpdatePars[4]; //parameters for updating or estimating
// thresholds/weights for prior model
float priorModelPars[7]; //parameters for prior model
float noisePrev[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame
float magnPrev[HALF_ANAL_BLOCKL]; //magnitude spectrum of previous frame
float logLrtTimeAvg[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing
float priorSpeechProb; //prior speech/noise probability
float featureData[7]; //data for features
float magnAvgPause[HALF_ANAL_BLOCKL]; //conservative noise spectrum estimate
float signalEnergy; //energy of magn
float sumMagn; //sum of magn
float whiteNoiseLevel; //initial noise estimate
float initMagnEst[HALF_ANAL_BLOCKL]; //initial magnitude spectrum estimate
float pinkNoiseNumerator; //pink noise parameter: numerator
float pinkNoiseExp; //pink noise parameter: power of freq
NSParaExtract_t featureExtractionParams; //parameters for feature extraction
//histograms for parameter estimation
int histLrt[HIST_PAR_EST];
int histSpecFlat[HIST_PAR_EST];
int histSpecDiff[HIST_PAR_EST];
//quantities for high band estimate
float speechProbHB[HALF_ANAL_BLOCKL]; //final speech/noise prob: prior + LRT
float dataBufHB[ANAL_BLOCKL_MAX]; //buffering data for HB
} NSinst_t;
@ -127,7 +127,7 @@ extern "C" {
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_InitCore(NSinst_t *inst, WebRtc_UWord32 fs);
int WebRtcNs_InitCore(NSinst_t* inst, WebRtc_UWord32 fs);
/****************************************************************************
* WebRtcNs_set_policy_core(...)
@ -144,7 +144,7 @@ int WebRtcNs_InitCore(NSinst_t *inst, WebRtc_UWord32 fs);
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNs_set_policy_core(NSinst_t *inst, int mode);
int WebRtcNs_set_policy_core(NSinst_t* inst, int mode);
/****************************************************************************
* WebRtcNs_ProcessCore
@ -166,11 +166,11 @@ int WebRtcNs_set_policy_core(NSinst_t *inst, int mode);
*/
int WebRtcNs_ProcessCore(NSinst_t *inst,
short *inFrameLow,
short *inFrameHigh,
short *outFrameLow,
short *outFrameHigh);
int WebRtcNs_ProcessCore(NSinst_t* inst,
short* inFrameLow,
short* inFrameHigh,
short* outFrameLow,
short* outFrameHigh);
#ifdef __cplusplus

File diff suppressed because it is too large Load Diff

View File

@ -20,85 +20,84 @@
#include <stdio.h>
#endif
typedef struct NsxInst_t_
{
WebRtc_UWord32 fs;
typedef struct NsxInst_t_ {
WebRtc_UWord32 fs;
const WebRtc_Word16* window;
WebRtc_Word16 analysisBuffer[ANAL_BLOCKL_MAX];
WebRtc_Word16 synthesisBuffer[ANAL_BLOCKL_MAX];
WebRtc_UWord16 noiseSupFilter[HALF_ANAL_BLOCKL];
WebRtc_UWord16 overdrive; /* Q8 */
WebRtc_UWord16 denoiseBound; /* Q14 */
const WebRtc_Word16* factor2Table;
WebRtc_Word16 noiseEstLogQuantile[SIMULT * HALF_ANAL_BLOCKL];
WebRtc_Word16 noiseEstDensity[SIMULT * HALF_ANAL_BLOCKL];
WebRtc_Word16 noiseEstCounter[SIMULT];
WebRtc_Word16 noiseEstQuantile[HALF_ANAL_BLOCKL];
const WebRtc_Word16* window;
WebRtc_Word16 analysisBuffer[ANAL_BLOCKL_MAX];
WebRtc_Word16 synthesisBuffer[ANAL_BLOCKL_MAX];
WebRtc_UWord16 noiseSupFilter[HALF_ANAL_BLOCKL];
WebRtc_UWord16 overdrive; /* Q8 */
WebRtc_UWord16 denoiseBound; /* Q14 */
const WebRtc_Word16* factor2Table;
WebRtc_Word16 noiseEstLogQuantile[SIMULT* HALF_ANAL_BLOCKL];
WebRtc_Word16 noiseEstDensity[SIMULT* HALF_ANAL_BLOCKL];
WebRtc_Word16 noiseEstCounter[SIMULT];
WebRtc_Word16 noiseEstQuantile[HALF_ANAL_BLOCKL];
WebRtc_Word16 anaLen;
int anaLen2;
int magnLen;
int aggrMode;
int stages;
int initFlag;
int gainMap;
WebRtc_Word16 anaLen;
int anaLen2;
int magnLen;
int aggrMode;
int stages;
int initFlag;
int gainMap;
WebRtc_Word32 maxLrt;
WebRtc_Word32 minLrt;
WebRtc_Word32 logLrtTimeAvgW32[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing in Q8
WebRtc_Word32 featureLogLrt;
WebRtc_Word32 thresholdLogLrt;
WebRtc_Word16 weightLogLrt;
WebRtc_Word32 maxLrt;
WebRtc_Word32 minLrt;
WebRtc_Word32 logLrtTimeAvgW32[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing in Q8
WebRtc_Word32 featureLogLrt;
WebRtc_Word32 thresholdLogLrt;
WebRtc_Word16 weightLogLrt;
WebRtc_UWord32 featureSpecDiff;
WebRtc_UWord32 thresholdSpecDiff;
WebRtc_Word16 weightSpecDiff;
WebRtc_UWord32 featureSpecDiff;
WebRtc_UWord32 thresholdSpecDiff;
WebRtc_Word16 weightSpecDiff;
WebRtc_UWord32 featureSpecFlat;
WebRtc_UWord32 thresholdSpecFlat;
WebRtc_Word16 weightSpecFlat;
WebRtc_UWord32 featureSpecFlat;
WebRtc_UWord32 thresholdSpecFlat;
WebRtc_Word16 weightSpecFlat;
WebRtc_Word32 avgMagnPause[HALF_ANAL_BLOCKL]; //conservative estimate of noise spectrum
WebRtc_UWord32 magnEnergy;
WebRtc_UWord32 sumMagn;
WebRtc_UWord32 curAvgMagnEnergy;
WebRtc_UWord32 timeAvgMagnEnergy;
WebRtc_UWord32 timeAvgMagnEnergyTmp;
WebRtc_Word32 avgMagnPause[HALF_ANAL_BLOCKL]; //conservative estimate of noise spectrum
WebRtc_UWord32 magnEnergy;
WebRtc_UWord32 sumMagn;
WebRtc_UWord32 curAvgMagnEnergy;
WebRtc_UWord32 timeAvgMagnEnergy;
WebRtc_UWord32 timeAvgMagnEnergyTmp;
WebRtc_UWord32 whiteNoiseLevel; //initial noise estimate
WebRtc_UWord32 initMagnEst[HALF_ANAL_BLOCKL];//initial magnitude spectrum estimate
WebRtc_Word32 pinkNoiseNumerator; //pink noise parameter: numerator
WebRtc_Word32 pinkNoiseExp; //pink noise parameter: power of freq
int minNorm; //smallest normalization factor
int zeroInputSignal; //zero input signal flag
WebRtc_UWord32 whiteNoiseLevel; //initial noise estimate
WebRtc_UWord32 initMagnEst[HALF_ANAL_BLOCKL];//initial magnitude spectrum estimate
WebRtc_Word32 pinkNoiseNumerator; //pink noise parameter: numerator
WebRtc_Word32 pinkNoiseExp; //pink noise parameter: power of freq
int minNorm; //smallest normalization factor
int zeroInputSignal; //zero input signal flag
WebRtc_UWord32 prevNoiseU32[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame
WebRtc_UWord16 prevMagnU16[HALF_ANAL_BLOCKL]; //magnitude spectrum from previous frame
WebRtc_Word16 priorNonSpeechProb; //prior speech/noise probability // Q14
WebRtc_UWord32 prevNoiseU32[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame
WebRtc_UWord16 prevMagnU16[HALF_ANAL_BLOCKL]; //magnitude spectrum from previous frame
WebRtc_Word16 priorNonSpeechProb; //prior speech/noise probability // Q14
int blockIndex; //frame index counter
int modelUpdate; //parameter for updating or estimating thresholds/weights for prior model
int cntThresUpdate;
int blockIndex; //frame index counter
int modelUpdate; //parameter for updating or estimating thresholds/weights for prior model
int cntThresUpdate;
//histograms for parameter estimation
WebRtc_Word16 histLrt[HIST_PAR_EST];
WebRtc_Word16 histSpecFlat[HIST_PAR_EST];
WebRtc_Word16 histSpecDiff[HIST_PAR_EST];
//histograms for parameter estimation
WebRtc_Word16 histLrt[HIST_PAR_EST];
WebRtc_Word16 histSpecFlat[HIST_PAR_EST];
WebRtc_Word16 histSpecDiff[HIST_PAR_EST];
//quantities for high band estimate
WebRtc_Word16 dataBufHBFX[ANAL_BLOCKL_MAX]; /* Q0 */
//quantities for high band estimate
WebRtc_Word16 dataBufHBFX[ANAL_BLOCKL_MAX]; /* Q0 */
int qNoise;
int prevQNoise;
int prevQMagn;
int blockLen10ms;
int qNoise;
int prevQNoise;
int prevQMagn;
int blockLen10ms;
WebRtc_Word16 real[ANAL_BLOCKL_MAX];
WebRtc_Word16 imag[ANAL_BLOCKL_MAX];
WebRtc_Word32 energyIn;
int scaleEnergyIn;
int normData;
WebRtc_Word16 real[ANAL_BLOCKL_MAX];
WebRtc_Word16 imag[ANAL_BLOCKL_MAX];
WebRtc_Word32 energyIn;
int scaleEnergyIn;
int normData;
} NsxInst_t;
@ -122,7 +121,7 @@ extern "C"
* Return value : 0 - Ok
* -1 - Error
*/
WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t *inst, WebRtc_UWord32 fs);
WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs);
/****************************************************************************
* WebRtcNsx_set_policy_core(...)
@ -139,7 +138,7 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t *inst, WebRtc_UWord32 fs);
* Return value : 0 - Ok
* -1 - Error
*/
int WebRtcNsx_set_policy_core(NsxInst_t *inst, int mode);
int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode);
/****************************************************************************
* WebRtcNsx_ProcessCore
@ -159,16 +158,16 @@ int WebRtcNsx_set_policy_core(NsxInst_t *inst, int mode);
* Return value : 0 - OK
* -1 - Error
*/
int WebRtcNsx_ProcessCore(NsxInst_t *inst, short *inFrameLow, short *inFrameHigh,
short *outFrameLow, short *outFrameHigh);
int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* inFrameLow, short* inFrameHigh,
short* outFrameLow, short* outFrameHigh);
/****************************************************************************
* Internal functions and variable declarations shared with optimized code.
*/
void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t *inst, int offset);
void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset);
void WebRtcNsx_NoiseEstimation(NsxInst_t *inst, WebRtc_UWord16 *magn, WebRtc_UWord32 *noise,
WebRtc_Word16 *qNoise);
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise,
WebRtc_Word16* qNoise);
extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];

View File

@ -11,215 +11,230 @@
#if defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)
#include "nsx_core.h"
#include <arm_neon.h>
#include <assert.h>
void WebRtcNsx_NoiseEstimation(NsxInst_t *inst, WebRtc_UWord16 *magn, WebRtc_UWord32 *noise,
WebRtc_Word16 *qNoise)
{
WebRtc_Word32 numerator;
void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise,
WebRtc_Word16* qNoise) {
WebRtc_Word32 numerator;
WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac;
WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
WebRtc_Word16 log2Const = 22713;
WebRtc_Word16 widthFactor = 21845;
WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac;
WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
WebRtc_Word16 log2Const = 22713;
WebRtc_Word16 widthFactor = 21845;
int i, s, offset;
int i, s, offset;
numerator = FACTOR_Q16;
numerator = FACTOR_Q16;
tabind = inst->stages - inst->normData;
if (tabind < 0)
{
logval = -WebRtcNsx_kLogTable[-tabind];
} else
{
logval = WebRtcNsx_kLogTable[tabind];
tabind = inst->stages - inst->normData;
assert(tabind < 9);
assert(tabind > -9);
if (tabind < 0) {
logval = -WebRtcNsx_kLogTable[-tabind];
} else {
logval = WebRtcNsx_kLogTable[tabind];
}
int16x8_t logval_16x8 = vdupq_n_s16(logval);
// lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
// magn is in Q(-stages), and the real lmagn values are:
// real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
// lmagn in Q8
for (i = 0; i < inst->magnLen; i++) {
if (magn[i]) {
zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]);
frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23);
assert(frac < 256);
// log2(magn(i))
log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
// log2(magn(i))*log(2)
lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15);
// + log(2^stages)
lmagn[i] += logval;
} else {
lmagn[i] = logval;
}
}
// lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
// magn is in Q(-stages), and the real lmagn values are:
// real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
// lmagn in Q8
for (i = 0; i < inst->magnLen; i++)
{
if (magn[i])
{
zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]);
frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23);
// log2(magn(i))
log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
// log2(magn(i))*log(2)
lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15);
// + log(2^stages)
lmagn[i] += logval;
} else
{
lmagn[i] = logval;
int16x4_t Q3_16x4 = vdup_n_s16(3);
int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8);
int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(widthFactor);
WebRtc_Word16 factor = FACTOR_Q7;
if (inst->blockIndex < END_STARTUP_LONG)
factor = FACTOR_Q7_STARTUP;
// Loop over simultaneous estimates
for (s = 0; s < SIMULT; s++) {
offset = s * inst->magnLen;
// Get counter values from state
counter = inst->noiseEstCounter[s];
assert(counter < 201);
countDiv = WebRtcNsx_kCounterDiv[counter];
countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv);
// quant_est(...)
WebRtc_Word16 deltaBuff[8];
int16x4_t tmp16x4_0;
int16x4_t tmp16x4_1;
int16x4_t countDiv_16x4 = vdup_n_s16(countDiv);
int16x8_t countProd_16x8 = vdupq_n_s16(countProd);
int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv);
int16x8_t prod16x8 = vqrdmulhq_s16(WIDTHFACTOR_16x8, tmp16x8_0);
int16x8_t tmp16x8_1;
int16x8_t tmp16x8_2;
int16x8_t tmp16x8_3;
int16x8_t tmp16x8_4;
int16x8_t tmp16x8_5;
int32x4_t tmp32x4;
for (i = 0; i < inst->magnLen - 7; i += 8) {
// Compute delta.
// Smaller step size during startup. This prevents from using
// unrealistic values causing overflow.
tmp16x8_0 = vdupq_n_s16(factor);
vst1q_s16(deltaBuff, tmp16x8_0);
int j;
for (j = 0; j < 8; j++) {
if (inst->noiseEstDensity[offset + i + j] > 512) {
deltaBuff[j] = WebRtcSpl_DivW32W16ResW16(
numerator, inst->noiseEstDensity[offset + i + j]);
}
}
}
int16x4_t Q3_16x4 = vdup_n_s16(3);
int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8);
int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(widthFactor);
// Update log quantile estimate
// Loop over simultaneous estimates
for (s = 0; s < SIMULT; s++)
{
offset = s * inst->magnLen;
// tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4);
tmp16x4_1 = vshrn_n_s32(tmp32x4, 14);
tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4);
tmp16x4_0 = vshrn_n_s32(tmp32x4, 14);
tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // Keep for several lines.
// Get counter values from state
counter = inst->noiseEstCounter[s];
countDiv = WebRtcNsx_kCounterDiv[counter];
countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv);
// prepare for the "if" branch
// tmp16 += 2;
// tmp16_1 = (Word16)(tmp16>>2);
tmp16x8_1 = vrshrq_n_s16(tmp16x8_0, 2);
// quant_est(...)
WebRtc_Word16 delta_[8];
int16x4_t tmp16x4_0;
int16x4_t tmp16x4_1;
int16x4_t countDiv_16x4 = vdup_n_s16(countDiv);
int16x8_t countProd_16x8 = vdupq_n_s16(countProd);
int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv);
int16x8_t prod16x8 = vqrdmulhq_s16(WIDTHFACTOR_16x8, tmp16x8_0);
int16x8_t tmp16x8_1;
int16x8_t tmp16x8_2;
int16x8_t tmp16x8_3;
int16x8_t tmp16x8_4;
int16x8_t tmp16x8_5;
int32x4_t tmp32x4;
// inst->noiseEstLogQuantile[offset+i] + tmp16_1;
tmp16x8_2 = vld1q_s16(&inst->noiseEstLogQuantile[offset + i]); // Keep
tmp16x8_1 = vaddq_s16(tmp16x8_2, tmp16x8_1); // Keep for several lines
for (i = 0; i < inst->magnLen - 7; i += 8) {
// compute delta
tmp16x8_0 = vdupq_n_s16(FACTOR_Q7);
vst1q_s16(delta_, tmp16x8_0);
int j;
for (j = 0; j < 8; j++) {
if (inst->noiseEstDensity[offset + i + j] > 512)
delta_[j] = WebRtcSpl_DivW32W16ResW16(numerator,
inst->noiseEstDensity[offset + i + j]);
}
// Prepare for the "else" branch
// tmp16 += 1;
// tmp16_1 = (Word16)(tmp16>>1);
tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1);
// Update log quantile estimate
// tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1);
tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4);
tmp16x4_1 = vshrn_n_s32(tmp32x4, 1);
// tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
tmp32x4 = vmull_s16(vld1_s16(&delta_[0]), countDiv_16x4);
tmp16x4_1 = vshrn_n_s32(tmp32x4, 14);
tmp32x4 = vmull_s16(vld1_s16(&delta_[4]), countDiv_16x4);
tmp16x4_0 = vshrn_n_s32(tmp32x4, 14);
tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // Keep for several lines.
// tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1);
tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4);
tmp16x4_0 = vshrn_n_s32(tmp32x4, 1);
// prepare for the "if" branch
// tmp16 += 2;
// tmp16_1 = (Word16)(tmp16>>2);
tmp16x8_1 = vrshrq_n_s16(tmp16x8_0, 2);
// inst->noiseEstLogQuantile[offset + i] - tmp16_2;
tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep
tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0);
// inst->noiseEstLogQuantile[offset+i] + tmp16_1;
tmp16x8_2 = vld1q_s16(&inst->noiseEstLogQuantile[offset + i]); // Keep
tmp16x8_1 = vaddq_s16(tmp16x8_2, tmp16x8_1); // Keep for several lines
// logval is the smallest fixed point representation we can have. Values below
// that will correspond to values in the interval [0, 1], which can't possibly
// occur.
tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8);
// Prepare for the "else" branch
// tmp16 += 1;
// tmp16_1 = (Word16)(tmp16>>1);
tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1);
// Do the if-else branches:
tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines
tmp16x8_5 = vsubq_s16(tmp16x8_3, tmp16x8_2);
__asm__("vcgt.s16 %q0, %q1, #0"::"w"(tmp16x8_4), "w"(tmp16x8_5));
__asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4));
__asm__("vbif %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4));
vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2);
// tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1);
tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4);
tmp16x4_1 = vshrn_n_s32(tmp32x4, 1);
// Update density estimate
// tmp16_1 + tmp16_2
tmp16x8_1 = vld1q_s16(&inst->noiseEstDensity[offset + i]);
tmp16x8_0 = vqrdmulhq_s16(tmp16x8_1, countProd_16x8);
tmp16x8_0 = vaddq_s16(tmp16x8_0, prod16x8);
// tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1);
tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4);
tmp16x4_0 = vshrn_n_s32(tmp32x4, 1);
// lmagn[i] - inst->noiseEstLogQuantile[offset + i]
tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2);
tmp16x8_3 = vabsq_s16(tmp16x8_3);
tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3);
__asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4));
vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1);
} // End loop over magnitude spectrum
// inst->noiseEstLogQuantile[offset + i] - tmp16_2;
tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep
tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0);
// Do the if-else branches:
tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines
tmp16x8_5 = vsubq_s16(tmp16x8_3, tmp16x8_2);
__asm__("vcgt.s16 %q0, %q1, #0"::"w"(tmp16x8_4), "w"(tmp16x8_5));
__asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4));
__asm__("vbif %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4));
vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2);
// Update density estimate
// tmp16_1 + tmp16_2
tmp16x8_1 = vld1q_s16(&inst->noiseEstDensity[offset + i]);
tmp16x8_0 = vqrdmulhq_s16(tmp16x8_1, countProd_16x8);
tmp16x8_0 = vaddq_s16(tmp16x8_0, prod16x8);
// lmagn[i] - inst->noiseEstLogQuantile[offset + i]
tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2);
tmp16x8_3 = vabsq_s16(tmp16x8_3);
tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3);
__asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4));
vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1);
} // End loop over magnitude spectrum
for (; i < inst->magnLen; i++)
{
// compute delta
if (inst->noiseEstDensity[offset + i] > 512)
{
delta = WebRtcSpl_DivW32W16ResW16(numerator,
inst->noiseEstDensity[offset + i]);
} else
{
delta = FACTOR_Q7;
}
// update log quantile estimate
tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
if (lmagn[i] > inst->noiseEstLogQuantile[offset + i])
{
// +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
// CounterDiv=1/inst->counter[s] in Q15
tmp16 += 2;
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
inst->noiseEstLogQuantile[offset + i] += tmp16no1;
} else
{
tmp16 += 1;
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
// *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
}
// update density estimate
if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
< WIDTH_Q8)
{
tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
inst->noiseEstDensity[offset + i], countProd, 15);
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(widthFactor,
countDiv, 15);
inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
}
} // end loop over magnitude spectrum
if (counter >= END_STARTUP_LONG)
{
inst->noiseEstCounter[s] = 0;
if (inst->blockIndex >= END_STARTUP_LONG)
{
WebRtcNsx_UpdateNoiseEstimate(inst, offset);
}
for (; i < inst->magnLen; i++) {
// compute delta
if (inst->noiseEstDensity[offset + i] > 512) {
delta = WebRtcSpl_DivW32W16ResW16(numerator,
inst->noiseEstDensity[offset + i]);
} else {
delta = FACTOR_Q7;
if (inst->blockIndex < END_STARTUP_LONG) {
// Smaller step size during startup. This prevents from using
// unrealistic values causing overflow.
delta = FACTOR_Q7_STARTUP;
}
inst->noiseEstCounter[s]++;
}
} // end loop over simultaneous estimates
// update log quantile estimate
tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
// +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
// CounterDiv=1/(inst->counter[s]+1) in Q15
tmp16 += 2;
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
inst->noiseEstLogQuantile[offset + i] += tmp16no1;
} else {
tmp16 += 1;
tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
// *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
if (inst->noiseEstLogQuantile[offset + i] < logval) {
// logval is the smallest fixed point representation we can have.
// Values below that will correspond to values in the interval
// [0, 1], which can't possibly occur.
inst->noiseEstLogQuantile[offset + i] = logval;
}
}
// Sequentially update the noise during startup
if (inst->blockIndex < END_STARTUP_LONG)
{
// update density estimate
if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
< WIDTH_Q8) {
tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
inst->noiseEstDensity[offset + i], countProd, 15);
tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
widthFactor, countDiv, 15);
inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
}
} // end loop over magnitude spectrum
if (counter >= END_STARTUP_LONG) {
inst->noiseEstCounter[s] = 0;
if (inst->blockIndex >= END_STARTUP_LONG) {
WebRtcNsx_UpdateNoiseEstimate(inst, offset);
}
}
inst->noiseEstCounter[s]++;
for (i = 0; i < inst->magnLen; i++)
{
noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise)
}
(*qNoise) = (WebRtc_Word16)inst->qNoise;
} // end loop over simultaneous estimates
// Sequentially update the noise during startup
if (inst->blockIndex < END_STARTUP_LONG) {
WebRtcNsx_UpdateNoiseEstimate(inst, offset);
}
for (i = 0; i < inst->magnLen; i++) {
noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise)
}
(*qNoise) = (WebRtc_Word16)inst->qNoise;
}
#endif // defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)

View File

@ -18,6 +18,7 @@
#define END_STARTUP_SHORT 50
#define FACTOR_Q16 (WebRtc_Word32)2621440 // 40 in Q16
#define FACTOR_Q7 (WebRtc_Word16)5120 // 40 in Q7
#define FACTOR_Q7_STARTUP (WebRtc_Word16)1024 // 8 in Q7
#define WIDTH_Q8 3 // 0.01 in Q8 (or 25 )
//PARAMETERS FOR NEW METHOD
#define DD_PR_SNR_Q11 2007 // ~= Q11(0.98) DD update of prior SNR

File diff suppressed because it is too large Load Diff

View File

@ -1,7 +1,12 @@
noinst_LTLIBRARIES = libapm_util.la
libapm_util_la_SOURCES = ring_buffer.c \
ring_buffer.h \
libapm_util_la_SOURCES = delay_estimator_float.c \
delay_estimator_float.h \
delay_estimator.c \
delay_estimator.h \
fft4g.c \
fft4g.h
libapm_util_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS)
fft4g.h \
ring_buffer.c \
ring_buffer.h
libapm_util_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
-I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface

View File

@ -0,0 +1,550 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "delay_estimator.h"
#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include "signal_processing_library.h"
typedef struct {
// Pointers to mean values of spectrum and bit counts
int32_t* mean_far_spectrum;
int32_t* mean_near_spectrum;
int32_t* mean_bit_counts;
// Arrays only used locally in DelayEstimatorProcess() but whose size
// is determined at run-time.
int32_t* bit_counts;
int32_t* far_spectrum_32;
int32_t* near_spectrum_32;
// Binary history variables
uint32_t* binary_far_history;
// Far end history variables
uint16_t* far_history;
int far_history_pos;
int* far_q_domains;
// Delay histogram variables
int* delay_histogram;
int vad_counter;
// Delay memory
int last_delay;
// Used to enable far end alignment. If it is disabled, only delay values are
// produced
int alignment_enabled;
// Buffer size parameters
int history_size;
int spectrum_size;
} DelayEstimator_t;
// Only bit |kBandFirst| through bit |kBandLast| are processed
// |kBandFirst| - |kBandLast| must be < 32
static const int kBandFirst = 12;
static const int kBandLast = 43;
static __inline uint32_t SetBit(uint32_t in, int32_t pos) {
uint32_t mask = WEBRTC_SPL_LSHIFT_W32(1, pos);
uint32_t out = (in | mask);
return out;
}
// Compares the |binary_vector| with all rows of the |binary_matrix| and counts
// per row the number of times they have the same value.
//
// Inputs:
// - binary_vector : binary "vector" stored in a long
// - binary_matrix : binary "matrix" stored as a vector of long
// - matrix_size : size of binary "matrix"
//
// Output:
// - bit_counts : "Vector" stored as a long, containing for each
// row the number of times the matrix row and the
// input vector have the same value
//
static void BitCountComparison(uint32_t binary_vector,
const uint32_t* binary_matrix,
int matrix_size,
int32_t* bit_counts) {
int n = 0;
uint32_t a = binary_vector;
register uint32_t tmp;
// compare |binary_vector| with all rows of the |binary_matrix|
for (; n < matrix_size; n++) {
a = (binary_vector ^ binary_matrix[n]);
// Returns bit counts in tmp
tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111);
tmp = ((tmp + (tmp >> 3)) & 030707070707);
tmp = (tmp + (tmp >> 6));
tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077;
bit_counts[n] = (int32_t) tmp;
}
}
// Computes the binary spectrum by comparing the input |spectrum| with a
// |threshold_spectrum|.
//
// Inputs:
// - spectrum : Spectrum of which the binary spectrum should be
// calculated.
// - threshold_spectrum : Threshold spectrum with which the input
// spectrum is compared.
// Return:
// - out : Binary spectrum
//
static uint32_t BinarySpectrum(int32_t* spectrum, int32_t* threshold_spectrum) {
int k = kBandFirst;
uint32_t out = 0;
for (; k <= kBandLast; k++) {
if (spectrum[k] > threshold_spectrum[k]) {
out = SetBit(out, k - kBandFirst);
}
}
return out;
}
// Calculates the mean recursively.
//
// Inputs:
// - new_value : new additional value
// - factor : factor for smoothing
//
// Input/Output:
// - mean_value : pointer to the mean value that should be updated
//
static void MeanEstimator(const int32_t new_value,
int factor,
int32_t* mean_value) {
int32_t mean_new = *mean_value;
int32_t diff = new_value - mean_new;
// mean_new = mean_value + ((new_value - mean_value) >> factor);
if (diff < 0) {
diff = -WEBRTC_SPL_RSHIFT_W32(-diff, factor);
} else {
diff = WEBRTC_SPL_RSHIFT_W32(diff, factor);
}
mean_new += diff;
*mean_value = mean_new;
}
// Moves the pointer to the next entry and inserts |far_spectrum| and
// corresponding Q-domain in its buffer.
//
// Inputs:
// - self : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum
// - far_q : Q-domain of far end spectrum
//
static void UpdateFarHistory(DelayEstimator_t* self,
uint16_t* far_spectrum,
int far_q) {
// Get new buffer position
self->far_history_pos++;
if (self->far_history_pos >= self->history_size) {
self->far_history_pos = 0;
}
// Update Q-domain buffer
self->far_q_domains[self->far_history_pos] = far_q;
// Update far end spectrum buffer
memcpy(&(self->far_history[self->far_history_pos * self->spectrum_size]),
far_spectrum,
sizeof(uint16_t) * self->spectrum_size);
}
int WebRtc_FreeDelayEstimator(void* handle) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
if (self == NULL) {
return -1;
}
if (self->mean_far_spectrum != NULL) {
free(self->mean_far_spectrum);
self->mean_far_spectrum = NULL;
}
if (self->mean_near_spectrum != NULL) {
free(self->mean_near_spectrum);
self->mean_near_spectrum = NULL;
}
if (self->mean_bit_counts != NULL) {
free(self->mean_bit_counts);
self->mean_bit_counts = NULL;
}
if (self->bit_counts != NULL) {
free(self->bit_counts);
self->bit_counts = NULL;
}
if (self->far_spectrum_32 != NULL) {
free(self->far_spectrum_32);
self->far_spectrum_32 = NULL;
}
if (self->near_spectrum_32 != NULL) {
free(self->near_spectrum_32);
self->near_spectrum_32 = NULL;
}
if (self->binary_far_history != NULL) {
free(self->binary_far_history);
self->binary_far_history = NULL;
}
if (self->far_history != NULL) {
free(self->far_history);
self->far_history = NULL;
}
if (self->far_q_domains != NULL) {
free(self->far_q_domains);
self->far_q_domains = NULL;
}
if (self->delay_histogram != NULL) {
free(self->delay_histogram);
self->delay_histogram = NULL;
}
free(self);
return 0;
}
int WebRtc_CreateDelayEstimator(void** handle,
int spectrum_size,
int history_size,
int enable_alignment) {
DelayEstimator_t *self = NULL;
// Check if the sub band used in the delay estimation is small enough to
// fit the binary spectra in a uint32.
assert(kBandLast - kBandFirst < 32);
if (spectrum_size < kBandLast) {
return -1;
}
if (history_size < 0) {
return -1;
}
if ((enable_alignment != 0) && (enable_alignment != 1)) {
return -1;
}
self = malloc(sizeof(DelayEstimator_t));
*handle = self;
if (self == NULL) {
return -1;
}
self->mean_far_spectrum = NULL;
self->mean_near_spectrum = NULL;
self->mean_bit_counts = NULL;
self->bit_counts = NULL;
self->far_spectrum_32 = NULL;
self->near_spectrum_32 = NULL;
self->binary_far_history = NULL;
self->far_history = NULL;
self->far_q_domains = NULL;
self->delay_histogram = NULL;
// Allocate memory for spectrum buffers
self->mean_far_spectrum = malloc(spectrum_size * sizeof(int32_t));
if (self->mean_far_spectrum == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->mean_near_spectrum = malloc(spectrum_size * sizeof(int32_t));
if (self->mean_near_spectrum == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->mean_bit_counts = malloc(history_size * sizeof(int32_t));
if (self->mean_bit_counts == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->bit_counts = malloc(history_size * sizeof(int32_t));
if (self->bit_counts == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->far_spectrum_32 = malloc(spectrum_size * sizeof(int32_t));
if (self->far_spectrum_32 == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->near_spectrum_32 = malloc(spectrum_size * sizeof(int32_t));
if (self->near_spectrum_32 == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
// Allocate memory for history buffers
self->binary_far_history = malloc(history_size * sizeof(uint32_t));
if (self->binary_far_history == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
if (enable_alignment) {
self->far_history = malloc(spectrum_size * history_size * sizeof(uint16_t));
if (self->far_history == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->far_q_domains = malloc(history_size * sizeof(int));
if (self->far_q_domains == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
}
self->delay_histogram = malloc(history_size * sizeof(int));
if (self->delay_histogram == NULL) {
WebRtc_FreeDelayEstimator(self);
self = NULL;
return -1;
}
self->spectrum_size = spectrum_size;
self->history_size = history_size;
self->alignment_enabled = enable_alignment;
return 0;
}
int WebRtc_InitDelayEstimator(void* handle) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
if (self == NULL) {
return -1;
}
// Set averaged far and near end spectra to zero
memset(self->mean_far_spectrum, 0, sizeof(int32_t) * self->spectrum_size);
memset(self->mean_near_spectrum, 0, sizeof(int32_t) * self->spectrum_size);
// Set averaged bit counts to zero
memset(self->mean_bit_counts, 0, sizeof(int32_t) * self->history_size);
memset(self->bit_counts, 0, sizeof(int32_t) * self->history_size);
memset(self->far_spectrum_32, 0, sizeof(int32_t) * self->spectrum_size);
memset(self->near_spectrum_32, 0, sizeof(int32_t) * self->spectrum_size);
// Set far end histories to zero
memset(self->binary_far_history, 0, sizeof(uint32_t) * self->history_size);
if (self->alignment_enabled) {
memset(self->far_history,
0,
sizeof(uint16_t) * self->spectrum_size * self->history_size);
memset(self->far_q_domains, 0, sizeof(int) * self->history_size);
self->far_history_pos = self->history_size;
}
// Set delay histogram to zero
memset(self->delay_histogram, 0, sizeof(int) * self->history_size);
// Set VAD counter to zero
self->vad_counter = 0;
// Set delay memory to zero
self->last_delay = 0;
return 0;
}
int WebRtc_DelayEstimatorProcess(void* handle,
uint16_t* far_spectrum,
uint16_t* near_spectrum,
int spectrum_size,
int far_q,
int vad_value) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
const int kVadCountThreshold = 25;
const int kMaxHistogram = 600;
int histogram_bin = 0;
int i = 0;
int max_histogram_level = 0;
int min_position = -1;
uint32_t binary_far_spectrum = 0;
uint32_t binary_near_spectrum = 0;
int32_t bit_counts_tmp = 0;
if (self == NULL) {
return -1;
}
if (spectrum_size != self->spectrum_size) {
// Data sizes don't match
return -1;
}
if (far_q > 15) {
// If |far_q| is larger than 15 we cannot guarantee no wrap around
return -1;
}
if (self->alignment_enabled) {
// Update far end history
UpdateFarHistory(self, far_spectrum, far_q);
} // Update the far and near end means
for (i = 0; i < self->spectrum_size; i++) {
self->far_spectrum_32[i] = (int32_t) far_spectrum[i];
MeanEstimator(self->far_spectrum_32[i], 6, &(self->mean_far_spectrum[i]));
self->near_spectrum_32[i] = (int32_t) near_spectrum[i];
MeanEstimator(self->near_spectrum_32[i], 6, &(self->mean_near_spectrum[i]));
}
// Shift binary spectrum history
memmove(&(self->binary_far_history[1]), &(self->binary_far_history[0]),
(self->history_size - 1) * sizeof(uint32_t));
// Get binary spectra
binary_far_spectrum = BinarySpectrum(self->far_spectrum_32,
self->mean_far_spectrum);
binary_near_spectrum = BinarySpectrum(self->near_spectrum_32,
self->mean_near_spectrum);
// Insert new binary spectrum
self->binary_far_history[0] = binary_far_spectrum;
// Compare with delayed spectra
BitCountComparison(binary_near_spectrum,
self->binary_far_history,
self->history_size,
self->bit_counts);
// Smooth bit count curve
for (i = 0; i < self->history_size; i++) {
// Update sum
// |bit_counts| is constrained to [0, 32], meaning we can smooth with a
// factor up to 2^26. We use Q9.
bit_counts_tmp = WEBRTC_SPL_LSHIFT_W32(self->bit_counts[i], 9); // Q9
MeanEstimator(bit_counts_tmp, 9, &(self->mean_bit_counts[i]));
}
// Find minimum position of bit count curve
min_position = (int) WebRtcSpl_MinIndexW32(self->mean_bit_counts,
(int16_t) self->history_size);
// If the far end has been active sufficiently long, begin accumulating a
// histogram of the minimum positions. Search for the maximum bin to
// determine the delay.
if (vad_value == 1) {
if (self->vad_counter >= kVadCountThreshold) {
// Increment the histogram at the current minimum position.
if (self->delay_histogram[min_position] < kMaxHistogram) {
self->delay_histogram[min_position] += 3;
}
self->last_delay = 0;
for (i = 0; i < self->history_size; i++) {
histogram_bin = self->delay_histogram[i];
// Decrement the histogram bin.
if (histogram_bin > 0) {
histogram_bin--;
self->delay_histogram[i] = histogram_bin;
// Select the histogram index corresponding to the maximum bin as the
// delay.
if (histogram_bin > max_histogram_level) {
max_histogram_level = histogram_bin;
self->last_delay = i;
}
}
}
} else {
self->vad_counter++;
}
} else {
self->vad_counter = 0;
}
return self->last_delay;
}
const uint16_t* WebRtc_AlignedFarend(void* handle,
int far_spectrum_size,
int* far_q) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
int buffer_position = 0;
if (self == NULL) {
return NULL;
}
if (far_spectrum_size != self->spectrum_size) {
return NULL;
}
if (self->alignment_enabled == 0) {
return NULL;
}
// Get buffer position
buffer_position = self->far_history_pos - self->last_delay;
if (buffer_position < 0) {
buffer_position += self->history_size;
}
// Get Q-domain
*far_q = self->far_q_domains[buffer_position];
// Return far end spectrum
return (self->far_history + (buffer_position * far_spectrum_size));
}
int WebRtc_last_delay(void* handle) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
if (self == NULL) {
return -1;
}
return self->last_delay;
}
int WebRtc_history_size(void* handle) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
if (self == NULL) {
return -1;
}
return self->history_size;
}
int WebRtc_spectrum_size(void* handle) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
if (self == NULL) {
return -1;
}
return self->spectrum_size;
}
int WebRtc_is_alignment_enabled(void* handle) {
DelayEstimator_t* self = (DelayEstimator_t*) handle;
if (self == NULL) {
return -1;
}
return self->alignment_enabled;
}

View File

@ -0,0 +1,154 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Performs delay estimation on a block by block basis
// The return value is 0 - OK and -1 - Error, unless otherwise stated.
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
#include "typedefs.h"
// Releases the memory allocated by WebRtc_CreateDelayEstimator(...)
// Input:
// - handle : Pointer to the delay estimation instance
//
int WebRtc_FreeDelayEstimator(void* handle);
// Allocates the memory needed by the delay estimation. The memory needs to be
// initialized separately using the WebRtc_InitDelayEstimator(...)
// function.
//
// Inputs:
// - handle : Instance that should be created
// - spectrum_size : Size of the spectrum used both in far end and
// near end. Used to allocate memory for spectrum
// specific buffers.
// - history_size : Size of the far end history used to estimate the
// delay from. Used to allocate memory for history
// specific buffers.
// - enable_alignment : With this mode set to 1, a far end history is
// created, so that the user can retrieve aligned
// far end spectra using
// WebRtc_AlignedFarend(...). Otherwise, only delay
// values are calculated.
//
// Output:
// - handle : Created instance
//
int WebRtc_CreateDelayEstimator(void** handle,
int spectrum_size,
int history_size,
int enable_alignment);
// Initializes the delay estimation instance created with
// WebRtc_CreateDelayEstimator(...)
// Input:
// - handle : Pointer to the delay estimation instance
//
// Output:
// - handle : Initialized instance
//
int WebRtc_InitDelayEstimator(void* handle);
// Estimates and returns the delay between the far end and near end blocks.
// Inputs:
// - handle : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum data
// - near_spectrum : Pointer to the near end spectrum data of the current
// block
// - spectrum_size : The size of the data arrays (same for both far and
// near end)
// - far_q : The Q-domain of the far end data
// - vad_value : The VAD decision of the current block
//
// Output:
// - handle : Updated instance
//
// Return value:
// - delay : >= 0 - Calculated delay value
// -1 - Error
//
int WebRtc_DelayEstimatorProcess(void* handle,
uint16_t* far_spectrum,
uint16_t* near_spectrum,
int spectrum_size,
int far_q,
int vad_value);
// Returns a pointer to the far end spectrum aligned to current near end
// spectrum. The function WebRtc_DelayEstimatorProcess(...) should have been
// called before WebRtc_AlignedFarend(...). Otherwise, you get the pointer to
// the previous frame. The memory is only valid until the next call of
// WebRtc_DelayEstimatorProcess(...).
//
// Inputs:
// - handle : Pointer to the delay estimation instance
// - far_spectrum_size : Size of far_spectrum allocated by the caller
//
// Output:
// - far_q : The Q-domain of the aligned far end spectrum
//
// Return value:
// - far_spectrum : Pointer to the aligned far end spectrum
// NULL - Error
//
const uint16_t* WebRtc_AlignedFarend(void* handle,
int far_spectrum_size,
int* far_q);
// Returns the last calculated delay updated by the function
// WebRtc_DelayEstimatorProcess(...)
//
// Input:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - delay : >= 0 - Last calculated delay value
// -1 - Error
//
int WebRtc_last_delay(void* handle);
// Returns the history size used in the far end buffers to calculate the delay
// over.
//
// Input:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - history_size : > 0 - Far end history size
// -1 - Error
//
int WebRtc_history_size(void* handle);
// Returns the fixed spectrum size used in the algorithm.
//
// Input:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - spectrum_size : > 0 - Spectrum size
// -1 - Error
//
int WebRtc_spectrum_size(void* handle);
// Returns 1 if the far end alignment is enabled and 0 otherwise.
//
// Input:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - alignment_enabled : 1 - Enabled
// 0 - Disabled
// -1 - Error
//
int WebRtc_is_alignment_enabled(void* handle);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_

View File

@ -0,0 +1,288 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
#include "delay_estimator_float.h"
#include <assert.h>
#include <math.h>
#include <stdlib.h>
#include <string.h>
#include "delay_estimator.h"
#include "signal_processing_library.h"
typedef struct {
// Fixed point spectra
uint16_t* far_spectrum_u16;
uint16_t* near_spectrum_u16;
// Far end history variables
float* far_history;
int far_history_pos;
// Fixed point delay estimator
void* fixed_handle;
} DelayEstimatorFloat_t;
// Moves the pointer to the next buffer entry and inserts new far end spectrum.
// Only used when alignment is enabled.
//
// Inputs:
// - self : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum
//
static void UpdateFarHistory(DelayEstimatorFloat_t* self, float* far_spectrum) {
int spectrum_size = WebRtc_spectrum_size(self->fixed_handle);
// Get new buffer position
self->far_history_pos++;
if (self->far_history_pos >= WebRtc_history_size(self->fixed_handle)) {
self->far_history_pos = 0;
}
// Update far end spectrum buffer
memcpy(&(self->far_history[self->far_history_pos * spectrum_size]),
far_spectrum,
sizeof(float) * spectrum_size);
}
int WebRtc_FreeDelayEstimatorFloat(void* handle) {
DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
if (self == NULL) {
return -1;
}
if (self->far_history != NULL) {
free(self->far_history);
self->far_history = NULL;
}
if (self->far_spectrum_u16 != NULL) {
free(self->far_spectrum_u16);
self->far_spectrum_u16 = NULL;
}
if (self->near_spectrum_u16 != NULL) {
free(self->near_spectrum_u16);
self->near_spectrum_u16 = NULL;
}
WebRtc_FreeDelayEstimator(self->fixed_handle);
free(self);
return 0;
}
int WebRtc_CreateDelayEstimatorFloat(void** handle,
int spectrum_size,
int history_size,
int enable_alignment) {
DelayEstimatorFloat_t *self = NULL;
if ((enable_alignment != 0) && (enable_alignment != 1)) {
return -1;
}
self = malloc(sizeof(DelayEstimatorFloat_t));
*handle = self;
if (self == NULL) {
return -1;
}
self->far_history = NULL;
self->far_spectrum_u16 = NULL;
self->near_spectrum_u16 = NULL;
// Create fixed point core delay estimator
if (WebRtc_CreateDelayEstimator(&self->fixed_handle,
spectrum_size,
history_size,
enable_alignment) != 0) {
WebRtc_FreeDelayEstimatorFloat(self);
self = NULL;
return -1;
}
// Allocate memory for far history buffer
if (enable_alignment) {
self->far_history = malloc(spectrum_size * history_size * sizeof(float));
if (self->far_history == NULL) {
WebRtc_FreeDelayEstimatorFloat(self);
self = NULL;
return -1;
}
}
// Allocate memory for fixed point spectra
self->far_spectrum_u16 = malloc(spectrum_size * sizeof(uint16_t));
if (self->far_spectrum_u16 == NULL) {
WebRtc_FreeDelayEstimatorFloat(self);
self = NULL;
return -1;
}
self->near_spectrum_u16 = malloc(spectrum_size * sizeof(uint16_t));
if (self->near_spectrum_u16 == NULL) {
WebRtc_FreeDelayEstimatorFloat(self);
self = NULL;
return -1;
}
return 0;
}
int WebRtc_InitDelayEstimatorFloat(void* handle) {
DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
if (self == NULL) {
return -1;
}
if (WebRtc_InitDelayEstimator(self->fixed_handle) != 0) {
return -1;
}
{
int history_size = WebRtc_history_size(self->fixed_handle);
int spectrum_size = WebRtc_spectrum_size(self->fixed_handle);
if (WebRtc_is_alignment_enabled(self->fixed_handle) == 1) {
// Set far end histories to zero
memset(self->far_history,
0,
sizeof(float) * spectrum_size * history_size);
self->far_history_pos = history_size;
}
// Set fixed point spectra to zero
memset(self->far_spectrum_u16, 0, sizeof(uint16_t) * spectrum_size);
memset(self->near_spectrum_u16, 0, sizeof(uint16_t) * spectrum_size);
}
return 0;
}
int WebRtc_DelayEstimatorProcessFloat(void* handle,
float* far_spectrum,
float* near_spectrum,
int spectrum_size,
int vad_value) {
DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
const float kFftSize = (float) (2 * (spectrum_size - 1));
const float kLogOf2Inverse = 1.4426950f;
float max_value = 0.0f;
float scaling = 0;
int far_q = 0;
int scaling_log = 0;
int i = 0;
if (self == NULL) {
return -1;
}
if (far_spectrum == NULL) {
// Empty far end spectrum
return -1;
}
if (near_spectrum == NULL) {
// Empty near end spectrum
return -1;
}
if (spectrum_size != WebRtc_spectrum_size(self->fixed_handle)) {
// Data sizes don't match
return -1;
}
// Convert floating point spectrum to fixed point
// 1) Find largest value
// 2) Scale largest value to fit in Word16
for (i = 0; i < spectrum_size; ++i) {
if (near_spectrum[i] > max_value) {
max_value = near_spectrum[i];
}
}
// Find the largest possible scaling that is a multiple of two.
// With largest we mean to fit in a Word16.
// TODO(bjornv): I've taken the size of FFT into account, since there is a
// different scaling in float vs fixed point FFTs. I'm not completely sure
// this is necessary.
scaling_log = 14 - (int) (log(max_value / kFftSize + 1) * kLogOf2Inverse);
scaling = (float) (1 << scaling_log) / kFftSize;
for (i = 0; i < spectrum_size; ++i) {
self->near_spectrum_u16[i] = (uint16_t) (near_spectrum[i] * scaling);
}
// Same for far end
max_value = 0.0f;
for (i = 0; i < spectrum_size; ++i) {
if (far_spectrum[i] > max_value) {
max_value = far_spectrum[i];
}
}
// Find the largest possible scaling that is a multiple of two.
// With largest we mean to fit in a Word16.
scaling_log = 14 - (int) (log(max_value / kFftSize + 1) * kLogOf2Inverse);
scaling = (float) (1 << scaling_log) / kFftSize;
for (i = 0; i < spectrum_size; ++i) {
self->far_spectrum_u16[i] = (uint16_t) (far_spectrum[i] * scaling);
}
far_q = (int) scaling_log;
assert(far_q < 16); // Catch too large scaling, which should never be able to
// occur.
if (WebRtc_is_alignment_enabled(self->fixed_handle) == 1) {
// Update far end history
UpdateFarHistory(self, far_spectrum);
}
return WebRtc_DelayEstimatorProcess(self->fixed_handle,
self->far_spectrum_u16,
self->near_spectrum_u16,
spectrum_size,
far_q,
vad_value);
}
const float* WebRtc_AlignedFarendFloat(void* handle, int far_spectrum_size) {
DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
int buffer_pos = 0;
if (self == NULL) {
return NULL;
}
if (far_spectrum_size != WebRtc_spectrum_size(self->fixed_handle)) {
return NULL;
}
if (WebRtc_is_alignment_enabled(self->fixed_handle) != 1) {
return NULL;
}
// Get buffer position
buffer_pos = self->far_history_pos - WebRtc_last_delay(self->fixed_handle);
if (buffer_pos < 0) {
buffer_pos += WebRtc_history_size(self->fixed_handle);
}
// Return pointer to far end spectrum
return (self->far_history + (buffer_pos * far_spectrum_size));
}
int WebRtc_last_delay_float(void* handle) {
DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
if (self == NULL) {
return -1;
}
return WebRtc_last_delay(self->fixed_handle);
}
int WebRtc_is_alignment_enabled_float(void* handle) {
DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
if (self == NULL) {
return -1;
}
return WebRtc_is_alignment_enabled(self->fixed_handle);
}

View File

@ -0,0 +1,125 @@
/*
* Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
*
* Use of this source code is governed by a BSD-style license
* that can be found in the LICENSE file in the root of the source
* tree. An additional intellectual property rights grant can be found
* in the file PATENTS. All contributing project authors may
* be found in the AUTHORS file in the root of the source tree.
*/
// Performs delay estimation on a block by block basis
// The return value is 0 - OK and -1 - Error, unless otherwise stated.
#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_
#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_
// Releases the memory allocated by WebRtc_CreateDelayEstimatorFloat(...)
// Input:
// - handle : Pointer to the delay estimation instance
//
int WebRtc_FreeDelayEstimatorFloat(void* handle);
// Allocates the memory needed by the delay estimation. The memory needs to be
// initialized separately using the WebRtc_InitDelayEstimatorFloat(...)
// function.
//
// Inputs:
// - handle : Instance that should be created
// - spectrum_size : Size of the spectrum used both in far end and
// near end. Used to allocate memory for spectrum
// specific buffers.
// - history_size : Size of the far end history used to estimate the
// delay from. Used to allocate memory for history
// specific buffers.
// - enable_alignment : With this mode set to 1, a far end history is
// created, so that the user can retrieve aligned
// far end spectra using
// WebRtc_AlignedFarendFloat(...). Otherwise, only
// delay values are calculated.
//
// Output:
// - handle : Created instance
//
int WebRtc_CreateDelayEstimatorFloat(void** handle,
int spectrum_size,
int history_size,
int enable_alignment);
// Initializes the delay estimation instance created with
// WebRtc_CreateDelayEstimatorFloat(...)
// Input:
// - handle : Pointer to the delay estimation instance
//
// Output:
// - handle : Initialized instance
//
int WebRtc_InitDelayEstimatorFloat(void* handle);
// Estimates and returns the delay between the far end and near end blocks.
// Inputs:
// - handle : Pointer to the delay estimation instance
// - far_spectrum : Pointer to the far end spectrum data
// - near_spectrum : Pointer to the near end spectrum data of the current
// block
// - spectrum_size : The size of the data arrays (same for both far and
// near end)
// - far_q : The Q-domain of the far end data
// - vad_value : The VAD decision of the current block
//
// Output:
// - handle : Updated instance
//
// Return value:
// - delay : >= 0 - Calculated delay value
// -1 - Error
//
int WebRtc_DelayEstimatorProcessFloat(void* handle,
float* far_spectrum,
float* near_spectrum,
int spectrum_size,
int vad_value);
// Returns a pointer to the far end spectrum aligned to current near end
// spectrum. The function WebRtc_DelayEstimatorProcessFloat(...) should
// have been called before WebRtc_AlignedFarendFloat(...). Otherwise, you get
// the pointer to the previous frame. The memory is only valid until the
// next call of WebRtc_DelayEstimatorProcessFloat(...).
//
// Inputs:
// - handle : Pointer to the delay estimation instance
// - far_spectrum_size : Size of far_spectrum allocated by the caller
//
// Output:
//
// Return value:
// - far_spectrum : Pointer to the aligned far end spectrum
// NULL - Error
//
const float* WebRtc_AlignedFarendFloat(void* handle, int far_spectrum_size);
// Returns the last calculated delay updated by the function
// WebRtcApm_DelayEstimatorProcessFloat(...)
//
// Inputs:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - delay : >= 0 - Last calculated delay value
// -1 - Error
//
int WebRtc_last_delay_float(void* handle);
// Returns 1 if the far end alignment is enabled and 0 otherwise.
//
// Input:
// - handle : Pointer to the delay estimation instance
//
// Return value:
// - alignment_enabled : 1 - Enabled
// 0 - Disabled
// -1 - Error
//
int WebRtc_is_alignment_enabled_float(void* handle);
#endif // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_

View File

@ -11,16 +11,23 @@
{
'target_name': 'apm_util',
'type': '<(library)',
'dependencies': [
'<(webrtc_root)/common_audio/common_audio.gyp:spl',
],
'direct_dependent_settings': {
'include_dirs': [
'.',
],
},
'sources': [
'ring_buffer.c',
'ring_buffer.h',
'delay_estimator_float.c',
'delay_estimator_float.h',
'delay_estimator.c',
'delay_estimator.h',
'fft4g.c',
'fft4g.h',
'ring_buffer.c',
'ring_buffer.h',
],
},
],

View File

@ -42,9 +42,12 @@ struct RTPVideoHeaderH263
bool bits; // H.263 mode B, Xor the lasy byte of previus packet with the
// first byte of this packet
};
enum {kNoPictureId = -1};
enum {kNoTl0PicIdx = -1};
enum {kNoTemporalIdx = -1};
enum {kNoSimulcastIdx = 0};
struct RTPVideoHeaderVP8
{
void InitRTPVideoHeaderVP8()
@ -89,6 +92,8 @@ struct RTPVideoHeader
WebRtc_UWord16 height;
bool isFirstPacket; // first packet in frame
WebRtc_UWord8 simulcastIdx; // Index if the simulcast encoder creating
// this frame, 0 if not using simulcast.
RTPVideoCodecTypes codec;
RTPVideoTypeHeader codecHeader;
};

View File

@ -33,6 +33,7 @@
'../interface/cpu_features_wrapper.h',
'../interface/critical_section_wrapper.h',
'../interface/data_log.h',
'../interface/data_log_c.h',
'../interface/data_log_impl.h',
'../interface/event_wrapper.h',
'../interface/file_wrapper.h',
@ -63,6 +64,7 @@
'critical_section.cc',
'critical_section_posix.h',
'critical_section_windows.h',
'data_log_c.cc',
'event.cc',
'event_posix.h',
'event_windows.h',

View File

@ -14,12 +14,14 @@
#define WEBRTC_TYPEDEFS_H_
// Reserved words definitions
// TODO(andrew): Look at removing these.
#define WEBRTC_EXTERN extern
#define G_CONST const
#define WEBRTC_INLINE extern __inline
// Define WebRTC preprocessor identifiers based on the current build platform.
// TODO(ajm): Clean these up. We can probably remove everything in this block.
// TODO(andrew): Clean these up. We can probably remove everything in this
// block.
// - TARGET_MAC_INTEL and TARGET_MAC aren't used anywhere.
// - In the few places where TARGET_PC is used, it should be replaced by
// something more specific.
@ -32,7 +34,7 @@
#endif
#elif defined(__APPLE__)
// Mac OS X
#if defined(__LITTLE_ENDIAN__ ) //TODO: is this used?
#if defined(__LITTLE_ENDIAN__ )
#if !defined(WEBRTC_TARGET_MAC_INTEL)
#define WEBRTC_TARGET_MAC_INTEL
#endif
@ -53,7 +55,7 @@
// http://msdn.microsoft.com/en-us/library/b0084kay.aspx
// http://www.agner.org/optimize/calling_conventions.pdf
// or with gcc, run: "echo | gcc -E -dM -"
// TODO(ajm): replace WEBRTC_LITTLE_ENDIAN with WEBRTC_ARCH_LITTLE_ENDIAN?
// TODO(andrew): replace WEBRTC_LITTLE_ENDIAN with WEBRTC_ARCH_LITTLE_ENDIAN?
#if defined(_M_X64) || defined(__x86_64__)
#define WEBRTC_ARCH_X86_FAMILY
#define WEBRTC_ARCH_X86_64
@ -65,8 +67,11 @@
#define WEBRTC_ARCH_32_BITS
#define WEBRTC_ARCH_LITTLE_ENDIAN
#elif defined(__ARMEL__)
// TODO(ajm): Chromium uses the two commented defines. Should we switch?
#define WEBRTC_ARCH_ARM
// TODO(andrew): We'd prefer to control platform defines here, but this is
// currently provided by the Android makefiles. Commented to avoid duplicate
// definition warnings.
//#define WEBRTC_ARCH_ARM
// TODO(andrew): Chromium uses the following two defines. Should we switch?
//#define WEBRTC_ARCH_ARM_FAMILY
//#define WEBRTC_ARCH_ARMEL
#define WEBRTC_ARCH_32_BITS
@ -75,10 +80,7 @@
#error Please add support for your architecture in typedefs.h
#endif
// TODO(ajm): SSE2 is disabled on Windows for the moment, because AEC
// optimization is broken. Enable it as soon as AEC is fixed.
//#if defined(__SSE2__) || defined(_MSC_VER)
#if defined(__SSE2__)
#if defined(__SSE2__) || defined(_MSC_VER)
#define WEBRTC_USE_SSE2
#endif