Update code to upstream revision r766

Removes matlab tests, adds delay estimation logging, and some other minor fixes/improvements.
2011-10-20 13:10:08 +05:30
parent 139f0b6dc0
commit 7e71fffb59
79 changed files with 6238 additions and 8599 deletions
--- a/configure.ac
+++ b/configure.ac
@@ -1,5 +1,5 @@
 # Revision changelog (version - date, svn rev. from upstream that was merged)
-#    0.1 - 15 Sep 2011, r597
+#    0.1 - 19 Oct 2011, r766
 AC_INIT([webrtc-audio-processing], [0.1])
 AM_INIT_AUTOMAKE([tar-ustar])

--- a/src/common_audio/signal_processing_library/OWNERS
+++ b/src/common_audio/signal_processing_library/OWNERS
@@ -1,3 +0,0 @@
-bjornv@webrtc.org
-tina.legrand@webrtc.org
-jan.skoglund@webrtc.org
--- a/src/common_audio/signal_processing_library/main/interface/signal_processing_library.h
+++ b/src/common_audio/signal_processing_library/main/interface/signal_processing_library.h
@@ -1659,6 +1659,30 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band,
 //      - out_data      : Super-wideband speech signal, 0-16 kHz
 //

+// WebRtc_Word16 WebRtcSpl_SatW32ToW16(...)
+//
+// This function saturates a 32-bit word into a 16-bit word.
+// 
+// Input:
+//      - value32   : The value of a 32-bit word.
+//
+// Output:
+//      - out16     : the saturated 16-bit word.
+//
+
+// int32_t WebRtc_MulAccumW16(...)
+//
+// This function multiply a 16-bit word by a 16-bit word, and accumulate this
+// value to a 32-bit integer.
+// 
+// Input:
+//      - a    : The value of the first 16-bit word.
+//      - b    : The value of the second 16-bit word.
+//      - c    : The value of an 32-bit integer.
+//
+// Return Value: The value of a * b + c.
+//
+
 // WebRtc_Word16 WebRtcSpl_get_version(...)
 //
 // This function gives the version string of the Signal Processing Library.
--- a/src/common_audio/signal_processing_library/main/interface/spl_inl.h
+++ b/src/common_audio/signal_processing_library/main/interface/spl_inl.h
@@ -19,16 +19,20 @@
 #include "spl_inl_armv7.h"
 #else

+static __inline WebRtc_Word16 WebRtcSpl_SatW32ToW16(WebRtc_Word32 value32) {
+  WebRtc_Word16 out16 = (WebRtc_Word16) value32;
+
+  if (value32 > 32767)
+    out16 = 32767;
+  else if (value32 < -32768)
+    out16 = -32768;
+
+  return out16;
+}
+
 static __inline WebRtc_Word16 WebRtcSpl_AddSatW16(WebRtc_Word16 a,
                                                  WebRtc_Word16 b) {
-  WebRtc_Word32 s_sum = (WebRtc_Word32) a + (WebRtc_Word32) b;
-
-  if (s_sum > WEBRTC_SPL_WORD16_MAX)
-    s_sum = WEBRTC_SPL_WORD16_MAX;
-  else if (s_sum < WEBRTC_SPL_WORD16_MIN)
-    s_sum = WEBRTC_SPL_WORD16_MIN;
-
-  return (WebRtc_Word16)s_sum;
+  return WebRtcSpl_SatW32ToW16((WebRtc_Word32) a + (WebRtc_Word32) b);
 }

 static __inline WebRtc_Word32 WebRtcSpl_AddSatW32(WebRtc_Word32 l_var1,
@@ -54,24 +58,7 @@ static __inline WebRtc_Word32 WebRtcSpl_AddSatW32(WebRtc_Word32 l_var1,

 static __inline WebRtc_Word16 WebRtcSpl_SubSatW16(WebRtc_Word16 var1,
                                                  WebRtc_Word16 var2) {
-  WebRtc_Word32 l_diff;
-  WebRtc_Word16 s_diff;
-
-  // perform subtraction
-  l_diff = (WebRtc_Word32)var1 - (WebRtc_Word32)var2;
-
-  // default setting
-  s_diff = (WebRtc_Word16) l_diff;
-
-  // check for overflow
-  if (l_diff > (WebRtc_Word32)32767)
-  s_diff = (WebRtc_Word16)32767;
-
-  // check for underflow
-  if (l_diff < (WebRtc_Word32)-32768)
-  s_diff = (WebRtc_Word16)-32768;
-
-  return s_diff;
+  return WebRtcSpl_SatW32ToW16((WebRtc_Word32) var1 - (WebRtc_Word32) var2);
 }

 static __inline WebRtc_Word32 WebRtcSpl_SubSatW32(WebRtc_Word32 l_var1,
@@ -161,6 +148,12 @@ static __inline int WebRtcSpl_NormW16(WebRtc_Word16 a) {
  return zeros;
 }

+static __inline int32_t WebRtc_MulAccumW16(int16_t a,
+                                          int16_t b,
+                                          int32_t c) {
+  return (a * b + c);
+}
+
 #endif  // WEBRTC_ARCH_ARM_V7A

 #endif  // WEBRTC_SPL_SPL_INL_H_
--- a/src/common_audio/signal_processing_library/main/interface/spl_inl_armv7.h
+++ b/src/common_audio/signal_processing_library/main/interface/spl_inl_armv7.h
@@ -45,6 +45,14 @@ static __inline WebRtc_Word32 WEBRTC_SPL_MUL_16_16(WebRtc_Word16 a,
  return tmp;
 }

+static __inline int32_t WebRtc_MulAccumW16(int16_t a,
+                                          int16_t b,
+                                          int32_t c) {
+  int32_t tmp = 0;
+  __asm__("smlabb %0, %1, %2, %3":"=r"(tmp):"r"(a), "r"(b), "r"(c));
+  return tmp;
+}
+
 static __inline WebRtc_Word16 WebRtcSpl_AddSatW16(WebRtc_Word16 a,
                                                  WebRtc_Word16 b) {
  WebRtc_Word32 s_sum;
@@ -119,4 +127,11 @@ static __inline int WebRtcSpl_NormW16(WebRtc_Word16 a) {
  return tmp - 17;
 }

+static __inline WebRtc_Word16 WebRtcSpl_SatW32ToW16(WebRtc_Word32 value32) {
+  WebRtc_Word16 out16;
+
+  __asm__("ssat %r0, #16, %r1" : "=r"(out16) : "r"(value32));
+
+  return out16;
+}
 #endif  // WEBRTC_SPL_SPL_INL_ARMV7_H_
--- a/src/common_audio/signal_processing_library/main/source/downsample_fast.c
+++ b/src/common_audio/signal_processing_library/main/source/downsample_fast.c
@@ -52,7 +52,7 @@ int WebRtcSpl_DownsampleFast(WebRtc_Word16 *in_ptr, WebRtc_Word16 in_length,

        // If output is higher than 32768, saturate it. Same with negative side

-        *downsampled_ptr++ = (WebRtc_Word16)WEBRTC_SPL_SAT(32767, o, -32768);
+        *downsampled_ptr++ = WebRtcSpl_SatW32ToW16(o);
    }

    return 0;
--- a/src/common_audio/signal_processing_library/main/source/resample_by_2.c
+++ b/src/common_audio/signal_processing_library/main/source/resample_by_2.c
@@ -17,154 +17,165 @@

 #include "signal_processing_library.h"

+#ifdef WEBRTC_ARCH_ARM_V7A
+
+// allpass filter coefficients.
+static const WebRtc_UWord32 kResampleAllpass1[3] = {3284, 24441, 49528 << 15};
+static const WebRtc_UWord32 kResampleAllpass2[3] =
+  {12199, 37471 << 15, 60255 << 15};
+
+// Multiply two 32-bit values and accumulate to another input value.
+// Return: state + ((diff * tbl_value) >> 16)
+
+static __inline WebRtc_Word32 MUL_ACCUM_1(WebRtc_Word32 tbl_value,
+                                          WebRtc_Word32 diff,
+                                          WebRtc_Word32 state) {
+  WebRtc_Word32 result;
+  __asm__("smlawb %r0, %r1, %r2, %r3": "=r"(result): "r"(diff),
+                                       "r"(tbl_value), "r"(state));
+  return result;
+}
+
+// Multiply two 32-bit values and accumulate to another input value.
+// Return: Return: state + (((diff << 1) * tbl_value) >> 32)
+//
+// The reason to introduce this function is that, in case we can't use smlawb
+// instruction (in MUL_ACCUM_1) due to input value range, we can still use 
+// smmla to save some cycles.
+
+static __inline WebRtc_Word32 MUL_ACCUM_2(WebRtc_Word32 tbl_value,
+                                          WebRtc_Word32 diff,
+                                          WebRtc_Word32 state) {
+  WebRtc_Word32 result;
+  __asm__("smmla %r0, %r1, %r2, %r3": "=r"(result): "r"(diff << 1),
+                                      "r"(tbl_value), "r"(state));
+  return result;
+}
+
+#else
+
 // allpass filter coefficients.
 static const WebRtc_UWord16 kResampleAllpass1[3] = {3284, 24441, 49528};
 static const WebRtc_UWord16 kResampleAllpass2[3] = {12199, 37471, 60255};

+// Multiply a 32-bit value with a 16-bit value and accumulate to another input:
+#define MUL_ACCUM_1(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+#define MUL_ACCUM_2(a, b, c) WEBRTC_SPL_SCALEDIFF32(a, b, c)
+
+#endif  // WEBRTC_ARCH_ARM_V7A
+
+
 // decimator
 void WebRtcSpl_DownsampleBy2(const WebRtc_Word16* in, const WebRtc_Word16 len,
-                             WebRtc_Word16* out, WebRtc_Word32* filtState)
-{
-    WebRtc_Word32 tmp1, tmp2, diff, in32, out32;
-    WebRtc_Word16 i;
+                             WebRtc_Word16* out, WebRtc_Word32* filtState) {
+  WebRtc_Word32 tmp1, tmp2, diff, in32, out32;
+  WebRtc_Word16 i;

-    register WebRtc_Word32 state0 = filtState[0];
-    register WebRtc_Word32 state1 = filtState[1];
-    register WebRtc_Word32 state2 = filtState[2];
-    register WebRtc_Word32 state3 = filtState[3];
-    register WebRtc_Word32 state4 = filtState[4];
-    register WebRtc_Word32 state5 = filtState[5];
-    register WebRtc_Word32 state6 = filtState[6];
-    register WebRtc_Word32 state7 = filtState[7];
+  register WebRtc_Word32 state0 = filtState[0];
+  register WebRtc_Word32 state1 = filtState[1];
+  register WebRtc_Word32 state2 = filtState[2];
+  register WebRtc_Word32 state3 = filtState[3];
+  register WebRtc_Word32 state4 = filtState[4];
+  register WebRtc_Word32 state5 = filtState[5];
+  register WebRtc_Word32 state6 = filtState[6];
+  register WebRtc_Word32 state7 = filtState[7];

-    for (i = (len >> 1); i > 0; i--)
-    {
-        // lower allpass filter
-        in32 = (WebRtc_Word32)(*in++) << 10;
-        diff = in32 - state1;
-        tmp1 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[0], diff, state0);
-        state0 = in32;
-        diff = tmp1 - state2;
-        tmp2 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[1], diff, state1);
-        state1 = tmp1;
-        diff = tmp2 - state3;
-        state3 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[2], diff, state2);
-        state2 = tmp2;
+  for (i = (len >> 1); i > 0; i--) {
+    // lower allpass filter
+    in32 = (WebRtc_Word32)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state2);
+    state2 = tmp2;

-        // upper allpass filter
-        in32 = (WebRtc_Word32)(*in++) << 10;
-        diff = in32 - state5;
-        tmp1 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[0], diff, state4);
-        state4 = in32;
-        diff = tmp1 - state6;
-        tmp2 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[1], diff, state5);
-        state5 = tmp1;
-        diff = tmp2 - state7;
-        state7 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[2], diff, state6);
-        state6 = tmp2;
+    // upper allpass filter
+    in32 = (WebRtc_Word32)(*in++) << 10;
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state6);
+    state6 = tmp2;

-        // add two allpass outputs, divide by two and round
-        out32 = (state3 + state7 + 1024) >> 11;
+    // add two allpass outputs, divide by two and round
+    out32 = (state3 + state7 + 1024) >> 11;

-        // limit amplitude to prevent wrap-around, and write to output array
-#ifdef WEBRTC_ARCH_ARM_V7A
-        __asm__("ssat %r0, #16, %r1" : "=r"(*out) : "r"(out32));
-        out++;
-#else
-        if (out32 > 32767)
-            *out++ = 32767;
-        else if (out32 < -32768)
-            *out++ = -32768;
-        else
-            *out++ = (WebRtc_Word16)out32;
-#endif
-    }
+    // limit amplitude to prevent wrap-around, and write to output array
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+  }

-    filtState[0] = state0;
-    filtState[1] = state1;
-    filtState[2] = state2;
-    filtState[3] = state3;
-    filtState[4] = state4;
-    filtState[5] = state5;
-    filtState[6] = state6;
-    filtState[7] = state7;
+  filtState[0] = state0;
+  filtState[1] = state1;
+  filtState[2] = state2;
+  filtState[3] = state3;
+  filtState[4] = state4;
+  filtState[5] = state5;
+  filtState[6] = state6;
+  filtState[7] = state7;
 }

-void WebRtcSpl_UpsampleBy2(const WebRtc_Word16* in, WebRtc_Word16 len, WebRtc_Word16* out,
-                           WebRtc_Word32* filtState)
-{
-    WebRtc_Word32 tmp1, tmp2, diff, in32, out32;
-    WebRtc_Word16 i;

-    register WebRtc_Word32 state0 = filtState[0];
-    register WebRtc_Word32 state1 = filtState[1];
-    register WebRtc_Word32 state2 = filtState[2];
-    register WebRtc_Word32 state3 = filtState[3];
-    register WebRtc_Word32 state4 = filtState[4];
-    register WebRtc_Word32 state5 = filtState[5];
-    register WebRtc_Word32 state6 = filtState[6];
-    register WebRtc_Word32 state7 = filtState[7];
+void WebRtcSpl_UpsampleBy2(const WebRtc_Word16* in, WebRtc_Word16 len,
+                           WebRtc_Word16* out, WebRtc_Word32* filtState) {
+  WebRtc_Word32 tmp1, tmp2, diff, in32, out32;
+  WebRtc_Word16 i;

-    for (i = len; i > 0; i--)
-    {
-        // lower allpass filter
-        in32 = (WebRtc_Word32)(*in++) << 10;
-        diff = in32 - state1;
-        tmp1 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[0], diff, state0);
-        state0 = in32;
-        diff = tmp1 - state2;
-        tmp2 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[1], diff, state1);
-        state1 = tmp1;
-        diff = tmp2 - state3;
-        state3 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass1[2], diff, state2);
-        state2 = tmp2;
+  register WebRtc_Word32 state0 = filtState[0];
+  register WebRtc_Word32 state1 = filtState[1];
+  register WebRtc_Word32 state2 = filtState[2];
+  register WebRtc_Word32 state3 = filtState[3];
+  register WebRtc_Word32 state4 = filtState[4];
+  register WebRtc_Word32 state5 = filtState[5];
+  register WebRtc_Word32 state6 = filtState[6];
+  register WebRtc_Word32 state7 = filtState[7];

-        // round; limit amplitude to prevent wrap-around; write to output array
-        out32 = (state3 + 512) >> 10;
-#ifdef WEBRTC_ARCH_ARM_V7A
-        __asm__("ssat %r0, #16, %r1":"=r"(*out): "r"(out32));
-        out++;
-#else
-        if (out32 > 32767)
-            *out++ = 32767;
-        else if (out32 < -32768)
-            *out++ = -32768;
-        else
-            *out++ = (WebRtc_Word16)out32;
-#endif
+  for (i = len; i > 0; i--) {
+    // lower allpass filter
+    in32 = (WebRtc_Word32)(*in++) << 10;
+    diff = in32 - state1;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass1[0], diff, state0);
+    state0 = in32;
+    diff = tmp1 - state2;
+    tmp2 = MUL_ACCUM_1(kResampleAllpass1[1], diff, state1);
+    state1 = tmp1;
+    diff = tmp2 - state3;
+    state3 = MUL_ACCUM_2(kResampleAllpass1[2], diff, state2);
+    state2 = tmp2;

-        // upper allpass filter
-        diff = in32 - state5;
-        tmp1 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[0], diff, state4);
-        state4 = in32;
-        diff = tmp1 - state6;
-        tmp2 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[1], diff, state5);
-        state5 = tmp1;
-        diff = tmp2 - state7;
-        state7 = WEBRTC_SPL_SCALEDIFF32(kResampleAllpass2[2], diff, state6);
-        state6 = tmp2;
+    // round; limit amplitude to prevent wrap-around; write to output array
+    out32 = (state3 + 512) >> 10;
+    *out++ = WebRtcSpl_SatW32ToW16(out32);

-        // round; limit amplitude to prevent wrap-around; write to output array
-        out32 = (state7 + 512) >> 10;
-#ifdef WEBRTC_ARCH_ARM_V7A
-        __asm__("ssat %r0, #16, %r1":"=r"(*out): "r"(out32));
-        out++;
-#else
-        if (out32 > 32767)
-            *out++ = 32767;
-        else if (out32 < -32768)
-            *out++ = -32768;
-        else
-            *out++ = (WebRtc_Word16)out32;
-#endif
-    }
-    
-    filtState[0] = state0;
-    filtState[1] = state1;
-    filtState[2] = state2;
-    filtState[3] = state3;
-    filtState[4] = state4;
-    filtState[5] = state5;
-    filtState[6] = state6;
-    filtState[7] = state7;
+    // upper allpass filter
+    diff = in32 - state5;
+    tmp1 = MUL_ACCUM_1(kResampleAllpass2[0], diff, state4);
+    state4 = in32;
+    diff = tmp1 - state6;
+    tmp2 = MUL_ACCUM_2(kResampleAllpass2[1], diff, state5);
+    state5 = tmp1;
+    diff = tmp2 - state7;
+    state7 = MUL_ACCUM_2(kResampleAllpass2[2], diff, state6);
+    state6 = tmp2;
+
+    // round; limit amplitude to prevent wrap-around; write to output array
+    out32 = (state7 + 512) >> 10;
+    *out++ = WebRtcSpl_SatW32ToW16(out32);
+  }
+
+  filtState[0] = state0;
+  filtState[1] = state1;
+  filtState[2] = state2;
+  filtState[3] = state3;
+  filtState[4] = state4;
+  filtState[5] = state5;
+  filtState[6] = state6;
+  filtState[7] = state7;
 }
--- a/src/common_audio/signal_processing_library/main/source/splitting_filter.c
+++ b/src/common_audio/signal_processing_library/main/source/splitting_filter.c
@@ -147,13 +147,11 @@ void WebRtcSpl_AnalysisQMF(const WebRtc_Word16* in_data, WebRtc_Word16* low_band
    {
        tmp = filter1[i] + filter2[i] + 1024;
        tmp = WEBRTC_SPL_RSHIFT_W32(tmp, 11);
-        low_band[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
-                tmp, WEBRTC_SPL_WORD16_MIN);
+        low_band[i] = WebRtcSpl_SatW32ToW16(tmp);

        tmp = filter1[i] - filter2[i] + 1024;
        tmp = WEBRTC_SPL_RSHIFT_W32(tmp, 11);
-        high_band[i] = (WebRtc_Word16)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX,
-                tmp, WEBRTC_SPL_WORD16_MIN);
+        high_band[i] = WebRtcSpl_SatW32ToW16(tmp);
    }
 }

@@ -191,10 +189,10 @@ void WebRtcSpl_SynthesisQMF(const WebRtc_Word16* low_band, const WebRtc_Word16*
    for (i = 0, k = 0; i < kBandFrameLength; i++)
    {
        tmp = WEBRTC_SPL_RSHIFT_W32(filter2[i] + 512, 10);
-        out_data[k++] = (WebRtc_Word16)WEBRTC_SPL_SAT(32767, tmp, -32768);
+        out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);

        tmp = WEBRTC_SPL_RSHIFT_W32(filter1[i] + 512, 10);
-        out_data[k++] = (WebRtc_Word16)WEBRTC_SPL_SAT(32767, tmp, -32768);
+        out_data[k++] = WebRtcSpl_SatW32ToW16(tmp);
    }

 }
--- a/src/common_audio/signal_processing_library/main/source/vector_scaling_operations.c
+++ b/src/common_audio/signal_processing_library/main/source/vector_scaling_operations.c
@@ -125,7 +125,7 @@ void WebRtcSpl_ScaleVectorWithSat(G_CONST WebRtc_Word16 *in_vector, WebRtc_Word1
    for (i = 0; i < in_vector_length; i++)
    {
        tmpW32 = WEBRTC_SPL_MUL_16_16_RSFT(*inptr++, gain, right_shifts);
-        ( *outptr++) = (WebRtc_Word16)WEBRTC_SPL_SAT(32767, tmpW32, -32768);
+        (*outptr++) = WebRtcSpl_SatW32ToW16(tmpW32);
    }
 }

--- a/src/common_audio/vad/Makefile.am
+++ b/src/common_audio/vad/Makefile.am
@@ -2,11 +2,9 @@ noinst_LTLIBRARIES = libvad.la

 libvad_la_SOURCES = main/interface/webrtc_vad.h \
 		    main/source/webrtc_vad.c \
-		    main/source/vad_const.c \
-		    main/source/vad_const.h \
-		    main/source/vad_defines.h \
 		    main/source/vad_core.c \
 		    main/source/vad_core.h \
+		    main/source/vad_defines.h \
 		    main/source/vad_filterbank.c \
 		    main/source/vad_filterbank.h \
 		    main/source/vad_gmm.c \
--- a/src/common_audio/vad/OWNERS
+++ b/src/common_audio/vad/OWNERS
@@ -1,2 +0,0 @@
-bjornv@webrtc.org
-jan.skoglund@webrtc.org
--- a/src/common_audio/vad/main/source/vad.gypi
+++ b/src/common_audio/vad/main/source/vad.gypi
@@ -25,11 +25,9 @@
      'sources': [
        '../interface/webrtc_vad.h',
        'webrtc_vad.c',
-        'vad_const.c',
-        'vad_const.h',
-        'vad_defines.h',
        'vad_core.c',
        'vad_core.h',
+        'vad_defines.h',
        'vad_filterbank.c',
        'vad_filterbank.h',
        'vad_gmm.c',
--- a/src/common_audio/vad/main/source/vad_const.c
+++ b/src/common_audio/vad/main/source/vad_const.c
@@ -1,80 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-/*
- * This file includes the constant values used internally in VAD.
- */
-
-#include "vad_const.h"
-
-// Spectrum Weighting
-const WebRtc_Word16 kSpectrumWeight[6] = {6, 8, 10, 12, 14, 16};
-
-const WebRtc_Word16 kCompVar = 22005;
-
-// Constant 160*log10(2) in Q9
-const WebRtc_Word16 kLogConst = 24660;
-
-// Constant log2(exp(1)) in Q12
-const WebRtc_Word16 kLog10Const = 5909;
-
-// Q15
-const WebRtc_Word16 kNoiseUpdateConst = 655;
-const WebRtc_Word16 kSpeechUpdateConst = 6554;
-
-// Q8
-const WebRtc_Word16 kBackEta = 154;
-
-// Coefficients used by WebRtcVad_HpOutput, Q14
-const WebRtc_Word16 kHpZeroCoefs[3] = {6631, -13262, 6631};
-const WebRtc_Word16 kHpPoleCoefs[3] = {16384, -7756, 5620};
-
-// Allpass filter coefficients, upper and lower, in Q15
-// Upper: 0.64, Lower: 0.17
-const WebRtc_Word16 kAllPassCoefsQ15[2] = {20972, 5571};
-const WebRtc_Word16 kAllPassCoefsQ13[2] = {5243, 1392}; // Q13
-
-// Minimum difference between the two models, Q5
-const WebRtc_Word16 kMinimumDifference[6] = {544, 544, 576, 576, 576, 576};
-
-// Upper limit of mean value for speech model, Q7
-const WebRtc_Word16 kMaximumSpeech[6] = {11392, 11392, 11520, 11520, 11520, 11520};
-
-// Minimum value for mean value
-const WebRtc_Word16 kMinimumMean[2] = {640, 768};
-
-// Upper limit of mean value for noise model, Q7
-const WebRtc_Word16 kMaximumNoise[6] = {9216, 9088, 8960, 8832, 8704, 8576};
-
-// Adjustment for division with two in WebRtcVad_SplitFilter
-const WebRtc_Word16 kOffsetVector[6] = {368, 368, 272, 176, 176, 176};
-
-// Start values for the Gaussian models, Q7
-// Weights for the two Gaussians for the six channels (noise)
-const WebRtc_Word16 kNoiseDataWeights[12] = {34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103};
-
-// Weights for the two Gaussians for the six channels (speech)
-const WebRtc_Word16 kSpeechDataWeights[12] = {48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81};
-
-// Means for the two Gaussians for the six channels (noise)
-const WebRtc_Word16 kNoiseDataMeans[12] = {6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863,
-        7820, 7266, 5020, 4362};
-
-// Means for the two Gaussians for the six channels (speech)
-const WebRtc_Word16 kSpeechDataMeans[12] = {8306, 10085, 10078, 11823, 11843, 6309, 9473,
-        9571, 10879, 7581, 8180, 7483};
-
-// Stds for the two Gaussians for the six channels (noise)
-const WebRtc_Word16 kNoiseDataStds[12] = {378, 1064, 493, 582, 688, 593, 474, 697, 475, 688,
-        421, 455};
-
-// Stds for the two Gaussians for the six channels (speech)
-const WebRtc_Word16 kSpeechDataStds[12] = {555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540,
-        1079, 850};
--- a/src/common_audio/vad/main/source/vad_const.h
+++ b/src/common_audio/vad/main/source/vad_const.h
@@ -1,59 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-
-/*
- * This header file includes the declarations of the internally used constants.
- */
-
-#ifndef WEBRTC_VAD_CONST_H_
-#define WEBRTC_VAD_CONST_H_
-
-#include "typedefs.h"
-
-// TODO(ajm): give these internal-linkage by moving to the appropriate file
-// where possible, and otherwise tag with WebRtcVad_.
-
-// Spectrum Weighting
-extern const WebRtc_Word16 kSpectrumWeight[];
-extern const WebRtc_Word16 kCompVar;
-// Logarithm constant
-extern const WebRtc_Word16 kLogConst;
-extern const WebRtc_Word16 kLog10Const;
-// Q15
-extern const WebRtc_Word16 kNoiseUpdateConst;
-extern const WebRtc_Word16 kSpeechUpdateConst;
-// Q8
-extern const WebRtc_Word16 kBackEta;
-// Coefficients used by WebRtcVad_HpOutput, Q14
-extern const WebRtc_Word16 kHpZeroCoefs[];
-extern const WebRtc_Word16 kHpPoleCoefs[];
-// Allpass filter coefficients, upper and lower, in Q15 resp. Q13
-extern const WebRtc_Word16 kAllPassCoefsQ15[];
-extern const WebRtc_Word16 kAllPassCoefsQ13[];
-// Minimum difference between the two models, Q5
-extern const WebRtc_Word16 kMinimumDifference[];
-// Maximum value when updating the speech model, Q7
-extern const WebRtc_Word16 kMaximumSpeech[];
-// Minimum value for mean value
-extern const WebRtc_Word16 kMinimumMean[];
-// Upper limit of mean value for noise model, Q7
-extern const WebRtc_Word16 kMaximumNoise[];
-// Adjustment for division with two in WebRtcVad_SplitFilter
-extern const WebRtc_Word16 kOffsetVector[];
-// Start values for the Gaussian models, Q7
-extern const WebRtc_Word16 kNoiseDataWeights[];
-extern const WebRtc_Word16 kSpeechDataWeights[];
-extern const WebRtc_Word16 kNoiseDataMeans[];
-extern const WebRtc_Word16 kSpeechDataMeans[];
-extern const WebRtc_Word16 kNoiseDataStds[];
-extern const WebRtc_Word16 kSpeechDataStds[];
-
-#endif // WEBRTC_VAD_CONST_H_
--- a/src/common_audio/vad/main/source/vad_core.c
+++ b/src/common_audio/vad/main/source/vad_core.c
@@ -15,12 +15,50 @@
 */

 #include "vad_core.h"
-#include "vad_const.h"
+
+#include "signal_processing_library.h"
+#include "typedefs.h"
 #include "vad_defines.h"
 #include "vad_filterbank.h"
 #include "vad_gmm.h"
 #include "vad_sp.h"
-#include "signal_processing_library.h"
+
+// Spectrum Weighting
+static const WebRtc_Word16 kSpectrumWeight[6] = { 6, 8, 10, 12, 14, 16 };
+static const WebRtc_Word16 kNoiseUpdateConst = 655; // Q15
+static const WebRtc_Word16 kSpeechUpdateConst = 6554; // Q15
+static const WebRtc_Word16 kBackEta = 154; // Q8
+// Minimum difference between the two models, Q5
+static const WebRtc_Word16 kMinimumDifference[6] = {
+    544, 544, 576, 576, 576, 576 };
+// Upper limit of mean value for speech model, Q7
+static const WebRtc_Word16 kMaximumSpeech[6] = {
+    11392, 11392, 11520, 11520, 11520, 11520 };
+// Minimum value for mean value
+static const WebRtc_Word16 kMinimumMean[2] = { 640, 768 };
+// Upper limit of mean value for noise model, Q7
+static const WebRtc_Word16 kMaximumNoise[6] = {
+    9216, 9088, 8960, 8832, 8704, 8576 };
+// Start values for the Gaussian models, Q7
+// Weights for the two Gaussians for the six channels (noise)
+static const WebRtc_Word16 kNoiseDataWeights[12] = {
+    34, 62, 72, 66, 53, 25, 94, 66, 56, 62, 75, 103 };
+// Weights for the two Gaussians for the six channels (speech)
+static const WebRtc_Word16 kSpeechDataWeights[12] = {
+    48, 82, 45, 87, 50, 47, 80, 46, 83, 41, 78, 81 };
+// Means for the two Gaussians for the six channels (noise)
+static const WebRtc_Word16 kNoiseDataMeans[12] = {
+    6738, 4892, 7065, 6715, 6771, 3369, 7646, 3863, 7820, 7266, 5020, 4362 };
+// Means for the two Gaussians for the six channels (speech)
+static const WebRtc_Word16 kSpeechDataMeans[12] = {
+    8306, 10085, 10078, 11823, 11843, 6309, 9473, 9571, 10879, 7581, 8180, 7483
+};
+// Stds for the two Gaussians for the six channels (noise)
+static const WebRtc_Word16 kNoiseDataStds[12] = {
+    378, 1064, 493, 582, 688, 593, 474, 697, 475, 688, 421, 455 };
+// Stds for the two Gaussians for the six channels (speech)
+static const WebRtc_Word16 kSpeechDataStds[12] = {
+    555, 505, 567, 524, 585, 1231, 509, 828, 492, 1540, 1079, 850 };

 static const int kInitCheck = 42;

--- a/src/common_audio/vad/main/source/vad_filterbank.c
+++ b/src/common_audio/vad/main/source/vad_filterbank.c
@@ -15,9 +15,21 @@
 */

 #include "vad_filterbank.h"
-#include "vad_defines.h"
-#include "vad_const.h"
+
 #include "signal_processing_library.h"
+#include "typedefs.h"
+#include "vad_defines.h"
+
+// Constant 160*log10(2) in Q9
+static const WebRtc_Word16 kLogConst = 24660;
+// Coefficients used by WebRtcVad_HpOutput, Q14
+static const WebRtc_Word16 kHpZeroCoefs[3] = {6631, -13262, 6631};
+static const WebRtc_Word16 kHpPoleCoefs[3] = {16384, -7756, 5620};
+// Allpass filter coefficients, upper and lower, in Q15
+// Upper: 0.64, Lower: 0.17
+static const WebRtc_Word16 kAllPassCoefsQ15[2] = {20972, 5571};
+// Adjustment for division with two in WebRtcVad_SplitFilter
+static const WebRtc_Word16 kOffsetVector[6] = {368, 368, 272, 176, 176, 176};

 void WebRtcVad_HpOutput(WebRtc_Word16 *in_vector,
                        WebRtc_Word16 in_vector_length,
--- a/src/common_audio/vad/main/source/vad_gmm.c
+++ b/src/common_audio/vad/main/source/vad_gmm.c
@@ -15,8 +15,13 @@
 */

 #include "vad_gmm.h"
+
 #include "signal_processing_library.h"
-#include "vad_const.h"
+#include "typedefs.h"
+
+static const WebRtc_Word32 kCompVar = 22005;
+// Constant log2(exp(1)) in Q12
+static const WebRtc_Word16 kLog10Const = 5909;

 WebRtc_Word32 WebRtcVad_GaussianProbability(WebRtc_Word16 in_sample,
                                            WebRtc_Word16 mean,
--- a/src/common_audio/vad/main/source/vad_sp.c
+++ b/src/common_audio/vad/main/source/vad_sp.c
@@ -10,15 +10,20 @@


 /*
- * This file includes the implementation of the VAD internal calls for Downsampling and
- * FindMinimum.
+ * This file includes the implementation of the VAD internal calls for
+ * Downsampling and FindMinimum.
 * For function call descriptions; See vad_sp.h.
 */

 #include "vad_sp.h"
-#include "vad_defines.h"
-#include "vad_const.h"
+
 #include "signal_processing_library.h"
+#include "typedefs.h"
+#include "vad_defines.h"
+
+// Allpass filter coefficients, upper and lower, in Q13
+// Upper: 0.64, Lower: 0.17
+static const WebRtc_Word16 kAllPassCoefsQ13[2] = {5243, 1392}; // Q13

 // Downsampling filter based on the splitting filter and the allpass functions
 // in vad_filterbank.c
--- a/src/common_types.h
+++ b/src/common_types.h
@@ -485,6 +485,7 @@ enum RawVideoType
 // Video codec
 enum { kConfigParameterSize = 128};
 enum { kPayloadNameSize = 32};
+enum { kMaxSimulcastStreams = 4};

 // H.263 specific
 struct VideoCodecH263
@@ -530,9 +531,10 @@ struct VideoCodecH264
 // VP8 specific
 struct VideoCodecVP8
 {
-    bool                       pictureLossIndicationOn;
-    bool                       feedbackModeOn;
-    VideoCodecComplexity       complexity;
+    bool                 pictureLossIndicationOn;
+    bool                 feedbackModeOn;
+    VideoCodecComplexity complexity;
+    unsigned char        numberOfTemporalLayers;
 };

 // MPEG-4 specific
@@ -570,6 +572,19 @@ union VideoCodecUnion
    VideoCodecGeneric   Generic;
 };

+/*
+*  Simulcast is when the same stream is encoded multiple times with different
+*  settings such as resolution.  
+*/
+struct SimulcastStream
+{
+    unsigned short      width;
+    unsigned short      height;
+    unsigned char       numberOfTemporalLayers;
+    unsigned int        maxBitrate;
+    unsigned int        qpMax; // minimum quality
+};
+
 // Common video codec properties
 struct VideoCodec
 {
@@ -588,8 +603,8 @@ struct VideoCodec
    VideoCodecUnion     codecSpecific;

    unsigned int        qpMax;
+    unsigned char       numberOfSimulcastStreams;
+    SimulcastStream     simulcastStream[kMaxSimulcastStreams];
 };
-
 }  // namespace webrtc
-
 #endif  // WEBRTC_COMMON_TYPES_H
--- a/src/modules/audio_processing/aec/main/interface/echo_cancellation.h
+++ b/src/modules/audio_processing/aec/main/interface/echo_cancellation.h
@@ -38,6 +38,7 @@ typedef struct {
    WebRtc_Word16 nlpMode;        // default kAecNlpModerate
    WebRtc_Word16 skewMode;       // default kAecFalse
    WebRtc_Word16 metricsMode;    // default kAecFalse
+    int delay_logging;            // default kAecFalse
    //float realSkew;
 } AecConfig;

@@ -66,7 +67,7 @@ extern "C" {
 * Inputs                       Description
 * -------------------------------------------------------------------
 * void **aecInst               Pointer to the AEC instance to be created
- *                              and initilized
+ *                              and initialized
 *
 * Outputs                      Description
 * -------------------------------------------------------------------
@@ -225,6 +226,23 @@ WebRtc_Word32 WebRtcAec_get_echo_status(void *aecInst, WebRtc_Word16 *status);
 */
 WebRtc_Word32 WebRtcAec_GetMetrics(void *aecInst, AecMetrics *metrics);

+/*
+ * Gets the current delay metrics for the session.
+ *
+ * Inputs                       Description
+ * -------------------------------------------------------------------
+ * void*      handle            Pointer to the AEC instance
+ *
+ * Outputs                      Description
+ * -------------------------------------------------------------------
+ * int*       median            Delay median value.
+ * int*       std               Delay standard deviation.
+ *
+ * int        return             0: OK
+ *                              -1: error
+ */
+int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std);
+
 /*
 * Gets the last error code.
 *
--- a/src/modules/audio_processing/aec/main/matlab/fullaec.m
+++ b/src/modules/audio_processing/aec/main/matlab/fullaec.m
@@ -1,953 +0,0 @@
-% Partitioned block frequency domain adaptive filtering NLMS and 
-% standard time-domain sample-based NLMS 
-%fid=fopen('aecFar-samsung.pcm', 'rb'); % Load far end
-fid=fopen('aecFar.pcm', 'rb'); % Load far end
-%fid=fopen(farFile, 'rb'); % Load far end
-rrin=fread(fid,inf,'int16');
-fclose(fid); 
-%rrin=loadsl('data/far_me2.pcm'); % Load far end
-%fid=fopen('aecNear-samsung.pcm', 'rb'); % Load near end
-fid=fopen('aecNear.pcm', 'rb'); % Load near end
-%fid=fopen(nearFile, 'rb'); % Load near end
-ssin=fread(fid,inf,'int16');
-%ssin = [zeros(1024,1) ; ssin(1:end-1024)];
-
-fclose(fid);
-rand('state',13);
-fs=16000;
-mult=fs/8000;
-%rrin=rrin(fs*0+1:round(fs*120));
-%ssin=ssin(fs*0+1:round(fs*120));
-if fs == 8000
-	cohRange = 2:3;
-elseif fs==16000
-	cohRange = 2;
-end
-
-% Flags
-NLPon=1;  % NLP
-CNon=1; % Comfort noise
-PLTon=1;  % Plotting
-
-M = 16; % Number of partitions
-N = 64; % Partition length
-L = M*N; % Filter length 
-if fs == 8000
-    mufb = 0.6;
-else
-    mufb = 0.5;  
-end
-%mufb=1;  
-VADtd=48;
-alp = 0.1; % Power estimation factor alc = 0.1; % Coherence estimation factor
-beta = 0.9; % Plotting factor 
-%% Changed a little %%
-step = 0.3;%0.1875; % Downward step size 
-%%
-if fs == 8000
-    threshold=2e-6;  % DTrob threshold
-else
-    %threshold=0.7e-6;
-    threshold=1.5e-6; end
-
-if fs == 8000
-    echoBandRange = ceil(300*2/fs*N):floor(1800*2/fs*N);
-    %echoBandRange = ceil(1500*2/fs*N):floor(2500*2/fs*N);
-else
-    echoBandRange = ceil(300*2/fs*N):floor(1800*2/fs*N);
-    %echoBandRange = ceil(300*2/fs*N):floor(1800*2/fs*N);
-end
-%echoBandRange = ceil(1600*2/fs*N):floor(1900*2/fs*N);
-%echoBandRange = ceil(2000*2/fs*N):floor(4000*2/fs*N);
-suppState = 1;
-transCtr = 0;
-
-Nt=1;
-vt=1;
-
-ramp = 1.0003; % Upward ramp
-rampd = 0.999; % Downward ramp
-cvt = 20; % Subband VAD threshold;
-nnthres = 20; % Noise threshold 
-
-shh=logspace(-1.3,-2.2,N+1)';
-sh=[shh;flipud(shh(2:end-1))]; % Suppression profile
-
-len=length(ssin);
-w=zeros(L,1); % Sample-based TD NLMS 
-WFb=zeros(N+1,M); % Block-based FD NLMS
-WFbOld=zeros(N+1,M); % Block-based FD NLMS
-YFb=zeros(N+1,M);
-erfb=zeros(len,1);
-erfb3=zeros(len,1);
-
-ercn=zeros(len,1);
-zm=zeros(N,1);
-XFm=zeros(N+1,M);
-YFm=zeros(N+1,M);
-pn0=10*ones(N+1,1);
-pn=zeros(N+1,1);
-NN=len;
-Nb=floor(NN/N)-M;
-erifb=zeros(Nb+1,1)+0.1;
-erifb3=zeros(Nb+1,1)+0.1;
-ericn=zeros(Nb+1,1)+0.1;
-dri=zeros(Nb+1,1)+0.1;
-start=1;
-xo=zeros(N,1);
-do=xo;
-eo=xo;
-
-echoBands=zeros(Nb+1,1);
-cohxdAvg=zeros(Nb+1,1);
-cohxdSlow=zeros(Nb+1,N+1);
-cohedSlow=zeros(Nb+1,N+1);
-%overdriveM=zeros(Nb+1,N+1);
-cohxdFastAvg=zeros(Nb+1,1);
-cohxdAvgBad=zeros(Nb+1,1);
-cohedAvg=zeros(Nb+1,1);
-cohedFastAvg=zeros(Nb+1,1);
-hnledAvg=zeros(Nb+1,1);
-hnlxdAvg=zeros(Nb+1,1);
-ovrdV=zeros(Nb+1,1);
-dIdxV=zeros(Nb+1,1);
-SLxV=zeros(Nb+1,1);
-hnlSortQV=zeros(Nb+1,1);
-hnlPrefAvgV=zeros(Nb+1,1);
-mutInfAvg=zeros(Nb+1,1);
-%overdrive=zeros(Nb+1,1);
-hnled = zeros(N+1, 1);
-weight=zeros(N+1,1);
-hnlMax = zeros(N+1, 1);
-hnl = zeros(N+1, 1);
-overdrive = ones(1, N+1);
-xfwm=zeros(N+1,M);
-dfm=zeros(N+1,M);
-WFbD=ones(N+1,1);
-
-fbSupp = 0;
-hnlLocalMin = 1;
-cohxdLocalMin = 1;
-hnlLocalMinV=zeros(Nb+1,1);
-cohxdLocalMinV=zeros(Nb+1,1);
-hnlMinV=zeros(Nb+1,1);
-dkEnV=zeros(Nb+1,1);
-ekEnV=zeros(Nb+1,1);
-ovrd = 2;
-ovrdPos = floor((N+1)/4);
-ovrdSm = 2;
-hnlMin = 1;
-minCtr = 0;
-SeMin = 0;
-SdMin = 0;
-SeLocalAvg = 0;
-SeMinSm = 0;
-divergeFact = 1;
-dIdx = 1;
-hnlMinCtr = 0;
-hnlNewMin = 0;
-divergeState = 0;
-
-Sy=ones(N+1,1);
-Sym=1e7*ones(N+1,1);
-
-wins=[0;sqrt(hanning(2*N-1))];
-ubufn=zeros(2*N,1);
-ebuf=zeros(2*N,1);
-ebuf2=zeros(2*N,1);
-ebuf4=zeros(2*N,1);
-mbuf=zeros(2*N,1);
-
-cohedFast = zeros(N+1,1);
-cohxdFast = zeros(N+1,1);
-cohxd = zeros(N+1,1);
-Se = zeros(N+1,1);
-Sd = zeros(N+1,1);
-Sx = zeros(N+1,1);
-SxBad = zeros(N+1,1);
-Sed = zeros(N+1,1);
-Sxd = zeros(N+1,1);
-SxdBad = zeros(N+1,1);
-hnledp=[];
-
-cohxdMax = 0;
-
-%hh=waitbar(0,'Please wait...');
-progressbar(0);
-
-%spaces = ' ';
-%spaces = repmat(spaces, 50, 1);
-%spaces = ['[' ; spaces ; ']'];
-%fprintf(1, spaces);
-%fprintf(1, '\n');
-
-for kk=1:Nb
-    pos = N * (kk-1) + start;
-    
-    % FD block method
-    % ----------------------   Organize data
-    xk = rrin(pos:pos+N-1);
-    dk = ssin(pos:pos+N-1);
-    
-    xx = [xo;xk];
-    xo = xk;
-    tmp = fft(xx); 
-	XX = tmp(1:N+1);
-
-	dd = [do;dk];  % Overlap
-	do = dk;
-	tmp = fft(dd); % Frequency domain 
-	DD = tmp(1:N+1);
-    
-    % ------------------------  Power estimation
-    pn0 = (1 - alp) * pn0 + alp * real(XX.* conj(XX));
-    pn = pn0;
-    %pn = (1 - alp) * pn + alp * M * pn0;
-	if (CNon)
-		Yp =  real(conj(DD).*DD); % Instantaneous power      
-		Sy =  (1 - alp) * Sy + alp * Yp; % Averaged power    
-		
-		mm = min(Sy,Sym);  
-		diff = Sym - mm;
-		if (kk>50)
-			Sym = (mm + step*diff) * ramp; % Estimated background noise power   
-		end
-	end
-    
-    % ----------------------   Filtering   
-    XFm(:,1) = XX;
-    for mm=0:(M-1)
-        m=mm+1;  
-        YFb(:,m) = XFm(:,m) .* WFb(:,m);
-    end
-    yfk = sum(YFb,2);
-	tmp = [yfk ; flipud(conj(yfk(2:N)))];
-    ykt = real(ifft(tmp));
-    ykfb = ykt(end-N+1:end); 
-    
-    % ----------------------   Error estimation 
-    ekfb = dk - ykfb; 
-    %if sum(abs(ekfb)) < sum(abs(dk))
-        %ekfb = dk - ykfb; 
-    %    erfb(pos:pos+N-1) = ekfb; 
-    %else
-        %ekfb = dk;
-    %    erfb(pos:pos+N-1) = dk; 
-    %end
-	%(kk-1)*(N*2)+1
-    erfb(pos:pos+N-1) = ekfb; 
-    tmp = fft([zm;ekfb]);      % FD version for cancelling part (overlap-save)
-	Ek = tmp(1:N+1);
-
-    % ------------------------  Adaptation  
-	Ek2 = Ek ./(M*pn + 0.001); % Normalized error
-	%Ek2 = Ek ./(pn + 0.001); % Normalized error
-	%Ek2 = Ek ./(100*pn + 0.001); % Normalized error
-	
-	absEf = max(abs(Ek2), threshold);
-	absEf = ones(N+1,1)*threshold./absEf;
-	Ek2 = Ek2.*absEf;
-	
-	mEk = mufb.*Ek2;
-	PP = conj(XFm).*(ones(M,1) * mEk')'; 
-	tmp = [PP ; flipud(conj(PP(2:N,:)))];
-	IFPP = real(ifft(tmp));
-	PH = IFPP(1:N,:);
-	tmp = fft([PH;zeros(N,M)]);
-	FPH = tmp(1:N+1,:);
-	WFb = WFb + FPH;
-
-    if mod(kk, 10*mult) == 0
-        WFbEn = sum(real(WFb.*conj(WFb)));
-        %WFbEn = sum(abs(WFb));
-        [tmp, dIdx] = max(WFbEn);
-
-        WFbD = sum(abs(WFb(:, dIdx)),2);
-        %WFbD = WFbD / (mean(WFbD) + 1e-10); 
-        WFbD = min(max(WFbD, 0.5), 4);
-    end
-    dIdxV(kk) = dIdx;
-    
-	% NLP
-	if (NLPon)
-        
-        ee = [eo;ekfb]; 
-        eo = ekfb;
-		window = wins;
-        if fs == 8000
-            %gamma = 0.88;
-            gamma = 0.9;
-        else
-		    %gamma = 0.92;
-		    gamma = 0.93;
-        end
-		%gamma = 0.9;
-
-		tmp = fft(xx.*window);
-		xf = tmp(1:N+1);
-		tmp = fft(dd.*window);
-		df = tmp(1:N+1);
-		tmp = fft(ee.*window);
-		ef = tmp(1:N+1);
-
-        xfwm(:,1) = xf;
-        xf = xfwm(:,dIdx);
-        %fprintf(1,'%d: %f\n', kk, xf(4));
-        dfm(:,1) = df;
-        
-        SxOld = Sx;
-
-		Se = gamma*Se + (1-gamma)*real(ef.*conj(ef));
-		Sd = gamma*Sd + (1-gamma)*real(df.*conj(df));
-		Sx = gamma*Sx + (1 - gamma)*real(xf.*conj(xf));
-
-        %xRatio = real(xfwm(:,1).*conj(xfwm(:,1))) ./ ...
-        %    (real(xfwm(:,2).*conj(xfwm(:,2))) + 1e-10);
-        %xRatio = Sx ./ (SxOld + 1e-10);
-        %SLx = log(1/(N+1)*sum(xRatio)) - 1/(N+1)*sum(log(xRatio));
-        %SLxV(kk) = SLx;
-
-        %freqSm = 0.9;
-        %Sx = filter(freqSm, [1 -(1-freqSm)], Sx);
-        %Sx(end:1) = filter(freqSm, [1 -(1-freqSm)], Sx(end:1));
-        %Se = filter(freqSm, [1 -(1-freqSm)], Se);
-        %Se(end:1) = filter(freqSm, [1 -(1-freqSm)], Se(end:1));
-        %Sd = filter(freqSm, [1 -(1-freqSm)], Sd);
-        %Sd(end:1) = filter(freqSm, [1 -(1-freqSm)], Sd(end:1));
-
-		%SeFast = ef.*conj(ef);
-		%SdFast = df.*conj(df);
-        %SxFast = xf.*conj(xf);
-        %cohedFast = 0.9*cohedFast + 0.1*SeFast ./ (SdFast + 1e-10);
-        %cohedFast(find(cohedFast > 1)) = 1;
-        %cohedFast(find(cohedFast > 1)) = 1 ./ cohedFast(find(cohedFast>1));
-        %cohedFastAvg(kk) = mean(cohedFast(echoBandRange));
-        %cohedFastAvg(kk) = min(cohedFast);
-
-        %cohxdFast = 0.8*cohxdFast + 0.2*log(SdFast ./ (SxFast + 1e-10));
-        %cohxdFastAvg(kk) = mean(cohxdFast(echoBandRange));
-
-		% coherence
-        Sxd = gamma*Sxd + (1 - gamma)*xf.*conj(df);
-		Sed = gamma*Sed + (1-gamma)*ef.*conj(df);
-
-        %Sxd = filter(freqSm, [1 -(1-freqSm)], Sxd);
-        %Sxd(end:1) = filter(freqSm, [1 -(1-freqSm)], Sxd(end:1));
-        %Sed = filter(freqSm, [1 -(1-freqSm)], Sed);
-        %Sed(end:1) = filter(freqSm, [1 -(1-freqSm)], Sed(end:1));
-
-		cohed = real(Sed.*conj(Sed))./(Se.*Sd + 1e-10);
-        %cohedAvg(kk) = mean(cohed(echoBandRange));
-        %cohedAvg(kk) = cohed(6);
-        %cohedAvg(kk) = min(cohed);
-
-		cohxd = real(Sxd.*conj(Sxd))./(Sx.*Sd + 1e-10);
-        %freqSm = 0.5;
-        %cohxd(3:end) = filter(freqSm, [1 -(1-freqSm)], cohxd(3:end));
-        %cohxd(end:3) = filter(freqSm, [1 -(1-freqSm)], cohxd(end:3));
-        %cohxdAvg(kk) = mean(cohxd(echoBandRange));
-        %cohxdAvg(kk) = (cohxd(32));
-        %cohxdAvg(kk) = max(cohxd);
-
-        %xf = xfm(:,dIdx);
-		%SxBad = gamma*SxBad + (1 - gamma)*real(xf.*conj(xf));
-        %SxdBad = gamma*SxdBad + (1 - gamma)*xf.*conj(df);
-		%cohxdBad = real(SxdBad.*conj(SxdBad))./(SxBad.*Sd + 0.01);
-        %cohxdAvgBad(kk) = mean(cohxdBad);
-
-        %for j=1:N+1
-        %    mutInf(j) = 0.9*mutInf(j) + 0.1*information(abs(xfm(j,:)), abs(dfm(j,:)));
-        %end
-        %mutInfAvg(kk) = mean(mutInf);
-
-        %hnled = cohedFast;
-        %xIdx = find(cohxd > 1 - cohed);
-        %hnled(xIdx) = 1 - cohxd(xIdx);
-        %hnled = 1 - max(cohxd, 1-cohedFast);
-        hnled = min(1 - cohxd, cohed);
-        %hnled = 1 - cohxd;
-        %hnled = max(1 - (cohxd + (1-cohedFast)), 0);
-        %hnled = 1 - max(cohxd, 1-cohed);
-
-        if kk > 1
-            cohxdSlow(kk,:) = 0.99*cohxdSlow(kk-1,:) + 0.01*cohxd';
-            cohedSlow(kk,:) = 0.99*cohedSlow(kk-1,:) + 0.01*(1-cohed)';
-        end
-
-
-        if 0
-        %if kk > 50
-            %idx = find(hnled > 0.3);
-            hnlMax = hnlMax*0.9999;
-            %hnlMax(idx) = max(hnlMax(idx), hnled(idx));
-            hnlMax = max(hnlMax, hnled);
-            %overdrive(idx) = max(log(hnlMax(idx))/log(0.99), 1); 
-            avgHnl = mean(hnlMax(echoBandRange));
-            if avgHnl > 0.3
-                overdrive = max(log(avgHnl)/log(0.99), 1);
-            end
-            weight(4:end) = max(hnlMax) - hnlMax(4:end);
-        end
-        
-        
-
-        %[hg, gidx] = max(hnled); 
-        %fnrg = Sx(gidx) / (Sd(gidx) + 1e-10);
-        
-        %[tmp, bidx] = find((Sx / Sd + 1e-10) > fnrg);
-        %hnled(bidx) = hg;
-
-
-		%cohed1 = mean(cohed(cohRange)); % range depends on bandwidth 
-		%cohed1 = cohed1^2;
-        %echoBands(kk) = length(find(cohed(echoBandRange) < 0.25))/length(echoBandRange);
-
-        %if (fbSupp == 0)
-        %    if (echoBands(kk) > 0.8)
-        %        fbSupp = 1;
-        %    end
-        %else
-        %    if (echoBands(kk) < 0.6)
-        %        fbSupp = 0;
-        %    end
-        %end
-        %overdrive(kk) = 7.5*echoBands(kk) + 0.5; 
-        
-		% Factor by which to weight other bands
-		%if (cohed1 < 0.1)
-		%	w = 0.8 - cohed1*10*0.4;
-		%else
-		%	w = 0.4;
-		%end
-			
-		% Weight coherence subbands
-		%hnled = w*cohed1 + (1 - w)*cohed;
-		%hnled = (hnled).^2;
-		%cohed(floor(N/2):end) = cohed(floor(N/2):end).^2;
-        %if fbSupp == 1
-        %    cohed = zeros(size(cohed));
-        %end
-        %cohed = cohed.^overdrive(kk);
-
-        %hnled = gamma*hnled + (1 - gamma)*cohed;
-		% Additional hf suppression
-		%hnledp = [hnledp ; mean(hnled)];
-		%hnled(floor(N/2):end) = hnled(floor(N/2):end).^2;
-		%ef = ef.*((weight*(min(1 - hnled)).^2 + (1 - weight).*(1 - hnled)).^2);
-
-        cohedMean = mean(cohed(echoBandRange));
-        %aggrFact = 4*(1-mean(hnled(echoBandRange))) + 1;
-        %[hnlSort, hnlSortIdx] = sort(hnled(echoBandRange));
-        [hnlSort, hnlSortIdx] = sort(1-cohxd(echoBandRange));
-        [xSort, xSortIdx] = sort(Sx);
-        %aggrFact = (1-mean(hnled(echoBandRange)));
-        %hnlSortQ = hnlSort(qIdx);
-        hnlSortQ = mean(1 - cohxd(echoBandRange));
-        %hnlSortQ = mean(1 - cohxd);
-
-        [hnlSort2, hnlSortIdx2] = sort(hnled(echoBandRange));
-        %[hnlSort2, hnlSortIdx2] = sort(hnled);
-        hnlQuant = 0.75;
-        hnlQuantLow = 0.5;
-        qIdx = floor(hnlQuant*length(hnlSort2));
-        qIdxLow = floor(hnlQuantLow*length(hnlSort2));
-        hnlPrefAvg = hnlSort2(qIdx);
-        hnlPrefAvgLow = hnlSort2(qIdxLow);
-        %hnlPrefAvgLow = mean(hnled);
-        %hnlPrefAvg = max(hnlSort2);
-        %hnlPrefAvgLow = min(hnlSort2);
-
-        %hnlPref = hnled(echoBandRange);
-        %hnlPrefAvg = mean(hnlPref(xSortIdx((0.5*length(xSortIdx)):end)));
-
-        %hnlPrefAvg = min(hnlPrefAvg, hnlSortQ);
-
-        %hnlSortQIdx = hnlSortIdx(qIdx);
-        %SeQ = Se(qIdx + echoBandRange(1) - 1); 
-        %SdQ = Sd(qIdx + echoBandRange(1) - 1); 
-        %SeQ = Se(qIdxLow + echoBandRange(1) - 1); 
-        %SdQ = Sd(qIdxLow + echoBandRange(1) - 1); 
-        %propLow = length(find(hnlSort < 0.1))/length(hnlSort);
-        %aggrFact = min((1 - hnlSortQ)/2, 0.5);
-        %aggrTerm = 1/aggrFact;
-
-        %hnlg = mean(hnled(echoBandRange));
-        %hnlg = hnlSortQ;
-        %if suppState == 0
-        %    if hnlg < 0.05
-        %        suppState = 2;
-        %        transCtr = 0;
-        %    elseif hnlg < 0.75
-        %        suppState = 1;
-        %        transCtr = 0;
-        %    end
-        %elseif suppState == 1
-        %    if hnlg > 0.8
-        %        suppState = 0;
-        %        transCtr = 0;
-        %    elseif hnlg < 0.05
-        %        suppState = 2;
-        %        transCtr = 0;
-        %    end
-        %else
-        %    if hnlg > 0.8
-        %        suppState = 0;
-        %        transCtr = 0;
-        %    elseif hnlg > 0.25
-        %        suppState = 1;
-        %        transCtr = 0;
-        %    end
-        %end
-        %if kk > 50
-
-            if cohedMean > 0.98 & hnlSortQ > 0.9
-                %if suppState == 1
-                %    hnled = 0.5*hnled + 0.5*cohed;
-                %    %hnlSortQ = 0.5*hnlSortQ + 0.5*cohedMean;
-                %    hnlPrefAvg = 0.5*hnlPrefAvg + 0.5*cohedMean;
-                %else
-                %    hnled = cohed;
-                %    %hnlSortQ = cohedMean;
-                %    hnlPrefAvg = cohedMean;
-                %end
-                suppState = 0;
-            elseif cohedMean < 0.95 | hnlSortQ < 0.8
-                %if suppState == 0
-                %    hnled = 0.5*hnled + 0.5*cohed;
-                %    %hnlSortQ = 0.5*hnlSortQ + 0.5*cohedMean;
-                %    hnlPrefAvg = 0.5*hnlPrefAvg + 0.5*cohedMean;
-                %end
-                suppState = 1;
-            end
-
-            if hnlSortQ < cohxdLocalMin & hnlSortQ < 0.75
-                cohxdLocalMin = hnlSortQ;
-            end
-
-            if cohxdLocalMin == 1
-                ovrd = 3;
-                hnled = 1-cohxd;
-                hnlPrefAvg = hnlSortQ;
-                hnlPrefAvgLow = hnlSortQ;
-            end
-
-            if suppState == 0
-                hnled = cohed;
-                hnlPrefAvg = cohedMean;
-                hnlPrefAvgLow = cohedMean;
-            end
-
-            %if hnlPrefAvg < hnlLocalMin & hnlPrefAvg < 0.6
-            if hnlPrefAvgLow < hnlLocalMin & hnlPrefAvgLow < 0.6
-                %hnlLocalMin = hnlPrefAvg;
-                %hnlMin = hnlPrefAvg;
-                hnlLocalMin = hnlPrefAvgLow;
-                hnlMin = hnlPrefAvgLow;
-                hnlNewMin = 1;
-                hnlMinCtr = 0;
-                %if hnlMinCtr == 0
-                %    hnlMinCtr = hnlMinCtr + 1;
-                %else
-                %    hnlMinCtr = 0;
-                %    hnlMin = hnlLocalMin;
-                    %SeLocalMin = SeQ;
-                    %SdLocalMin = SdQ;
-                    %SeLocalAvg = 0;
-                    %minCtr = 0;
-                %    ovrd = max(log(0.0001)/log(hnlMin), 2);
-                    %divergeFact = hnlLocalMin;
-            end
-
-            if hnlNewMin == 1
-                hnlMinCtr = hnlMinCtr + 1;
-            end
-            if hnlMinCtr == 2 
-                hnlNewMin = 0;
-                hnlMinCtr = 0;
-                %ovrd = max(log(0.0001)/log(hnlMin), 2);
-                ovrd = max(log(0.00001)/(log(hnlMin + 1e-10) + 1e-10), 3);
-                %ovrd = max(log(0.00000001)/(log(hnlMin + 1e-10) + 1e-10), 5);
-                %ovrd = max(log(0.0001)/log(hnlPrefAvg), 2);
-                %ovrd = max(log(0.001)/log(hnlMin), 2);
-            end
-            hnlLocalMin = min(hnlLocalMin + 0.0008/mult, 1);
-            cohxdLocalMin = min(cohxdLocalMin + 0.0004/mult, 1);
-            %divergeFact = hnlSortQ;
-
-
-            %if minCtr > 0 & hnlLocalMin < 1
-            %    hnlMin = hnlLocalMin;
-            %    %SeMin = 0.9*SeMin + 0.1*sqrt(SeLocalMin);
-            %    SdMin = sqrt(SdLocalMin);
-            %    %SeMin = sqrt(SeLocalMin)*hnlSortQ;
-            %    SeMin = sqrt(SeLocalMin);
-            %    %ovrd = log(100/SeMin)/log(hnlSortQ);
-            %    %ovrd = log(100/SeMin)/log(hnlSortQ);
-            %    ovrd = log(0.01)/log(hnlMin);
-            %    ovrd = max(ovrd, 2);
-            %    ovrdPos = hnlSortQIdx;
-            %    %ovrd = max(ovrd, 1);
-            %    %SeMin = sqrt(SeLocalAvg/5);
-            %    minCtr = 0;
-            %else
-            %    %SeLocalMin = 0.9*SeLocalMin +0.1*SeQ;
-            %    SeLocalAvg = SeLocalAvg + SeQ;
-            %    minCtr = minCtr + 1;
-            %end
-
-            if ovrd < ovrdSm
-                ovrdSm = 0.99*ovrdSm + 0.01*ovrd;
-            else
-                ovrdSm = 0.9*ovrdSm + 0.1*ovrd;
-            end
-        %end
-
-        %ekEn = sum(real(ekfb.^2));
-        %dkEn = sum(real(dk.^2));
-        ekEn = sum(Se);
-        dkEn = sum(Sd);
-
-        if divergeState == 0
-            if ekEn > dkEn
-                ef = df;
-                divergeState = 1;
-                %hnlPrefAvg = hnlSortQ;
-                %hnled = (1 - cohxd);
-            end
-        else
-            %if ekEn*1.1 < dkEn  
-            %if ekEn*1.26 < dkEn  
-            if ekEn*1.05 < dkEn  
-                divergeState = 0;
-            else
-                ef = df;
-            end
-        end
-
-        if ekEn > dkEn*19.95
-            WFb=zeros(N+1,M); % Block-based FD NLMS
-        end
-
-        ekEnV(kk) = ekEn;
-        dkEnV(kk) = dkEn;
-
-        hnlLocalMinV(kk) = hnlLocalMin;
-        cohxdLocalMinV(kk) = cohxdLocalMin;
-        hnlMinV(kk) = hnlMin;
-        %cohxdMaxLocal = max(cohxdSlow(kk,:));
-        %if kk > 50
-        %cohxdMaxLocal = 1-hnlSortQ;
-        %if cohxdMaxLocal > 0.5
-        %    %if cohxdMaxLocal > cohxdMax 
-        %        odScale = max(log(cohxdMaxLocal)/log(0.95), 1);
-        %        %overdrive(7:end) = max(log(cohxdSlow(kk,7:end))/log(0.9), 1);
-        %        cohxdMax = cohxdMaxLocal;
-        %    end
-        %end
-        %end
-        %cohxdMax = cohxdMax*0.999;
-
-        %overdriveM(kk,:) = max(overdrive, 1);
-        %aggrFact = 0.25;
-        aggrFact = 0.3;
-        %aggrFact = 0.5*propLow;
-        %if fs == 8000
-        %    wCurve = [0 ; 0 ; aggrFact*sqrt(linspace(0,1,N-1))' + 0.1];
-        %else
-        %    wCurve = [0; 0; 0; aggrFact*sqrt(linspace(0,1,N-2))' + 0.1];
-        %end
-        wCurve = [0; aggrFact*sqrt(linspace(0,1,N))' + 0.1];
-        % For sync with C
-        %if fs == 8000
-        %    wCurve = wCurve(2:end);
-        %else
-        %    wCurve = wCurve(1:end-1);
-        %end
-        %weight = aggrFact*(sqrt(linspace(0,1,N+1)'));
-        %weight = aggrFact*wCurve;
-        weight = wCurve;
-        %weight = aggrFact*ones(N+1,1);
-        %weight = zeros(N+1,1);
-        %hnled = weight.*min(hnled) + (1 - weight).*hnled;
-        %hnled = weight.*min(mean(hnled(echoBandRange)), hnled) + (1 - weight).*hnled;
-        %hnled = weight.*min(hnlSortQ, hnled) + (1 - weight).*hnled;
-
-        %hnlSortQV(kk) = mean(hnled);
-        %hnlPrefAvgV(kk) = mean(hnled(echoBandRange));
-
-        hnled = weight.*min(hnlPrefAvg, hnled) + (1 - weight).*hnled;
-
-        %od = aggrFact*(sqrt(linspace(0,1,N+1)') + aggrTerm);
-        %od = 4*(sqrt(linspace(0,1,N+1)') + 1/4);
-
-        %ovrdFact = (ovrdSm - 1) / sqrt(ovrdPos/(N+1));
-        %ovrdFact = ovrdSm / sqrt(echoBandRange(floor(length(echoBandRange)/2))/(N+1));
-        %od = ovrdFact*sqrt(linspace(0,1,N+1))' + 1;
-        %od = ovrdSm*ones(N+1,1).*abs(WFb(:,dIdx))/(max(abs(WFb(:,dIdx)))+1e-10);
-
-        %od = ovrdSm*ones(N+1,1);
-        %od = ovrdSm*WFbD.*(sqrt(linspace(0,1,N+1))' + 1);
-
-        od = ovrdSm*(sqrt(linspace(0,1,N+1))' + 1);
-        %od = 4*(sqrt(linspace(0,1,N+1))' + 1);
-
-        %od = 2*ones(N+1,1);
-        %od = 2*ones(N+1,1);
-        %sshift = ((1-hnled)*2-1).^3+1; 
-        sshift = ones(N+1,1);
-
-        hnled = hnled.^(od.*sshift);
-
-        %if hnlg > 0.75
-            %if (suppState ~= 0)
-            %    transCtr = 0;
-            %end
-        %    suppState = 0;
-        %elseif hnlg < 0.6 & hnlg > 0.2
-        %    suppState = 1;
-        %elseif hnlg < 0.1
-            %hnled = zeros(N+1, 1);
-            %if (suppState ~= 2)
-            %    transCtr = 0;
-            %end
-        %    suppState = 2;
-        %else
-        %    if (suppState ~= 2)
-        %        transCtr = 0;
-        %    end
-        %    suppState = 2;
-        %end
-        %if suppState == 0
-        %    hnled = ones(N+1, 1);
-        %elseif suppState == 2
-        %    hnled = zeros(N+1, 1);
-        %end
-        %hnled(find(hnled < 0.1)) = 0;
-        %hnled = hnled.^2;
-        %if transCtr < 5
-            %hnl = 0.75*hnl + 0.25*hnled;
-        %    transCtr = transCtr + 1;
-        %else
-            hnl = hnled;
-        %end
-        %hnled(find(hnled < 0.05)) = 0;
-        ef = ef.*(hnl);
-
-        %ef = ef.*(min(1 - cohxd, cohed).^2);
-        %ef = ef.*((1-cohxd).^2);
-        
-        ovrdV(kk) = ovrdSm;
-        %ovrdV(kk) = dIdx;
-        %ovrdV(kk) = divergeFact;
-        %hnledAvg(kk) = 1-mean(1-cohedFast(echoBandRange));
-        hnledAvg(kk) = 1-mean(1-cohed(echoBandRange));
-        hnlxdAvg(kk) = 1-mean(cohxd(echoBandRange));
-        %hnlxdAvg(kk) = cohxd(5);
-        %hnlSortQV(kk) = mean(hnled);
-        hnlSortQV(kk) = hnlPrefAvgLow;
-        hnlPrefAvgV(kk) = hnlPrefAvg;
-        %hnlAvg(kk) = propLow;
-        %ef(N/2:end) = 0;
-        %ner = (sum(Sd) ./ (sum(Se.*(hnl.^2)) + 1e-10));
-
-		% Comfort noise
-		if (CNon)
-			snn=sqrt(Sym);
-			snn(1)=0; % Reject LF noise
-			Un=snn.*exp(j*2*pi.*[0;rand(N-1,1);0]);
-
-			% Weight comfort noise by suppression
-			Un = sqrt(1-hnled.^2).*Un;
-			Fmix = ef + Un;
-		else
-			Fmix = ef;
-		end
-
-		% Overlap and add in time domain for smoothness 
-		tmp = [Fmix ; flipud(conj(Fmix(2:N)))];
-		mixw = wins.*real(ifft(tmp));
-		mola  = mbuf(end-N+1:end) + mixw(1:N);
-		mbuf = mixw;
-		ercn(pos:pos+N-1) = mola; 
-	end % NLPon
-
-    % Filter update
-	%Ek2 = Ek ./(12*pn + 0.001); % Normalized error
-    %Ek2 = Ek2 * divergeFact;
-	%Ek2 = Ek ./(pn + 0.001); % Normalized error
-	%Ek2 = Ek ./(100*pn + 0.001); % Normalized error
-
-    %divergeIdx = find(abs(Ek) > abs(DD));
-    %divergeIdx = find(Se > Sd);
-    %threshMod = threshold*ones(N+1,1); 
-    %if length(divergeIdx) > 0
-    %if sum(abs(Ek)) > sum(abs(DD))
-        %WFb(divergeIdx,:) = WFb(divergeIdx,:) .* repmat(sqrt(Sd(divergeIdx)./(Se(divergeIdx)+1e-10))),1,M);
-        %Ek2(divergeIdx) = Ek2(divergeIdx) .* sqrt(Sd(divergeIdx)./(Se(divergeIdx)+1e-10));
-        %Ek2(divergeIdx) = Ek2(divergeIdx) .* abs(DD(divergeIdx))./(abs(Ek(divergeIdx))+1e-10);
-        %WFb(divergeIdx,:) = WFbOld(divergeIdx,:);
-        %WFb = WFbOld;
-        %threshMod(divergeIdx) = threshMod(divergeIdx) .* abs(DD(divergeIdx))./(abs(Ek(divergeIdx))+1e-10);
-    %    threshMod(divergeIdx) = threshMod(divergeIdx) .* sqrt(Sd(divergeIdx)./(Se(divergeIdx)+1e-10));
-    %end
-	
-	%absEf = max(abs(Ek2), threshold);
-	%absEf = ones(N+1,1)*threshold./absEf;
-	%absEf = max(abs(Ek2), threshMod);
-	%absEf = threshMod./absEf;
-	%Ek2 = Ek2.*absEf;
-
-    %if sum(Se) <= sum(Sd)
-
-    %    mEk = mufb.*Ek2;
-    %    PP = conj(XFm).*(ones(M,1) * mEk')'; 
-    %    tmp = [PP ; flipud(conj(PP(2:N,:)))];
-    %    IFPP = real(ifft(tmp));
-    %    PH = IFPP(1:N,:);
-    %    tmp = fft([PH;zeros(N,M)]);
-    %    FPH = tmp(1:N+1,:);
-    %    %WFbOld = WFb;
-    %    WFb = WFb + FPH;
-
-    %else
-    %    WF = WFbOld;
-    %end
-
-	% Shift old FFTs
-	%for m=M:-1:2
-	%	XFm(:,m) = XFm(:,m-1);    
-	%	YFm(:,m) = YFm(:,m-1);    
-	%end
-    XFm(:,2:end) = XFm(:,1:end-1);
-    YFm(:,2:end) = YFm(:,1:end-1);
-    xfwm(:,2:end) = xfwm(:,1:end-1);
-    dfm(:,2:end) = dfm(:,1:end-1);
-
-	%if mod(kk, floor(Nb/50)) == 0
-    %    fprintf(1, '.');
-	%end
-
-	if mod(kk, floor(Nb/100)) == 0
-	%if mod(kk, floor(Nb/500)) == 0
-        progressbar(kk/Nb); 
-        %figure(5)
-        %plot(abs(WFb));
-        %legend('1','2','3','4','5','6','7','8','9','10','11','12');
-        %title(kk*N/fs);
-        %figure(6)
-        %plot(WFbD);
-        %figure(6)
-        %plot(threshMod)
-        %if length(divergeIdx) > 0
-        %    plot(abs(DD))
-        %    hold on
-        %    plot(abs(Ek), 'r')
-        %    hold off
-            %plot(min(sqrt(Sd./(Se+1e-10)),1))
-            %axis([0 N 0 1]);
-        %end
-        %figure(6)
-        %plot(cohedFast);
-        %axis([1 N+1 0 1]);
-        %plot(WFbEn);
-
-        %figure(7)
-        %plot(weight);
-        %plot([cohxd 1-cohed]);
-        %plot([cohxd 1-cohed 1-cohedFast hnled]);
-        %plot([cohxd cohxdFast/max(cohxdFast)]);
-        %legend('cohxd', '1-cohed', '1-cohedFast');
-        %axis([1 65 0 1]);
-        %pause(0.5);
-        %overdrive
-    end
-end
-progressbar(1);
-
-%figure(2);
-%plot([feat(:,1) feat(:,2)+1 feat(:,3)+2 mfeat+3]);
-%plot([feat(:,1) mfeat+1]);
-
-%figure(3);
-%plot(10*log10([dri erifb erifb3 ericn]));
-%legend('Near-end','Error','Post NLP','Final',4);
-% Compensate for delay
-%ercn=[ercn(N+1:end);zeros(N,1)];
-%ercn_=[ercn_(N+1:end);zeros(N,1)];
-
-%figure(11);
-%plot(cohxdSlow);
-
-%figure(12);
-%surf(cohxdSlow);
-%shading interp;
-
-%figure(13);
-%plot(overdriveM);
-
-%figure(14);
-%surf(overdriveM);
-%shading interp;
-
-figure(10);
-t = (0:Nb)*N/fs;
-rrinSubSamp = rrin(N*(1:(Nb+1)));
-plot(t, rrinSubSamp/max(abs(rrinSubSamp)),'b');
-hold on
-plot(t, hnledAvg, 'r');
-plot(t, hnlxdAvg, 'g');
-plot(t, hnlSortQV, 'y');
-plot(t, hnlLocalMinV, 'k');
-plot(t, cohxdLocalMinV, 'c');
-plot(t, hnlPrefAvgV, 'm');
-%plot(t, cohxdAvg, 'r');
-%plot(cohxdFastAvg, 'r');
-%plot(cohxdAvgBad, 'k');
-%plot(t, cohedAvg, 'k');
-%plot(t, 1-cohedFastAvg, 'k');
-%plot(ssin(N*(1:floor(length(ssin)/N)))/max(abs(ssin)));
-%plot(echoBands,'r');
-%plot(overdrive, 'g');
-%plot(erfb(N*(1:floor(length(erfb)/N)))/max(abs(erfb)));
-hold off
-tightx;
-
-figure(11)
-plot(t, ovrdV);
-tightx;
-%plot(mfeat,'r');
-%plot(1-cohxyp_,'r');
-%plot(Hnlxydp,'y');
-%plot(hnledp,'k');
-%plot(Hnlxydp, 'c');
-%plot(ccohpd_,'k');
-%plot(supplot_, 'g');
-%plot(ones(length(mfeat),1)*rr1_, 'k');
-%plot(ones(length(mfeat),1)*rr2_, 'k');
-%plot(N*(1:length(feat)), feat);
-%plot(Sep_,'r');
-%axis([1 floor(length(erfb)/N) -1 1])
-%hold off
-%plot(10*log10([Se_, Sx_, Seu_, real(sf_.*conj(sf_))]));
-%legend('Se','Sx','Seu','S');
-%figure(5)
-%plot([ercn ercn_]);
-
-figure(12)
-plot(t, dIdxV);
-%plot(t, SLxV);
-tightx;
-
-%figure(13)
-%plot(t, [ekEnV dkEnV]);
-%plot(t, dkEnV./(ekEnV+1e-10));
-%tightx;
-
-%close(hh);
-%spclab(fs,ssin,erfb,ercn,'outxd.pcm');
-%spclab(fs,rrin,ssin,erfb,1.78*ercn,'vqeOut-1.pcm');
-%spclab(fs,erfb,'aecOutLp.pcm');
-%spclab(fs,rrin,ssin,erfb,1.78*ercn,'aecOut25.pcm','vqeOut-1.pcm');
-%spclab(fs,rrin,ssin,erfb,ercn,'aecOut-mba.pcm');
-%spclab(fs,rrin,ssin,erfb,ercn,'aecOut.pcm');
-%spclab(fs, ssin, erfb, ercn, 'out0.pcm');
--- a/src/modules/audio_processing/aec/main/source/aec_core.c
+++ b/src/modules/audio_processing/aec/main/source/aec_core.c
@@ -12,12 +12,14 @@
 * The core AEC algorithm, which is presented with time-aligned signals.
 */

+#include "aec_core.h"
+
 #include <math.h>
 #include <stdlib.h>
 #include <string.h>

-#include "aec_core.h"
 #include "aec_rdft.h"
+#include "delay_estimator_float.h"
 #include "ring_buffer.h"
 #include "system_wrappers/interface/cpu_features_wrapper.h"

@@ -34,26 +36,9 @@ static const float cnScaleHband = (float)0.4; // scale for comfort noise in H ba
 // Initial bin for averaging nlp gain in low band
 static const int freqAvgIc = PART_LEN / 2;

-/* Matlab code to produce table:
-win = sqrt(hanning(63)); win = [0 ; win(1:32)];
-fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
-*/
-/*
-static const float sqrtHanning[33] = {
-    0.00000000000000, 0.04906767432742, 0.09801714032956,
-    0.14673047445536, 0.19509032201613, 0.24298017990326,
-    0.29028467725446, 0.33688985339222, 0.38268343236509,
-    0.42755509343028, 0.47139673682600, 0.51410274419322,
-    0.55557023301960, 0.59569930449243, 0.63439328416365,
-    0.67155895484702, 0.70710678118655, 0.74095112535496,
-    0.77301045336274, 0.80320753148064, 0.83146961230255,
-    0.85772861000027, 0.88192126434835, 0.90398929312344,
-    0.92387953251129, 0.94154406518302, 0.95694033573221,
-    0.97003125319454, 0.98078528040323, 0.98917650996478,
-    0.99518472667220, 0.99879545620517, 1.00000000000000
-};
-*/
-
+// Matlab code to produce table:
+// win = sqrt(hanning(63)); win = [0 ; win(1:32)];
+// fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
 static const float sqrtHanning[65] = {
    0.00000000000000f, 0.02454122852291f, 0.04906767432742f,
    0.07356456359967f, 0.09801714032956f, 0.12241067519922f,
@@ -79,10 +64,9 @@ static const float sqrtHanning[65] = {
    0.99969881869620f, 1.00000000000000f
 };

-/* Matlab code to produce table:
-weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1];
-fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve);
-*/
+// Matlab code to produce table:
+// weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1];
+// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve);
 const float WebRtcAec_weightCurve[65] = {
    0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f,
    0.1845f, 0.1926f, 0.2000f, 0.2069f, 0.2134f, 0.2195f,
@@ -97,10 +81,9 @@ const float WebRtcAec_weightCurve[65] = {
    0.3903f, 0.3928f, 0.3952f, 0.3976f, 0.4000f
 };

-/* Matlab code to produce table:
-overDriveCurve = [sqrt(linspace(0,1,65))' + 1];
-fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve);
-*/
+// Matlab code to produce table:
+// overDriveCurve = [sqrt(linspace(0,1,65))' + 1];
+// fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve);
 const float WebRtcAec_overDriveCurve[65] = {
    1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f,
    1.3062f, 1.3307f, 1.3536f, 1.3750f, 1.3953f, 1.4146f,
@@ -193,6 +176,15 @@ int WebRtcAec_CreateAec(aec_t **aecInst)
        return -1;
    }

+    if (WebRtc_CreateDelayEstimatorFloat(&aec->delay_estimator,
+                                         PART_LEN1,
+                                         kMaxDelay,
+                                         0) == -1) {
+      WebRtcAec_FreeAec(aec);
+      aec = NULL;
+      return -1;
+    }
+
    return 0;
 }

@@ -209,6 +201,8 @@ int WebRtcAec_FreeAec(aec_t *aec)
    WebRtcApm_FreeBuffer(aec->nearFrBufH);
    WebRtcApm_FreeBuffer(aec->outFrBufH);

+    WebRtc_FreeDelayEstimatorFloat(aec->delay_estimator);
+
    free(aec);
    return 0;
 }
@@ -255,6 +249,32 @@ static void ScaleErrorSignal(aec_t *aec, float ef[2][PART_LEN1])
  }
 }

+// Time-unconstrined filter adaptation.
+// TODO(andrew): consider for a low-complexity mode.
+//static void FilterAdaptationUnconstrained(aec_t *aec, float *fft,
+//                                          float ef[2][PART_LEN1]) {
+//  int i, j;
+//  for (i = 0; i < NR_PART; i++) {
+//    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
+//    int pos;
+//    // Check for wrap
+//    if (i + aec->xfBufBlockPos >= NR_PART) {
+//      xPos -= NR_PART * PART_LEN1;
+//    }
+//
+//    pos = i * PART_LEN1;
+//
+//    for (j = 0; j < PART_LEN1; j++) {
+//      aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0],
+//                                      -aec->xfBuf[xPos + j][1],
+//                                      ef[j][0], ef[j][1]);
+//      aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0],
+//                                      -aec->xfBuf[xPos + j][1],
+//                                      ef[j][0], ef[j][1]);
+//    }
+//  }
+//}
+
 static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) {
  int i, j;
  for (i = 0; i < NR_PART; i++) {
@@ -267,16 +287,6 @@ static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) {

    pos = i * PART_LEN1;

-#ifdef UNCONSTR
-    for (j = 0; j < PART_LEN1; j++) {
-      aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0],
-                                      -aec->xfBuf[xPos + j][1],
-                                      ef[j][0], ef[j][1]);
-      aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0],
-                                      -aec->xfBuf[xPos + j][1],
-                                      ef[j][0], ef[j][1]);
-    }
-#else
    for (j = 0; j < PART_LEN; j++) {

      fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j],
@@ -309,7 +319,6 @@ static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) {
      aec->wfBuf[0][pos + j] += fft[2 * j];
      aec->wfBuf[1][pos + j] += fft[2 * j + 1];
    }
-#endif // UNCONSTR
  }
 }

@@ -375,6 +384,12 @@ int WebRtcAec_InitAec(aec_t *aec, int sampFreq)
        return -1;
    }

+    if (WebRtc_InitDelayEstimatorFloat(aec->delay_estimator) != 0) {
+      return -1;
+    }
+    aec->delay_logging_enabled = 0;
+    memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram));
+
    // Default target suppression level
    aec->targetSupp = -11.5;
    aec->minOverDrive = 2.0;
@@ -565,6 +580,10 @@ static void ProcessBlock(aec_t *aec, const short *farend,
    float fft[PART_LEN2];
    float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
    complex_t df[PART_LEN1];
+    float far_spectrum = 0.0f;
+    float near_spectrum = 0.0f;
+    float abs_far_spectrum[PART_LEN1];
+    float abs_near_spectrum[PART_LEN1];

    const float gPow[2] = {0.9f, 0.1f};

@@ -629,10 +648,15 @@ static void ProcessBlock(aec_t *aec, const short *farend,

    // Power smoothing
    for (i = 0; i < PART_LEN1; i++) {
-        aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * NR_PART *
-            (xf[0][i] * xf[0][i] + xf[1][i] * xf[1][i]);
-        aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] *
-            (df[i][0] * df[i][0] + df[i][1] * df[i][1]);
+      far_spectrum = xf[0][i] * xf[0][i] + xf[1][i] * xf[1][i];
+      aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * NR_PART * far_spectrum;
+      // Calculate absolute spectra
+      abs_far_spectrum[i] = sqrtf(far_spectrum);
+
+      near_spectrum = df[i][0] * df[i][0] + df[i][1] * df[i][1];
+      aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum;
+      // Calculate absolute spectra
+      abs_near_spectrum[i] = sqrtf(near_spectrum);
    }

    // Estimate noise power. Wait until dPow is more stable.
@@ -667,6 +691,20 @@ static void ProcessBlock(aec_t *aec, const short *farend,
        aec->noisePow = aec->dMinPow;
    }

+    // Block wise delay estimation used for logging
+    if (aec->delay_logging_enabled) {
+      int delay_estimate = 0;
+      // Estimate the delay
+      delay_estimate = WebRtc_DelayEstimatorProcessFloat(aec->delay_estimator,
+                                                         abs_far_spectrum,
+                                                         abs_near_spectrum,
+                                                         PART_LEN1,
+                                                         aec->echoState);
+      if (delay_estimate >= 0) {
+        // Update delay estimate buffer
+        aec->delay_histogram[delay_estimate]++;
+      }
+    }

    // Update the xfBuf block position.
    aec->xfBufBlockPos--;
@@ -720,9 +758,7 @@ static void ProcessBlock(aec_t *aec, const short *farend,

    // Scale error signal inversely with far power.
    WebRtcAec_ScaleErrorSignal(aec, ef);
-    // Filter adaptation
    WebRtcAec_FilterAdaptation(aec, fft, ef);
-
    NonLinearProcessing(aec, output, outputH);

 #ifdef AEC_DEBUG
--- a/src/modules/audio_processing/aec/main/source/aec_core.h
+++ b/src/modules/audio_processing/aec/main/source/aec_core.h
@@ -20,7 +20,6 @@
 #include "signal_processing_library.h"
 #include "typedefs.h"

-//#define UNCONSTR // time-unconstrained filter
 //#define AEC_DEBUG // for recording files

 #define FRAME_LEN 80
@@ -34,6 +33,8 @@
 #define PREF_BAND_SIZE 24

 #define BLOCKL_MAX FRAME_LEN
+// Maximum delay in fixed point delay estimator, used for logging
+enum {kMaxDelay = 100};

 typedef float complex_t[2];
 // For performance reasons, some arrays of complex numbers are replaced by twice
@@ -142,6 +143,10 @@ typedef struct {
    int flag_Hband_cn;      //for comfort noise
    float cn_scale_Hband;   //scale for comfort noise in H band

+    int delay_histogram[kMaxDelay];
+    int delay_logging_enabled;
+    void* delay_estimator;
+
 #ifdef AEC_DEBUG
    FILE *farFile;
    FILE *nearFile;
--- a/src/modules/audio_processing/aec/main/source/aec_core_sse2.c
+++ b/src/modules/audio_processing/aec/main/source/aec_core_sse2.c
@@ -138,16 +138,6 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1])
      xPos -= NR_PART * PART_LEN1;
    }

-#ifdef UNCONSTR
-    for (j = 0; j < PART_LEN1; j++) {
-      aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0],
-                                      -aec->xfBuf[xPos + j][1],
-                                      ef[j][0], ef[j][1]);
-      aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0],
-                                      -aec->xfBuf[xPos + j][1],
-                                      ef[j][0], ef[j][1]);
-    }
-#else
    // Process the whole array...
    for (j = 0; j < PART_LEN; j+= 4) {
      // Load xfBuf and ef.
@@ -208,7 +198,6 @@ static void FilterAdaptationSSE2(aec_t *aec, float *fft, float ef[2][PART_LEN1])
      }
      aec->wfBuf[1][pos] = wt1;
    }
-#endif // UNCONSTR
  }
 }

@@ -246,10 +235,9 @@ static __m128 mm_pow_ps(__m128 a, __m128 b)
        {0x43BF8000, 0x43BF8000, 0x43BF8000, 0x43BF8000};
    static const int shift_exponent_into_top_mantissa = 8;
    const __m128 two_n = _mm_and_ps(a, *((__m128 *)float_exponent_mask));
-    const __m128 n_1 = (__m128)_mm_srli_epi32((__m128i)two_n,
-        shift_exponent_into_top_mantissa);
-    const __m128 n_0 = _mm_or_ps(
-        (__m128)n_1, *((__m128 *)eight_biased_exponent));
+    const __m128 n_1 = _mm_castsi128_ps(_mm_srli_epi32(_mm_castps_si128(two_n),
+        shift_exponent_into_top_mantissa));
+    const __m128 n_0 = _mm_or_ps(n_1, *((__m128 *)eight_biased_exponent));
    const __m128 n   = _mm_sub_ps(n_0,  *((__m128 *)implicit_leading_one));

    // Compute y.
@@ -328,8 +316,8 @@ static __m128 mm_pow_ps(__m128 a, __m128 b)
    static const int float_exponent_shift = 23;
    const __m128i two_n_exponent = _mm_add_epi32(
        x_minus_half_floor, *((__m128i *)float_exponent_bias));
-    const __m128  two_n = (__m128)_mm_slli_epi32(
-        two_n_exponent, float_exponent_shift);
+    const __m128  two_n = _mm_castsi128_ps(_mm_slli_epi32(
+        two_n_exponent, float_exponent_shift));
    // Compute y.
    const __m128 y = _mm_sub_ps(x_max, _mm_cvtepi32_ps(x_minus_half_floor));
    // Approximate 2^y ~= C2 * y^2 + C1 * y + C0.
--- a/src/modules/audio_processing/aec/main/source/aec_rdft.h
+++ b/src/modules/audio_processing/aec/main/source/aec_rdft.h
@@ -11,6 +11,14 @@
 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_
 #define WEBRTC_MODULES_AUDIO_PROCESSING_AEC_MAIN_SOURCE_AEC_RDFT_H_

+// These intrinsics were unavailable before VS 2008.
+// TODO(andrew): move to a common file.
+#if defined(_MSC_VER) && _MSC_VER < 1500
+#include <emmintrin.h>
+static __inline __m128 _mm_castsi128_ps(__m128i a) { return *(__m128*)&a; }
+static __inline __m128i _mm_castps_si128(__m128 a) { return *(__m128i*)&a; }
+#endif
+
 #ifdef _MSC_VER /* visual c++ */
 # define ALIGN16_BEG __declspec(align(16))
 # define ALIGN16_END
--- a/src/modules/audio_processing/aec/main/source/aec_rdft_sse2.c
+++ b/src/modules/audio_processing/aec/main/source/aec_rdft_sse2.c
@@ -42,27 +42,33 @@ static void cft1st_128_SSE2(float *a) {
    const __m128 x1v    = _mm_sub_ps(a01v, a23v);
    const __m128 x2v    = _mm_add_ps(a45v, a67v);
    const __m128 x3v    = _mm_sub_ps(a45v, a67v);
+          __m128 x0w;
                 a01v   = _mm_add_ps(x0v, x2v);
                 x0v    = _mm_sub_ps(x0v, x2v);
-          __m128 x0w    = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
-
-    const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v);
-    const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w);
-                 a45v   = _mm_add_ps(a45_0v, a45_1v);
-
-    const __m128 x3w    = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0 ,1));
-    const __m128 x3s    = _mm_mul_ps(mm_swap_sign, x3w);
-                 x0v    = _mm_add_ps(x1v, x3s);
                 x0w    = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
-    const __m128 a23_0v = _mm_mul_ps(wk1rv, x0v);
-    const __m128 a23_1v = _mm_mul_ps(wk1iv, x0w);
-                 a23v   = _mm_add_ps(a23_0v, a23_1v);
+    {
+      const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v);
+      const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w);
+                   a45v   = _mm_add_ps(a45_0v, a45_1v);
+    }
+    {
+            __m128 a23_0v, a23_1v;
+      const __m128 x3w    = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0 ,1));
+      const __m128 x3s    = _mm_mul_ps(mm_swap_sign, x3w);
+                   x0v    = _mm_add_ps(x1v, x3s);
+                   x0w    = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
+                   a23_0v = _mm_mul_ps(wk1rv, x0v);
+                   a23_1v = _mm_mul_ps(wk1iv, x0w);
+                   a23v   = _mm_add_ps(a23_0v, a23_1v);

-                 x0v    = _mm_sub_ps(x1v, x3s);
-                 x0w    = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
-    const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v);
-    const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w);
-                 a67v   = _mm_add_ps(a67_0v, a67_1v);
+                   x0v    = _mm_sub_ps(x1v, x3s);
+                   x0w    = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0 ,1));
+    }
+    {
+      const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v);
+      const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w);
+                   a67v   = _mm_add_ps(a67_0v, a67_1v);
+    }

                 a00v   = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1 ,0));
                 a04v   = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1 ,0));
@@ -78,7 +84,7 @@ static void cft1st_128_SSE2(float *a) {
 static void cftmdl_128_SSE2(float *a) {
  const int l = 8;
  const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign);
-  int j0, k, k1, k2;
+  int j0;

  __m128 wk1rv = _mm_load_ps(cftmdl_wk1r);
  for (j0 = 0; j0 < l; j0 += 2) {
@@ -86,9 +92,11 @@ static void cftmdl_128_SSE2(float *a) {
    const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 +  8]);
    const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
    const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
-    const __m128  a_00_32 = _mm_shuffle_ps((__m128)a_00, (__m128)a_32,
+    const __m128  a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
+                                           _mm_castsi128_ps(a_32),
                                           _MM_SHUFFLE(1, 0, 1 ,0));
-    const __m128  a_08_40 = _mm_shuffle_ps((__m128)a_08, (__m128)a_40,
+    const __m128  a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
+                                           _mm_castsi128_ps(a_40),
                                           _MM_SHUFFLE(1, 0, 1 ,0));
          __m128  x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
    const __m128  x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
@@ -97,30 +105,24 @@ static void cftmdl_128_SSE2(float *a) {
    const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
    const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
    const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
-    const __m128  a_16_48 = _mm_shuffle_ps((__m128)a_16, (__m128)a_48,
+    const __m128  a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
+                                           _mm_castsi128_ps(a_48),
                                           _MM_SHUFFLE(1, 0, 1 ,0));
-    const __m128  a_24_56 = _mm_shuffle_ps((__m128)a_24, (__m128)a_56,
+    const __m128  a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
+                                           _mm_castsi128_ps(a_56),
                                           _MM_SHUFFLE(1, 0, 1 ,0));
    const __m128  x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
    const __m128  x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);

    const __m128  xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-    _mm_storel_epi64((__m128i*)&a[j0 +  0], (__m128i)xx0);
-    _mm_storel_epi64((__m128i*)&a[j0 + 32],
-                     _mm_shuffle_epi32((__m128i)xx0, _MM_SHUFFLE(3, 2, 3, 2)));
    const __m128  xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-    _mm_storel_epi64((__m128i*)&a[j0 + 16], (__m128i)xx1);
-    _mm_storel_epi64((__m128i*)&a[j0 + 48],
-                     _mm_shuffle_epi32((__m128i)xx1, _MM_SHUFFLE(2, 3, 2, 3)));
-    a[j0 + 48] = -a[j0 + 48];

-    const __m128  x3i0_3r0_3i1_x3r1 = (__m128)
-        _mm_shuffle_epi32((__m128i)x3r0_3i0_3r1_x3i1, _MM_SHUFFLE(2, 3, 0, 1));
+    const __m128  x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(
+        _mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1),
+                          _MM_SHUFFLE(2, 3, 0, 1)));
    const __m128  x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
    const __m128  x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
    const __m128  x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
-    _mm_storel_epi64((__m128i*)&a[j0 +  8], (__m128i)x1_x3_add);
-    _mm_storel_epi64((__m128i*)&a[j0 + 24], (__m128i)x1_x3_sub);

    const __m128 yy0 = _mm_shuffle_ps(x1_x3_add, x1_x3_sub,
                                      _MM_SHUFFLE(2, 2, 2 ,2));
@@ -129,79 +131,111 @@ static void cftmdl_128_SSE2(float *a) {
    const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1);
    const __m128 yy3 = _mm_add_ps(yy0, yy2);
    const __m128 yy4 = _mm_mul_ps(wk1rv, yy3);
-    _mm_storel_epi64((__m128i*)&a[j0 + 40], (__m128i)yy4);
+
+    _mm_storel_epi64((__m128i*)&a[j0 +  0], _mm_castps_si128(xx0));
+    _mm_storel_epi64((__m128i*)&a[j0 + 32],
+                     _mm_shuffle_epi32(_mm_castps_si128(xx0),
+                                       _MM_SHUFFLE(3, 2, 3, 2)));
+
+    _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1));
+    _mm_storel_epi64((__m128i*)&a[j0 + 48],
+                     _mm_shuffle_epi32(_mm_castps_si128(xx1),
+                                       _MM_SHUFFLE(2, 3, 2, 3)));
+    a[j0 + 48] = -a[j0 + 48];
+
+    _mm_storel_epi64((__m128i*)&a[j0 +  8], _mm_castps_si128(x1_x3_add));
+    _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub));
+
+    _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4));
    _mm_storel_epi64((__m128i*)&a[j0 + 56],
-                     _mm_shuffle_epi32((__m128i)yy4, _MM_SHUFFLE(2, 3, 2, 3)));
+                     _mm_shuffle_epi32(_mm_castps_si128(yy4),
+                     _MM_SHUFFLE(2, 3, 2, 3)));
  }

-  k1 = 0;
-  k = 64;
-  k1 += 2;
-  k2 = 2 * k1;
-  const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2+0]);
-  const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2+0]);
-               wk1rv = _mm_load_ps(&rdft_wk1r[k2+0]);
-  const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2+0]);
-  const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2+0]);
-  const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2+0]);
-  for (j0 = k; j0 < l + k; j0 += 2) {
-    const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 +  0]);
-    const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 +  8]);
-    const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
-    const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
-    const __m128 a_00_32 = _mm_shuffle_ps((__m128)a_00, (__m128)a_32,
-                                          _MM_SHUFFLE(1, 0, 1 ,0));
-    const __m128 a_08_40 = _mm_shuffle_ps((__m128)a_08, (__m128)a_40,
-                                          _MM_SHUFFLE(1, 0, 1 ,0));
-          __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
-    const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);
+  {
+    int k = 64;
+    int k1 = 2;
+    int k2 = 2 * k1;
+    const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2+0]);
+    const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2+0]);
+    const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2+0]);
+    const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2+0]);
+    const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2+0]);
+                 wk1rv = _mm_load_ps(&rdft_wk1r[k2+0]);
+    for (j0 = k; j0 < l + k; j0 += 2) {
+      const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 +  0]);
+      const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 +  8]);
+      const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]);
+      const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]);
+      const __m128 a_00_32 = _mm_shuffle_ps(_mm_castsi128_ps(a_00),
+                                            _mm_castsi128_ps(a_32),
+                                            _MM_SHUFFLE(1, 0, 1 ,0));
+      const __m128 a_08_40 = _mm_shuffle_ps(_mm_castsi128_ps(a_08),
+                                            _mm_castsi128_ps(a_40),
+                                            _MM_SHUFFLE(1, 0, 1 ,0));
+            __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40);
+      const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40);

-    const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
-    const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
-    const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
-    const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
-    const __m128 a_16_48 = _mm_shuffle_ps((__m128)a_16, (__m128)a_48,
-                                          _MM_SHUFFLE(1, 0, 1 ,0));
-    const __m128 a_24_56 = _mm_shuffle_ps((__m128)a_24, (__m128)a_56,
-                                          _MM_SHUFFLE(1, 0, 1 ,0));
-    const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
-    const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);
+      const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]);
+      const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]);
+      const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]);
+      const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]);
+      const __m128 a_16_48 = _mm_shuffle_ps(_mm_castsi128_ps(a_16),
+                                            _mm_castsi128_ps(a_48),
+                                            _MM_SHUFFLE(1, 0, 1 ,0));
+      const __m128 a_24_56 = _mm_shuffle_ps(_mm_castsi128_ps(a_24),
+                                            _mm_castsi128_ps(a_56),
+                                            _MM_SHUFFLE(1, 0, 1 ,0));
+      const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56);
+      const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56);

-    const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-    _mm_storel_epi64((__m128i*)&a[j0 +  0], (__m128i)xx);
-    _mm_storel_epi64((__m128i*)&a[j0 + 32],
-                       _mm_shuffle_epi32((__m128i)xx, _MM_SHUFFLE(3, 2, 3, 2)));
+      const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+      const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
+      const __m128 xx2 = _mm_mul_ps(xx1 , wk2rv);
+      const __m128 xx3 = _mm_mul_ps(wk2iv,
+          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1),
+                                             _MM_SHUFFLE(2, 3, 0, 1))));
+      const __m128 xx4 = _mm_add_ps(xx2, xx3);

-    const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
-    const __m128 xx2 = _mm_mul_ps(xx1 , wk2rv);
-    const __m128 xx3 = _mm_mul_ps(wk2iv,
-              (__m128)_mm_shuffle_epi32((__m128i)xx1, _MM_SHUFFLE(2, 3, 0, 1)));
-    const __m128 xx4 = _mm_add_ps(xx2, xx3);
-    _mm_storel_epi64((__m128i*)&a[j0 + 16], (__m128i)xx4);
-    _mm_storel_epi64((__m128i*)&a[j0 + 48],
-                      _mm_shuffle_epi32((__m128i)xx4, _MM_SHUFFLE(3, 2, 3, 2)));
+      const __m128  x3i0_3r0_3i1_x3r1 =  _mm_castsi128_ps(
+          _mm_shuffle_epi32(_mm_castps_si128(x3r0_3i0_3r1_x3i1),
+                            _MM_SHUFFLE(2, 3, 0, 1)));
+      const __m128  x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
+      const __m128  x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+      const __m128  x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);

-    const __m128  x3i0_3r0_3i1_x3r1 = (__m128)
-         _mm_shuffle_epi32((__m128i)x3r0_3i0_3r1_x3i1, _MM_SHUFFLE(2, 3, 0, 1));
-    const __m128  x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1);
-    const __m128  x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
-    const __m128  x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped);
+      const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
+      const __m128 xx11 = _mm_mul_ps(wk1iv,
+          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add),
+                                             _MM_SHUFFLE(2, 3, 0, 1))));
+      const __m128 xx12 = _mm_add_ps(xx10, xx11);

-    const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv);
-    const __m128 xx11 = _mm_mul_ps(wk1iv,
-        (__m128)_mm_shuffle_epi32((__m128i)x1_x3_add, _MM_SHUFFLE(2, 3, 0, 1)));
-    const __m128 xx12 = _mm_add_ps(xx10, xx11);
-    _mm_storel_epi64((__m128i*)&a[j0 +  8], (__m128i)xx12);
-    _mm_storel_epi64((__m128i*)&a[j0 + 40],
-                     _mm_shuffle_epi32((__m128i)xx12, _MM_SHUFFLE(3, 2, 3, 2)));
+      const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
+      const __m128 xx21 = _mm_mul_ps(wk3iv,
+          _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub),
+                           _MM_SHUFFLE(2, 3, 0, 1))));
+      const __m128 xx22 = _mm_add_ps(xx20, xx21);

-    const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv);
-    const __m128 xx21 = _mm_mul_ps(wk3iv,
-        (__m128)_mm_shuffle_epi32((__m128i)x1_x3_sub, _MM_SHUFFLE(2, 3, 0, 1)));
-    const __m128 xx22 = _mm_add_ps(xx20, xx21);
-    _mm_storel_epi64((__m128i*)&a[j0 + 24], (__m128i)xx22);
-    _mm_storel_epi64((__m128i*)&a[j0 + 56],
-                     _mm_shuffle_epi32((__m128i)xx22, _MM_SHUFFLE(3, 2, 3, 2)));
+      _mm_storel_epi64((__m128i*)&a[j0 +  0], _mm_castps_si128(xx));
+      _mm_storel_epi64((__m128i*)&a[j0 + 32],
+                         _mm_shuffle_epi32(_mm_castps_si128(xx),
+                                           _MM_SHUFFLE(3, 2, 3, 2)));
+
+      _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4));
+      _mm_storel_epi64((__m128i*)&a[j0 + 48],
+                        _mm_shuffle_epi32(_mm_castps_si128(xx4),
+                                          _MM_SHUFFLE(3, 2, 3, 2)));
+
+      _mm_storel_epi64((__m128i*)&a[j0 +  8], _mm_castps_si128(xx12));
+      _mm_storel_epi64((__m128i*)&a[j0 + 40],
+                       _mm_shuffle_epi32(_mm_castps_si128(xx12),
+                                         _MM_SHUFFLE(3, 2, 3, 2)));
+
+      _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22));
+      _mm_storel_epi64((__m128i*)&a[j0 + 56],
+                       _mm_shuffle_epi32(_mm_castps_si128(xx22),
+                                         _MM_SHUFFLE(3, 2, 3, 2)));
+    }
  }
 }

--- a/src/modules/audio_processing/aec/main/source/echo_cancellation.c
+++ b/src/modules/audio_processing/aec/main/source/echo_cancellation.c
@@ -11,16 +11,18 @@
 /*
 * Contains the API functions for the AEC.
 */
+#include "echo_cancellation.h"
+
+#include <math.h>
+#ifdef AEC_DEBUG
+#include <stdio.h>
+#endif
 #include <stdlib.h>
 #include <string.h>

-#include "echo_cancellation.h"
 #include "aec_core.h"
-#include "ring_buffer.h"
 #include "resampler.h"
-#ifdef AEC_DEBUG
-    #include <stdio.h>
-#endif
+#include "ring_buffer.h"

 #define BUF_SIZE_FRAMES 50 // buffer size (frames)
 // Maximum length of resampled signal. Must be an integer multiple of frames
@@ -215,7 +217,7 @@ WebRtc_Word32 WebRtcAec_Init(void *aecInst, WebRtc_Word32 sampFreq, WebRtc_Word3
        return -1;
    }

-    aecpc->initFlag = initCheck;  // indicates that initilisation has been done
+    aecpc->initFlag = initCheck;  // indicates that initialization has been done

    if (aecpc->sampFreq == 32000) {
        aecpc->splitSampFreq = 16000;
@@ -254,6 +256,7 @@ WebRtc_Word32 WebRtcAec_Init(void *aecInst, WebRtc_Word32 sampFreq, WebRtc_Word3
    aecConfig.nlpMode = kAecNlpModerate;
    aecConfig.skewMode = kAecFalse;
    aecConfig.metricsMode = kAecFalse;
+    aecConfig.delay_logging = kAecFalse;

    if (WebRtcAec_set_config(aecpc, aecConfig) == -1) {
        aecpc->lastError = AEC_UNSPECIFIED_ERROR;
@@ -566,6 +569,15 @@ WebRtc_Word32 WebRtcAec_set_config(void *aecInst, AecConfig config)
        WebRtcAec_InitMetrics(aecpc->aec);
    }

+  if (config.delay_logging != kAecFalse && config.delay_logging != kAecTrue) {
+    aecpc->lastError = AEC_BAD_PARAMETER_ERROR;
+    return -1;
+  }
+  aecpc->aec->delay_logging_enabled = config.delay_logging;
+  if (aecpc->aec->delay_logging_enabled == kAecTrue) {
+    memset(aecpc->aec->delay_histogram, 0, sizeof(aecpc->aec->delay_histogram));
+  }
+
    return 0;
 }

@@ -590,6 +602,7 @@ WebRtc_Word32 WebRtcAec_get_config(void *aecInst, AecConfig *config)
    config->nlpMode = aecpc->nlpMode;
    config->skewMode = aecpc->skewMode;
    config->metricsMode = aecpc->aec->metricsMode;
+    config->delay_logging = aecpc->aec->delay_logging_enabled;

    return 0;
 }
@@ -717,6 +730,69 @@ WebRtc_Word32 WebRtcAec_GetMetrics(void *aecInst, AecMetrics *metrics)
    return 0;
 }

+int WebRtcAec_GetDelayMetrics(void* handle, int* median, int* std) {
+  aecpc_t* self = handle;
+  int i = 0;
+  int delay_values = 0;
+  int num_delay_values = 0;
+  int my_median = 0;
+  float l1_norm = 0;
+
+  if (self == NULL) {
+    return -1;
+  }
+  if (median == NULL) {
+    self->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+  if (std == NULL) {
+    self->lastError = AEC_NULL_POINTER_ERROR;
+    return -1;
+  }
+  if (self->initFlag != initCheck) {
+    self->lastError = AEC_UNINITIALIZED_ERROR;
+    return -1;
+  }
+  if (self->aec->delay_logging_enabled == 0) {
+    // Logging disabled
+    self->lastError = AEC_UNSUPPORTED_FUNCTION_ERROR;
+    return -1;
+  }
+
+  // Get number of delay values since last update
+  for (i = 0; i < kMaxDelay; i++) {
+    num_delay_values += self->aec->delay_histogram[i];
+  }
+  if (num_delay_values == 0) {
+    // We have no new delay value data
+    *median = -1;
+    *std = -1;
+    return 0;
+  }
+
+  delay_values = num_delay_values >> 1; // Start value for median count down
+  // Get median of delay values since last update
+  for (i = 0; i < kMaxDelay; i++) {
+    delay_values -= self->aec->delay_histogram[i];
+    if (delay_values < 0) {
+      my_median = i;
+      break;
+    }
+  }
+  *median = my_median;
+
+  // Calculate the L1 norm, with median value as central moment
+  for (i = 0; i < kMaxDelay; i++) {
+    l1_norm += (float) (fabs(i - my_median) * self->aec->delay_histogram[i]);
+  }
+  *std = (int) (l1_norm / (float) num_delay_values + 0.5f);
+
+  // Reset histogram
+  memset(self->aec->delay_histogram, 0, sizeof(self->aec->delay_histogram));
+
+  return 0;
+}
+
 WebRtc_Word32 WebRtcAec_get_version(WebRtc_Word8 *versionStr, WebRtc_Word16 len)
 {
    const char version[] = "AEC 2.5.0";
--- a/src/modules/audio_processing/aecm/Makefile.am
+++ b/src/modules/audio_processing/aecm/Makefile.am
@@ -3,9 +3,7 @@ noinst_LTLIBRARIES = libaecm.la
 libaecm_la_SOURCES = main/interface/echo_control_mobile.h \
 		     main/source/echo_control_mobile.c \
 		     main/source/aecm_core.c \
-		     main/source/aecm_core.h \
-		     main/source/aecm_delay_estimator.c \
-		     main/source/aecm_delay_estimator.h
+		     main/source/aecm_core.h
 libaecm_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
 		    -I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface \
 		    -I$(top_srcdir)/src/modules/audio_processing/utility
--- a/src/modules/audio_processing/aecm/main/matlab/compsup.m
+++ b/src/modules/audio_processing/aecm/main/matlab/compsup.m
@@ -1,447 +0,0 @@
-function [emicrophone,aaa]=compsup(microphone,TheFarEnd,avtime,samplingfreq);
-% microphone = microphone signal
-% aaa = nonlinearity input variable
-% TheFarEnd = far end signal
-% avtime = interval to compute suppression from (seconds)
-% samplingfreq = sampling frequency
-
-%if(nargin==6)
-%    fprintf(1,'suppress has received a delay sequence\n');
-%end
-
-
-Ap500=[  1.00, -4.95, 9.801, -9.70299, 4.80298005, -0.9509900499];
-Bp500=[  0.662743088639636, -2.5841655608125, 3.77668102146288, -2.45182477425154, 0.596566274575251, 0.0];
-
-
-Ap200=[ 1.00, -4.875, 9.50625, -9.26859375, 4.518439453125, -0.881095693359375];
-Bp200=[ 0.862545460994275, -3.2832804496114, 4.67892032308828, -2.95798023879133, 0.699796870041299, 0.0];
-
-maxDelay=0.4; %[s]
-histLen=1; %[s]
-
-
-% CONSTANTS THAT YOU CAN EXPERIMENT WITH
-A_GAIN=10.0;	 	% for the suppress case
-oversampling = 2;	% must be power of 2; minimum is 2; 4 works
-% fine for support=64, but for support=128,
-% 8 gives better results.
-support=64; %512	% fft support (frequency resolution; at low
-% settings you can hear more distortion
-% (e.g. pitch that is left-over from far-end))
-% 128 works well, 64 is ok)
-
-lowlevel = mean(abs(microphone))*0.0001;
-
-G_ol = 0;  % Use overlapping sets of estimates
-
-% ECHO SUPPRESSION SPECIFIC PARAMETERS
-suppress_overdrive=1.0;   % overdrive factor for suppression 1.4 is good
-gamma_echo=1.0;           % same as suppress_overdrive but at different place
-de_echo_bound=0.0;
-mLim=10;                  % rank of matrix G
-%limBW = 1;		  % use bandwidth-limited response for G
-if mLim > (support/2+1)
-    error('mLim in suppress.m too large\n');
-end
-
-
-dynrange=1.0000e-004;
-
-% other, constants
-hsupport = support/2;
-hsupport1 = hsupport+1;
-factor =  2 / oversampling;
-updatel = support/oversampling;
-win=sqrt(designwindow(0,support));
-estLen = round(avtime * samplingfreq/updatel)
-
-runningfmean =0.0;
-
-mLim = floor(hsupport1/2);
-V = sqrt(2/hsupport1)*cos(pi/hsupport1*(repmat((0:hsupport1-1) + 0.5, mLim, 1).* ...
-    repmat((0:mLim-1)' + 0.5, 1, hsupport1)));
-
-fprintf(1,'updatel is %5.3f s\n', updatel/samplingfreq);
-
-
-
-bandfirst=8; bandlast=25;
-dosmooth=0;  % to get rid of wavy bin counts (can be worse or better)
-
-% compute some constants
-blockLen = support/oversampling;
-maxDelayb = floor(samplingfreq*maxDelay/updatel); % in blocks
-histLenb = floor(samplingfreq*histLen/updatel); % in blocks
-
-x0=TheFarEnd;
-y0=microphone;
-
-
-%input
-tlength=min([length(microphone),length(TheFarEnd)]);
-updateno=floor(tlength/updatel);
-tlength=updatel*updateno;
-updateno = updateno - oversampling + 1;
-
-TheFarEnd =TheFarEnd(1:tlength);
-microphone =microphone(1:tlength);
-
-TheFarEnd =[zeros(hsupport,1);TheFarEnd(1:tlength)];
-microphone =[zeros(hsupport,1);microphone(1:tlength)];
-
-
-% signal length
-n = min([floor(length(x0)/support)*support,floor(length(y0)/support)*support]);
-nb = n/blockLen - oversampling + 1; % in blocks
-
-% initialize space
-win = sqrt([0 ; hanning(support-1)]);
-sxAll2 = zeros(hsupport1,nb);
-syAll2 = zeros(hsupport1,nb);
-
-z500=zeros(5,maxDelayb+1);
-z200=zeros(5,hsupport1);
-
-bxspectrum=uint32(zeros(nb,1));
-bxhist=uint32(zeros(maxDelayb+1,1));
-byspectrum=uint32(zeros(nb,1));
-bcount=zeros(1+maxDelayb,nb);
-fcount=zeros(1+maxDelayb,nb);
-fout=zeros(1+maxDelayb,nb);
-delay=zeros(nb,1);
-tdelay=zeros(nb,1);
-nlgains=zeros(nb,1);
-
-% create space (mainly for debugging)
-emicrophone=zeros(tlength,1);
-femicrophone=complex(zeros(hsupport1,updateno));
-thefilter=zeros(hsupport1,updateno);
-thelimiter=ones(hsupport1,updateno);
-fTheFarEnd=complex(zeros(hsupport1,updateno));
-afTheFarEnd=zeros(hsupport1,updateno);
-fmicrophone=complex(zeros(hsupport1,updateno));
-afmicrophone=zeros(hsupport1,updateno);
-
-G = zeros(hsupport1, hsupport1);
-zerovec = zeros(hsupport1,1);
-zeromat = zeros(hsupport1);
-
-% Reset sums
-mmxs_a = zerovec;
-mmys_a = zerovec;
-s2xs_a = zerovec;
-s2ys_a = zerovec;
-Rxxs_a = zeromat;
-Ryxs_a = zeromat;
-count_a = 1;
-
-mmxs_b = zerovec;
-mmys_b = zerovec;
-s2xs_b = zerovec;
-s2ys_b = zerovec;
-Rxxs_b = zeromat;
-Ryxs_b = zeromat;
-count_b = 1;
-
-nog=0;
-
-aaa=zeros(size(TheFarEnd));
-
-% loop over signal blocks
-fprintf(1,'.. Suppression; averaging G over %5.1f seconds; file length %5.1f seconds ..\n',avtime, length(microphone)/samplingfreq);
-fprintf(1,'.. SUPPRESSING ONLY AFTER %5.1f SECONDS! ..\n',avtime);
-fprintf(1,'.. 20 seconds is good ..\n');
-hh = waitbar_j(0,'Please wait...');
-
-
-for i=1:updateno
-
-    sb = (i-1)*updatel + 1;
-    se=sb+support-1;
-    
-    % analysis FFTs
-    temp=fft(win .* TheFarEnd(sb:se));
-    fTheFarEnd(:,i)=temp(1:hsupport1);
-    xf=fTheFarEnd(:,i);
-    afTheFarEnd(:,i)= abs(fTheFarEnd(:,i));
-    
-    temp=win .* microphone(sb:se);
-    
-    temp=fft(win .* microphone(sb:se));
-    fmicrophone(:,i)=temp(1:hsupport1);
-    yf=fmicrophone(:,i);
-    afmicrophone(:,i)= abs(fmicrophone(:,i));
-
-    
-    ener_orig = afmicrophone(:,i)'*afmicrophone(:,i);
-    if( ener_orig == 0)
-        afmicrophone(:,i)=lowlevel*ones(size(afmicrophone(:,i)));
-    end
-    
-    
-    	% use log domain (showed improved performance)
-xxf= sqrt(real(xf.*conj(xf))+1e-20);
-yyf= sqrt(real(yf.*conj(yf))+1e-20);
-        sxAll2(:,i) = 20*log10(xxf);
-	syAll2(:,i) = 20*log10(yyf);
-
-       mD=min(i-1,maxDelayb);
-      xthreshold = sum(sxAll2(:,i-mD:i),2)/(maxDelayb+1);
-
-      [yout, z200] = filter(Bp200,Ap200,syAll2(:,i),z200,2);
-      yout=yout/(maxDelayb+1);
-      ythreshold = mean(syAll2(:,i-mD:i),2);
-      
-
-  bxspectrum(i)=getBspectrum(sxAll2(:,i),xthreshold,bandfirst,bandlast);
-  byspectrum(i)=getBspectrum(syAll2(:,i),yout,bandfirst,bandlast);
-
-  bxhist(end-mD:end)=bxspectrum(i-mD:i);
-  
-  bcount(:,i)=hisser2( ...
-     byspectrum(i),flipud(bxhist),bandfirst,bandlast);
- 
- 
-  [fout(:,i), z500] = filter(Bp500,Ap500,bcount(:,i),z500,2);
-  fcount(:,i)=sum(bcount(:,max(1,i-histLenb+1):i),2); % using the history range
- fout(:,i)=round(fout(:,i)); 
-  [value,delay(i)]=min(fout(:,i),[],1);
-  tdelay(i)=(delay(i)-1)*support/(samplingfreq*oversampling);
-
-    % compensate
-
-    idel =  max(i - delay(i) + 1,1);
-    
-  
-    % echo suppression
-    
-    noisyspec = afmicrophone(:,i);
-    
-    % Estimate G using covariance matrices
-    
-    % Cumulative estimates    
-    xx = afTheFarEnd(:,idel);
-    yy = afmicrophone(:,i);
-    
-    % Means
-    mmxs_a = mmxs_a + xx;
-    mmys_a = mmys_a + yy;
-    if (G_ol)
-        mmxs_b = mmxs_b + xx;  
-        mmys_b = mmys_b + yy;
-        mmy = mean([mmys_a/count_a mmys_b/count_b],2);
-        mmx = mean([mmxs_a/count_a mmxs_b/count_b],2);
-    else
-        mmx = mmxs_a/count_a;   
-        mmy = mmys_a/count_a;   
-    end
-    count_a = count_a + 1;
-    count_b = count_b + 1;
-    
-    % Mean removal
-    xxm = xx - mmx;
-    yym = yy - mmy;
-    
-    % Variances
-    s2xs_a = s2xs_a +  xxm .* xxm;
-    s2ys_a = s2ys_a +  yym .* yym;
-    s2xs_b = s2xs_b +  xxm .* xxm;
-    s2ys_b = s2ys_b +  yym .* yym;
-    
-    % Correlation matrices  
-    Rxxs_a = Rxxs_a + xxm * xxm';
-    Ryxs_a = Ryxs_a + yym * xxm';
-    Rxxs_b = Rxxs_b + xxm * xxm';
-    Ryxs_b = Ryxs_b + yym * xxm';
-    
-    
-    % Gain matrix A
-    
-    if mod(i, estLen) == 0
-        
-        
-        % Cumulative based estimates
-        Rxxf = Rxxs_a / (estLen - 1);
-        Ryxf = Ryxs_a / (estLen - 1);
-        
-        % Variance normalization
-        s2x2 = s2xs_a / (estLen - 1);
-        s2x2 = sqrt(s2x2);
-       % Sx = diag(max(s2x2,dynrange*max(s2x2)));
-        Sx = diag(s2x2);
-        if (sum(s2x2) > 0)
-          iSx = inv(Sx);
-         else
-                 iSx= Sx + 0.01;
-         end
-             
-        s2y2 = s2ys_a / (estLen - 1);
-        s2y2 = sqrt(s2y2);
-       % Sy = diag(max(s2y2,dynrange*max(s2y2)));
-        Sy = diag(s2y2);
-        iSy = inv(Sy);        
-        rx = iSx * Rxxf * iSx;
-        ryx = iSy * Ryxf * iSx;
-        
-     
-        
-        dbd= 7; % Us less than the full matrix
-        
-        % k x m
-        % Bandlimited structure on G
-        LSEon = 0; % Default is using MMSE
-        if (LSEon)
-            ryx = ryx*rx;
-            rx = rx*rx;
-        end
-        p = dbd-1;
-        gaj = min(min(hsupport1,2*p+1),min([p+(1:hsupport1); hsupport1+p+1-(1:hsupport1)]));
-        cgaj = [0 cumsum(gaj)];
-        
-        G3 = zeros(hsupport1);
-        for kk=1:hsupport1
-            ki = max(0,kk-p-1);
-            if (sum(sum(rx(ki+1:ki+gaj(kk),ki+1:ki+gaj(kk))))>0)
-               G3(kk,ki+1:ki+gaj(kk)) = ryx(kk,ki+1:ki+gaj(kk))/rx(ki+1:ki+gaj(kk),ki+1:ki+gaj(kk));
-           else
-               G3(kk,ki+1:ki+gaj(kk)) = ryx(kk,ki+1:ki+gaj(kk));
-           end
-        end
-        % End Bandlimited structure
-        
-        G = G3;
-        G(abs(G)<0.01)=0;
-        G = suppress_overdrive * Sy * G * iSx;
-        
-        if 1
-            figure(32); mi=2;
-            surf(max(min(G,mi),-mi)); view(2)
-            title('Unscaled Masked Limited-bandwidth G');
-        end
-        pause(0.05);
-        
-        % Reset sums
-        mmxs_a = zerovec;
-        mmys_a = zerovec;
-        s2xs_a = zerovec;
-        s2ys_a = zerovec;
-        Rxxs_a = zeromat;
-        Ryxs_a = zeromat;
-        count_a = 1;
-        
-    end
-    
-    if (G_ol)    
-        % Gain matrix B
-        
-        if ((mod((i-estLen/2), estLen) == 0) & i>estLen)
-            
-            
-            % Cumulative based estimates
-            Rxxf = Rxxs_b / (estLen - 1);
-            Ryxf = Ryxs_b / (estLen - 1);
-            
-            % Variance normalization
-            s2x2 = s2xs_b / (estLen - 1);
-            s2x2 = sqrt(s2x2);
-            Sx = diag(max(s2x2,dynrange*max(s2x2)));
-            iSx = inv(Sx);
-            s2y2 = s2ys_b / (estLen - 1);
-            s2y2 = sqrt(s2y2);
-            Sy = diag(max(s2y2,dynrange*max(s2y2)));
-            iSy = inv(Sy);        
-            rx = iSx * Rxxf * iSx;
-            ryx = iSy * Ryxf * iSx;
-            
-            
-            % Bandlimited structure on G
-            LSEon = 0; % Default is using MMSE
-            if (LSEon)
-                ryx = ryx*rx;
-                rx = rx*rx;
-            end
-            p = dbd-1;
-            gaj = min(min(hsupport1,2*p+1),min([p+(1:hsupport1); hsupport1+p+1-(1:hsupport1)]));
-            cgaj = [0 cumsum(gaj)];
-            
-            G3 = zeros(hsupport1);
-            for kk=1:hsupport1
-                ki = max(0,kk-p-1);
-                G3(kk,ki+1:ki+gaj(kk)) = ryx(kk,ki+1:ki+gaj(kk))/rx(ki+1:ki+gaj(kk),ki+1:ki+gaj(kk));
-            end
-            % End Bandlimited structure
-            
-            G = G3;
-            G(abs(G)<0.01)=0;
-            G = suppress_overdrive * Sy * G * iSx;
-            
-            if 1
-                figure(32); mi=2;
-                surf(max(min(G,mi),-mi)); view(2)
-                title('Unscaled Masked Limited-bandwidth G');
-            end
-            pause(0.05);
-            
-            
-            % Reset sums
-            mmxs_b = zerovec;
-            mmys_b = zerovec;
-            s2xs_b = zerovec;
-            s2ys_b = zerovec;
-            Rxxs_b = zeromat;
-            Ryxs_b = zeromat;
-            count_b = 1;
-            
-        end
-        
-    end
-    
-    FECestimate2 = G*afTheFarEnd(:,idel);
-    
-    % compute Wiener filter and suppressor function
-    thefilter(:,i) = (noisyspec - gamma_echo*FECestimate2) ./ noisyspec;
-    ix0 = find(thefilter(:,i)<de_echo_bound);   % bounding trick 1
-    thefilter(ix0,i) = de_echo_bound;     % bounding trick 2
-    ix0 = find(thefilter(:,i)>1);   % bounding in reasonable range
-    thefilter(ix0,i) = 1;
-    
-    % NONLINEARITY
-    nl_alpha=0.8;    % memory; seems not very critical
-    nlSeverity=0.3;  % nonlinearity severity: 0 does nothing; 1 suppresses all
-    thefmean=mean(thefilter(8:16,i));
-    if (thefmean<1)
-        disp('');
-    end
-    runningfmean = nl_alpha*runningfmean + (1-nl_alpha)*thefmean;
-    aaa(sb+20+1:sb+20+updatel)=10000*runningfmean* ones(updatel,1); % debug
-    slope0=1.0/(1.0-nlSeverity); %
-    thegain = max(0.0,min(1.0,slope0*(runningfmean-nlSeverity)));
-    % END NONLINEARITY
-    thefilter(:,i) = thegain*thefilter(:,i);
-    
-    
-    % Wiener filtering
-    femicrophone(:,i) = fmicrophone(:,i) .* thefilter(:,i);
-    thelimiter(:,i) = (noisyspec - A_GAIN*FECestimate2) ./ noisyspec;
-    index = find(thelimiter(:,i)>1.0);
-    thelimiter(index,i) = 1.0;
-    index = find(thelimiter(:,i)<0.0);
-    thelimiter(index,i) = 0.0;
-    
-    if (rem(i,floor(updateno/20))==0)
-        fprintf(1,'.');
-    end
-    if mod(i,50)==0
-        waitbar_j(i/updateno,hh); 
-    end
-    
-    
-    % reconstruction; first make spectrum odd
-    temp=[femicrophone(:,i);flipud(conj(femicrophone(2:hsupport,i)))];
-    emicrophone(sb:se) = emicrophone(sb:se) + factor * win .* real(ifft(temp));
-
-end
-fprintf(1,'\n');
-
-close(hh);
--- a/src/modules/audio_processing/aecm/main/matlab/getBspectrum.m
+++ b/src/modules/audio_processing/aecm/main/matlab/getBspectrum.m
@@ -1,22 +0,0 @@
-function bspectrum=getBspectrum(ps,threshold,bandfirst,bandlast)
-% function bspectrum=getBspectrum(ps,threshold,bandfirst,bandlast)
-% compute binary spectrum using threshold spectrum as pivot
-% bspectrum = binary spectrum (binary)
-% ps=current power spectrum (float)
-% threshold=threshold spectrum (float)
-% bandfirst = first band considered
-% bandlast = last band considered
-  
-% initialization stuff
-  if( length(ps)<bandlast | bandlast>32 | length(ps)~=length(threshold)) 
-  error('BinDelayEst:spectrum:invalid','Dimensionality error');
-end
-
-% get current binary spectrum
-diff = ps - threshold;
-bspectrum=uint32(0);
-for(i=bandfirst:bandlast)
-  if( diff(i)>0 ) 
-    bspectrum = bitset(bspectrum,i);
-  end
-end
--- a/src/modules/audio_processing/aecm/main/matlab/hisser2.m
+++ b/src/modules/audio_processing/aecm/main/matlab/hisser2.m
@@ -1,21 +0,0 @@
-function  bcount=hisser2(bs,bsr,bandfirst,bandlast)
-% function  bcount=hisser(bspectrum,bandfirst,bandlast)
-% histogram for the binary spectra
-% bcount= array of bit counts 
-% bs=binary spectrum (one int32 number each)  
-% bsr=reference binary spectra (one int32 number each)
-% blockSize = histogram over blocksize blocks
-% bandfirst = first band considered
-% bandlast = last band considered
-
-% weight all delays equally
-maxDelay = length(bsr);
-
-% compute counts (two methods; the first works better and is operational)
-bcount=zeros(maxDelay,1);
-for(i=1:maxDelay)
- % the delay should have low count for low-near&high-far and high-near&low-far
- bcount(i)= sum(bitget(bitxor(bs,bsr(i)),bandfirst:bandlast));  
- % the delay should have low count for low-near&high-far (works less well)
-% bcount(i)= sum(bitget(bitand(bsr(i),bitxor(bs,bsr(i))),bandfirst:bandlast));
-end
--- a/src/modules/audio_processing/aecm/main/matlab/main2.m
+++ b/src/modules/audio_processing/aecm/main/matlab/main2.m
@@ -1,19 +0,0 @@
-
-fid=fopen('aecfar.pcm'); far=fread(fid,'short'); fclose(fid);
-fid=fopen('aecnear.pcm'); mic=fread(fid,'short'); fclose(fid);
-
-%fid=fopen('QA1far.pcm'); far=fread(fid,'short'); fclose(fid);
-%fid=fopen('QA1near.pcm'); mic=fread(fid,'short'); fclose(fid);
-
-start=0 * 8000+1;
-stop= 30 * 8000;
-microphone=mic(start:stop);
-TheFarEnd=far(start:stop);
-avtime=1;
-
-% 16000 to make it compatible with the C-version
-[emicrophone,tdel]=compsup(microphone,TheFarEnd,avtime,16000); 
-
-spclab(8000,TheFarEnd,microphone,emicrophone);
-
-
--- a/src/modules/audio_processing/aecm/main/matlab/matlab/AECMobile.m
+++ b/src/modules/audio_processing/aecm/main/matlab/matlab/AECMobile.m
@@ -1,269 +0,0 @@
-function [femicrophone, aecmStructNew, enerNear, enerFar] = AECMobile(fmicrophone, afTheFarEnd, setupStruct, aecmStruct)
-global NEARENDFFT;
-global F;
-
-aecmStructNew = aecmStruct;
-
-% Magnitude spectrum of near end signal
-afmicrophone = abs(fmicrophone);
-%afmicrophone = NEARENDFFT(setupStruct.currentBlock,:)'/2^F(setupStruct.currentBlock,end);
-% Near end energy level
-ener_orig = afmicrophone'*afmicrophone;
-if( ener_orig == 0)
-    lowlevel = 0.01;
-    afmicrophone = lowlevel*ones(size(afmicrophone));
-end
-%adiff = max(abs(afmicrophone - afTheFarEnd));
-%if (adiff > 0)
-%    disp([setupStruct.currentBlock adiff])
-%end
-
-% Store the near end energy
-%aecmStructNew.enerNear(setupStruct.currentBlock) = log(afmicrophone'*afmicrophone);
-aecmStructNew.enerNear(setupStruct.currentBlock) = log(sum(afmicrophone));
-% Store the far end energy
-%aecmStructNew.enerFar(setupStruct.currentBlock) = log(afTheFarEnd'*afTheFarEnd);
-aecmStructNew.enerFar(setupStruct.currentBlock) = log(sum(afTheFarEnd));
-
-% Update subbands (We currently use all frequency bins, hence .useSubBand is turned off)
-if aecmStructNew.useSubBand
-    internalIndex = 1;
-    for kk=1:setupStruct.subBandLength+1
-        ySubBand(kk) = mean(afmicrophone(internalIndex:internalIndex+setupStruct.numInBand(kk)-1).^aecmStructNew.bandFactor);
-        xSubBand(kk) = mean(afTheFarEnd(internalIndex:internalIndex+setupStruct.numInBand(kk)-1).^aecmStructNew.bandFactor);
-        internalIndex = internalIndex + setupStruct.numInBand(kk);
-    end
-else
-    ySubBand = afmicrophone.^aecmStructNew.bandFactor;
-    xSubBand = afTheFarEnd.^aecmStructNew.bandFactor;
-end
-
-% Estimated echo energy
-if (aecmStructNew.bandFactor == 1)
-    %aecmStructNew.enerEcho(setupStruct.currentBlock) = log((aecmStructNew.H.*xSubBand)'*(aecmStructNew.H.*xSubBand));
-    %aecmStructNew.enerEchoStored(setupStruct.currentBlock) = log((aecmStructNew.HStored.*xSubBand)'*(aecmStructNew.HStored.*xSubBand));
-    aecmStructNew.enerEcho(setupStruct.currentBlock) = log(sum(aecmStructNew.H.*xSubBand));
-    aecmStructNew.enerEchoStored(setupStruct.currentBlock) = log(sum(aecmStructNew.HStored.*xSubBand));
-elseif (aecmStructNew.bandFactor == 2)
-    aecmStructNew.enerEcho(setupStruct.currentBlock) = log(aecmStructNew.H'*xSubBand);
-    aecmStructNew.enerEchoStored(setupStruct.currentBlock) = log(aecmStructNew.HStored'*xSubBand);
-end
-
-% Last 100 blocks of data, used for plotting
-n100 = max(1,setupStruct.currentBlock-99):setupStruct.currentBlock;
-enerError = aecmStructNew.enerNear(n100)-aecmStructNew.enerEcho(n100);
-enerErrorStored = aecmStructNew.enerNear(n100)-aecmStructNew.enerEchoStored(n100);
-
-% Store the far end sub band. This is needed if we use LSE instead of NLMS
-aecmStructNew.X = [xSubBand aecmStructNew.X(:,1:end-1)];
-
-% Update energy levels, which control the VAD
-if ((aecmStructNew.enerFar(setupStruct.currentBlock) < aecmStructNew.energyMin) & (aecmStructNew.enerFar(setupStruct.currentBlock) >= aecmStruct.FAR_ENERGY_MIN))
-    aecmStructNew.energyMin = aecmStructNew.enerFar(setupStruct.currentBlock);
-    %aecmStructNew.energyMin = max(aecmStructNew.energyMin,12);
-    aecmStructNew.energyMin = max(aecmStructNew.energyMin,aecmStruct.FAR_ENERGY_MIN);
-    aecmStructNew.energyLevel = (aecmStructNew.energyMax-aecmStructNew.energyMin)*aecmStructNew.energyThres+aecmStructNew.energyMin;
-    aecmStructNew.energyLevelMSE = (aecmStructNew.energyMax-aecmStructNew.energyMin)*aecmStructNew.energyThresMSE+aecmStructNew.energyMin;
-end
-if (aecmStructNew.enerFar(setupStruct.currentBlock) > aecmStructNew.energyMax)
-    aecmStructNew.energyMax = aecmStructNew.enerFar(setupStruct.currentBlock);
-    aecmStructNew.energyLevel = (aecmStructNew.energyMax-aecmStructNew.energyMin)*aecmStructNew.energyThres+aecmStructNew.energyMin;
-    aecmStructNew.energyLevelMSE = (aecmStructNew.energyMax-aecmStructNew.energyMin)*aecmStructNew.energyThresMSE+aecmStructNew.energyMin;
-end
-
-% Calculate current energy error in near end (estimated echo vs. near end)
-dE = aecmStructNew.enerNear(setupStruct.currentBlock)-aecmStructNew.enerEcho(setupStruct.currentBlock);
-
-%%%%%%%%
-% Calculate step size used in LMS algorithm, based on current far end energy and near end energy error (dE)
-%%%%%%%%
-if setupStruct.stepSize_flag
-    [mu, aecmStructNew] = calcStepSize(aecmStructNew.enerFar(setupStruct.currentBlock), dE, aecmStructNew, setupStruct.currentBlock, 1);
-else
-    mu = 0.25;
-end
-aecmStructNew.muLog(setupStruct.currentBlock) = mu; % Store the step size
-
-% Estimate Echo Spectral Shape
-[U, aecmStructNew.H] = fallerEstimator(ySubBand,aecmStructNew.X,aecmStructNew.H,mu);
-
-%%%%%
-% Determine if we should store or restore the channel
-%%%%%
-if ((setupStruct.currentBlock <= aecmStructNew.convLength) | (~setupStruct.channelUpdate_flag))
-    aecmStructNew.HStored = aecmStructNew.H; % Store what you have after startup
-elseif ((setupStruct.currentBlock > aecmStructNew.convLength) & (setupStruct.channelUpdate_flag))
-    if ((aecmStructNew.enerFar(setupStruct.currentBlock) < aecmStructNew.energyLevelMSE) & (aecmStructNew.enerFar(setupStruct.currentBlock-1) >= aecmStructNew.energyLevelMSE))
-        xxx = aecmStructNew.countMseH;
-        if (xxx > 20)
-            mseStored = mean(abs(aecmStructNew.enerEchoStored(setupStruct.currentBlock-xxx:setupStruct.currentBlock-1)-aecmStructNew.enerNear(setupStruct.currentBlock-xxx:setupStruct.currentBlock-1)));
-            mseLatest = mean(abs(aecmStructNew.enerEcho(setupStruct.currentBlock-xxx:setupStruct.currentBlock-1)-aecmStructNew.enerNear(setupStruct.currentBlock-xxx:setupStruct.currentBlock-1)));
-            %fprintf('Stored: %4f Latest: %4f\n', mseStored, mseLatest) % Uncomment if you want to display the MSE values
-            if ((mseStored < 0.8*mseLatest) & (aecmStructNew.mseHStoredOld < 0.8*aecmStructNew.mseHLatestOld))
-                aecmStructNew.H = aecmStructNew.HStored;
-                fprintf('Restored H at block %d\n',setupStruct.currentBlock)
-            elseif (((0.8*mseStored > mseLatest) & (mseLatest < aecmStructNew.mseHThreshold) & (aecmStructNew.mseHLatestOld < aecmStructNew.mseHThreshold)) | (mseStored == Inf))
-                aecmStructNew.HStored = aecmStructNew.H;
-                fprintf('Stored new H at block %d\n',setupStruct.currentBlock)
-            end
-            aecmStructNew.mseHStoredOld = mseStored;
-            aecmStructNew.mseHLatestOld = mseLatest;
-        end
-    elseif ((aecmStructNew.enerFar(setupStruct.currentBlock) >= aecmStructNew.energyLevelMSE) & (aecmStructNew.enerFar(setupStruct.currentBlock-1) < aecmStructNew.energyLevelMSE))
-        aecmStructNew.countMseH = 1;
-    elseif (aecmStructNew.enerFar(setupStruct.currentBlock) >= aecmStructNew.energyLevelMSE)
-        aecmStructNew.countMseH = aecmStructNew.countMseH + 1;
-    end
-end
-
-%%%%%
-% Check delay (calculate the delay offset (if we can))
-% The algorithm is not tuned and should be used with care. It runs separately from Bastiaan's algorithm.
-%%%%%
-yyy = 31; % Correlation buffer length (currently unfortunately hard coded)
-dxxx = 25; % Maximum offset (currently unfortunately hard coded)
-if (setupStruct.currentBlock > aecmStructNew.convLength)
-    if (aecmStructNew.enerFar(setupStruct.currentBlock-(yyy+2*dxxx-1):setupStruct.currentBlock) > aecmStructNew.energyLevelMSE)
-        for xxx = -dxxx:dxxx
-            aecmStructNew.delayLatestS(xxx+dxxx+1) = sum(sign(aecmStructNew.enerEcho(setupStruct.currentBlock-(yyy+dxxx-xxx)+1:setupStruct.currentBlock+xxx-dxxx)-mean(aecmStructNew.enerEcho(setupStruct.currentBlock-(yyy++dxxx-xxx)+1:setupStruct.currentBlock+xxx-dxxx))).*sign(aecmStructNew.enerNear(setupStruct.currentBlock-yyy-dxxx+1:setupStruct.currentBlock-dxxx)-mean(aecmStructNew.enerNear(setupStruct.currentBlock-yyy-dxxx+1:setupStruct.currentBlock-dxxx))));
-        end
-        aecmStructNew.newDelayCurve = 1;
-    end
-end
-if ((setupStruct.currentBlock > 2*aecmStructNew.convLength) & ~rem(setupStruct.currentBlock,yyy*2) & aecmStructNew.newDelayCurve)
-    [maxV,maxP] = max(aecmStructNew.delayLatestS);
-    if ((maxP > 2) & (maxP < 2*dxxx))
-        maxVLeft = aecmStructNew.delayLatestS(max(1,maxP-4));
-        maxVRight = aecmStructNew.delayLatestS(min(2*dxxx+1,maxP+4));
-        %fprintf('Max %d, Left %d, Right %d\n',maxV,maxVLeft,maxVRight) % Uncomment if you want to see max value
-        if ((maxV > 24) & (maxVLeft < maxV - 10)  & (maxVRight < maxV - 10))
-            aecmStructNew.feedbackDelay = maxP-dxxx-1;
-            aecmStructNew.newDelayCurve = 0;
-            aecmStructNew.feedbackDelayUpdate = 1;
-            fprintf('Feedback Update at block %d\n',setupStruct.currentBlock)
-        end
-    end
-end
-% End of "Check delay"
-%%%%%%%%
-
-%%%%%
-% Calculate suppression gain, based on far end energy and near end energy error (dE)
-if (setupStruct.supGain_flag)
-    [gamma_echo, aecmStructNew.cntIn, aecmStructNew.cntOut] = calcFilterGain(aecmStructNew.enerFar(setupStruct.currentBlock), dE, aecmStructNew, setupStruct.currentBlock, aecmStructNew.convLength, aecmStructNew.cntIn, aecmStructNew.cntOut);
-else
-    gamma_echo = 1;
-end
-aecmStructNew.gammaLog(setupStruct.currentBlock) = gamma_echo; % Store the gain
-gamma_use = gamma_echo;
-
-% Use the stored channel
-U = aecmStructNew.HStored.*xSubBand;
-
-% compute Wiener filter and suppressor function
-Iy = find(ySubBand);
-subBandFilter = zeros(size(ySubBand));
-if (aecmStructNew.bandFactor == 2)
-    subBandFilter(Iy) = (1 - gamma_use*sqrt(U(Iy)./ySubBand(Iy))); % For Faller
-else
-    subBandFilter(Iy) = (1 - gamma_use*(U(Iy)./ySubBand(Iy))); % For COV
-end
-ix0 = find(subBandFilter < 0);   % bounding trick 1
-subBandFilter(ix0) = 0;
-ix0 = find(subBandFilter > 1);   % bounding trick 1
-subBandFilter(ix0) = 1;
-
-% Interpolate back to normal frequency bins if we use sub bands
-if aecmStructNew.useSubBand
-    thefilter = interp1(setupStruct.centerFreq,subBandFilter,linspace(0,setupStruct.samplingfreq/2,setupStruct.hsupport1)','nearest');
-    testfilter = interp1(setupStruct.centerFreq,subBandFilter,linspace(0,setupStruct.samplingfreq/2,1000),'nearest');
-    thefilter(end) = subBandFilter(end);
-    
-    internalIndex = 1;
-    for kk=1:setupStruct.subBandLength+1
-        internalIndex:internalIndex+setupStruct.numInBand(kk)-1;
-        thefilter(internalIndex:internalIndex+setupStruct.numInBand(kk)-1) = subBandFilter(kk);
-        internalIndex = internalIndex + setupStruct.numInBand(kk);
-    end
-else
-    thefilter = subBandFilter;
-    testfilter = subBandFilter;
-end
-
-% Bound the filter
-ix0 = find(thefilter < setupStruct.de_echo_bound);   % bounding trick 1
-thefilter(ix0) = setupStruct.de_echo_bound;     % bounding trick 2
-ix0 = find(thefilter > 1);   % bounding in reasonable range
-thefilter(ix0) = 1;
-
-%%%%
-% NLP
-%%%%
-thefmean = mean(thefilter(8:16));
-if (thefmean < 1)
-    disp('');
-end
-aecmStructNew.runningfmean = setupStruct.nl_alpha*aecmStructNew.runningfmean + (1-setupStruct.nl_alpha)*thefmean;
-slope0 = 1.0/(1.0 - setupStruct.nlSeverity); %
-thegain = max(0.0, min(1.0, slope0*(aecmStructNew.runningfmean - setupStruct.nlSeverity)));
-if ~setupStruct.nlp_flag
-    thegain = 1;
-end
-% END NONLINEARITY
-thefilter = thegain*thefilter;
-
-%%%%
-% The suppression
-%%%%
-femicrophone = fmicrophone .* thefilter;
-% Store the output energy (used for plotting)
-%aecmStructNew.enerOut(setupStruct.currentBlock) = log(abs(femicrophone)'*abs(femicrophone));
-aecmStructNew.enerOut(setupStruct.currentBlock) = log(sum(abs(femicrophone)));
-
-if aecmStructNew.plotIt
-    figure(13)
-    subplot(311)
-    %plot(n100,enerFar(n100),'b-',n100,enerNear(n100),'k--',n100,enerEcho(n100),'r-',[n100(1) n100(end)],[1 1]*vadThNew,'b:',[n100(1) n100(end)],[1 1]*((energyMax-energyMin)/4+energyMin),'r-.',[n100(1) n100(end)],[1 1]*vadNearThNew,'g:',[n100(1) n100(end)],[1 1]*energyMax,'r-.',[n100(1) n100(end)],[1 1]*energyMin,'r-.','LineWidth',2)
-    plot(n100,aecmStructNew.enerFar(n100),'b-',n100,aecmStructNew.enerNear(n100),'k--',n100,aecmStructNew.enerOut(n100),'r-.',n100,aecmStructNew.enerEcho(n100),'r-',n100,aecmStructNew.enerEchoStored(n100),'c-',[n100(1) n100(end)],[1 1]*((aecmStructNew.energyMax-aecmStructNew.energyMin)/4+aecmStructNew.energyMin),'g-.',[n100(1) n100(end)],[1 1]*aecmStructNew.energyMax,'g-.',[n100(1) n100(end)],[1 1]*aecmStructNew.energyMin,'g-.','LineWidth',2)
-    %title(['Frame ',int2str(i),' av ',int2str(setupStruct.updateno),' State = ',int2str(speechState),' \mu = ',num2str(mu)])
-    title(['\gamma = ',num2str(gamma_echo),' \mu = ',num2str(mu)])
-    subplot(312)
-    %plot(n100,enerError,'b-',[n100(1) n100(end)],[1 1]*vadNearTh,'r:',[n100(1) n100(end)],[-1.5 -1.5]*vadNearTh,'r:','LineWidth',2)
-    %plot(n100,enerError,'b-',[n100(1) n100(end)],[1 1],'r:',[n100(1) n100(end)],[-2 -2],'r:','LineWidth',2)
-    plot(n100,enerError,'b-',n100,enerErrorStored,'c-',[n100(1) n100(end)],[1 1]*aecmStructNew.varMean,'k--',[n100(1) n100(end)],[1 1],'r:',[n100(1) n100(end)],[-2 -2],'r:','LineWidth',2)
-    % Plot mu
-    %plot(n100,log2(aecmStructNew.muLog(n100)),'b-','LineWidth',2)
-    %plot(n100,log2(aecmStructNew.HGain(n100)),'b-',[n100(1) n100(end)],[1 1]*log2(sum(aecmStructNew.HStored)),'r:','LineWidth',2)
-    title(['Block ',int2str(setupStruct.currentBlock),' av ',int2str(setupStruct.updateno)])
-    subplot(313)
-    %plot(n100,enerVar(n100),'b-',[n100(1) n100(end)],[1 1],'r:',[n100(1) n100(end)],[-2 -2],'r:','LineWidth',2)
-    %plot(n100,enerVar(n100),'b-','LineWidth',2)
-    % Plot correlation curve
-
-    %plot(-25:25,aecmStructNew.delayStored/max(aecmStructNew.delayStored),'c-',-25:25,aecmStructNew.delayLatest/max(aecmStructNew.delayLatest),'r-',-25:25,(max(aecmStructNew.delayStoredS)-aecmStructNew.delayStoredS)/(max(aecmStructNew.delayStoredS)-min(aecmStructNew.delayStoredS)),'c:',-25:25,(max(aecmStructNew.delayLatestS)-aecmStructNew.delayLatestS)/(max(aecmStructNew.delayLatestS)-min(aecmStructNew.delayLatestS)),'r:','LineWidth',2)
-    %plot(-25:25,aecmStructNew.delayStored,'c-',-25:25,aecmStructNew.delayLatest,'r-',-25:25,(max(aecmStructNew.delayStoredS)-aecmStructNew.delayStoredS)/(max(aecmStructNew.delayStoredS)-min(aecmStructNew.delayStoredS)),'c:',-25:25,(max(aecmStructNew.delayLatestS)-aecmStructNew.delayLatestS)/(max(aecmStructNew.delayLatestS)-min(aecmStructNew.delayLatestS)),'r:','LineWidth',2)
-    %plot(-25:25,aecmStructNew.delayLatest,'r-',-25:25,(50-aecmStructNew.delayLatestS)/100,'r:','LineWidth',2)
-    plot(-25:25,aecmStructNew.delayLatestS,'r:','LineWidth',2)
-    %plot(-25:25,aecmStructNew.delayStored,'c-',-25:25,aecmStructNew.delayLatest,'r-','LineWidth',2)
-    plot(0:32,aecmStruct.HStored,'bo-','LineWidth',2)
-    %title(['\gamma | In = ',int2str(aecmStructNew.muStruct.countInInterval),' | Out High = ',int2str(aecmStructNew.muStruct.countOutHighInterval),' | Out Low = ',int2str(aecmStructNew.muStruct.countOutLowInterval)])
-    pause(1)
-    %if ((setupStruct.currentBlock == 860) | (setupStruct.currentBlock == 420) | (setupStruct.currentBlock == 960))
-    if 0%(setupStruct.currentBlock == 960)
-        figure(60)
-        plot(n100,aecmStructNew.enerNear(n100),'k--',n100,aecmStructNew.enerEcho(n100),'k:','LineWidth',2)
-        legend('Near End','Estimated Echo')
-        title('Signal Energy witH offset compensation')
-        figure(61)
-        subplot(211)
-        stem(sign(aecmStructNew.enerNear(n100)-mean(aecmStructNew.enerNear(n100))))
-        title('Near End Energy Pattern (around mean value)')
-        subplot(212)
-        stem(sign(aecmStructNew.enerEcho(n100)-mean(aecmStructNew.enerEcho(n100))))
-        title('Estimated Echo Energy Pattern (around mean value)')
-        pause
-    end
-    drawnow%,pause
-elseif ~rem(setupStruct.currentBlock,100)
-    fprintf('Block %d of %d\n',setupStruct.currentBlock,setupStruct.updateno)
-end
--- a/src/modules/audio_processing/aecm/main/matlab/matlab/align.m
+++ b/src/modules/audio_processing/aecm/main/matlab/matlab/align.m
@@ -1,98 +0,0 @@
-function [delayStructNew] = align(xf, yf, delayStruct, i, trueDelay);
-
-%%%%%%%
-% Bastiaan's algorithm copied
-%%%%%%%
-Ap500 = [1.00, -4.95, 9.801, -9.70299, 4.80298005, -0.9509900499];
-Bp500 = [0.662743088639636, -2.5841655608125, 3.77668102146288, -2.45182477425154, 0.596566274575251, 0.0];
-Ap200 = [1.00, -4.875, 9.50625, -9.26859375, 4.518439453125, -0.881095693359375];
-Bp200 = [0.862545460994275, -3.2832804496114, 4.67892032308828, -2.95798023879133, 0.699796870041299, 0.0];
-
-oldMethod = 1; % Turn on or off the old method. The new one is Bastiaan's August 2008 updates
-THReSHoLD = 2.0; % ADJUSTABLE threshold factor; 4.0 seems good
-%%%%%%%%%%%%%%%%%%%
-% use log domain (showed improved performance)
-xxf = sqrt(real(xf.*conj(xf))+1e-20);
-yyf = sqrt(real(yf.*conj(yf))+1e-20);
-delayStruct.sxAll2(:,i) = 20*log10(xxf);
-delayStruct.syAll2(:,i) = 20*log10(yyf);
-
-mD = min(i-1,delayStruct.maxDelayb);
-if oldMethod
-    factor = 1.0;
-    histLenb = 250;
-    xthreshold = factor*median(delayStruct.sxAll2(:,i-mD:i),2);
-    ythreshold = factor*median(delayStruct.syAll2(:,i-mD:i),2);
-else
-    xthreshold = sum(delayStruct.sxAll2(:,i-mD:i),2)/(delayStruct.maxDelayb+1);
-    
-    [yout, delayStruct.z200] = filter(Bp200, Ap200, delayStruct.syAll2(:,i), delayStruct.z200, 2);
-    yout = yout/(delayStruct.maxDelayb+1);
-    ythreshold = mean(delayStruct.syAll2(:,i-mD:i),2);
-    ythreshold = yout;
-end
-
-delayStruct.bxspectrum(i) = getBspectrum(delayStruct.sxAll2(:,i), xthreshold, delayStruct.bandfirst, delayStruct.bandlast);
-delayStruct.byspectrum(i) = getBspectrum(delayStruct.syAll2(:,i), ythreshold, delayStruct.bandfirst, delayStruct.bandlast);
-
-delayStruct.bxhist(end-mD:end) = delayStruct.bxspectrum(i-mD:i);
-
-delayStruct.bcount(:,i) = hisser2(delayStruct.byspectrum(i), flipud(delayStruct.bxhist), delayStruct.bandfirst, delayStruct.bandlast);
-[delayStruct.fout(:,i), delayStruct.z500] = filter(Bp500, Ap500, delayStruct.bcount(:,i), delayStruct.z500, 2);
-if oldMethod
-    %delayStruct.new(:,i) = sum(delayStruct.bcount(:,max(1,i-histLenb+1):i),2); % using the history range
-    tmpVec = [delayStruct.fout(1,i)*ones(2,1); delayStruct.fout(:,i); delayStruct.fout(end,i)*ones(2,1)]; % using the history range
-    tmpVec = filter(ones(1,5), 1, tmpVec);
-    delayStruct.new(:,i) = tmpVec(5:end);
-    %delayStruct.new(:,i) = delayStruct.fout(:,i); % using the history range
-else
-    [delayStruct.fout(:,i), delayStruct.z500] = filter(Bp500, Ap500, delayStruct.bcount(:,i), delayStruct.z500, 2);
-    % NEW CODE
-    delayStruct.new(:,i) = filter([-1,-2,1,4,1,-2,-1], 1, delayStruct.fout(:,i)); %remv smth component
-    delayStruct.new(1:end-3,i) = delayStruct.new(1+3:end,i);
-    delayStruct.new(1:6,i) = 0.0;
-    delayStruct.new(end-6:end,i) = 0.0;  % ends are no good
-end
-[valuen, tempdelay] = min(delayStruct.new(:,i));  % find minimum
-if oldMethod
-    threshold = valuen + (max(delayStruct.new(:,i)) - valuen)/4;
-    thIndex = find(delayStruct.new(:,i) <= threshold);
-    if (i > 1)
-        delayDiff = abs(delayStruct.delay(i-1)-tempdelay+1);
-        if (delayStruct.oneGoodEstimate & (max(diff(thIndex)) > 1) & (delayDiff < 10))
-            % We consider this minimum to be significant, hence update the delay
-            delayStruct.delay(i) = tempdelay;
-        elseif (~delayStruct.oneGoodEstimate & (max(diff(thIndex)) > 1))
-            delayStruct.delay(i) = tempdelay;
-            if (i > histLenb)
-                delayStruct.oneGoodEstimate = 1;
-            end
-        else
-            delayStruct.delay(i) = delayStruct.delay(i-1);
-        end
-    else
-        delayStruct.delay(i) = tempdelay;
-    end
-else
-    threshold = THReSHoLD*std(delayStruct.new(:,i));   % set updata threshold 
-    if ((-valuen > threshold) | (i < delayStruct.smlength)) % see if you want to update delay
-        delayStruct.delay(i) = tempdelay;
-    else
-        delayStruct.delay(i) = delayStruct.delay(i-1);
-    end
-    % END NEW CODE
-end
-delayStructNew = delayStruct;
-
-% administrative and plotting stuff
-if( 0)
-    figure(10);
-    plot([1:length(delayStructNew.new(:,i))],delayStructNew.new(:,i),trueDelay*[1 1],[min(delayStructNew.new(:,i)),max(delayStructNew.new(:,i))],'r',[1 length(delayStructNew.new(:,i))],threshold*[1 1],'r:', 'LineWidth',2);
-    %plot([1:length(delayStructNew.bcount(:,i))],delayStructNew.bcount(:,i),trueDelay*[1 1],[min(delayStructNew.bcount(:,i)),max(delayStructNew.bcount(:,i))],'r','LineWidth',2);
-    %plot([thedelay,thedelay],[min(fcount(:,i)),max(fcount(:,i))],'r');
-    %title(sprintf('bin count and known delay at time %5.1f s\n',(i-1)*(support/(fs*oversampling))));
-    title(delayStructNew.oneGoodEstimate)
-    xlabel('delay in frames');
-    %hold off;
-    drawnow
-end
--- a/src/modules/audio_processing/aecm/main/matlab/matlab/calcFilterGain.m
+++ b/src/modules/audio_processing/aecm/main/matlab/matlab/calcFilterGain.m
@@ -1,88 +0,0 @@
-function [gam, cntIn2, cntOut2] = calcFilterGain(energy, dE, aecmStruct, t, T, cntIn, cntOut)
-
-defaultLevel = 1.2;
-cntIn2 = cntIn;
-cntOut2 = cntOut;
-if (t < T)
-    gam = 1;
-else
-    dE1 = -5;
-    dE2 = 1;
-    gamMid = 0.2;
-    gam = max(0,min((energy - aecmStruct.energyMin)/(aecmStruct.energyLevel - aecmStruct.energyMin), 1-(1-gamMid)*(aecmStruct.energyMax-energy)/(aecmStruct.energyMax-aecmStruct.energyLevel)));
-    
-    dEOffset = -0.5;
-    dEWidth = 1.5;
-    %gam2 = max(1,2-((dE-dEOffset)/(dE2-dEOffset)).^2);
-    gam2 = 1+(abs(dE-dEOffset)<(dE2-dEOffset));
-    
-    gam = gam*gam2;
-    
-    
-    if (energy < aecmStruct.energyLevel)
-        gam = 0;
-    else
-        gam = defaultLevel;
-    end
-    dEVec = aecmStruct.enerNear(t-63:t)-aecmStruct.enerEcho(t-63:t);
-    %dEVec = aecmStruct.enerNear(t-20:t)-aecmStruct.enerEcho(t-20:t);
-    numCross = 0;
-    currentState = 0;
-    for ii=1:64
-        if (currentState == 0)
-            currentState = (dEVec(ii) > dE2) - (dEVec(ii) < -2);
-        elseif ((currentState == 1) & (dEVec(ii) < -2))
-            numCross = numCross + 1;
-            currentState = -1;
-        elseif ((currentState == -1) & (dEVec(ii) > dE2))
-            numCross = numCross + 1;
-            currentState = 1;
-        end
-    end
-    gam = max(0, gam - numCross/25);
-    gam = 1;
-    
-    ener_A = 1;
-    ener_B = 0.8;
-    ener_C = aecmStruct.energyLevel + (aecmStruct.energyMax-aecmStruct.energyLevel)/5;
-    dE_A = 4;%2;
-    dE_B = 3.6;%1.8;
-    dE_C = 0.9*dEWidth;
-    dE_D = 1;
-    timeFactorLength = 10;
-    ddE = abs(dE-dEOffset);
-    if (energy < aecmStruct.energyLevel)
-        gam = 0;
-    else
-        gam = 1;
-        gam2 = max(0, min(ener_B*(energy-aecmStruct.energyLevel)/(ener_C-aecmStruct.energyLevel), ener_B+(ener_A-ener_B)*(energy-ener_C)/(aecmStruct.energyMax-ener_C)));
-        if (ddE < dEWidth)
-            % Update counters
-            cntIn2 = cntIn2 + 1;
-            if (cntIn2 > 2)
-                cntOut2 = 0;
-            end
-            gam3 = max(dE_D, min(dE_A-(dE_A-dE_B)*(ddE/dE_C), dE_D+(dE_B-dE_D)*(dEWidth-ddE)/(dEWidth-dE_C)));
-            gam3 = dE_A;
-        else
-            % Update counters
-            cntOut2 = cntOut2 + 1;
-            if (cntOut2 > 2)
-                cntIn2 = 0;
-            end
-            %gam2 = 1;
-            gam3 = dE_D;
-        end
-        timeFactor = min(1, cntIn2/timeFactorLength);
-        gam = gam*(1-timeFactor) + timeFactor*gam2*gam3;
-    end
-    %gam = gam/floor(numCross/2+1);
-end
-if isempty(gam)
-    numCross
-    timeFactor
-    cntIn2
-    cntOut2
-    gam2
-    gam3
-end
--- a/src/modules/audio_processing/aecm/main/matlab/matlab/calcStepSize.m
+++ b/src/modules/audio_processing/aecm/main/matlab/matlab/calcStepSize.m
@@ -1,105 +0,0 @@
-function [mu, aecmStructNew] = calcStepSize(energy, dE, aecmStruct, t, logscale)
-
-if (nargin < 4)
-    t = 1;
-    logscale = 1;
-elseif (nargin == 4)
-    logscale = 1;
-end
-T = aecmStruct.convLength;
-
-if logscale
-    currentMuMax = aecmStruct.MU_MIN + (aecmStruct.MU_MAX-aecmStruct.MU_MIN)*min(t,T)/T;
-    if (aecmStruct.energyMin >= aecmStruct.energyMax)
-        mu = aecmStruct.MU_MIN;
-    else
-        mu = (energy - aecmStruct.energyMin)/(aecmStruct.energyMax - aecmStruct.energyMin)*(currentMuMax-aecmStruct.MU_MIN) + aecmStruct.MU_MIN;
-    end
-    mu = 2^mu;
-    if (energy < aecmStruct.energyLevel)
-        mu = 0;
-    end
-else
-    muMin = 0;
-    muMax = 0.5;
-    currentMuMax = muMin + (muMax-muMin)*min(t,T)/T;
-    if (aecmStruct.energyMin >= aecmStruct.energyMax)
-        mu = muMin;
-    else
-        mu = (energy - aecmStruct.energyMin)/(aecmStruct.energyMax - aecmStruct.energyMin)*(currentMuMax-muMin) + muMin;
-    end
-end
-dE2 = 1;
-dEOffset = -0.5;
-offBoost = 5;
-if (mu > 0)
-    if (abs(dE-aecmStruct.ENERGY_DEV_OFFSET) > aecmStruct.ENERGY_DEV_TOL)
-        aecmStruct.muStruct.countInInterval = 0;
-    else
-        aecmStruct.muStruct.countInInterval = aecmStruct.muStruct.countInInterval + 1;
-    end
-    if (dE < aecmStruct.ENERGY_DEV_OFFSET - aecmStruct.ENERGY_DEV_TOL)
-        aecmStruct.muStruct.countOutLowInterval = aecmStruct.muStruct.countOutLowInterval + 1;
-    else
-        aecmStruct.muStruct.countOutLowInterval = 0;
-    end
-    if (dE > aecmStruct.ENERGY_DEV_OFFSET + aecmStruct.ENERGY_DEV_TOL)
-        aecmStruct.muStruct.countOutHighInterval = aecmStruct.muStruct.countOutHighInterval + 1;
-    else
-        aecmStruct.muStruct.countOutHighInterval = 0;
-    end
-end
-muVar = 2^min(-3,5/50*aecmStruct.muStruct.countInInterval-3);
-muOff = 2^max(offBoost,min(0,offBoost*(aecmStruct.muStruct.countOutLowInterval-aecmStruct.muStruct.minOutLowInterval)/(aecmStruct.muStruct.maxOutLowInterval-aecmStruct.muStruct.minOutLowInterval)));
-
-muLow = 1/64;
-muVar = 1;
-if (t < 2*T)
-    muDT = 1;
-    muVar = 1;
-    mdEVec = 0;
-    numCross = 0;
-else
-    muDT = min(1,max(muLow,1-(1-muLow)*(dE-aecmStruct.ENERGY_DEV_OFFSET)/aecmStruct.ENERGY_DEV_TOL));
-    dEVec = aecmStruct.enerNear(t-63:t)-aecmStruct.enerEcho(t-63:t);
-    %dEVec = aecmStruct.enerNear(t-20:t)-aecmStruct.enerEcho(t-20:t);
-    numCross = 0;
-    currentState = 0;
-    for ii=1:64
-        if (currentState == 0)
-            currentState = (dEVec(ii) > dE2) - (dEVec(ii) < -2);
-        elseif ((currentState == 1) & (dEVec(ii) < -2))
-            numCross = numCross + 1;
-            currentState = -1;
-        elseif ((currentState == -1) & (dEVec(ii) > dE2))
-            numCross = numCross + 1;
-            currentState = 1;
-        end
-    end
-            
-    %logicDEVec = (dEVec > dE2) - (dEVec < -2);
-    %numCross = sum(abs(diff(logicDEVec)));
-    %mdEVec = mean(abs(dEVec-dEOffset));
-    %mdEVec = mean(abs(dEVec-mean(dEVec)));
-    %mdEVec = max(dEVec)-min(dEVec);
-    %if (mdEVec > 4)%1.5)
-    %    muVar = 0;
-    %end
-    muVar = 2^(-floor(numCross/2));
-    muVar = 2^(-numCross);
-end
-%muVar = 1;
-
-
-% if (eStd > (dE2-dEOffset))
-%     muVar = 1/8;
-% else
-%     muVar = 1;
-% end
-
-%mu = mu*muDT*muVar*muOff;
-mu = mu*muDT*muVar;
-mu = min(mu,0.25);
-aecmStructNew = aecmStruct;
-%aecmStructNew.varMean = mdEVec;
-aecmStructNew.varMean = numCross;
--- a/src/modules/audio_processing/aecm/main/matlab/matlab/fallerEstimator.m
+++ b/src/modules/audio_processing/aecm/main/matlab/matlab/fallerEstimator.m
@@ -1,42 +0,0 @@
-function [U, Hnew] = fallerEstimator(Y, X, H, mu)
-
-% Near end signal is stacked frame by frame columnwise in matrix Y and far end in X
-%
-% Possible estimation procedures are
-% 1) LSE
-% 2) NLMS
-% 3) Separated numerator and denomerator filters
-regParam = 1;
-[numFreqs, numFrames] = size(Y);
-[numFreqs, Q] = size(X);
-U = zeros(numFreqs, 1);
-
-if ((nargin == 3) | (nargin == 5))
-    dtd = 0;
-end
-if (nargin == 4)
-    dtd = H;
-end
-Emax = 7;
-dEH = Emax-sum(sum(H));
-nu = 2*mu;
-% if (nargin < 5)
-%     H = zeros(numFreqs, Q);
-%     for kk = 1:numFreqs
-%         Xmatrix = hankel(X(kk,1:Q),X(kk,Q:end));
-%         y = Y(kk,1:end-Q+1)';
-%         H(kk,:) = (y'*Xmatrix')*inv(Xmatrix*Xmatrix'+regParam);
-%         U(kk,1) = H(kk,:)*Xmatrix(:,1);
-%     end
-% else
-    for kk = 1:numFreqs
-        x = X(kk,1:Q)';
-        y = Y(kk,1);
-        Htmp = mu*(y-H(kk,:)*x)/(x'*x+regParam)*x;
-        %Htmp = (mu*(y-H(kk,:)*x)/(x'*x+regParam) - nu/dEH)*x;
-        H(kk,:) = H(kk,:) + Htmp';
-        U(kk,1) = H(kk,:)*x;
-    end
-% end
-
-Hnew = H;
--- a/src/modules/audio_processing/aecm/main/matlab/matlab/getBspectrum.m
+++ b/src/modules/audio_processing/aecm/main/matlab/matlab/getBspectrum.m
@@ -1,22 +0,0 @@
-function bspectrum=getBspectrum(ps,threshold,bandfirst,bandlast)
-% function bspectrum=getBspectrum(ps,threshold,bandfirst,bandlast)
-% compute binary spectrum using threshold spectrum as pivot
-% bspectrum = binary spectrum (binary)
-% ps=current power spectrum (float)
-% threshold=threshold spectrum (float)
-% bandfirst = first band considered
-% bandlast = last band considered
-  
-% initialization stuff
-  if( length(ps)<bandlast | bandlast>32 | length(ps)~=length(threshold)) 
-  error('BinDelayEst:spectrum:invalid','Dimensionality error');
-end
-
-% get current binary spectrum
-diff = ps - threshold;
-bspectrum=uint32(0);
-for(i=bandfirst:bandlast)
-  if( diff(i)>0 ) 
-    bspectrum = bitset(bspectrum,i);
-  end
-end
--- a/src/modules/audio_processing/aecm/main/matlab/matlab/hisser2.m
+++ b/src/modules/audio_processing/aecm/main/matlab/matlab/hisser2.m
@@ -1,21 +0,0 @@
-function  bcount=hisser2(bs,bsr,bandfirst,bandlast)
-% function  bcount=hisser(bspectrum,bandfirst,bandlast)
-% histogram for the binary spectra
-% bcount= array of bit counts 
-% bs=binary spectrum (one int32 number each)  
-% bsr=reference binary spectra (one int32 number each)
-% blockSize = histogram over blocksize blocks
-% bandfirst = first band considered
-% bandlast = last band considered
-
-% weight all delays equally
-maxDelay = length(bsr);
-
-% compute counts (two methods; the first works better and is operational)
-bcount=zeros(maxDelay,1);
-for(i=1:maxDelay)
- % the delay should have low count for low-near&high-far and high-near&low-far
- bcount(i)= sum(bitget(bitxor(bs,bsr(i)),bandfirst:bandlast));  
- % the delay should have low count for low-near&high-far (works less well)
-% bcount(i)= sum(bitget(bitand(bsr(i),bitxor(bs,bsr(i))),bandfirst:bandlast));
-end
--- a/src/modules/audio_processing/aecm/main/matlab/matlab/mainProgram.m
+++ b/src/modules/audio_processing/aecm/main/matlab/matlab/mainProgram.m
@@ -1,283 +0,0 @@
-useHTC = 1; % Set this if you want to run a single file and set file names below. Otherwise use simEnvironment to run from several scenarios in a row
-delayCompensation_flag = 0; % Set this flag to one if you want to turn on the delay compensation/enhancement
-global FARENDFFT;
-global NEARENDFFT;
-global F;
-
-if useHTC
-%    fid=fopen('./htcTouchHd/nb/aecFar.pcm'); xFar=fread(fid,'short'); fclose(fid);
-%    fid=fopen('./htcTouchHd/nb/aecNear.pcm'); yNear=fread(fid,'short'); fclose(fid);
-%    fid=fopen('./samsungBlackjack/nb/aecFar.pcm'); xFar=fread(fid,'short'); fclose(fid);
-%    fid=fopen('./samsungBlackjack/nb/aecNear.pcm'); yNear=fread(fid,'short'); fclose(fid);
-%     fid=fopen('aecFarPoor.pcm'); xFar=fread(fid,'short'); fclose(fid);
-%     fid=fopen('aecNearPoor.pcm'); yNear=fread(fid,'short'); fclose(fid);
-%     fid=fopen('out_aes.pcm'); outAES=fread(fid,'short'); fclose(fid);
-   fid=fopen('aecFar4.pcm'); xFar=fread(fid,'short'); fclose(fid);
-   fid=fopen('aecNear4.pcm'); yNear=fread(fid,'short'); fclose(fid);
-    yNearSpeech = zeros(size(xFar));
-     fs = 8000;
-     frameSize = 64;
-%     frameSize = 128;
-     fs = 16000;
-%     frameSize = 256;
-%F = load('fftValues.txt');
-%FARENDFFT = F(:,1:33);
-%NEARENDFFT = F(:,34:66);
-
-else
-    loadFileFar = [speakerType, '_s_',scenario,'_far_b.wav'];
-    [xFar,fs,nbits] = wavread(loadFileFar);
-    xFar = xFar*2^(nbits-1);
-    loadFileNear = [speakerType, '_s_',scenario,'_near_b.wav'];
-    [yNear,fs,nbits] = wavread(loadFileNear);
-    yNear = yNear*2^(nbits-1);
-    loadFileNearSpeech = [speakerType, '_s_',scenario,'_nearSpeech_b.wav'];
-    [yNearSpeech,fs,nbits] = wavread(loadFileNearSpeech);
-    yNearSpeech = yNearSpeech*2^(nbits-1);
-    frameSize = 256;
-end
-
-dtRegions = [];
-
-% General settings for the AECM
-setupStruct = struct(...
-    'stepSize_flag', 1,...      % This flag turns on the step size calculation. If turned off, mu = 0.25.
-    'supGain_flag', 0,...       % This flag turns on the suppression gain calculation. If turned off, gam = 1.
-    'channelUpdate_flag', 0,... % This flag turns on the channel update. If turned off, H is updated for convLength and then kept constant.
-    'nlp_flag', 0,...           % Turn on/off NLP
-    'withVAD_flag', 0,...           % Turn on/off NLP
-    'useSubBand', 0,...         % Set to 1 if to use subBands
-    'useDelayEstimation', 1,... % Set to 1 if to use delay estimation
-    'support', frameSize,...    % # of samples per frame
-    'samplingfreq',fs,...       % Sampling frequency
-    'oversampling', 2,...       % Overlap between blocks/frames
-    'updatel', 0,...            % # of samples between blocks
-    'hsupport1', 0,...          % # of bins in frequency domain
-    'factor', 0,...             % synthesis window amplification
-    'tlength', 0,...            % # of samples of entire file
-    'updateno', 0,...           % # of updates
-    'nb', 1,...                 % # of blocks
-    'currentBlock', 0,...       %
-    'win', zeros(frameSize,1),...% Window to apply for fft and synthesis
-    'avtime', 1,...             % Time (in sec.) to perform averaging
-    'estLen', 0,...             % Averaging in # of blocks
-    'A_GAIN', 10.0,...          % 
-    'suppress_overdrive', 1.0,...   % overdrive factor for suppression 1.4 is good
-    'gamma_echo', 1.0,...       % same as suppress_overdrive but at different place
-    'de_echo_bound', 0.0,...    %
-    'nl_alpha', 0.4,...         % memory; seems not very critical
-    'nlSeverity', 0.2,...         % nonlinearity severity: 0 does nothing; 1 suppresses all
-    'numInBand', [],...         % # of frequency bins in resp. subBand
-    'centerFreq', [],...        % Center frequency of resp. subBand
-    'dtRegions', dtRegions,...  % Regions where we have DT
-    'subBandLength', frameSize/2);%All bins
-    %'subBandLength', 11);       %Something's wrong when subBandLength even
-    %'nl_alpha', 0.8,...         % memory; seems not very critical
-
-delayStruct = struct(...
-    'bandfirst', 8,...
-    'bandlast', 25,...
-    'smlength', 600,...
-    'maxDelay', 0.4,...
-    'oneGoodEstimate', 0,...
-    'delayAdjust', 0,...
-    'maxDelayb', 0);
-% More parameters in delayStruct are constructed in "updateSettings" below
-
-% Make struct settings
-[setupStruct, delayStruct] = updateSettings(yNear, xFar, setupStruct, delayStruct);
-setupStruct.numInBand = ones(setupStruct.hsupport1,1);
-
-Q = 1; % Time diversity in channel
-% General settings for the step size calculation
-muStruct = struct(...
-    'countInInterval', 0,...
-    'countOutHighInterval', 0,...
-    'countOutLowInterval', 0,...
-    'minInInterval', 50,...
-    'minOutHighInterval', 10,...
-    'minOutLowInterval', 10,...
-    'maxOutLowInterval', 50);
-% General settings for the AECM
-aecmStruct = struct(...
-    'plotIt', 0,... % Set to 0 to turn off plotting
-    'useSubBand', 0,...
-    'bandFactor', 1,...
-    'H', zeros(setupStruct.subBandLength+1,Q),...
-    'HStored', zeros(setupStruct.subBandLength+1,Q),...
-    'X', zeros(setupStruct.subBandLength+1,Q),...
-    'energyThres', 0.28,...
-    'energyThresMSE', 0.4,...
-    'energyMin', inf,...
-    'energyMax', -inf,...
-    'energyLevel', 0,...
-    'energyLevelMSE', 0,...
-    'convLength', 100,...
-    'gammaLog', ones(setupStruct.updateno,1),...
-    'muLog', ones(setupStruct.updateno,1),...
-    'enerFar', zeros(setupStruct.updateno,1),...
-    'enerNear', zeros(setupStruct.updateno,1),...
-    'enerEcho', zeros(setupStruct.updateno,1),...
-    'enerEchoStored', zeros(setupStruct.updateno,1),...
-    'enerOut', zeros(setupStruct.updateno,1),...
-    'runningfmean', 0,...
-    'muStruct', muStruct,...
-    'varMean', 0,...
-    'countMseH', 0,...
-    'mseHThreshold', 1.1,...
-    'mseHStoredOld', inf,...
-    'mseHLatestOld', inf,...
-    'delayLatestS', zeros(1,51),...
-    'feedbackDelay', 0,...
-    'feedbackDelayUpdate', 0,...
-    'cntIn', 0,...
-    'cntOut', 0,...
-    'FAR_ENERGY_MIN', 1,...
-    'ENERGY_DEV_OFFSET', 0.5,...
-    'ENERGY_DEV_TOL', 1.5,...
-    'MU_MIN', -16,...
-    'MU_MAX', -2,...
-    'newDelayCurve', 0);
-
-% Adjust speech signals
-xFar = [zeros(setupStruct.hsupport1-1,1);xFar(1:setupStruct.tlength)];
-yNear = [zeros(setupStruct.hsupport1-1,1);yNear(1:setupStruct.tlength)];
-yNearSpeech = [zeros(setupStruct.hsupport1-1,1);yNearSpeech(1:setupStruct.tlength)];
-xFar = xFar(1:setupStruct.tlength);
-yNear = yNear(1:setupStruct.tlength);
-
-% Set figure settings
-if aecmStruct.plotIt
-    figure(13)
-    set(gcf,'doublebuffer','on')
-end
-%%%%%%%%%%
-% Here starts the algorithm
-% Dividing into frames and then estimating the near end speech
-%%%%%%%%%%
-fTheFarEnd      = complex(zeros(setupStruct.hsupport1,1));
-afTheFarEnd     = zeros(setupStruct.hsupport1,setupStruct.updateno+1);
-fFar            = zeros(setupStruct.hsupport1,setupStruct.updateno+1);
-fmicrophone     = complex(zeros(setupStruct.hsupport1,1));
-afmicrophone    = zeros(setupStruct.hsupport1,setupStruct.updateno+1);
-fNear           = zeros(setupStruct.hsupport1,setupStruct.updateno+1);
-femicrophone    = complex(zeros(setupStruct.hsupport1,1));
-emicrophone     = zeros(setupStruct.tlength,1);
-
-if (setupStruct.useDelayEstimation == 2)
-    delSamples = [1641 1895 2032 1895 2311 2000 2350 2222 NaN 2332 2330 2290 2401 2415 NaN 2393 2305 2381 2398];
-    delBlocks = round(delSamples/setupStruct.updatel);
-    delStarts = floor([25138 46844 105991 169901 195739 218536 241803 333905 347703 362660 373753 745135 765887 788078 806257 823835 842443 860139 881869]/setupStruct.updatel);
-else
-    delStarts = [];
-end
-
-for i=1:setupStruct.updateno
-    setupStruct.currentBlock = i;
-    
-    sb = (i-1)*setupStruct.updatel + 1;
-    se = sb + setupStruct.support - 1;
-    
-    %%%%%%%
-    % Analysis FFTs
-    %%%%%%%
-    % Far end signal
-    temp = fft(setupStruct.win .* xFar(sb:se))/frameSize;
-    fTheFarEnd = temp(1:setupStruct.hsupport1);
-    afTheFarEnd(:,i) = abs(fTheFarEnd);
-    fFar(:,i) = fTheFarEnd;
-    % Near end signal
-    temp = fft(setupStruct.win .* yNear(sb:se))/frameSize;%,pause
-    fmicrophone = temp(1:setupStruct.hsupport1);
-    afmicrophone(:,i) = abs(fmicrophone);
-    fNear(:,i) = fmicrophone;
-    %abs(fmicrophone),pause
-    % The true near end speaker (if we have such info)
-    temp = fft(setupStruct.win .* yNearSpeech(sb:se));
-    aftrueSpeech = abs(temp(1:setupStruct.hsupport1));
-    
-    if(i == 1000)
-        %break;
-    end
-    
-    % Perform delay estimation
-    if (setupStruct.useDelayEstimation == 1)
-        % Delay Estimation
-        delayStruct = align(fTheFarEnd, fmicrophone, delayStruct, i);
-        %delayStruct.delay(i) = 39;%19;
-        idel =  max(i - delayStruct.delay(i) + 1,1);
-        
-        if delayCompensation_flag
-            % If we have a new delay estimate from Bastiaan's alg. update the offset
-            if (delayStruct.delay(i) ~= delayStruct.delay(max(1,i-1)))
-                delayStruct.delayAdjust = delayStruct.delayAdjust + delayStruct.delay(i) - delayStruct.delay(i-1);
-            end
-            % Store the compensated delay
-            delayStruct.delayNew(i) = delayStruct.delay(i) - delayStruct.delayAdjust;
-            if (delayStruct.delayNew(i) < 1)
-                % Something's wrong
-                pause,break
-            end
-            % Compensate with the offset estimate
-            idel = idel + delayStruct.delayAdjust;
-        end
-        if 0%aecmStruct.plotIt
-            figure(1)
-            plot(1:i,delayStruct.delay(1:i),'k:',1:i,delayStruct.delayNew(1:i),'k--','LineWidth',2),drawnow
-        end
-    elseif (setupStruct.useDelayEstimation == 2)
-        % Use "manual delay"
-        delIndex = find(delStarts<i);
-        if isempty(delIndex)
-            idel = i;
-        else
-            idel = i - delBlocks(max(delIndex));
-            if isnan(idel)
-                idel = i - delBlocks(max(delIndex)-1);
-            end
-        end
-    else
-        % No delay estimation
-        %idel = max(i - 18, 1);
-        idel = max(i - 50, 1);
-    end
-
-    %%%%%%%%
-    % This is the AECM algorithm
-    %
-    % Output is the new frequency domain signal (hopefully) echo compensated
-    %%%%%%%%
-    [femicrophone, aecmStruct] = AECMobile(fmicrophone, afTheFarEnd(:,idel), setupStruct, aecmStruct);
-    %[femicrophone, aecmStruct] = AECMobile(fmicrophone, FARENDFFT(idel,:)'/2^F(idel,end-1), setupStruct, aecmStruct);
-    
-    if aecmStruct.feedbackDelayUpdate
-        % If the feedback tells us there is a new offset out there update the enhancement
-        delayStruct.delayAdjust = delayStruct.delayAdjust + aecmStruct.feedbackDelay;
-        aecmStruct.feedbackDelayUpdate = 0;
-    end
-    
-    % reconstruction; first make spectrum odd
-    temp = [femicrophone; flipud(conj(femicrophone(2:(setupStruct.hsupport1-1))))];
-    emicrophone(sb:se) = emicrophone(sb:se) + setupStruct.factor * setupStruct.win .* real(ifft(temp))*frameSize;
-    if max(isnan(emicrophone(sb:se)))
-        % Something's wrong with the output at block i
-        i
-        break
-    end
-end
-
-
-if useHTC
-    fid=fopen('aecOutMatlabC.pcm','w');fwrite(fid,int16(emicrophone),'short');fclose(fid);
-    %fid=fopen('farendFFT.txt','w');fwrite(fid,int16(afTheFarEnd(:)),'short');fclose(fid);
-    %fid=fopen('farendFFTreal.txt','w');fwrite(fid,int16(imag(fFar(:))),'short');fclose(fid);
-    %fid=fopen('farendFFTimag.txt','w');fwrite(fid,int16(real(fFar(:))),'short');fclose(fid);
-    %fid=fopen('nearendFFT.txt','w');fwrite(fid,int16(afmicrophone(:)),'short');fclose(fid);
-    %fid=fopen('nearendFFTreal.txt','w');fwrite(fid,int16(real(fNear(:))),'short');fclose(fid);
-    %fid=fopen('nearendFFTimag.txt','w');fwrite(fid,int16(imag(fNear(:))),'short');fclose(fid);
-end
-if useHTC
-    %spclab(setupStruct.samplingfreq,xFar,yNear,emicrophone)
-else
-    spclab(setupStruct.samplingfreq,xFar,yNear,emicrophone,yNearSpeech)
-end    
--- a/src/modules/audio_processing/aecm/main/matlab/matlab/simEnvironment.m
+++ b/src/modules/audio_processing/aecm/main/matlab/matlab/simEnvironment.m
@@ -1,15 +0,0 @@
-speakerType = 'fm';
-%for k=2:5
-%for k=[2 4 5]
-for k=3
-    scenario = int2str(k);
-    fprintf('Current scenario: %d\n',k)
-    mainProgram
-    %saveFile = [speakerType, '_s_',scenario,'_delayEst_v2_vad_man.wav'];
-    %wavwrite(emic,fs,nbits,saveFile);
-    %saveFile = ['P:\Engineering_share\BjornV\AECM\',speakerType, '_s_',scenario,'_delayEst_v2_vad_man.pcm'];
-    %saveFile = [speakerType, '_s_',scenario,'_adaptMu_adaptGamma_withVar_gammFilt_HSt.pcm'];
-    saveFile = ['scenario_',scenario,'_090417_backupH_nlp.pcm'];
-    fid=fopen(saveFile,'w');fwrite(fid,int16(emicrophone),'short');fclose(fid);
-    %pause
-end
--- a/src/modules/audio_processing/aecm/main/matlab/matlab/updateSettings.m
+++ b/src/modules/audio_processing/aecm/main/matlab/matlab/updateSettings.m
@@ -1,94 +0,0 @@
-function [setupStructNew, delayStructNew] = updateSettings(microphone, TheFarEnd, setupStruct, delayStruct);
-
-% other, constants
-setupStruct.hsupport1 = setupStruct.support/2 + 1;
-setupStruct.factor =  2 / setupStruct.oversampling;
-setupStruct.updatel = setupStruct.support/setupStruct.oversampling;
-setupStruct.estLen = round(setupStruct.avtime * setupStruct.samplingfreq/setupStruct.updatel);
-
-% compute some constants
-blockLen = setupStruct.support/setupStruct.oversampling;
-delayStruct.maxDelayb = floor(setupStruct.samplingfreq*delayStruct.maxDelay/setupStruct.updatel); % in blocks
-
-%input
-tlength = min([length(microphone),length(TheFarEnd)]);
-updateno = floor(tlength/setupStruct.updatel);
-setupStruct.tlength = setupStruct.updatel*updateno;
-setupStruct.updateno = updateno - setupStruct.oversampling + 1;
-
-% signal length
-n = floor(min([length(TheFarEnd), length(microphone)])/setupStruct.support)*setupStruct.support;
-setupStruct.nb = n/blockLen - setupStruct.oversampling + 1; % in blocks
-
-setupStruct.win = sqrt([0 ; hanning(setupStruct.support-1)]);
-
-% Construct filterbank in Bark-scale
-
-K = setupStruct.subBandLength; %Something's wrong when K even
-erbs = 21.4*log10(0.00437*setupStruct.samplingfreq/2+1);
-fe = (10.^((0:K)'*erbs/K/21.4)-1)/0.00437;
-setupStruct.centerFreq = fe;
-H = diag(ones(1,K-1))+diag(ones(1,K-2),-1);
-Hinv = inv(H);
-aty = 2*Hinv(end,:)*fe(2:end-1);
-boundary = aty - (setupStruct.samplingfreq/2 + fe(end-1))/2;
-if rem(K,2)
-    x1 = min([fe(2)/2, -boundary]);
-else
-    x1 = max([0, boundary]);
-end
-%x1
-g = fe(2:end-1);
-g(1) = g(1) - x1/2;
-x = 2*Hinv*g;
-x = [x1;x];
-%figure(42), clf
-xy = zeros((K+1)*4,1);
-yy = zeros((K+1)*4,1);
-xy(1:4) = [fe(1) fe(1) x(1) x(1)]';
-yy(1:4) = [0 1 1 0]'/x(1);
-for kk=2:K
-    xy((kk-1)*4+(1:4)) = [x(kk-1) x(kk-1) x(kk) x(kk)]';
-    yy((kk-1)*4+(1:4)) = [0 1 1 0]'/(x(kk)-x(kk-1));
-end
-xy(end-3:end) = [x(K) x(K) fe(end) fe(end)]';
-yy(end-3:end) = [0 1 1 0]'/(fe(end)*2-2*x(K));
-%plot(xy,yy,'LineWidth',2)
-%fill(xy,yy,'y')
-
-x = [0;x];
-xk = x*setupStruct.hsupport1/setupStruct.samplingfreq*2;
-%setupStruct.erbBoundaries = xk;
-numInBand = zeros(length(xk),1);
-xh = (0:setupStruct.hsupport1-1);
-
-for kk=1:length(xk)
-    if (kk==length(xk))
-        numInBand(kk) = length(find(xh>=xk(kk)));
-    else
-        numInBand(kk) = length(intersect(find(xh>=xk(kk)),find(xh<xk(kk+1))));
-    end
-end
-setupStruct.numInBand = numInBand;
-
-setupStructNew = setupStruct;
-
-delayStructNew = struct(...
-    'sxAll2',zeros(setupStructNew.hsupport1,setupStructNew.nb),...
-    'syAll2',zeros(setupStructNew.hsupport1,setupStructNew.nb),...
-    'z200',zeros(5,setupStructNew.hsupport1),...
-    'z500',zeros(5,delayStruct.maxDelayb+1),...
-    'bxspectrum',uint32(zeros(setupStructNew.nb,1)),...
-    'byspectrum',uint32(zeros(setupStructNew.nb,1)),...
-    'bandfirst',delayStruct.bandfirst,'bandlast',delayStruct.bandlast,...
-    'bxhist',uint32(zeros(delayStruct.maxDelayb+1,1)),...
-    'bcount',zeros(1+delayStruct.maxDelayb,setupStructNew.nb),...
-    'fout',zeros(1+delayStruct.maxDelayb,setupStructNew.nb),...
-    'new',zeros(1+delayStruct.maxDelayb,setupStructNew.nb),...
-    'smlength',delayStruct.smlength,...
-    'maxDelay', delayStruct.maxDelay,...
-    'maxDelayb', delayStruct.maxDelayb,...
-    'oneGoodEstimate', 0,...
-    'delayAdjust', 0,...
-    'delayNew',zeros(setupStructNew.nb,1),...
-    'delay',zeros(setupStructNew.nb,1));
--- a/src/modules/audio_processing/aecm/main/matlab/waitbar_j.m
+++ b/src/modules/audio_processing/aecm/main/matlab/waitbar_j.m
@@ -1,234 +0,0 @@
-function fout = waitbar_j(x,whichbar, varargin)
-%WAITBAR Display wait bar.
-%   H = WAITBAR(X,'title', property, value, property, value, ...) 
-%   creates and displays a waitbar of fractional length X.  The 
-%   handle to the waitbar figure is returned in H.
-%   X should be between 0 and 1.  Optional arguments property and 
-%   value allow to set corresponding waitbar figure properties.
-%   Property can also be an action keyword 'CreateCancelBtn', in 
-%   which case a cancel button will be added to the figure, and 
-%   the passed value string will be executed upon clicking on the 
-%   cancel button or the close figure button.
-%
-%   WAITBAR(X) will set the length of the bar in the most recently
-%   created waitbar window to the fractional length X.
-%
-%   WAITBAR(X,H) will set the length of the bar in waitbar H
-%   to the fractional length X.
-%
-%   WAITBAR(X,H,'updated title') will update the title text in
-%   the waitbar figure, in addition to setting the fractional
-%   length to X.
-%
-%   WAITBAR is typically used inside a FOR loop that performs a 
-%   lengthy computation.  A sample usage is shown below:
-%
-%       h = waitbar(0,'Please wait...');
-%       for i=1:100,
-%           % computation here %
-%           waitbar(i/100,h)
-%       end
-%       close(h)
-
-%   Clay M. Thompson 11-9-92
-%   Vlad Kolesnikov  06-7-99
-%   Copyright 1984-2001 The MathWorks, Inc.
-%   $Revision: 1.22 $  $Date: 2001/04/15 12:03:29 $
-
-if nargin>=2
-    if ischar(whichbar)
-        type=2; %we are initializing
-        name=whichbar;
-    elseif isnumeric(whichbar)
-        type=1; %we are updating, given a handle
-        f=whichbar;
-    else
-        error(['Input arguments of type ' class(whichbar) ' not valid.'])
-    end
-elseif nargin==1
-    f = findobj(allchild(0),'flat','Tag','TMWWaitbar');
-    
-    if isempty(f)
-        type=2;
-        name='Waitbar';
-    else
-        type=1;
-        f=f(1);
-    end   
-else
-    error('Input arguments not valid.');
-end
-
-x = max(0,min(100*x,100));
-
-switch type
- case 1,  % waitbar(x)    update
-  p = findobj(f,'Type','patch');
-  l = findobj(f,'Type','line');
-  if isempty(f) | isempty(p) | isempty(l), 
-      error('Couldn''t find waitbar handles.'); 
-  end
-  xpatch = get(p,'XData');
-  xpatch = [0 x x 0];
-  set(p,'XData',xpatch)
-  xline = get(l,'XData');
-  set(l,'XData',xline);
-  
-  if nargin>2,
-      % Update waitbar title:
-      hAxes = findobj(f,'type','axes');
-      hTitle = get(hAxes,'title');
-      set(hTitle,'string',varargin{1});
-  end
-  
- case 2,  % waitbar(x,name)  initialize
-  vertMargin = 0;
-  if nargin > 2,
-      % we have optional arguments: property-value pairs
-      if rem (nargin, 2 ) ~= 0
-          error( 'Optional initialization arguments must be passed in pairs' );
-      end
-  end
-  
-  oldRootUnits = get(0,'Units');
-
-  set(0, 'Units', 'points');
-  screenSize = get(0,'ScreenSize');
-  
-  axFontSize=get(0,'FactoryAxesFontSize');
-  
-  pointsPerPixel = 72/get(0,'ScreenPixelsPerInch');
-  
-  width = 360 * pointsPerPixel;
-  height = 75 * pointsPerPixel;
-  pos = [screenSize(3)/2-width/2 screenSize(4)/2-height/2 width height];
-
-%pos=  [501.75 589.5 393.75 52.5];
-  f = figure(...
-      'Units', 'points', ...
-      'BusyAction', 'queue', ...
-      'Position', pos, ...
-      'Resize','on', ...
-      'CreateFcn','', ...
-      'NumberTitle','off', ...
-      'IntegerHandle','off', ...
-      'MenuBar', 'none', ...
-      'Tag','TMWWaitbar',...
-      'Interruptible', 'off', ...
-      'Visible','on');
-  
-  %%%%%%%%%%%%%%%%%%%%%
-  % set figure properties as passed to the fcn
-  % pay special attention to the 'cancel' request
-  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
-  if nargin > 2,
-      propList = varargin(1:2:end);
-      valueList = varargin(2:2:end);
-      cancelBtnCreated = 0;
-      for ii = 1:length( propList )
-          try
-              if strcmp(lower(propList{ii}), 'createcancelbtn' ) & ~cancelBtnCreated
-                  cancelBtnHeight = 23 * pointsPerPixel;
-                  cancelBtnWidth = 60 * pointsPerPixel;
-                  newPos = pos;
-                  vertMargin = vertMargin + cancelBtnHeight;
-                  newPos(4) = newPos(4)+vertMargin;
-                  callbackFcn = [valueList{ii}];
-                  set( f, 'Position', newPos, 'CloseRequestFcn', callbackFcn );
-                  cancelButt = uicontrol('Parent',f, ...
-                                         'Units','points', ...
-                                         'Callback',callbackFcn, ...
-                                         'ButtonDownFcn', callbackFcn, ...
-                                         'Enable','on', ...
-                                         'Interruptible','off', ...
-                                         'Position', [pos(3)-cancelBtnWidth*1.4, 7,  ...
-                    cancelBtnWidth, cancelBtnHeight], ...
-                                         'String','Cancel', ...
-                                         'Tag','TMWWaitbarCancelButton');
-                  cancelBtnCreated = 1;
-              else
-                  % simply set the prop/value pair of the figure
-                  set( f, propList{ii}, valueList{ii});
-              end
-          catch
-              disp ( ['Warning: could not set property ''' propList{ii} ''' with value ''' num2str(valueList{ii}) '''' ] );
-          end
-      end
-  end  
-  
-  %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%  
-  
-  
-  colormap([]);
-  
-  axNorm=[.05 .3 .9 .2];
- % axNorm=[1 1 1 1];
-  axPos=axNorm.*[pos(3:4),pos(3:4)] + [0 vertMargin 0 0];
-  
-  h = axes('XLim',[0 100],...
-           'YLim',[0 1],...
-           'Box','on', ...
-           'Units','Points',...
-           'FontSize', axFontSize,...
-           'Position',axPos,...
-           'XTickMode','manual',...
-           'YTickMode','manual',...
-           'XTick',[],...
-           'YTick',[],...
-           'XTickLabelMode','manual',...
-           'XTickLabel',[],...
-           'YTickLabelMode','manual',...
-           'YTickLabel',[]);
-  
-  tHandle=title(name);
-  tHandle=get(h,'title');
-  oldTitleUnits=get(tHandle,'Units');
-  set(tHandle,...
-      'Units',      'points',...
-      'String',     name);
-  
-  tExtent=get(tHandle,'Extent');
-  set(tHandle,'Units',oldTitleUnits);
-  
-  titleHeight=tExtent(4)+axPos(2)+axPos(4)+5;
-  if titleHeight>pos(4)
-      pos(4)=titleHeight;
-      pos(2)=screenSize(4)/2-pos(4)/2;
-      figPosDirty=logical(1);
-  else
-      figPosDirty=logical(0);
-  end
-  
-  if tExtent(3)>pos(3)*1.10;
-      pos(3)=min(tExtent(3)*1.10,screenSize(3));
-      pos(1)=screenSize(3)/2-pos(3)/2;
-      
-      axPos([1,3])=axNorm([1,3])*pos(3);
-      set(h,'Position',axPos);
-      
-      figPosDirty=logical(1);
-  end
-  
-  if figPosDirty
-      set(f,'Position',pos);
-  end
-
-  xpatch = [0 x x 0];
-  ypatch = [0 0 1 1];
-   xline = [100 0 0 100 100];
-   yline = [0 0 1 1 0];
-  
-  p = patch(xpatch,ypatch,'r','EdgeColor','r','EraseMode','none');
-  l = line(xline,yline,'EraseMode','none');
-  set(l,'Color',get(gca,'XColor'));
-  
-  
-  set(f,'HandleVisibility','callback','visible','on', 'resize','off');
-  
-  set(0, 'Units', oldRootUnits);
-end  % case
-drawnow;
-
-if nargout==1,
-    fout = f;
-end
--- a/src/modules/audio_processing/aecm/main/source/aecm.gypi
+++ b/src/modules/audio_processing/aecm/main/source/aecm.gypi
@@ -28,8 +28,6 @@
        'echo_control_mobile.c',
        'aecm_core.c',
        'aecm_core.h',
-        'aecm_delay_estimator.c',
-        'aecm_delay_estimator.h',
      ],
    },
  ],
--- a/src/modules/audio_processing/aecm/main/source/aecm_core.c
+++ b/src/modules/audio_processing/aecm/main/source/aecm_core.c
@@ -13,8 +13,8 @@
 #include <assert.h>
 #include <stdlib.h>

-#include "aecm_delay_estimator.h"
 #include "echo_control_mobile.h"
+#include "delay_estimator.h"
 #include "ring_buffer.h"
 #include "typedefs.h"

@@ -153,11 +153,13 @@ int WebRtcAecm_CreateCore(AecmCore_t **aecmInst)
        return -1;
    }

-    if (WebRtcAecm_CreateDelayEstimator(&aecm->delay_estimator, PART_LEN1, MAX_DELAY) == -1)
-    {
-        WebRtcAecm_FreeCore(aecm);
-        aecm = NULL;
-        return -1;
+    if (WebRtc_CreateDelayEstimator(&aecm->delay_estimator,
+                                    PART_LEN1,
+                                    MAX_DELAY,
+                                    1) == -1) {
+      WebRtcAecm_FreeCore(aecm);
+      aecm = NULL;
+      return -1;
    }

    // Init some aecm pointers. 16 and 32 byte alignment is only necessary
@@ -242,9 +244,8 @@ int WebRtcAecm_InitCore(AecmCore_t * const aecm, int samplingFreq)
    aecm->seed = 666;
    aecm->totCount = 0;

-    if (WebRtcAecm_InitDelayEstimator(aecm->delay_estimator) != 0)
-    {
-        return -1;
+    if (WebRtc_InitDelayEstimator(aecm->delay_estimator) != 0) {
+      return -1;
    }

    // Initialize to reasonable values
@@ -339,7 +340,7 @@ int WebRtcAecm_FreeCore(AecmCore_t *aecm)
    WebRtcApm_FreeBuffer(aecm->nearCleanFrameBuf);
    WebRtcApm_FreeBuffer(aecm->outFrameBuf);

-    WebRtcAecm_FreeDelayEstimator(aecm->delay_estimator);
+    WebRtc_FreeDelayEstimator(aecm->delay_estimator);
    free(aecm);

    return 0;
@@ -1161,6 +1162,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
    WebRtc_Word16 supGain;
    WebRtc_Word16 zeros32, zeros16;
    WebRtc_Word16 zerosDBufNoisy, zerosDBufClean, zerosXBuf;
+    int far_q;
    WebRtc_Word16 resolutionDiff, qDomainDiff;

    const int kMinPrefBand = 4;
@@ -1200,10 +1202,10 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
 #endif

    // Transform far end signal from time domain to frequency domain.
-    zerosXBuf = TimeToFrequencyDomain(aecm->xBuf,
-                                      dfw,
-                                      xfa,
-                                      &xfaSum);
+    far_q = TimeToFrequencyDomain(aecm->xBuf,
+                                  dfw,
+                                  xfa,
+                                  &xfaSum);

    // Transform noisy near end signal from time domain to frequency domain.
    zerosDBufNoisy = TimeToFrequencyDomain(aecm->dBufNoisy,
@@ -1211,7 +1213,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
                                           dfaNoisy,
                                           &dfaNoisySum);
    aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain;
-    aecm->dfaNoisyQDomain = zerosDBufNoisy;
+    aecm->dfaNoisyQDomain = (WebRtc_Word16)zerosDBufNoisy;


    if (nearendClean == NULL)
@@ -1228,7 +1230,7 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
                                               dfaClean,
                                               &dfaCleanSum);
        aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain;
-        aecm->dfaCleanQDomain = zerosDBufClean;
+        aecm->dfaCleanQDomain = (WebRtc_Word16)zerosDBufClean;
    }

 #ifdef ARM_WINM_LOG_
@@ -1243,12 +1245,12 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,

    // Get the delay
    // Save far-end history and estimate delay
-    delay = WebRtcAecm_DelayEstimatorProcess(aecm->delay_estimator,
-                                             xfa,
-                                             dfaNoisy,
-                                             PART_LEN1,
-                                             zerosXBuf,
-                                             aecm->currentVADValue);
+    delay = WebRtc_DelayEstimatorProcess(aecm->delay_estimator,
+                                         xfa,
+                                         dfaNoisy,
+                                         PART_LEN1,
+                                         far_q,
+                                         aecm->currentVADValue);
    if (delay < 0)
    {
        return -1;
@@ -1272,16 +1274,21 @@ int WebRtcAecm_ProcessBlock(AecmCore_t * aecm,
    QueryPerformanceCounter((LARGE_INTEGER*)&start);
 #endif
    // Get aligned far end spectrum
-    far_spectrum_ptr = WebRtcAecm_GetAlignedFarend(aecm->delay_estimator,
-                                                   PART_LEN1,
-                                                   &zerosXBuf);
+    far_spectrum_ptr = WebRtc_AlignedFarend(aecm->delay_estimator,
+                                            PART_LEN1,
+                                            &far_q);
+    zerosXBuf = (WebRtc_Word16) far_q;
    if (far_spectrum_ptr == NULL)
    {
        return -1;
    }

    // Calculate log(energy) and update energy threshold levels
-    WebRtcAecm_CalcEnergies(aecm, far_spectrum_ptr, zerosXBuf, dfaNoisySum, echoEst32);
+    WebRtcAecm_CalcEnergies(aecm,
+                            far_spectrum_ptr,
+                            zerosXBuf,
+                            dfaNoisySum,
+                            echoEst32);

    // Calculate stepsize
    mu = WebRtcAecm_CalcStepSize(aecm);
@@ -1923,4 +1930,3 @@ void WebRtcAecm_ResetAdaptiveChannel(AecmCore_t* aecm)
 }

 #endif // !(defined(WEBRTC_ANDROID) && defined(WEBRTC_ARCH_ARM_NEON))
-
--- a/src/modules/audio_processing/aecm/main/source/aecm_core.h
+++ b/src/modules/audio_processing/aecm/main/source/aecm_core.h
@@ -178,7 +178,7 @@ typedef struct
    WebRtc_Word16 farEnergyMaxMin;
    WebRtc_Word16 farEnergyVAD;
    WebRtc_Word16 farEnergyMSE;
-    WebRtc_Word16 currentVADValue;
+    int currentVADValue;
    WebRtc_Word16 vadUpdateCount;

    WebRtc_Word16 startupState;
--- a/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.c
+++ b/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.c
@@ -1,604 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-#include "aecm_delay_estimator.h"
-
-#include <assert.h>
-#include <stdlib.h>
-
-#include "signal_processing_library.h"
-#include "typedefs.h"
-
-typedef struct
-{
-    // Pointers to mean values of spectrum and bit counts
-    WebRtc_Word32* mean_far_spectrum;
-    WebRtc_Word32* mean_near_spectrum;
-    WebRtc_Word32* mean_bit_counts;
-
-    // Arrays only used locally in DelayEstimatorProcess() but whose size
-    // is determined at run-time.
-    WebRtc_Word32* bit_counts;
-    WebRtc_Word32* far_spectrum_32;
-    WebRtc_Word32* near_spectrum_32;
-
-    // Binary history variables
-    WebRtc_UWord32* binary_far_history;
-
-    // Far end history variables
-    WebRtc_UWord16* far_history;
-    int far_history_position;
-    WebRtc_Word16* far_q_domains;
-
-    // Delay histogram variables
-    WebRtc_Word16* delay_histogram;
-    WebRtc_Word16 vad_counter;
-
-    // Delay memory
-    int last_delay;
-
-    // Buffer size parameters
-    int history_size;
-    int spectrum_size;
-
-} DelayEstimator_t;
-
-// Only bit |kBandFirst| through bit |kBandLast| are processed
-// |kBandFirst| - |kBandLast| must be < 32
-static const int kBandFirst = 12;
-static const int kBandLast = 43;
-
-static __inline WebRtc_UWord32 SetBit(WebRtc_UWord32 in,
-                                      WebRtc_Word32 pos)
-{
-    WebRtc_UWord32 mask = WEBRTC_SPL_LSHIFT_W32(1, pos);
-    WebRtc_UWord32 out = (in | mask);
-
-    return out;
-}
-
-// Compares the binary vector |binary_vector| with all rows of the binary
-// matrix |binary_matrix| and counts per row the number of times they have the
-// same value.
-// Input:
-//      - binary_vector     : binary "vector" stored in a long
-//      - binary_matrix     : binary "matrix" stored as a vector of long
-//      - matrix_size       : size of binary "matrix"
-// Output:
-//      - bit_counts        : "Vector" stored as a long, containing for each
-//                            row the number of times the matrix row and the
-//                            input vector have the same value
-//
-static void BitCountComparison(const WebRtc_UWord32 binary_vector,
-                               const WebRtc_UWord32* binary_matrix,
-                               int matrix_size,
-                               WebRtc_Word32* bit_counts)
-{
-    int n = 0;
-    WebRtc_UWord32 a = binary_vector;
-    register WebRtc_UWord32 tmp;
-
-    // compare binary vector |binary_vector| with all rows of the binary matrix
-    // |binary_matrix|
-    for (; n < matrix_size; n++)
-    {
-        a = (binary_vector ^ binary_matrix[n]);
-        // Returns bit counts in tmp
-        tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111);
-        tmp = ((tmp + (tmp >> 3)) & 030707070707);
-        tmp = (tmp + (tmp >> 6));
-        tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077;
-
-        bit_counts[n] = (WebRtc_Word32)tmp;
-    }
-}
-
-// Computes the binary spectrum by comparing the input |spectrum| with a
-// |threshold_spectrum|.
-//
-// Input:
-//      - spectrum              : Spectrum of which the binary spectrum should
-//                                be calculated.
-//      - threshold_spectrum    : Threshold spectrum with which the input
-//                                spectrum is compared.
-// Return:
-//      - out                   : Binary spectrum
-//
-static WebRtc_UWord32 GetBinarySpectrum(WebRtc_Word32* spectrum,
-                                        WebRtc_Word32* threshold_spectrum)
-{
-    int k = kBandFirst;
-    WebRtc_UWord32 out = 0;
-
-    for (; k <= kBandLast; k++)
-    {
-        if (spectrum[k] > threshold_spectrum[k])
-        {
-            out = SetBit(out, k - kBandFirst);
-        }
-    }
-
-    return out;
-}
-
-//   Calculates the mean recursively.
-//
-//   Input:
-//      - new_value     : new additional value
-//      - factor        : factor for smoothing
-//
-//   Input/Output:
-//      - mean_value    : pointer to the mean value that should be updated
-//
-static void MeanEstimator(const WebRtc_Word32 new_value,
-                          int factor,
-                          WebRtc_Word32* mean_value)
-{
-    WebRtc_Word32 mean_new = *mean_value;
-    WebRtc_Word32 diff = new_value - mean_new;
-
-    // mean_new = mean_value + ((new_value - mean_value) >> factor);
-    if (diff < 0)
-    {
-        diff = -WEBRTC_SPL_RSHIFT_W32(-diff, factor);
-    }
-    else
-    {
-        diff = WEBRTC_SPL_RSHIFT_W32(diff, factor);
-    }
-    mean_new += diff;
-
-    *mean_value = mean_new;
-}
-
-// Moves the pointer to the next entry and inserts new far end spectrum and
-// corresponding Q-domain in its buffer.
-//
-// Input:
-//      - handle        : Pointer to the delay estimation instance
-//      - far_spectrum  : Pointer to the far end spectrum
-//      - far_q         : Q-domain of far end spectrum
-//
-static void UpdateFarHistory(DelayEstimator_t* self,
-                             WebRtc_UWord16* far_spectrum,
-                             WebRtc_Word16 far_q)
-{
-    // Get new buffer position
-    self->far_history_position++;
-    if (self->far_history_position >= self->history_size)
-    {
-        self->far_history_position = 0;
-    }
-    // Update Q-domain buffer
-    self->far_q_domains[self->far_history_position] = far_q;
-    // Update far end spectrum buffer
-    memcpy(&(self->far_history[self->far_history_position * self->spectrum_size]),
-           far_spectrum,
-           sizeof(WebRtc_UWord16) * self->spectrum_size);
-}
-
-int WebRtcAecm_FreeDelayEstimator(void* handle)
-{
-    DelayEstimator_t* self = (DelayEstimator_t*)handle;
-
-    if (self == NULL)
-    {
-        return -1;
-    }
-
-    if (self->mean_far_spectrum != NULL)
-    {
-        free(self->mean_far_spectrum);
-        self->mean_far_spectrum = NULL;
-    }
-    if (self->mean_near_spectrum != NULL)
-    {
-        free(self->mean_near_spectrum);
-        self->mean_near_spectrum = NULL;
-    }
-    if (self->mean_bit_counts != NULL)
-    {
-        free(self->mean_bit_counts);
-        self->mean_bit_counts = NULL;
-    }
-    if (self->bit_counts != NULL)
-    {
-        free(self->bit_counts);
-        self->bit_counts = NULL;
-    }
-    if (self->far_spectrum_32 != NULL)
-    {
-        free(self->far_spectrum_32);
-        self->far_spectrum_32 = NULL;
-    }
-    if (self->near_spectrum_32 != NULL)
-    {
-        free(self->near_spectrum_32);
-        self->near_spectrum_32 = NULL;
-    }
-    if (self->far_history != NULL)
-    {
-        free(self->far_history);
-        self->far_history = NULL;
-    }
-    if (self->binary_far_history != NULL)
-    {
-        free(self->binary_far_history);
-        self->binary_far_history = NULL;
-    }
-    if (self->far_q_domains != NULL)
-    {
-        free(self->far_q_domains);
-        self->far_q_domains = NULL;
-    }
-    if (self->delay_histogram != NULL)
-    {
-        free(self->delay_histogram);
-        self->delay_histogram = NULL;
-    }
-
-    free(self);
-
-    return 0;
-}
-
-int WebRtcAecm_CreateDelayEstimator(void** handle,
-                                    int spectrum_size,
-                                    int history_size)
-{
-    DelayEstimator_t *self = NULL;
-    // Check if the sub band used in the delay estimation is small enough to
-    // fit in a Word32.
-    assert(kBandLast - kBandFirst < 32);
-    if (spectrum_size < kBandLast)
-    {
-        return -1;
-    }
-    if (history_size < 0)
-    {
-        return -1;
-    }
-
-    self = malloc(sizeof(DelayEstimator_t));
-    *handle = self;
-    if (self == NULL)
-    {
-        return -1;
-    }
-
-    self->mean_far_spectrum = NULL;
-    self->mean_near_spectrum = NULL;
-    self->bit_counts = NULL;
-    self->far_spectrum_32 = NULL;
-    self->near_spectrum_32 = NULL;
-    self->far_history = NULL;
-    self->mean_bit_counts = NULL;
-    self->binary_far_history = NULL;
-    self->far_q_domains = NULL;
-    self->delay_histogram = NULL;
-
-    // Allocate memory for spectrum buffers
-    self->mean_far_spectrum = malloc(spectrum_size * sizeof(WebRtc_Word32));
-    if (self->mean_far_spectrum == NULL)
-    {
-        WebRtcAecm_FreeDelayEstimator(self);
-        self = NULL;
-        return -1;
-    }
-    self->mean_near_spectrum = malloc(spectrum_size * sizeof(WebRtc_Word32));
-    if (self->mean_near_spectrum == NULL)
-    {
-        WebRtcAecm_FreeDelayEstimator(self);
-        self = NULL;
-        return -1;
-    }
-    self->mean_bit_counts = malloc(history_size * sizeof(WebRtc_Word32));
-    if (self->mean_bit_counts == NULL)
-    {
-        WebRtcAecm_FreeDelayEstimator(self);
-        self = NULL;
-        return -1;
-    }
-    self->bit_counts = malloc(history_size * sizeof(WebRtc_Word32));
-    if (self->bit_counts == NULL)
-    {
-        WebRtcAecm_FreeDelayEstimator(self);
-        self = NULL;
-        return -1;
-    }
-    self->far_spectrum_32 = malloc(spectrum_size * sizeof(WebRtc_Word32));
-    if (self->far_spectrum_32 == NULL)
-    {
-        WebRtcAecm_FreeDelayEstimator(self);
-        self = NULL;
-        return -1;
-    }
-    self->near_spectrum_32 = malloc(spectrum_size * sizeof(WebRtc_Word32));
-    if (self->near_spectrum_32 == NULL)
-    {
-        WebRtcAecm_FreeDelayEstimator(self);
-        self = NULL;
-        return -1;
-    }
-    // Allocate memory for history buffers
-    self->far_history = malloc(spectrum_size * history_size *
-                               sizeof(WebRtc_UWord16));
-    if (self->far_history == NULL)
-    {
-        WebRtcAecm_FreeDelayEstimator(self);
-        self = NULL;
-        return -1;
-    }
-    self->binary_far_history = malloc(history_size * sizeof(WebRtc_UWord32));
-    if (self->binary_far_history == NULL)
-    {
-        WebRtcAecm_FreeDelayEstimator(self);
-        self = NULL;
-        return -1;
-    }
-    self->far_q_domains = malloc(history_size * sizeof(WebRtc_Word16));
-    if (self->far_q_domains == NULL)
-    {
-        WebRtcAecm_FreeDelayEstimator(self);
-        self = NULL;
-        return -1;
-    }
-    self->delay_histogram = malloc(history_size * sizeof(WebRtc_Word16));
-    if (self->delay_histogram == NULL)
-    {
-        WebRtcAecm_FreeDelayEstimator(self);
-        self = NULL;
-        return -1;
-    }
-
-    self->spectrum_size = spectrum_size;
-    self->history_size = history_size;
-
-    return 0;
-}
-
-int WebRtcAecm_InitDelayEstimator(void* handle)
-{
-    DelayEstimator_t* self = (DelayEstimator_t*)handle;
-
-    if (self == NULL)
-    {
-        return -1;
-    }
-    // Set averaged far and near end spectra to zero
-    memset(self->mean_far_spectrum,
-           0,
-           sizeof(WebRtc_Word32) * self->spectrum_size);
-    memset(self->mean_near_spectrum,
-           0,
-           sizeof(WebRtc_Word32) * self->spectrum_size);
-    // Set averaged bit counts to zero
-    memset(self->mean_bit_counts,
-           0,
-           sizeof(WebRtc_Word32) * self->history_size);
-    memset(self->bit_counts,
-           0,
-           sizeof(WebRtc_Word32) * self->history_size);
-    memset(self->far_spectrum_32,
-           0,
-           sizeof(WebRtc_Word32) * self->spectrum_size);
-    memset(self->near_spectrum_32,
-           0,
-           sizeof(WebRtc_Word32) * self->spectrum_size);
-    // Set far end histories to zero
-    memset(self->binary_far_history,
-           0,
-           sizeof(WebRtc_UWord32) * self->history_size);
-    memset(self->far_history,
-           0,
-           sizeof(WebRtc_UWord16) * self->spectrum_size *
-           self->history_size);
-    memset(self->far_q_domains,
-           0,
-           sizeof(WebRtc_Word16) * self->history_size);
-
-    self->far_history_position = self->history_size;
-    // Set delay histogram to zero
-    memset(self->delay_histogram,
-           0,
-           sizeof(WebRtc_Word16) * self->history_size);
-    // Set VAD counter to zero
-    self->vad_counter = 0;
-    // Set delay memory to zero
-    self->last_delay = 0;
-
-    return 0;
-}
-
-int WebRtcAecm_DelayEstimatorProcess(void* handle,
-                                     WebRtc_UWord16* far_spectrum,
-                                     WebRtc_UWord16* near_spectrum,
-                                     int spectrum_size,
-                                     WebRtc_Word16 far_q,
-                                     WebRtc_Word16 vad_value)
-{
-    DelayEstimator_t* self = (DelayEstimator_t*)handle;
-
-    WebRtc_UWord32 bxspectrum, byspectrum;
-
-    int i;
-
-    WebRtc_Word32 dtmp1;
-
-    WebRtc_Word16 maxHistLvl = 0;
-    WebRtc_Word16 minpos = -1;
-
-    const int kVadCountThreshold = 25;
-    const int kMaxHistogram = 600;
-
-    if (self == NULL)
-    {
-        return -1;
-    }
-
-    if (spectrum_size != self->spectrum_size)
-    {
-        // Data sizes don't match
-        return -1;
-    }
-    if (far_q > 15)
-    {
-        // If far_Q is larger than 15 we can not guarantee no wrap around
-        return -1;
-    }
-
-    // Update far end history
-    UpdateFarHistory(self, far_spectrum, far_q);
-    // Update the far and near end means
-    for (i = 0; i < self->spectrum_size; i++)
-    {
-        self->far_spectrum_32[i] = (WebRtc_Word32)far_spectrum[i];
-        MeanEstimator(self->far_spectrum_32[i], 6, &(self->mean_far_spectrum[i]));
-
-        self->near_spectrum_32[i] = (WebRtc_Word32)near_spectrum[i];
-        MeanEstimator(self->near_spectrum_32[i], 6, &(self->mean_near_spectrum[i]));
-    }
-
-    // Shift binary spectrum history
-    memmove(&(self->binary_far_history[1]),
-            &(self->binary_far_history[0]),
-            (self->history_size - 1) * sizeof(WebRtc_UWord32));
-
-    // Get binary spectra
-    bxspectrum = GetBinarySpectrum(self->far_spectrum_32, self->mean_far_spectrum);
-    byspectrum = GetBinarySpectrum(self->near_spectrum_32, self->mean_near_spectrum);
-    // Insert new binary spectrum
-    self->binary_far_history[0] = bxspectrum;
-
-    // Compare with delayed spectra
-    BitCountComparison(byspectrum,
-                      self->binary_far_history,
-                      self->history_size,
-                      self->bit_counts);
-
-    // Smooth bit count curve
-    for (i = 0; i < self->history_size; i++)
-    {
-        // Update sum
-        // |bit_counts| is constrained to [0, 32], meaning we can smooth with a
-        // factor up to 2^26. We use Q9.
-        dtmp1 = WEBRTC_SPL_LSHIFT_W32(self->bit_counts[i], 9); // Q9
-        MeanEstimator(dtmp1, 9, &(self->mean_bit_counts[i]));
-    }
-
-    // Find minimum position of bit count curve
-    minpos = WebRtcSpl_MinIndexW32(self->mean_bit_counts, self->history_size);
-
-    // If the farend has been active sufficiently long, begin accumulating a
-    // histogram of the minimum positions. Search for the maximum bin to
-    // determine the delay.
-    if (vad_value == 1)
-    {
-        if (self->vad_counter >= kVadCountThreshold)
-        {
-            // Increment the histogram at the current minimum position.
-            if (self->delay_histogram[minpos] < kMaxHistogram)
-            {
-                self->delay_histogram[minpos] += 3;
-            }
-
-#if (!defined ARM_WINM) && (!defined ARM9E_GCC) && (!defined ANDROID_AECOPT)
-            // Decrement the entire histogram.
-            // Select the histogram index corresponding to the maximum bin as
-            // the delay.
-            self->last_delay = 0;
-            for (i = 0; i < self->history_size; i++)
-            {
-                if (self->delay_histogram[i] > 0)
-                {
-                    self->delay_histogram[i]--;
-                }
-                if (self->delay_histogram[i] > maxHistLvl)
-                {
-                    maxHistLvl = self->delay_histogram[i];
-                    self->last_delay = i;
-                }
-            }
-#else
-            self->last_delay = 0;
-
-            for (i = 0; i < self->history_size; i++)
-            {
-                WebRtc_Word16 tempVar = self->delay_histogram[i];
-
-                // Decrement the entire histogram.
-                if (tempVar > 0)
-                {
-                    tempVar--;
-                    self->delay_histogram[i] = tempVar;
-
-                    // Select the histogram index corresponding to the maximum
-                    // bin as the delay.
-                    if (tempVar > maxHistLvl)
-                    {
-                        maxHistLvl = tempVar;
-                        self->last_delay = i;
-                    }
-                }
-            }
-#endif
-        } else
-        {
-            self->vad_counter++;
-        }
-    } else
-    {
-        self->vad_counter = 0;
-    }
-
-    return self->last_delay;
-}
-
-const WebRtc_UWord16* WebRtcAecm_GetAlignedFarend(void* handle,
-                                                  int far_spectrum_size,
-                                                  WebRtc_Word16* far_q)
-{
-    DelayEstimator_t* self = (DelayEstimator_t*)handle;
-    int buffer_position = 0;
-
-    if (self == NULL)
-    {
-        return NULL;
-    }
-    if (far_spectrum_size != self->spectrum_size)
-    {
-        return NULL;
-    }
-
-    // Get buffer position
-    buffer_position = self->far_history_position - self->last_delay;
-    if (buffer_position < 0)
-    {
-        buffer_position += self->history_size;
-    }
-    // Get Q-domain
-    *far_q = self->far_q_domains[buffer_position];
-    // Return far end spectrum
-    return (self->far_history + (buffer_position * self->spectrum_size));
-
-}
-
-int WebRtcAecm_GetLastDelay(void* handle)
-{
-    DelayEstimator_t* self = (DelayEstimator_t*)handle;
-
-    if (self == NULL)
-    {
-        return -1;
-    }
-
-    // Return last calculated delay
-    return self->last_delay;
-}
--- a/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.h
+++ b/src/modules/audio_processing/aecm/main/source/aecm_delay_estimator.h
@@ -1,112 +0,0 @@
-/*
- *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
- *
- *  Use of this source code is governed by a BSD-style license
- *  that can be found in the LICENSE file in the root of the source
- *  tree. An additional intellectual property rights grant can be found
- *  in the file PATENTS.  All contributing project authors may
- *  be found in the AUTHORS file in the root of the source tree.
- */
-
-// Performs delay estimation on a block by block basis
-// The return value is  0 - OK and -1 - Error, unless otherwise stated.
-
-#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_
-#define WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_
-
-#include "typedefs.h"
-
-// Releases the memory allocated by WebRtcAecm_CreateDelayEstimator(...)
-// Input:
-//      - handle        : Pointer to the delay estimation instance
-//
-int WebRtcAecm_FreeDelayEstimator(void* handle);
-
-// Allocates the memory needed by the delay estimation. The memory needs to be
-// initialized separately using the WebRtcAecm_InitDelayEstimator(...) function.
-//
-// Input:
-//      - handle        : Instance that should be created
-//      - spectrum_size : Size of the spectrum used both in far end and near
-//                        end. Used to allocate memory for spectrum specific
-//                        buffers.
-//      - history_size  : Size of the far end history used to estimate the
-//                        delay from. Used to allocate memory for history
-//                        specific buffers.
-//
-// Output:
-//      - handle        : Created instance
-//
-int WebRtcAecm_CreateDelayEstimator(void** handle,
-                                    int spectrum_size,
-                                    int history_size);
-
-// Initializes the delay estimation instance created with
-// WebRtcAecm_CreateDelayEstimator(...)
-// Input:
-//      - handle        : Pointer to the delay estimation instance
-//
-// Output:
-//      - handle        : Initialized instance
-//
-int WebRtcAecm_InitDelayEstimator(void* handle);
-
-// Estimates and returns the delay between the far end and near end blocks.
-// Input:
-//      - handle        : Pointer to the delay estimation instance
-//      - far_spectrum  : Pointer to the far end spectrum data
-//      - near_spectrum : Pointer to the near end spectrum data of the current
-//                        block
-//      - spectrum_size : The size of the data arrays (same for both far and
-//                        near end)
-//      - far_q         : The Q-domain of the far end data
-//      - vad_value     : The VAD decision of the current block
-//
-// Output:
-//      - handle        : Updated instance
-//
-// Return value:
-//      - delay         :  >= 0 - Calculated delay value
-//                        -1    - Error
-//
-int WebRtcAecm_DelayEstimatorProcess(void* handle,
-                                     WebRtc_UWord16* far_spectrum,
-                                     WebRtc_UWord16* near_spectrum,
-                                     int spectrum_size,
-                                     WebRtc_Word16 far_q,
-                                     WebRtc_Word16 vad_value);
-
-// Returns a pointer to the far end spectrum aligned to current near end
-// spectrum. The function WebRtcAecm_DelayEstimatorProcess(...) should
-// have been called before WebRtcAecm_GetAlignedFarend(...). Otherwise, you get
-// the pointer to the previous frame. The memory is only valid until the next
-// call of WebRtcAecm_DelayEstimatorProcess(...).
-//
-// Inputs:
-//      - handle            : Pointer to the delay estimation instance
-//
-// Output:
-//      - far_spectrum_size : Size of far_spectrum allocated by the caller
-//      - far_q             : The Q-domain of the aligned far end spectrum
-//
-// Return value:
-//      - far_spectrum      : Pointer to the aligned far end spectrum
-//                            NULL - Error
-//
-const WebRtc_UWord16* WebRtcAecm_GetAlignedFarend(void* handle,
-                                                  int far_spectrum_size,
-                                                  WebRtc_Word16* far_q);
-
-// Returns the last calculated delay updated by the function
-// WebRtcAecm_DelayEstimatorProcess(...)
-//
-// Inputs:
-//      - handle        : Pointer to the delay estimation instance
-//
-// Return value:
-//      - delay         :  >= 0 - Last calculated delay value
-//                        -1    - Error
-//
-int WebRtcAecm_GetLastDelay(void* handle);
-
-#endif // WEBRTC_MODULES_AUDIO_PROCESSING_AECM_MAIN_SOURCE_AECM_DELAY_ESTIMATOR_H_
--- a/src/modules/audio_processing/agc/main/matlab/getGains.m
+++ b/src/modules/audio_processing/agc/main/matlab/getGains.m
@@ -1,32 +0,0 @@
-% Outputs a file for testing purposes. 
-%
-% Adjust the following parameters to suit. Their purpose becomes more clear on
-% viewing the gain plots.
-% MaxGain: Max gain in dB
-% MinGain: Min gain at overload (0 dBov) in dB
-% CompRatio: Compression ratio, essentially determines the slope of the gain
-%            function between the max and min gains
-% Knee: The smoothness of the transition to max gain (smaller is smoother)
-MaxGain = 5; MinGain = 0; CompRatio = 3; Knee = 1;
-
-% Compute gains
-zeros = 0:31; lvl = 2.^(1-zeros); 
-A = -10*log10(lvl) * (CompRatio - 1) / CompRatio;
-B = MaxGain - MinGain;
-gains = round(2^16*10.^(0.05 * (MinGain + B * ( log(exp(-Knee*A)+exp(-Knee*B)) - log(1+exp(-Knee*B)) ) / log(1/(1+exp(Knee*B))))));
-fprintf(1, '\t%i, %i, %i, %i,\n', gains);
-
-% Save gains to file
-fid = fopen('gains', 'wb');
-if fid == -1
-	error(sprintf('Unable to open file %s', filename));
-	return
-end
-fwrite(fid, gains, 'int32');
-fclose(fid);
-
-% Plotting
-in = 10*log10(lvl); out = 20*log10(gains/65536);
-subplot(121); plot(in, out); axis([-60, 0, -5, 30]); grid on; xlabel('Input (dB)'); ylabel('Gain (dB)');
-subplot(122); plot(in, in+out); axis([-60, 0, -60, 10]); grid on; xlabel('Input (dB)'); ylabel('Output (dB)');
-zoom on;
--- a/src/modules/audio_processing/main/apm_tests.gypi
+++ b/src/modules/audio_processing/main/apm_tests.gypi
@@ -27,6 +27,7 @@
        'audio_processing',
        '<(webrtc_root)/common_audio/common_audio.gyp:spl',
        '<(webrtc_root)/system_wrappers/source/system_wrappers.gyp:system_wrappers',
+        '<(webrtc_root)/../test/test.gyp:test_support',
        '<(webrtc_root)/../testing/gtest.gyp:gtest',
        '<(webrtc_root)/../testing/gtest.gyp:gtest_main',
        '<(webrtc_root)/../third_party/protobuf/protobuf.gyp:protobuf_lite',
--- a/src/modules/audio_processing/main/interface/audio_processing.h
+++ b/src/modules/audio_processing/main/interface/audio_processing.h
@@ -322,6 +322,16 @@ class EchoCancellation {
  // TODO(ajm): discuss the metrics update period.
  virtual int GetMetrics(Metrics* metrics) = 0;

+  // Enables computation and logging of delay values. Statistics are obtained
+  // through |GetDelayMetrics()|.
+  virtual int enable_delay_logging(bool enable) = 0;
+  virtual bool is_delay_logging_enabled() const = 0;
+
+  // The delay metrics consists of the delay |median| and the delay standard
+  // deviation |std|. The values are averaged over the time period since the
+  // last call to |GetDelayMetrics()|.
+  virtual int GetDelayMetrics(int* median, int* std) = 0;
+
 protected:
  virtual ~EchoCancellation() {};
 };
@@ -486,6 +496,7 @@ class HighPassFilter {
 };

 // An estimation component used to retrieve level metrics.
+// NOTE: currently unavailable. All methods return errors.
 class LevelEstimator {
 public:
  virtual int Enable(bool enable) = 0;
@@ -539,6 +550,10 @@ class NoiseSuppression {
 // The voice activity detection (VAD) component analyzes the stream to
 // determine if voice is present. A facility is also provided to pass in an
 // external VAD decision.
+//
+// In addition to |stream_has_voice()| the VAD decision is provided through the
+// |AudioFrame| passed to |ProcessStream()|. The |_vadActivity| member will be
+// modified to reflect the current decision.
 class VoiceDetection {
 public:
  virtual int Enable(bool enable) = 0;
--- a/src/modules/audio_processing/main/source/audio_buffer.cc
+++ b/src/modules/audio_processing/main/source/audio_buffer.cc
@@ -10,8 +10,6 @@

 #include "audio_buffer.h"

-#include "module_common_types.h"
-
 namespace webrtc {
 namespace {

@@ -64,21 +62,22 @@ struct SplitAudioChannel {
  WebRtc_Word32 synthesis_filter_state2[6];
 };

-// TODO(am): check range of input parameters?
-AudioBuffer::AudioBuffer(WebRtc_Word32 max_num_channels,
-                         WebRtc_Word32 samples_per_channel)
-    : max_num_channels_(max_num_channels),
-      num_channels_(0),
-      num_mixed_channels_(0),
-      num_mixed_low_pass_channels_(0),
-      samples_per_channel_(samples_per_channel),
-      samples_per_split_channel_(samples_per_channel),
-      reference_copied_(false),
-      data_(NULL),
-      channels_(NULL),
-      split_channels_(NULL),
-      mixed_low_pass_channels_(NULL),
-      low_pass_reference_channels_(NULL) {
+// TODO(andrew): check range of input parameters?
+AudioBuffer::AudioBuffer(int max_num_channels,
+                         int samples_per_channel)
+  : max_num_channels_(max_num_channels),
+    num_channels_(0),
+    num_mixed_channels_(0),
+    num_mixed_low_pass_channels_(0),
+    samples_per_channel_(samples_per_channel),
+    samples_per_split_channel_(samples_per_channel),
+    reference_copied_(false),
+    activity_(AudioFrame::kVadUnknown),
+    data_(NULL),
+    channels_(NULL),
+    split_channels_(NULL),
+    mixed_low_pass_channels_(NULL),
+    low_pass_reference_channels_(NULL) {
  if (max_num_channels_ > 1) {
    channels_ = new AudioChannel[max_num_channels_];
    mixed_low_pass_channels_ = new AudioChannel[max_num_channels_];
@@ -109,7 +108,7 @@ AudioBuffer::~AudioBuffer() {
  }
 }

-WebRtc_Word16* AudioBuffer::data(WebRtc_Word32 channel) const {
+WebRtc_Word16* AudioBuffer::data(int channel) const {
  assert(channel >= 0 && channel < num_channels_);
  if (data_ != NULL) {
    return data_;
@@ -118,7 +117,7 @@ WebRtc_Word16* AudioBuffer::data(WebRtc_Word32 channel) const {
  return channels_[channel].data;
 }

-WebRtc_Word16* AudioBuffer::low_pass_split_data(WebRtc_Word32 channel) const {
+WebRtc_Word16* AudioBuffer::low_pass_split_data(int channel) const {
  assert(channel >= 0 && channel < num_channels_);
  if (split_channels_ == NULL) {
    return data(channel);
@@ -127,7 +126,7 @@ WebRtc_Word16* AudioBuffer::low_pass_split_data(WebRtc_Word32 channel) const {
  return split_channels_[channel].low_pass_data;
 }

-WebRtc_Word16* AudioBuffer::high_pass_split_data(WebRtc_Word32 channel) const {
+WebRtc_Word16* AudioBuffer::high_pass_split_data(int channel) const {
  assert(channel >= 0 && channel < num_channels_);
  if (split_channels_ == NULL) {
    return NULL;
@@ -136,13 +135,13 @@ WebRtc_Word16* AudioBuffer::high_pass_split_data(WebRtc_Word32 channel) const {
  return split_channels_[channel].high_pass_data;
 }

-WebRtc_Word16* AudioBuffer::mixed_low_pass_data(WebRtc_Word32 channel) const {
+WebRtc_Word16* AudioBuffer::mixed_low_pass_data(int channel) const {
  assert(channel >= 0 && channel < num_mixed_low_pass_channels_);

  return mixed_low_pass_channels_[channel].data;
 }

-WebRtc_Word16* AudioBuffer::low_pass_reference(WebRtc_Word32 channel) const {
+WebRtc_Word16* AudioBuffer::low_pass_reference(int channel) const {
  assert(channel >= 0 && channel < num_channels_);
  if (!reference_copied_) {
    return NULL;
@@ -151,58 +150,67 @@ WebRtc_Word16* AudioBuffer::low_pass_reference(WebRtc_Word32 channel) const {
  return low_pass_reference_channels_[channel].data;
 }

-WebRtc_Word32* AudioBuffer::analysis_filter_state1(WebRtc_Word32 channel) const {
+WebRtc_Word32* AudioBuffer::analysis_filter_state1(int channel) const {
  assert(channel >= 0 && channel < num_channels_);
  return split_channels_[channel].analysis_filter_state1;
 }

-WebRtc_Word32* AudioBuffer::analysis_filter_state2(WebRtc_Word32 channel) const {
+WebRtc_Word32* AudioBuffer::analysis_filter_state2(int channel) const {
  assert(channel >= 0 && channel < num_channels_);
  return split_channels_[channel].analysis_filter_state2;
 }

-WebRtc_Word32* AudioBuffer::synthesis_filter_state1(WebRtc_Word32 channel) const {
+WebRtc_Word32* AudioBuffer::synthesis_filter_state1(int channel) const {
  assert(channel >= 0 && channel < num_channels_);
  return split_channels_[channel].synthesis_filter_state1;
 }

-WebRtc_Word32* AudioBuffer::synthesis_filter_state2(WebRtc_Word32 channel) const {
+WebRtc_Word32* AudioBuffer::synthesis_filter_state2(int channel) const {
  assert(channel >= 0 && channel < num_channels_);
  return split_channels_[channel].synthesis_filter_state2;
 }

-WebRtc_Word32 AudioBuffer::num_channels() const {
+void AudioBuffer::set_activity(AudioFrame::VADActivity activity) {
+  activity_ = activity;
+}
+
+AudioFrame::VADActivity AudioBuffer::activity() {
+  return activity_;
+}
+
+int AudioBuffer::num_channels() const {
  return num_channels_;
 }

-WebRtc_Word32 AudioBuffer::samples_per_channel() const {
+int AudioBuffer::samples_per_channel() const {
  return samples_per_channel_;
 }

-WebRtc_Word32 AudioBuffer::samples_per_split_channel() const {
+int AudioBuffer::samples_per_split_channel() const {
  return samples_per_split_channel_;
 }

-// TODO(ajm): Do deinterleaving and mixing in one step?
-void AudioBuffer::DeinterleaveFrom(AudioFrame* audioFrame) {
-  assert(audioFrame->_audioChannel <= max_num_channels_);
-  assert(audioFrame->_payloadDataLengthInSamples ==  samples_per_channel_);
+// TODO(andrew): Do deinterleaving and mixing in one step?
+void AudioBuffer::DeinterleaveFrom(AudioFrame* frame) {
+  assert(frame->_audioChannel <= max_num_channels_);
+  assert(frame->_payloadDataLengthInSamples ==  samples_per_channel_);

-  num_channels_ = audioFrame->_audioChannel;
+  num_channels_ = frame->_audioChannel;
  num_mixed_channels_ = 0;
  num_mixed_low_pass_channels_ = 0;
  reference_copied_ = false;
+  activity_ = frame->_vadActivity;

  if (num_channels_ == 1) {
    // We can get away with a pointer assignment in this case.
-    data_ = audioFrame->_payloadData;
+    data_ = frame->_payloadData;
    return;
  }

+  WebRtc_Word16* interleaved = frame->_payloadData;
  for (int i = 0; i < num_channels_; i++) {
    WebRtc_Word16* deinterleaved = channels_[i].data;
-    WebRtc_Word16* interleaved = audioFrame->_payloadData;
-    WebRtc_Word32 interleaved_idx = i;
+    int interleaved_idx = i;
    for (int j = 0; j < samples_per_channel_; j++) {
      deinterleaved[j] = interleaved[interleaved_idx];
      interleaved_idx += num_channels_;
@@ -210,27 +218,28 @@ void AudioBuffer::DeinterleaveFrom(AudioFrame* audioFrame) {
  }
 }

-void AudioBuffer::InterleaveTo(AudioFrame* audioFrame) const {
-  assert(audioFrame->_audioChannel == num_channels_);
-  assert(audioFrame->_payloadDataLengthInSamples == samples_per_channel_);
+void AudioBuffer::InterleaveTo(AudioFrame* frame) const {
+  assert(frame->_audioChannel == num_channels_);
+  assert(frame->_payloadDataLengthInSamples == samples_per_channel_);
+  frame->_vadActivity = activity_;

  if (num_channels_ == 1) {
    if (num_mixed_channels_ == 1) {
-      memcpy(audioFrame->_payloadData,
+      memcpy(frame->_payloadData,
             channels_[0].data,
             sizeof(WebRtc_Word16) * samples_per_channel_);
    } else {
      // These should point to the same buffer in this case.
-      assert(data_ == audioFrame->_payloadData);
+      assert(data_ == frame->_payloadData);
    }

    return;
  }

+  WebRtc_Word16* interleaved = frame->_payloadData;
  for (int i = 0; i < num_channels_; i++) {
    WebRtc_Word16* deinterleaved = channels_[i].data;
-    WebRtc_Word16* interleaved = audioFrame->_payloadData;
-    WebRtc_Word32 interleaved_idx = i;
+    int interleaved_idx = i;
    for (int j = 0; j < samples_per_channel_; j++) {
      interleaved[interleaved_idx] = deinterleaved[j];
      interleaved_idx += num_channels_;
@@ -238,9 +247,10 @@ void AudioBuffer::InterleaveTo(AudioFrame* audioFrame) const {
  }
 }

-// TODO(ajm): would be good to support the no-mix case with pointer assignment.
-// TODO(ajm): handle mixing to multiple channels?
-void AudioBuffer::Mix(WebRtc_Word32 num_mixed_channels) {
+// TODO(andrew): would be good to support the no-mix case with pointer
+// assignment.
+// TODO(andrew): handle mixing to multiple channels?
+void AudioBuffer::Mix(int num_mixed_channels) {
  // We currently only support the stereo to mono case.
  assert(num_channels_ == 2);
  assert(num_mixed_channels == 1);
@@ -254,7 +264,7 @@ void AudioBuffer::Mix(WebRtc_Word32 num_mixed_channels) {
  num_mixed_channels_ = num_mixed_channels;
 }

-void AudioBuffer::CopyAndMixLowPass(WebRtc_Word32 num_mixed_channels) {
+void AudioBuffer::CopyAndMixLowPass(int num_mixed_channels) {
  // We currently only support the stereo to mono case.
  assert(num_channels_ == 2);
  assert(num_mixed_channels == 1);
--- a/src/modules/audio_processing/main/source/audio_buffer.h
+++ b/src/modules/audio_processing/main/source/audio_buffer.h
@@ -11,55 +11,58 @@
 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_
 #define WEBRTC_MODULES_AUDIO_PROCESSING_MAIN_SOURCE_AUDIO_BUFFER_H_

+#include "module_common_types.h"
 #include "typedefs.h"

-
 namespace webrtc {

 struct AudioChannel;
 struct SplitAudioChannel;
-class AudioFrame;

 class AudioBuffer {
 public:
-  AudioBuffer(WebRtc_Word32 max_num_channels, WebRtc_Word32 samples_per_channel);
+  AudioBuffer(int max_num_channels, int samples_per_channel);
  virtual ~AudioBuffer();

-  WebRtc_Word32 num_channels() const;
-  WebRtc_Word32 samples_per_channel() const;
-  WebRtc_Word32 samples_per_split_channel() const;
+  int num_channels() const;
+  int samples_per_channel() const;
+  int samples_per_split_channel() const;

-  WebRtc_Word16* data(WebRtc_Word32 channel) const;
-  WebRtc_Word16* low_pass_split_data(WebRtc_Word32 channel) const;
-  WebRtc_Word16* high_pass_split_data(WebRtc_Word32 channel) const;
-  WebRtc_Word16* mixed_low_pass_data(WebRtc_Word32 channel) const;
-  WebRtc_Word16* low_pass_reference(WebRtc_Word32 channel) const;
+  WebRtc_Word16* data(int channel) const;
+  WebRtc_Word16* low_pass_split_data(int channel) const;
+  WebRtc_Word16* high_pass_split_data(int channel) const;
+  WebRtc_Word16* mixed_low_pass_data(int channel) const;
+  WebRtc_Word16* low_pass_reference(int channel) const;

-  WebRtc_Word32* analysis_filter_state1(WebRtc_Word32 channel) const;
-  WebRtc_Word32* analysis_filter_state2(WebRtc_Word32 channel) const;
-  WebRtc_Word32* synthesis_filter_state1(WebRtc_Word32 channel) const;
-  WebRtc_Word32* synthesis_filter_state2(WebRtc_Word32 channel) const;
+  WebRtc_Word32* analysis_filter_state1(int channel) const;
+  WebRtc_Word32* analysis_filter_state2(int channel) const;
+  WebRtc_Word32* synthesis_filter_state1(int channel) const;
+  WebRtc_Word32* synthesis_filter_state2(int channel) const;
+
+  void set_activity(AudioFrame::VADActivity activity);
+  AudioFrame::VADActivity activity();

  void DeinterleaveFrom(AudioFrame* audioFrame);
  void InterleaveTo(AudioFrame* audioFrame) const;
-  void Mix(WebRtc_Word32 num_mixed_channels);
-  void CopyAndMixLowPass(WebRtc_Word32 num_mixed_channels);
+  void Mix(int num_mixed_channels);
+  void CopyAndMixLowPass(int num_mixed_channels);
  void CopyLowPassToReference();

 private:
-  const WebRtc_Word32 max_num_channels_;
-  WebRtc_Word32 num_channels_;
-  WebRtc_Word32 num_mixed_channels_;
-  WebRtc_Word32 num_mixed_low_pass_channels_;
-  const WebRtc_Word32 samples_per_channel_;
-  WebRtc_Word32 samples_per_split_channel_;
+  const int max_num_channels_;
+  int num_channels_;
+  int num_mixed_channels_;
+  int num_mixed_low_pass_channels_;
+  const int samples_per_channel_;
+  int samples_per_split_channel_;
  bool reference_copied_;
+  AudioFrame::VADActivity activity_;

  WebRtc_Word16* data_;
-  // TODO(ajm): Prefer to make these vectors if permitted...
+  // TODO(andrew): use vectors here.
  AudioChannel* channels_;
  SplitAudioChannel* split_channels_;
-  // TODO(ajm): improve this, we don't need the full 32 kHz space here.
+  // TODO(andrew): improve this, we don't need the full 32 kHz space here.
  AudioChannel* mixed_low_pass_channels_;
  AudioChannel* low_pass_reference_channels_;
 };
--- a/src/modules/audio_processing/main/source/echo_cancellation_impl.cc
+++ b/src/modules/audio_processing/main/source/echo_cancellation_impl.cc
@@ -66,7 +66,8 @@ EchoCancellationImpl::EchoCancellationImpl(const AudioProcessingImpl* apm)
    device_sample_rate_hz_(48000),
    stream_drift_samples_(0),
    was_stream_drift_set_(false),
-    stream_has_echo_(false) {}
+    stream_has_echo_(false),
+    delay_logging_enabled_(false) {}

 EchoCancellationImpl::~EchoCancellationImpl() {}

@@ -283,6 +284,39 @@ bool EchoCancellationImpl::stream_has_echo() const {
  return stream_has_echo_;
 }

+int EchoCancellationImpl::enable_delay_logging(bool enable) {
+  CriticalSectionScoped crit_scoped(*apm_->crit());
+  delay_logging_enabled_ = enable;
+  return Configure();
+}
+
+bool EchoCancellationImpl::is_delay_logging_enabled() const {
+  return delay_logging_enabled_;
+}
+
+// TODO(bjornv): How should we handle the multi-channel case?
+int EchoCancellationImpl::GetDelayMetrics(int* median, int* std) {
+  CriticalSectionScoped crit_scoped(*apm_->crit());
+  if (median == NULL) {
+    return apm_->kNullPointerError;
+  }
+  if (std == NULL) {
+    return apm_->kNullPointerError;
+  }
+
+  if (!is_component_enabled() || !delay_logging_enabled_) {
+    return apm_->kNotEnabledError;
+  }
+
+  Handle* my_handle = static_cast<Handle*>(handle(0));
+  if (WebRtcAec_GetDelayMetrics(my_handle, median, std) !=
+      apm_->kNoError) {
+    return GetHandleError(my_handle);
+  }
+
+  return apm_->kNoError;
+}
+
 int EchoCancellationImpl::Initialize() {
  int err = ProcessingComponent::Initialize();
  if (err != apm_->kNoError || !is_component_enabled()) {
@@ -332,6 +366,7 @@ int EchoCancellationImpl::ConfigureHandle(void* handle) const {
  config.metricsMode = metrics_enabled_;
  config.nlpMode = MapSetting(suppression_level_);
  config.skewMode = drift_compensation_enabled_;
+  config.delay_logging = delay_logging_enabled_;

  return WebRtcAec_set_config(static_cast<Handle*>(handle), config);
 }
--- a/src/modules/audio_processing/main/source/echo_cancellation_impl.h
+++ b/src/modules/audio_processing/main/source/echo_cancellation_impl.h
@@ -49,6 +49,9 @@ class EchoCancellationImpl : public EchoCancellation,
  virtual bool are_metrics_enabled() const;
  virtual bool stream_has_echo() const;
  virtual int GetMetrics(Metrics* metrics);
+  virtual int enable_delay_logging(bool enable);
+  virtual bool is_delay_logging_enabled() const;
+  virtual int GetDelayMetrics(int* median, int* std);

  // ProcessingComponent implementation.
  virtual void* CreateHandle() const;
@@ -66,6 +69,7 @@ class EchoCancellationImpl : public EchoCancellation,
  int stream_drift_samples_;
  bool was_stream_drift_set_;
  bool stream_has_echo_;
+  bool delay_logging_enabled_;
 };
 }  // namespace webrtc

--- a/src/modules/audio_processing/main/source/voice_detection_impl.cc
+++ b/src/modules/audio_processing/main/source/voice_detection_impl.cc
@@ -74,16 +74,16 @@ int VoiceDetectionImpl::ProcessCaptureAudio(AudioBuffer* audio) {

  // TODO(ajm): concatenate data in frame buffer here.

-  int vad_ret_val;
-  vad_ret_val = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
-                      apm_->split_sample_rate_hz(),
-                      mixed_data,
-                      frame_size_samples_);
-
-  if (vad_ret_val == 0) {
+  int vad_ret = WebRtcVad_Process(static_cast<Handle*>(handle(0)),
+                                  apm_->split_sample_rate_hz(),
+                                  mixed_data,
+                                  frame_size_samples_);
+  if (vad_ret == 0) {
    stream_has_voice_ = false;
-  } else if (vad_ret_val == 1) {
+    audio->set_activity(AudioFrame::kVadPassive);
+  } else if (vad_ret == 1) {
    stream_has_voice_ = true;
+    audio->set_activity(AudioFrame::kVadActive);
  } else {
    return apm_->kUnspecifiedError;
  }
--- a/src/modules/audio_processing/main/test/process_test/process_test.cc
+++ b/src/modules/audio_processing/main/test/process_test/process_test.cc
@@ -28,6 +28,7 @@

 using webrtc::AudioFrame;
 using webrtc::AudioProcessing;
+using webrtc::EchoCancellation;
 using webrtc::GainControl;
 using webrtc::NoiseSuppression;
 using webrtc::TickInterval;
@@ -61,6 +62,12 @@ bool ReadMessageFromFile(FILE* file,
  return msg->ParseFromArray(array, usize);
 }

+void PrintStat(const AudioProcessing::Statistic& stat) {
+  printf("%d, %d, %d\n", stat.average,
+                         stat.maximum,
+                         stat.minimum);
+}
+
 void usage() {
  printf(
  "Usage: process_test [options] [-pb PROTOBUF_FILE]\n"
@@ -86,6 +93,8 @@ void usage() {
  printf("\n  -aec     Echo cancellation\n");
  printf("  --drift_compensation\n");
  printf("  --no_drift_compensation\n");
+  printf("  --no_echo_metrics\n");
+  printf("  --no_delay_logging\n");
  printf("\n  -aecm    Echo control mobile\n");
  printf("  --aecm_echo_path_in_file FILE\n");
  printf("  --aecm_echo_path_out_file FILE\n");
@@ -107,6 +116,7 @@ void usage() {
  printf("  --vad_out_file FILE\n");
  printf("\n");
  printf("Modifiers:\n");
+  printf("  --noasm         Disable SSE optimization.\n");
  printf("  --perf          Measure performance.\n");
  printf("  --quiet         Suppress text output.\n");
  printf("  --no_progress   Suppress progress.\n");
@@ -156,7 +166,7 @@ void void_main(int argc, char* argv[]) {
  //bool interleaved = true;

  for (int i = 1; i < argc; i++) {
-     if (strcmp(argv[i], "-pb") == 0) {
+    if (strcmp(argv[i], "-pb") == 0) {
      i++;
      ASSERT_LT(i, argc) << "Specify protobuf filename after -pb";
      pb_filename = argv[i];
@@ -208,9 +218,10 @@ void void_main(int argc, char* argv[]) {

    } else if (strcmp(argv[i], "-aec") == 0) {
      ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
-
-    } else if (strcmp(argv[i], "-noasm") == 0) {
-      WebRtc_GetCPUInfo = WebRtc_GetCPUInfoNoASM;
+      ASSERT_EQ(apm->kNoError,
+                apm->echo_cancellation()->enable_metrics(true));
+      ASSERT_EQ(apm->kNoError,
+                apm->echo_cancellation()->enable_delay_logging(true));

    } else if (strcmp(argv[i], "--drift_compensation") == 0) {
      ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
@@ -223,6 +234,16 @@ void void_main(int argc, char* argv[]) {
      ASSERT_EQ(apm->kNoError,
                apm->echo_cancellation()->enable_drift_compensation(false));

+    } else if (strcmp(argv[i], "--no_echo_metrics") == 0) {
+      ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
+      ASSERT_EQ(apm->kNoError,
+                apm->echo_cancellation()->enable_metrics(false));
+
+    } else if (strcmp(argv[i], "--no_delay_logging") == 0) {
+      ASSERT_EQ(apm->kNoError, apm->echo_cancellation()->Enable(true));
+      ASSERT_EQ(apm->kNoError,
+                apm->echo_cancellation()->enable_delay_logging(false));
+
    } else if (strcmp(argv[i], "-aecm") == 0) {
      ASSERT_EQ(apm->kNoError, apm->echo_control_mobile()->Enable(true));

@@ -316,6 +337,11 @@ void void_main(int argc, char* argv[]) {
      ASSERT_LT(i, argc) << "Specify filename after --vad_out_file";
      vad_out_filename = argv[i];

+    } else if (strcmp(argv[i], "--noasm") == 0) {
+      WebRtc_GetCPUInfo = WebRtc_GetCPUInfoNoASM;
+      // We need to reinitialize here if components have already been enabled.
+      ASSERT_EQ(apm->kNoError, apm->Initialize());
+
    } else if (strcmp(argv[i], "--perf") == 0) {
      perf_testing = true;

@@ -460,13 +486,6 @@ void void_main(int argc, char* argv[]) {
                                                 << aecm_echo_path_out_filename;
  }

-  enum Events {
-    kInitializeEvent,
-    kRenderEvent,
-    kCaptureEvent,
-    kResetEventDeprecated
-  };
-  int16_t event = 0;
  size_t read_count = 0;
  int reverse_count = 0;
  int primary_count = 0;
@@ -642,9 +661,15 @@ void void_main(int argc, char* argv[]) {
    }

    ASSERT_TRUE(feof(pb_file));
-    printf("100%% complete\r");

  } else {
+    enum Events {
+      kInitializeEvent,
+      kRenderEvent,
+      kCaptureEvent,
+      kResetEventDeprecated
+    };
+    int16_t event = 0;
    while (simulating || feof(event_file) == 0) {
      std::ostringstream trace_stream;
      trace_stream << "Processed frames: " << reverse_count << " (reverse), "
@@ -708,6 +733,10 @@ void void_main(int argc, char* argv[]) {

        if (simulating) {
          if (read_count != far_frame._payloadDataLengthInSamples) {
+            // Read an equal amount from the near file to avoid errors due to
+            // not reaching end-of-file.
+            EXPECT_EQ(0, fseek(near_file, read_count * sizeof(WebRtc_Word16),
+                      SEEK_CUR));
            break; // This is expected.
          }
        } else {
@@ -828,6 +857,7 @@ void void_main(int argc, char* argv[]) {
      }
    }
  }
+  printf("100%% complete\r");

  if (aecm_echo_path_out_file != NULL) {
    const size_t path_size =
@@ -845,6 +875,27 @@ void void_main(int argc, char* argv[]) {
  if (verbose) {
    printf("\nProcessed frames: %d (primary), %d (reverse)\n",
        primary_count, reverse_count);
+
+    if (apm->echo_cancellation()->are_metrics_enabled()) {
+      EchoCancellation::Metrics metrics;
+      apm->echo_cancellation()->GetMetrics(&metrics);
+      printf("\n--Echo metrics--\n");
+      printf("(avg, max, min)\n");
+      printf("ERL:  ");
+      PrintStat(metrics.echo_return_loss);
+      printf("ERLE: ");
+      PrintStat(metrics.echo_return_loss_enhancement);
+      printf("ANLP: ");
+      PrintStat(metrics.a_nlp);
+    }
+    if (apm->echo_cancellation()->is_delay_logging_enabled()) {
+      int median = 0;
+      int std = 0;
+      apm->echo_cancellation()->GetDelayMetrics(&median, &std);
+      printf("\n--Delay metrics--\n");
+      printf("Median:             %3d\n", median);
+      printf("Standard deviation: %3d\n", std);
+    }
  }

  if (!pb_file) {
--- a/src/modules/audio_processing/main/test/unit_test/unit_test.cc
+++ b/src/modules/audio_processing/main/test/unit_test/unit_test.cc
@@ -10,12 +10,13 @@

 #include <stdio.h>

-#include <gtest/gtest.h>
+#include "gtest/gtest.h"

 #include "audio_processing.h"
 #include "event_wrapper.h"
 #include "module_common_types.h"
 #include "signal_processing_library.h"
+#include "testsupport/fileutils.h"
 #include "thread_wrapper.h"
 #include "trace.h"
 #ifdef WEBRTC_ANDROID
@@ -42,12 +43,6 @@ namespace {
 // be set to true with the command-line switch --write_output_data.
 bool write_output_data = false;

-#if defined(WEBRTC_APM_UNIT_TEST_FIXED_PROFILE)
-const char kOutputFileName[] = "output_data_fixed.pb";
-#elif defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
-const char kOutputFileName[] = "output_data_float.pb";
-#endif
-
 class ApmEnvironment : public ::testing::Environment {
 public:
  virtual void SetUp() {
@@ -65,7 +60,9 @@ class ApmTest : public ::testing::Test {
  ApmTest();
  virtual void SetUp();
  virtual void TearDown();
-
+  // Path to where the resource files to be used for this test are located.
+  const std::string kResourcePath;
+  const std::string kOutputFileName;
  webrtc::AudioProcessing* apm_;
  webrtc::AudioFrame* frame_;
  webrtc::AudioFrame* revframe_;
@@ -74,7 +71,14 @@ class ApmTest : public ::testing::Test {
 };

 ApmTest::ApmTest()
-    : apm_(NULL),
+    : kResourcePath(webrtc::test::GetProjectRootPath() +
+                    "test/data/audio_processing/"),
+#if defined(WEBRTC_APM_UNIT_TEST_FIXED_PROFILE)
+      kOutputFileName(kResourcePath + "output_data_fixed.pb"),
+#elif defined(WEBRTC_APM_UNIT_TEST_FLOAT_PROFILE)
+      kOutputFileName(kResourcePath + "output_data_float.pb"),
+#endif
+      apm_(NULL),
      frame_(NULL),
      revframe_(NULL),
      far_file_(NULL),
@@ -98,10 +102,14 @@ void ApmTest::SetUp() {
  revframe_->_audioChannel = 2;
  revframe_->_frequencyInHz = 32000;

-  far_file_ = fopen("aec_far.pcm", "rb");
-  ASSERT_TRUE(far_file_ != NULL) << "Could not open input file aec_far.pcm\n";
-  near_file_ = fopen("aec_near.pcm", "rb");
-  ASSERT_TRUE(near_file_ != NULL) << "Could not open input file aec_near.pcm\n";
+  std::string input_filename = kResourcePath + "aec_far.pcm";
+  far_file_ = fopen(input_filename.c_str(), "rb");
+  ASSERT_TRUE(far_file_ != NULL) << "Could not open input file " <<
+      input_filename << "\n";
+  input_filename = kResourcePath + "aec_near.pcm";
+  near_file_ = fopen(input_filename.c_str(), "rb");
+  ASSERT_TRUE(near_file_ != NULL) << "Could not open input file " <<
+        input_filename << "\n";
 }

 void ApmTest::TearDown() {
@@ -177,11 +185,9 @@ void WriteStatsMessage(const AudioProcessing::Statistic& output,
  message->set_minimum(output.minimum);
 }

-void WriteMessageLiteToFile(const char* filename,
+void WriteMessageLiteToFile(const std::string filename,
                            const ::google::protobuf::MessageLite& message) {
-  assert(filename != NULL);
-
-  FILE* file = fopen(filename, "wb");
+  FILE* file = fopen(filename.c_str(), "wb");
  ASSERT_TRUE(file != NULL) << "Could not open " << filename;
  int size = message.ByteSize();
  ASSERT_GT(size, 0);
@@ -196,12 +202,11 @@ void WriteMessageLiteToFile(const char* filename,
  fclose(file);
 }

-void ReadMessageLiteFromFile(const char* filename,
+void ReadMessageLiteFromFile(const std::string filename,
                             ::google::protobuf::MessageLite* message) {
-  assert(filename != NULL);
  assert(message != NULL);

-  FILE* file = fopen(filename, "rb");
+  FILE* file = fopen(filename.c_str(), "rb");
  ASSERT_TRUE(file != NULL) << "Could not open " << filename;
  int size = 0;
  ASSERT_EQ(1u, fread(&size, sizeof(int), 1, file));
@@ -457,6 +462,8 @@ TEST_F(ApmTest, Process) {
            apm_->echo_cancellation()->enable_drift_compensation(true));
  EXPECT_EQ(apm_->kNoError,
            apm_->echo_cancellation()->enable_metrics(true));
+  EXPECT_EQ(apm_->kNoError,
+            apm_->echo_cancellation()->enable_delay_logging(true));
  EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true));

  EXPECT_EQ(apm_->kNoError,
@@ -555,6 +562,7 @@ TEST_F(ApmTest, Process) {
               &temp_data[0],
               sizeof(WebRtc_Word16) * read_count);
      }
+      frame_->_vadActivity = AudioFrame::kVadUnknown;

      EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));

@@ -571,6 +579,9 @@ TEST_F(ApmTest, Process) {
      }
      if (apm_->voice_detection()->stream_has_voice()) {
        has_voice_count++;
+        EXPECT_EQ(AudioFrame::kVadActive, frame_->_vadActivity);
+      } else {
+        EXPECT_EQ(AudioFrame::kVadPassive, frame_->_vadActivity);
      }

      frame_count++;
@@ -587,6 +598,10 @@ TEST_F(ApmTest, Process) {
    EchoCancellation::Metrics echo_metrics;
    EXPECT_EQ(apm_->kNoError,
              apm_->echo_cancellation()->GetMetrics(&echo_metrics));
+    int median = 0;
+    int std = 0;
+    EXPECT_EQ(apm_->kNoError,
+              apm_->echo_cancellation()->GetDelayMetrics(&median, &std));
 #endif

    if (!write_output_data) {
@@ -608,6 +623,11 @@ TEST_F(ApmTest, Process) {
                reference.echo_return_loss_enhancement());
      TestStats(echo_metrics.a_nlp,
                reference.a_nlp());
+
+      webrtc::audioproc::Test::DelayMetrics reference_delay =
+          test->delay_metrics();
+      EXPECT_EQ(median, reference_delay.median());
+      EXPECT_EQ(std, reference_delay.std());
 #endif
    } else {
      test->set_has_echo_count(has_echo_count);
@@ -628,6 +648,11 @@ TEST_F(ApmTest, Process) {
                        message->mutable_echo_return_loss_enhancement());
      WriteStatsMessage(echo_metrics.a_nlp,
                        message->mutable_a_nlp());
+
+      webrtc::audioproc::Test::DelayMetrics* message_delay =
+          test->mutable_delay_metrics();
+      message_delay->set_median(median);
+      message_delay->set_std(std);
 #endif
    }

@@ -692,6 +717,18 @@ TEST_F(ApmTest, EchoCancellation) {
            apm_->echo_cancellation()->enable_metrics(false));
  EXPECT_FALSE(apm_->echo_cancellation()->are_metrics_enabled());

+  int median = 0;
+  int std = 0;
+  EXPECT_EQ(apm_->kNotEnabledError,
+            apm_->echo_cancellation()->GetDelayMetrics(&median, &std));
+
+  EXPECT_EQ(apm_->kNoError,
+            apm_->echo_cancellation()->enable_delay_logging(true));
+  EXPECT_TRUE(apm_->echo_cancellation()->is_delay_logging_enabled());
+  EXPECT_EQ(apm_->kNoError,
+            apm_->echo_cancellation()->enable_delay_logging(false));
+  EXPECT_FALSE(apm_->echo_cancellation()->is_delay_logging_enabled());
+
  EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(true));
  EXPECT_TRUE(apm_->echo_cancellation()->is_enabled());
  EXPECT_EQ(apm_->kNoError, apm_->echo_cancellation()->Enable(false));
@@ -966,27 +1003,27 @@ TEST_F(ApmTest, VoiceDetection) {
  EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
  EXPECT_FALSE(apm_->voice_detection()->is_enabled());

+  // Test that AudioFrame activity is maintained when VAD is disabled.
+  EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(false));
+  AudioFrame::VADActivity activity[] = {
+      AudioFrame::kVadActive,
+      AudioFrame::kVadPassive,
+      AudioFrame::kVadUnknown
+  };
+  for (size_t i = 0; i < sizeof(activity)/sizeof(*activity); i++) {
+    frame_->_vadActivity = activity[i];
+    EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
+    EXPECT_EQ(activity[i], frame_->_vadActivity);
+  }
+
+  // Test that AudioFrame activity is set when VAD is enabled.
+  EXPECT_EQ(apm_->kNoError, apm_->voice_detection()->Enable(true));
+  frame_->_vadActivity = AudioFrame::kVadUnknown;
+  EXPECT_EQ(apm_->kNoError, apm_->ProcessStream(frame_));
+  EXPECT_NE(AudioFrame::kVadUnknown, frame_->_vadActivity);
+
  // TODO(bjornv): Add tests for streamed voice; stream_has_voice()
 }
-
-// Below are some ideas for tests from VPM.
-
-/*TEST_F(VideoProcessingModuleTest, GetVersionTest)
-{
-}
-
-TEST_F(VideoProcessingModuleTest, HandleNullBuffer)
-{
-}
-
-TEST_F(VideoProcessingModuleTest, HandleBadSize)
-{
-}
-
-TEST_F(VideoProcessingModuleTest, IdenticalResultsAfterReset)
-{
-}
-*/
 }  // namespace

 int main(int argc, char** argv) {
--- a/src/modules/audio_processing/main/test/unit_test/unittest.proto
+++ b/src/modules/audio_processing/main/test/unit_test/unittest.proto
@@ -35,6 +35,13 @@ message Test {
  }

  optional EchoMetrics echo_metrics = 11;
+
+  message DelayMetrics {
+    optional int32 median = 1;
+    optional int32 std = 2;
+  }
+
+  optional DelayMetrics delay_metrics = 12;
 }

 message OutputData {
--- a/src/modules/audio_processing/ns/main/interface/noise_suppression.h
+++ b/src/modules/audio_processing/ns/main/interface/noise_suppression.h
@@ -30,7 +30,7 @@ extern "C" {
 * Return value         :  0 - Ok
 *                        -1 - Error (probably length is not sufficient)
 */
-int WebRtcNs_get_version(char *version, short length);
+int WebRtcNs_get_version(char* version, short length);


 /*
@@ -46,7 +46,7 @@ int WebRtcNs_get_version(char *version, short length);
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-int WebRtcNs_Create(NsHandle **NS_inst);
+int WebRtcNs_Create(NsHandle** NS_inst);


 /*
@@ -59,7 +59,7 @@ int WebRtcNs_Create(NsHandle **NS_inst);
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-int WebRtcNs_Free(NsHandle *NS_inst);
+int WebRtcNs_Free(NsHandle* NS_inst);


 /*
@@ -75,7 +75,7 @@ int WebRtcNs_Free(NsHandle *NS_inst);
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-int WebRtcNs_Init(NsHandle *NS_inst, WebRtc_UWord32 fs);
+int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs);

 /*
 * This changes the aggressiveness of the noise suppression method.
@@ -90,7 +90,7 @@ int WebRtcNs_Init(NsHandle *NS_inst, WebRtc_UWord32 fs);
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-int WebRtcNs_set_policy(NsHandle *NS_inst, int mode);
+int WebRtcNs_set_policy(NsHandle* NS_inst, int mode);


 /*
@@ -111,11 +111,11 @@ int WebRtcNs_set_policy(NsHandle *NS_inst, int mode);
 * Return value         :  0 - OK
 *                        -1 - Error
 */
-int WebRtcNs_Process(NsHandle *NS_inst,
-                     short *spframe,
-                     short *spframe_H,
-                     short *outframe,
-                     short *outframe_H);
+int WebRtcNs_Process(NsHandle* NS_inst,
+                     short* spframe,
+                     short* spframe_H,
+                     short* outframe,
+                     short* outframe_H);

 #ifdef __cplusplus
 }
--- a/src/modules/audio_processing/ns/main/interface/noise_suppression_x.h
+++ b/src/modules/audio_processing/ns/main/interface/noise_suppression_x.h
@@ -11,7 +11,7 @@
 #ifndef WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_
 #define WEBRTC_MODULES_AUDIO_PROCESSING_NS_MAIN_INTERFACE_NOISE_SUPPRESSION_X_H_

-#include "signal_processing_library.h"
+#include "typedefs.h"

 typedef struct NsxHandleT NsxHandle;

@@ -30,7 +30,7 @@ extern "C" {
 * Return value         :  0 - Ok
 *                        -1 - Error (probably length is not sufficient)
 */
-int WebRtcNsx_get_version(char *version, short length);
+int WebRtcNsx_get_version(char* version, short length);


 /*
@@ -46,7 +46,7 @@ int WebRtcNsx_get_version(char *version, short length);
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-int WebRtcNsx_Create(NsxHandle **nsxInst);
+int WebRtcNsx_Create(NsxHandle** nsxInst);


 /*
@@ -59,7 +59,7 @@ int WebRtcNsx_Create(NsxHandle **nsxInst);
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-int WebRtcNsx_Free(NsxHandle *nsxInst);
+int WebRtcNsx_Free(NsxHandle* nsxInst);


 /*
@@ -75,7 +75,7 @@ int WebRtcNsx_Free(NsxHandle *nsxInst);
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-int WebRtcNsx_Init(NsxHandle *nsxInst, WebRtc_UWord32 fs);
+int WebRtcNsx_Init(NsxHandle* nsxInst, WebRtc_UWord32 fs);

 /*
 * This changes the aggressiveness of the noise suppression method.
@@ -90,7 +90,7 @@ int WebRtcNsx_Init(NsxHandle *nsxInst, WebRtc_UWord32 fs);
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-int WebRtcNsx_set_policy(NsxHandle *nsxInst, int mode);
+int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode);

 /*
 * This functions does noise suppression for the inserted speech frame. The
@@ -110,11 +110,11 @@ int WebRtcNsx_set_policy(NsxHandle *nsxInst, int mode);
 * Return value         :  0 - OK
 *                        -1 - Error
 */
-int WebRtcNsx_Process(NsxHandle *nsxInst,
-                      short *speechFrame,
-                      short *speechFrameHB,
-                      short *outFrame,
-                      short *outFrameHB);
+int WebRtcNsx_Process(NsxHandle* nsxInst,
+                      short* speechFrame,
+                      short* speechFrameHB,
+                      short* outFrame,
+                      short* outFrameHB);

 #ifdef __cplusplus
 }
--- a/src/modules/audio_processing/ns/main/source/noise_suppression.c
+++ b/src/modules/audio_processing/ns/main/source/noise_suppression.c
@@ -15,55 +15,51 @@
 #include "ns_core.h"
 #include "defines.h"

-int WebRtcNs_get_version(char *versionStr, short length)
-{
-    const char version[] = "NS 2.2.0";
-    const short versionLen = (short)strlen(version) + 1; // +1 for null-termination
+int WebRtcNs_get_version(char* versionStr, short length) {
+  const char version[] = "NS 2.2.0";
+  const short versionLen = (short)strlen(version) + 1; // +1: null-termination

-    if (versionStr == NULL) {
-        return -1;
-    }
+  if (versionStr == NULL) {
+    return -1;
+  }

-    if (versionLen > length) {
-        return -1;
-    }
+  if (versionLen > length) {
+    return -1;
+  }

-    strncpy(versionStr, version, versionLen);
+  strncpy(versionStr, version, versionLen);

+  return 0;
+}
+
+int WebRtcNs_Create(NsHandle** NS_inst) {
+  *NS_inst = (NsHandle*) malloc(sizeof(NSinst_t));
+  if (*NS_inst != NULL) {
+    (*(NSinst_t**)NS_inst)->initFlag = 0;
    return 0;
-}
-
-int WebRtcNs_Create(NsHandle **NS_inst)
-{
-    *NS_inst = (NsHandle*) malloc(sizeof(NSinst_t));
-    if (*NS_inst!=NULL) {
-        (*(NSinst_t**)NS_inst)->initFlag=0;
-        return 0;
-    } else {
-        return -1;
-    }
+  } else {
+    return -1;
+  }

 }

-int WebRtcNs_Free(NsHandle *NS_inst)
-{
-    free(NS_inst);
-    return 0;
+int WebRtcNs_Free(NsHandle* NS_inst) {
+  free(NS_inst);
+  return 0;
 }


-int WebRtcNs_Init(NsHandle *NS_inst, WebRtc_UWord32 fs)
-{
-    return WebRtcNs_InitCore((NSinst_t*) NS_inst, fs);
+int WebRtcNs_Init(NsHandle* NS_inst, WebRtc_UWord32 fs) {
+  return WebRtcNs_InitCore((NSinst_t*) NS_inst, fs);
 }

-int WebRtcNs_set_policy(NsHandle *NS_inst, int mode)
-{
-    return WebRtcNs_set_policy_core((NSinst_t*) NS_inst, mode);
+int WebRtcNs_set_policy(NsHandle* NS_inst, int mode) {
+  return WebRtcNs_set_policy_core((NSinst_t*) NS_inst, mode);
 }


-int WebRtcNs_Process(NsHandle *NS_inst, short *spframe, short *spframe_H, short *outframe, short *outframe_H)
-{
-    return WebRtcNs_ProcessCore((NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H);
+int WebRtcNs_Process(NsHandle* NS_inst, short* spframe, short* spframe_H,
+                     short* outframe, short* outframe_H) {
+  return WebRtcNs_ProcessCore(
+      (NSinst_t*) NS_inst, spframe, spframe_H, outframe, outframe_H);
 }
--- a/src/modules/audio_processing/ns/main/source/noise_suppression_x.c
+++ b/src/modules/audio_processing/ns/main/source/noise_suppression_x.c
@@ -15,60 +15,51 @@
 #include "nsx_core.h"
 #include "nsx_defines.h"

-int WebRtcNsx_get_version(char *versionStr, short length)
-{
-    const char version[] = "NS\t3.1.0";
-    const short versionLen = (short)strlen(version) + 1; // +1 for null-termination
+int WebRtcNsx_get_version(char* versionStr, short length) {
+  const char version[] = "NS\t3.1.0";
+  const short versionLen = (short)strlen(version) + 1; // +1: null-termination

-    if (versionStr == NULL)
-    {
-        return -1;
-    }
+  if (versionStr == NULL) {
+    return -1;
+  }

-    if (versionLen > length)
-    {
-        return -1;
-    }
+  if (versionLen > length) {
+    return -1;
+  }

-    strncpy(versionStr, version, versionLen);
+  strncpy(versionStr, version, versionLen);

+  return 0;
+}
+
+int WebRtcNsx_Create(NsxHandle** nsxInst) {
+  *nsxInst = (NsxHandle*)malloc(sizeof(NsxInst_t));
+  if (*nsxInst != NULL) {
+    (*(NsxInst_t**)nsxInst)->initFlag = 0;
    return 0;
-}
-
-int WebRtcNsx_Create(NsxHandle **nsxInst)
-{
-    *nsxInst = (NsxHandle*)malloc(sizeof(NsxInst_t));
-    if (*nsxInst != NULL)
-    {
-        (*(NsxInst_t**)nsxInst)->initFlag = 0;
-        return 0;
-    } else
-    {
-        return -1;
-    }
+  } else {
+    return -1;
+  }

 }

-int WebRtcNsx_Free(NsxHandle *nsxInst)
-{
-    free(nsxInst);
-    return 0;
+int WebRtcNsx_Free(NsxHandle* nsxInst) {
+  free(nsxInst);
+  return 0;
 }

-int WebRtcNsx_Init(NsxHandle *nsxInst, WebRtc_UWord32 fs)
-{
-    return WebRtcNsx_InitCore((NsxInst_t*)nsxInst, fs);
+int WebRtcNsx_Init(NsxHandle* nsxInst, WebRtc_UWord32 fs) {
+  return WebRtcNsx_InitCore((NsxInst_t*)nsxInst, fs);
 }

-int WebRtcNsx_set_policy(NsxHandle *nsxInst, int mode)
-{
-    return WebRtcNsx_set_policy_core((NsxInst_t*)nsxInst, mode);
+int WebRtcNsx_set_policy(NsxHandle* nsxInst, int mode) {
+  return WebRtcNsx_set_policy_core((NsxInst_t*)nsxInst, mode);
 }

-int WebRtcNsx_Process(NsxHandle *nsxInst, short *speechFrame, short *speechFrameHB,
-                      short *outFrame, short *outFrameHB)
-{
-    return WebRtcNsx_ProcessCore((NsxInst_t*)nsxInst, speechFrame, speechFrameHB, outFrame,
-                              outFrameHB);
+int WebRtcNsx_Process(NsxHandle* nsxInst, short* speechFrame,
+                      short* speechFrameHB, short* outFrame,
+                      short* outFrameHB) {
+  return WebRtcNsx_ProcessCore(
+      (NsxInst_t*)nsxInst, speechFrame, speechFrameHB, outFrame, outFrameHB);
 }

--- a/src/modules/audio_processing/ns/main/source/ns_core.c
+++ b/src/modules/audio_processing/ns/main/source/ns_core.c
--- a/src/modules/audio_processing/ns/main/source/ns_core.h
+++ b/src/modules/audio_processing/ns/main/source/ns_core.h
@@ -15,95 +15,95 @@

 typedef struct NSParaExtract_t_ {

-    //bin size of histogram
-    float binSizeLrt;
-    float binSizeSpecFlat;
-    float binSizeSpecDiff;
-    //range of histogram over which lrt threshold is computed
-    float rangeAvgHistLrt;
-    //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
-    //thresholds for prior model
-    float factor1ModelPars; //for lrt and spectral difference
-    float factor2ModelPars; //for spectral_flatness: used when noise is flatter than speech
-    //peak limit for spectral flatness (varies between 0 and 1)
-    float thresPosSpecFlat;
-    //limit on spacing of two highest peaks in histogram: spacing determined by bin size
-    float limitPeakSpacingSpecFlat;
-    float limitPeakSpacingSpecDiff;
-    //limit on relevance of second peak:
-    float limitPeakWeightsSpecFlat;
-    float limitPeakWeightsSpecDiff;
-    //limit on fluctuation of lrt feature
-    float thresFluctLrt;
-    //limit on the max and min values for the feature thresholds
-    float maxLrt;
-    float minLrt;
-    float maxSpecFlat;
-    float minSpecFlat;
-    float maxSpecDiff;
-    float minSpecDiff;
-    //criteria of weight of histogram peak  to accept/reject feature
-    int thresWeightSpecFlat;
-    int thresWeightSpecDiff;
+  //bin size of histogram
+  float binSizeLrt;
+  float binSizeSpecFlat;
+  float binSizeSpecDiff;
+  //range of histogram over which lrt threshold is computed
+  float rangeAvgHistLrt;
+  //scale parameters: multiply dominant peaks of the histograms by scale factor to obtain
+  //thresholds for prior model
+  float factor1ModelPars; //for lrt and spectral difference
+  float factor2ModelPars; //for spectral_flatness: used when noise is flatter than speech
+  //peak limit for spectral flatness (varies between 0 and 1)
+  float thresPosSpecFlat;
+  //limit on spacing of two highest peaks in histogram: spacing determined by bin size
+  float limitPeakSpacingSpecFlat;
+  float limitPeakSpacingSpecDiff;
+  //limit on relevance of second peak:
+  float limitPeakWeightsSpecFlat;
+  float limitPeakWeightsSpecDiff;
+  //limit on fluctuation of lrt feature
+  float thresFluctLrt;
+  //limit on the max and min values for the feature thresholds
+  float maxLrt;
+  float minLrt;
+  float maxSpecFlat;
+  float minSpecFlat;
+  float maxSpecDiff;
+  float minSpecDiff;
+  //criteria of weight of histogram peak  to accept/reject feature
+  int thresWeightSpecFlat;
+  int thresWeightSpecDiff;

 } NSParaExtract_t;

 typedef struct NSinst_t_ {

-    WebRtc_UWord32  fs;
-    int             blockLen;
-    int             blockLen10ms;
-    int             windShift;
-    int             outLen;
-    int             anaLen;
-    int             magnLen;
-    int             aggrMode;
-    const float*    window;
-    float           dataBuf[ANAL_BLOCKL_MAX];
-    float           syntBuf[ANAL_BLOCKL_MAX];
-    float           outBuf[3 * BLOCKL_MAX];
+  WebRtc_UWord32  fs;
+  int             blockLen;
+  int             blockLen10ms;
+  int             windShift;
+  int             outLen;
+  int             anaLen;
+  int             magnLen;
+  int             aggrMode;
+  const float*    window;
+  float           dataBuf[ANAL_BLOCKL_MAX];
+  float           syntBuf[ANAL_BLOCKL_MAX];
+  float           outBuf[3 * BLOCKL_MAX];

-    int             initFlag;
-    // parameters for quantile noise estimation
-    float           density[SIMULT * HALF_ANAL_BLOCKL];
-    float           lquantile[SIMULT * HALF_ANAL_BLOCKL];
-    float           quantile[HALF_ANAL_BLOCKL];
-    int             counter[SIMULT];
-    int             updates;
-    // parameters for Wiener filter
-    float           smooth[HALF_ANAL_BLOCKL];
-    float           overdrive;
-    float           denoiseBound;
-    int             gainmap;
-    // fft work arrays.
-    int             ip[IP_LENGTH];
-    float           wfft[W_LENGTH];
+  int             initFlag;
+  // parameters for quantile noise estimation
+  float           density[SIMULT* HALF_ANAL_BLOCKL];
+  float           lquantile[SIMULT* HALF_ANAL_BLOCKL];
+  float           quantile[HALF_ANAL_BLOCKL];
+  int             counter[SIMULT];
+  int             updates;
+  // parameters for Wiener filter
+  float           smooth[HALF_ANAL_BLOCKL];
+  float           overdrive;
+  float           denoiseBound;
+  int             gainmap;
+  // fft work arrays.
+  int             ip[IP_LENGTH];
+  float           wfft[W_LENGTH];

-    // parameters for new method: some not needed, will reduce/cleanup later
-    WebRtc_Word32   blockInd;                           //frame index counter
-    int             modelUpdatePars[4];                 //parameters for updating or estimating
-                                                        // thresholds/weights for prior model
-    float           priorModelPars[7];                  //parameters for prior model
-    float           noisePrev[HALF_ANAL_BLOCKL];        //noise spectrum from previous frame
-    float           magnPrev[HALF_ANAL_BLOCKL];         //magnitude spectrum of previous frame
-    float           logLrtTimeAvg[HALF_ANAL_BLOCKL];    //log lrt factor with time-smoothing
-    float           priorSpeechProb;                    //prior speech/noise probability
-    float           featureData[7];                     //data for features
-    float           magnAvgPause[HALF_ANAL_BLOCKL];     //conservative noise spectrum estimate
-    float           signalEnergy;                       //energy of magn
-    float           sumMagn;                            //sum of magn
-    float           whiteNoiseLevel;                    //initial noise estimate
-    float           initMagnEst[HALF_ANAL_BLOCKL];      //initial magnitude spectrum estimate
-    float           pinkNoiseNumerator;                 //pink noise parameter: numerator
-    float           pinkNoiseExp;                       //pink noise parameter: power of freq
-    NSParaExtract_t featureExtractionParams;            //parameters for feature extraction
-    //histograms for parameter estimation
-    int             histLrt[HIST_PAR_EST];
-    int             histSpecFlat[HIST_PAR_EST];
-    int             histSpecDiff[HIST_PAR_EST];
-    //quantities for high band estimate
-    float           speechProbHB[HALF_ANAL_BLOCKL];     //final speech/noise prob: prior + LRT
-    float           dataBufHB[ANAL_BLOCKL_MAX];         //buffering data for HB
+  // parameters for new method: some not needed, will reduce/cleanup later
+  WebRtc_Word32   blockInd;                           //frame index counter
+  int             modelUpdatePars[4];                 //parameters for updating or estimating
+  // thresholds/weights for prior model
+  float           priorModelPars[7];                  //parameters for prior model
+  float           noisePrev[HALF_ANAL_BLOCKL];        //noise spectrum from previous frame
+  float           magnPrev[HALF_ANAL_BLOCKL];         //magnitude spectrum of previous frame
+  float           logLrtTimeAvg[HALF_ANAL_BLOCKL];    //log lrt factor with time-smoothing
+  float           priorSpeechProb;                    //prior speech/noise probability
+  float           featureData[7];                     //data for features
+  float           magnAvgPause[HALF_ANAL_BLOCKL];     //conservative noise spectrum estimate
+  float           signalEnergy;                       //energy of magn
+  float           sumMagn;                            //sum of magn
+  float           whiteNoiseLevel;                    //initial noise estimate
+  float           initMagnEst[HALF_ANAL_BLOCKL];      //initial magnitude spectrum estimate
+  float           pinkNoiseNumerator;                 //pink noise parameter: numerator
+  float           pinkNoiseExp;                       //pink noise parameter: power of freq
+  NSParaExtract_t featureExtractionParams;            //parameters for feature extraction
+  //histograms for parameter estimation
+  int             histLrt[HIST_PAR_EST];
+  int             histSpecFlat[HIST_PAR_EST];
+  int             histSpecDiff[HIST_PAR_EST];
+  //quantities for high band estimate
+  float           speechProbHB[HALF_ANAL_BLOCKL];     //final speech/noise prob: prior + LRT
+  float           dataBufHB[ANAL_BLOCKL_MAX];         //buffering data for HB

 } NSinst_t;

@@ -127,7 +127,7 @@ extern "C" {
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-int WebRtcNs_InitCore(NSinst_t *inst, WebRtc_UWord32 fs);
+int WebRtcNs_InitCore(NSinst_t* inst, WebRtc_UWord32 fs);

 /****************************************************************************
 * WebRtcNs_set_policy_core(...)
@@ -144,7 +144,7 @@ int WebRtcNs_InitCore(NSinst_t *inst, WebRtc_UWord32 fs);
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-int WebRtcNs_set_policy_core(NSinst_t *inst, int mode);
+int WebRtcNs_set_policy_core(NSinst_t* inst, int mode);

 /****************************************************************************
 * WebRtcNs_ProcessCore
@@ -166,11 +166,11 @@ int WebRtcNs_set_policy_core(NSinst_t *inst, int mode);
 */


-int WebRtcNs_ProcessCore(NSinst_t *inst,
-                         short *inFrameLow,
-                         short *inFrameHigh,
-                         short *outFrameLow,
-                         short *outFrameHigh);
+int WebRtcNs_ProcessCore(NSinst_t* inst,
+                         short* inFrameLow,
+                         short* inFrameHigh,
+                         short* outFrameLow,
+                         short* outFrameHigh);


 #ifdef __cplusplus
--- a/src/modules/audio_processing/ns/main/source/nsx_core.c
+++ b/src/modules/audio_processing/ns/main/source/nsx_core.c
--- a/src/modules/audio_processing/ns/main/source/nsx_core.h
+++ b/src/modules/audio_processing/ns/main/source/nsx_core.h
@@ -20,85 +20,84 @@
 #include <stdio.h>
 #endif

-typedef struct NsxInst_t_
-{
-    WebRtc_UWord32          fs;
+typedef struct NsxInst_t_ {
+  WebRtc_UWord32          fs;

-    const WebRtc_Word16*    window;
-    WebRtc_Word16           analysisBuffer[ANAL_BLOCKL_MAX];
-    WebRtc_Word16           synthesisBuffer[ANAL_BLOCKL_MAX];
-    WebRtc_UWord16          noiseSupFilter[HALF_ANAL_BLOCKL];
-    WebRtc_UWord16          overdrive; /* Q8 */
-    WebRtc_UWord16          denoiseBound; /* Q14 */
-    const WebRtc_Word16*    factor2Table;
-    WebRtc_Word16           noiseEstLogQuantile[SIMULT * HALF_ANAL_BLOCKL];
-    WebRtc_Word16           noiseEstDensity[SIMULT * HALF_ANAL_BLOCKL];
-    WebRtc_Word16           noiseEstCounter[SIMULT];
-    WebRtc_Word16           noiseEstQuantile[HALF_ANAL_BLOCKL];
+  const WebRtc_Word16*    window;
+  WebRtc_Word16           analysisBuffer[ANAL_BLOCKL_MAX];
+  WebRtc_Word16           synthesisBuffer[ANAL_BLOCKL_MAX];
+  WebRtc_UWord16          noiseSupFilter[HALF_ANAL_BLOCKL];
+  WebRtc_UWord16          overdrive; /* Q8 */
+  WebRtc_UWord16          denoiseBound; /* Q14 */
+  const WebRtc_Word16*    factor2Table;
+  WebRtc_Word16           noiseEstLogQuantile[SIMULT* HALF_ANAL_BLOCKL];
+  WebRtc_Word16           noiseEstDensity[SIMULT* HALF_ANAL_BLOCKL];
+  WebRtc_Word16           noiseEstCounter[SIMULT];
+  WebRtc_Word16           noiseEstQuantile[HALF_ANAL_BLOCKL];

-    WebRtc_Word16           anaLen;
-    int                     anaLen2;
-    int                     magnLen;
-    int                     aggrMode;
-    int                     stages;
-    int                     initFlag;
-    int                     gainMap;
+  WebRtc_Word16           anaLen;
+  int                     anaLen2;
+  int                     magnLen;
+  int                     aggrMode;
+  int                     stages;
+  int                     initFlag;
+  int                     gainMap;

-    WebRtc_Word32           maxLrt;
-    WebRtc_Word32           minLrt;
-    WebRtc_Word32           logLrtTimeAvgW32[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing in Q8
-    WebRtc_Word32           featureLogLrt;
-    WebRtc_Word32           thresholdLogLrt;
-    WebRtc_Word16           weightLogLrt;
+  WebRtc_Word32           maxLrt;
+  WebRtc_Word32           minLrt;
+  WebRtc_Word32           logLrtTimeAvgW32[HALF_ANAL_BLOCKL]; //log lrt factor with time-smoothing in Q8
+  WebRtc_Word32           featureLogLrt;
+  WebRtc_Word32           thresholdLogLrt;
+  WebRtc_Word16           weightLogLrt;

-    WebRtc_UWord32          featureSpecDiff;
-    WebRtc_UWord32          thresholdSpecDiff;
-    WebRtc_Word16           weightSpecDiff;
+  WebRtc_UWord32          featureSpecDiff;
+  WebRtc_UWord32          thresholdSpecDiff;
+  WebRtc_Word16           weightSpecDiff;

-    WebRtc_UWord32          featureSpecFlat;
-    WebRtc_UWord32          thresholdSpecFlat;
-    WebRtc_Word16           weightSpecFlat;
+  WebRtc_UWord32          featureSpecFlat;
+  WebRtc_UWord32          thresholdSpecFlat;
+  WebRtc_Word16           weightSpecFlat;

-    WebRtc_Word32           avgMagnPause[HALF_ANAL_BLOCKL]; //conservative estimate of noise spectrum
-    WebRtc_UWord32          magnEnergy;
-    WebRtc_UWord32          sumMagn;
-    WebRtc_UWord32          curAvgMagnEnergy;
-    WebRtc_UWord32          timeAvgMagnEnergy;
-    WebRtc_UWord32          timeAvgMagnEnergyTmp;
+  WebRtc_Word32           avgMagnPause[HALF_ANAL_BLOCKL]; //conservative estimate of noise spectrum
+  WebRtc_UWord32          magnEnergy;
+  WebRtc_UWord32          sumMagn;
+  WebRtc_UWord32          curAvgMagnEnergy;
+  WebRtc_UWord32          timeAvgMagnEnergy;
+  WebRtc_UWord32          timeAvgMagnEnergyTmp;

-    WebRtc_UWord32          whiteNoiseLevel;              //initial noise estimate
-    WebRtc_UWord32          initMagnEst[HALF_ANAL_BLOCKL];//initial magnitude spectrum estimate
-    WebRtc_Word32           pinkNoiseNumerator;           //pink noise parameter: numerator
-    WebRtc_Word32           pinkNoiseExp;                 //pink noise parameter: power of freq
-    int                     minNorm;                      //smallest normalization factor
-    int                     zeroInputSignal;              //zero input signal flag
+  WebRtc_UWord32          whiteNoiseLevel;              //initial noise estimate
+  WebRtc_UWord32          initMagnEst[HALF_ANAL_BLOCKL];//initial magnitude spectrum estimate
+  WebRtc_Word32           pinkNoiseNumerator;           //pink noise parameter: numerator
+  WebRtc_Word32           pinkNoiseExp;                 //pink noise parameter: power of freq
+  int                     minNorm;                      //smallest normalization factor
+  int                     zeroInputSignal;              //zero input signal flag

-    WebRtc_UWord32          prevNoiseU32[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame
-    WebRtc_UWord16          prevMagnU16[HALF_ANAL_BLOCKL]; //magnitude spectrum from previous frame
-    WebRtc_Word16           priorNonSpeechProb; //prior speech/noise probability // Q14
+  WebRtc_UWord32          prevNoiseU32[HALF_ANAL_BLOCKL]; //noise spectrum from previous frame
+  WebRtc_UWord16          prevMagnU16[HALF_ANAL_BLOCKL]; //magnitude spectrum from previous frame
+  WebRtc_Word16           priorNonSpeechProb; //prior speech/noise probability // Q14

-    int                     blockIndex; //frame index counter
-    int                     modelUpdate; //parameter for updating or estimating thresholds/weights for prior model
-    int                     cntThresUpdate;
+  int                     blockIndex; //frame index counter
+  int                     modelUpdate; //parameter for updating or estimating thresholds/weights for prior model
+  int                     cntThresUpdate;

-    //histograms for parameter estimation
-    WebRtc_Word16           histLrt[HIST_PAR_EST];
-    WebRtc_Word16           histSpecFlat[HIST_PAR_EST];
-    WebRtc_Word16           histSpecDiff[HIST_PAR_EST];
+  //histograms for parameter estimation
+  WebRtc_Word16           histLrt[HIST_PAR_EST];
+  WebRtc_Word16           histSpecFlat[HIST_PAR_EST];
+  WebRtc_Word16           histSpecDiff[HIST_PAR_EST];

-    //quantities for high band estimate
-    WebRtc_Word16           dataBufHBFX[ANAL_BLOCKL_MAX]; /* Q0 */
+  //quantities for high band estimate
+  WebRtc_Word16           dataBufHBFX[ANAL_BLOCKL_MAX]; /* Q0 */

-    int                     qNoise;
-    int                     prevQNoise;
-    int                     prevQMagn;
-    int                     blockLen10ms;
+  int                     qNoise;
+  int                     prevQNoise;
+  int                     prevQMagn;
+  int                     blockLen10ms;

-    WebRtc_Word16           real[ANAL_BLOCKL_MAX];
-    WebRtc_Word16           imag[ANAL_BLOCKL_MAX];
-    WebRtc_Word32           energyIn;
-    int                     scaleEnergyIn;
-    int                     normData;
+  WebRtc_Word16           real[ANAL_BLOCKL_MAX];
+  WebRtc_Word16           imag[ANAL_BLOCKL_MAX];
+  WebRtc_Word32           energyIn;
+  int                     scaleEnergyIn;
+  int                     normData;

 } NsxInst_t;

@@ -122,7 +121,7 @@ extern "C"
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t *inst, WebRtc_UWord32 fs);
+WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t* inst, WebRtc_UWord32 fs);

 /****************************************************************************
 * WebRtcNsx_set_policy_core(...)
@@ -139,7 +138,7 @@ WebRtc_Word32 WebRtcNsx_InitCore(NsxInst_t *inst, WebRtc_UWord32 fs);
 * Return value         :  0 - Ok
 *                        -1 - Error
 */
-int WebRtcNsx_set_policy_core(NsxInst_t *inst, int mode);
+int WebRtcNsx_set_policy_core(NsxInst_t* inst, int mode);

 /****************************************************************************
 * WebRtcNsx_ProcessCore
@@ -159,16 +158,16 @@ int WebRtcNsx_set_policy_core(NsxInst_t *inst, int mode);
 * Return value         :  0 - OK
 *                        -1 - Error
 */
-int WebRtcNsx_ProcessCore(NsxInst_t *inst, short *inFrameLow, short *inFrameHigh,
-                          short *outFrameLow, short *outFrameHigh);
+int WebRtcNsx_ProcessCore(NsxInst_t* inst, short* inFrameLow, short* inFrameHigh,
+                          short* outFrameLow, short* outFrameHigh);

 /****************************************************************************
 * Internal functions and variable declarations shared with optimized code.
 */
-void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t *inst, int offset);
+void WebRtcNsx_UpdateNoiseEstimate(NsxInst_t* inst, int offset);

-void WebRtcNsx_NoiseEstimation(NsxInst_t *inst, WebRtc_UWord16 *magn, WebRtc_UWord32 *noise,
-                               WebRtc_Word16 *qNoise);
+void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise,
+                               WebRtc_Word16* qNoise);

 extern const WebRtc_Word16 WebRtcNsx_kLogTable[9];
 extern const WebRtc_Word16 WebRtcNsx_kLogTableFrac[256];
--- a/src/modules/audio_processing/ns/main/source/nsx_core_neon.c
+++ b/src/modules/audio_processing/ns/main/source/nsx_core_neon.c
@@ -11,215 +11,230 @@
 #if defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)

 #include "nsx_core.h"
+
 #include <arm_neon.h>
+#include <assert.h>

-void WebRtcNsx_NoiseEstimation(NsxInst_t *inst, WebRtc_UWord16 *magn, WebRtc_UWord32 *noise,
-                               WebRtc_Word16 *qNoise)
-{
-    WebRtc_Word32 numerator;
+void WebRtcNsx_NoiseEstimation(NsxInst_t* inst, WebRtc_UWord16* magn, WebRtc_UWord32* noise,
+                               WebRtc_Word16* qNoise) {
+  WebRtc_Word32 numerator;

-    WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac;
-    WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
-    WebRtc_Word16 log2Const = 22713;
-    WebRtc_Word16 widthFactor = 21845;
+  WebRtc_Word16 lmagn[HALF_ANAL_BLOCKL], counter, countDiv, countProd, delta, zeros, frac;
+  WebRtc_Word16 log2, tabind, logval, tmp16, tmp16no1, tmp16no2;
+  WebRtc_Word16 log2Const = 22713;
+  WebRtc_Word16 widthFactor = 21845;

-    int i, s, offset;
+  int i, s, offset;

-    numerator = FACTOR_Q16;
+  numerator = FACTOR_Q16;

-    tabind = inst->stages - inst->normData;
-    if (tabind < 0)
-    {
-        logval = -WebRtcNsx_kLogTable[-tabind];
-    } else
-    {
-        logval = WebRtcNsx_kLogTable[tabind];
+  tabind = inst->stages - inst->normData;
+  assert(tabind < 9);
+  assert(tabind > -9);
+  if (tabind < 0) {
+    logval = -WebRtcNsx_kLogTable[-tabind];
+  } else {
+    logval = WebRtcNsx_kLogTable[tabind];
+  }
+
+  int16x8_t logval_16x8 = vdupq_n_s16(logval);
+
+  // lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
+  // magn is in Q(-stages), and the real lmagn values are:
+  // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
+  // lmagn in Q8
+  for (i = 0; i < inst->magnLen; i++) {
+    if (magn[i]) {
+      zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]);
+      frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23);
+      assert(frac < 256);
+      // log2(magn(i))
+      log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
+      // log2(magn(i))*log(2)
+      lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15);
+      // + log(2^stages)
+      lmagn[i] += logval;
+    } else {
+      lmagn[i] = logval;
    }
+  }

-    // lmagn(i)=log(magn(i))=log(2)*log2(magn(i))
-    // magn is in Q(-stages), and the real lmagn values are:
-    // real_lmagn(i)=log(magn(i)*2^stages)=log(magn(i))+log(2^stages)
-    // lmagn in Q8
-    for (i = 0; i < inst->magnLen; i++)
-    {
-        if (magn[i])
-        {
-            zeros = WebRtcSpl_NormU32((WebRtc_UWord32)magn[i]);
-            frac = (WebRtc_Word16)((((WebRtc_UWord32)magn[i] << zeros) & 0x7FFFFFFF) >> 23);
-            // log2(magn(i))
-            log2 = (WebRtc_Word16)(((31 - zeros) << 8) + WebRtcNsx_kLogTableFrac[frac]);
-            // log2(magn(i))*log(2)
-            lmagn[i] = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(log2, log2Const, 15);
-            // + log(2^stages)
-            lmagn[i] += logval;
-        } else
-        {
-            lmagn[i] = logval;
+  int16x4_t Q3_16x4  = vdup_n_s16(3);
+  int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8);
+  int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(widthFactor);
+
+  WebRtc_Word16 factor = FACTOR_Q7;
+  if (inst->blockIndex < END_STARTUP_LONG)
+    factor = FACTOR_Q7_STARTUP;
+
+  // Loop over simultaneous estimates
+  for (s = 0; s < SIMULT; s++) {
+    offset = s * inst->magnLen;
+
+    // Get counter values from state
+    counter = inst->noiseEstCounter[s];
+    assert(counter < 201);
+    countDiv = WebRtcNsx_kCounterDiv[counter];
+    countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv);
+
+    // quant_est(...)
+    WebRtc_Word16 deltaBuff[8];
+    int16x4_t tmp16x4_0;
+    int16x4_t tmp16x4_1;
+    int16x4_t countDiv_16x4 = vdup_n_s16(countDiv);
+    int16x8_t countProd_16x8 = vdupq_n_s16(countProd);
+    int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv);
+    int16x8_t prod16x8 = vqrdmulhq_s16(WIDTHFACTOR_16x8, tmp16x8_0);
+    int16x8_t tmp16x8_1;
+    int16x8_t tmp16x8_2;
+    int16x8_t tmp16x8_3;
+    int16x8_t tmp16x8_4;
+    int16x8_t tmp16x8_5;
+    int32x4_t tmp32x4;
+
+    for (i = 0; i < inst->magnLen - 7; i += 8) {
+      // Compute delta.
+      // Smaller step size during startup. This prevents from using
+      // unrealistic values causing overflow.
+      tmp16x8_0 = vdupq_n_s16(factor);
+      vst1q_s16(deltaBuff, tmp16x8_0);
+
+      int j;
+      for (j = 0; j < 8; j++) {
+        if (inst->noiseEstDensity[offset + i + j] > 512) {
+          deltaBuff[j] = WebRtcSpl_DivW32W16ResW16(
+              numerator, inst->noiseEstDensity[offset + i + j]);
        }
-    }
+      }

-    int16x4_t Q3_16x4  = vdup_n_s16(3);
-    int16x8_t WIDTHQ8_16x8 = vdupq_n_s16(WIDTH_Q8);
-    int16x8_t WIDTHFACTOR_16x8 = vdupq_n_s16(widthFactor);
+      // Update log quantile estimate

-    // Loop over simultaneous estimates
-    for (s = 0; s < SIMULT; s++)
-    {
-        offset = s * inst->magnLen;
+      // tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
+      tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[0]), countDiv_16x4);
+      tmp16x4_1 = vshrn_n_s32(tmp32x4, 14);
+      tmp32x4 = vmull_s16(vld1_s16(&deltaBuff[4]), countDiv_16x4);
+      tmp16x4_0 = vshrn_n_s32(tmp32x4, 14);
+      tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // Keep for several lines.

-        // Get counter values from state
-        counter = inst->noiseEstCounter[s];
-        countDiv = WebRtcNsx_kCounterDiv[counter];
-        countProd = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16(counter, countDiv);
+      // prepare for the "if" branch
+      // tmp16 += 2;
+      // tmp16_1 = (Word16)(tmp16>>2);
+      tmp16x8_1 = vrshrq_n_s16(tmp16x8_0, 2);

-        // quant_est(...)
-        WebRtc_Word16 delta_[8];
-        int16x4_t tmp16x4_0;
-        int16x4_t tmp16x4_1;
-        int16x4_t countDiv_16x4 = vdup_n_s16(countDiv);
-        int16x8_t countProd_16x8 = vdupq_n_s16(countProd);
-        int16x8_t tmp16x8_0 = vdupq_n_s16(countDiv);
-        int16x8_t prod16x8 = vqrdmulhq_s16(WIDTHFACTOR_16x8, tmp16x8_0);
-        int16x8_t tmp16x8_1;
-        int16x8_t tmp16x8_2;
-        int16x8_t tmp16x8_3;
-        int16x8_t tmp16x8_4;
-        int16x8_t tmp16x8_5;
-        int32x4_t tmp32x4;
+      // inst->noiseEstLogQuantile[offset+i] + tmp16_1;
+      tmp16x8_2 = vld1q_s16(&inst->noiseEstLogQuantile[offset + i]); // Keep
+      tmp16x8_1 = vaddq_s16(tmp16x8_2, tmp16x8_1); // Keep for several lines

-        for (i = 0; i < inst->magnLen - 7; i += 8) {
-            // compute delta
-            tmp16x8_0 = vdupq_n_s16(FACTOR_Q7);
-            vst1q_s16(delta_, tmp16x8_0);
-            int j;
-            for (j = 0; j < 8; j++) {
-                if (inst->noiseEstDensity[offset + i + j] > 512)
-                    delta_[j] = WebRtcSpl_DivW32W16ResW16(numerator, 
-                                   inst->noiseEstDensity[offset + i + j]);
-            }
+      // Prepare for the "else" branch
+      // tmp16 += 1;
+      // tmp16_1 = (Word16)(tmp16>>1);
+      tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1);

-            // Update log quantile estimate
+      // tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1);
+      tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4);
+      tmp16x4_1 = vshrn_n_s32(tmp32x4, 1);

-            // tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
-            tmp32x4 = vmull_s16(vld1_s16(&delta_[0]), countDiv_16x4);
-            tmp16x4_1 = vshrn_n_s32(tmp32x4, 14);
-            tmp32x4 = vmull_s16(vld1_s16(&delta_[4]), countDiv_16x4);
-            tmp16x4_0 = vshrn_n_s32(tmp32x4, 14);
-            tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // Keep for several lines.
+      // tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1);
+      tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4);
+      tmp16x4_0 = vshrn_n_s32(tmp32x4, 1);

-            // prepare for the "if" branch
-            // tmp16 += 2;
-            // tmp16_1 = (Word16)(tmp16>>2);
-            tmp16x8_1 = vrshrq_n_s16(tmp16x8_0, 2);
+      // inst->noiseEstLogQuantile[offset + i] - tmp16_2;
+      tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep
+      tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0);

-            // inst->noiseEstLogQuantile[offset+i] + tmp16_1;
-            tmp16x8_2 = vld1q_s16(&inst->noiseEstLogQuantile[offset + i]); // Keep
-            tmp16x8_1 = vaddq_s16(tmp16x8_2, tmp16x8_1); // Keep for several lines
+      // logval is the smallest fixed point representation we can have. Values below
+      // that will correspond to values in the interval [0, 1], which can't possibly
+      // occur.
+      tmp16x8_0 = vmaxq_s16(tmp16x8_0, logval_16x8);

-            // Prepare for the "else" branch
-            // tmp16 += 1;
-            // tmp16_1 = (Word16)(tmp16>>1);
-            tmp16x8_0 = vrshrq_n_s16(tmp16x8_0, 1);
+      // Do the if-else branches:
+      tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines
+      tmp16x8_5 = vsubq_s16(tmp16x8_3, tmp16x8_2);
+      __asm__("vcgt.s16 %q0, %q1, #0"::"w"(tmp16x8_4), "w"(tmp16x8_5));
+      __asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4));
+      __asm__("vbif %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4));
+      vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2);

-            // tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1);
-            tmp32x4 = vmull_s16(vget_low_s16(tmp16x8_0), Q3_16x4);
-            tmp16x4_1 = vshrn_n_s32(tmp32x4, 1);
+      // Update density estimate
+      // tmp16_1 + tmp16_2
+      tmp16x8_1 = vld1q_s16(&inst->noiseEstDensity[offset + i]);
+      tmp16x8_0 = vqrdmulhq_s16(tmp16x8_1, countProd_16x8);
+      tmp16x8_0 = vaddq_s16(tmp16x8_0, prod16x8);

-            // tmp16_2 = (Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16_1,3,1);
-            tmp32x4 = vmull_s16(vget_high_s16(tmp16x8_0), Q3_16x4);
-            tmp16x4_0 = vshrn_n_s32(tmp32x4, 1);
+      // lmagn[i] - inst->noiseEstLogQuantile[offset + i]
+      tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2);
+      tmp16x8_3 = vabsq_s16(tmp16x8_3);
+      tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3);
+      __asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4));
+      vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1);
+    } // End loop over magnitude spectrum

-            // inst->noiseEstLogQuantile[offset + i] - tmp16_2;
-            tmp16x8_0 = vcombine_s16(tmp16x4_1, tmp16x4_0); // keep
-            tmp16x8_0 = vsubq_s16(tmp16x8_2, tmp16x8_0);
-
-            // Do the if-else branches:
-            tmp16x8_3 = vld1q_s16(&lmagn[i]); // keep for several lines
-            tmp16x8_5 = vsubq_s16(tmp16x8_3, tmp16x8_2);
-            __asm__("vcgt.s16 %q0, %q1, #0"::"w"(tmp16x8_4), "w"(tmp16x8_5));
-            __asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_1), "w"(tmp16x8_4));
-            __asm__("vbif %q0, %q1, %q2"::"w"(tmp16x8_2), "w"(tmp16x8_0), "w"(tmp16x8_4));
-            vst1q_s16(&inst->noiseEstLogQuantile[offset + i], tmp16x8_2);
-
-            // Update density estimate
-            // tmp16_1 + tmp16_2
-            tmp16x8_1 = vld1q_s16(&inst->noiseEstDensity[offset + i]);
-            tmp16x8_0 = vqrdmulhq_s16(tmp16x8_1, countProd_16x8);
-            tmp16x8_0 = vaddq_s16(tmp16x8_0, prod16x8);
-
-            // lmagn[i] - inst->noiseEstLogQuantile[offset + i]
-            tmp16x8_3 = vsubq_s16(tmp16x8_3, tmp16x8_2);
-            tmp16x8_3 = vabsq_s16(tmp16x8_3);
-            tmp16x8_4 = vcgtq_s16(WIDTHQ8_16x8, tmp16x8_3);
-            __asm__("vbit %q0, %q1, %q2"::"w"(tmp16x8_1), "w"(tmp16x8_0), "w"(tmp16x8_4));
-            vst1q_s16(&inst->noiseEstDensity[offset + i], tmp16x8_1);
-        } // End loop over magnitude spectrum
-
-        for (; i < inst->magnLen; i++)
-        {
-            // compute delta
-            if (inst->noiseEstDensity[offset + i] > 512)
-            {
-                delta = WebRtcSpl_DivW32W16ResW16(numerator,
-                                                  inst->noiseEstDensity[offset + i]);
-            } else
-            {
-                delta = FACTOR_Q7;
-            }
-
-            // update log quantile estimate
-            tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
-            if (lmagn[i] > inst->noiseEstLogQuantile[offset + i])
-            {
-                // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
-                // CounterDiv=1/inst->counter[s] in Q15
-                tmp16 += 2;
-                tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
-                inst->noiseEstLogQuantile[offset + i] += tmp16no1;
-            } else
-            {
-                tmp16 += 1;
-                tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
-                // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
-                tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
-                inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
-            }
-
-            // update density estimate
-            if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
-                    < WIDTH_Q8)
-            {
-                tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
-                        inst->noiseEstDensity[offset + i], countProd, 15);
-                tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(widthFactor,
-                                                                               countDiv, 15);
-                inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
-            }
-        } // end loop over magnitude spectrum
-
-        if (counter >= END_STARTUP_LONG)
-        {
-            inst->noiseEstCounter[s] = 0;
-            if (inst->blockIndex >= END_STARTUP_LONG)
-            {
-                WebRtcNsx_UpdateNoiseEstimate(inst, offset);
-            }
+    for (; i < inst->magnLen; i++) {
+      // compute delta
+      if (inst->noiseEstDensity[offset + i] > 512) {
+        delta = WebRtcSpl_DivW32W16ResW16(numerator,
+                                          inst->noiseEstDensity[offset + i]);
+      } else {
+        delta = FACTOR_Q7;
+        if (inst->blockIndex < END_STARTUP_LONG) {
+          // Smaller step size during startup. This prevents from using
+          // unrealistic values causing overflow.
+          delta = FACTOR_Q7_STARTUP;
        }
-        inst->noiseEstCounter[s]++;
+      }

-    } // end loop over simultaneous estimates
+      // update log quantile estimate
+      tmp16 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(delta, countDiv, 14);
+      if (lmagn[i] > inst->noiseEstLogQuantile[offset + i]) {
+        // +=QUANTILE*delta/(inst->counter[s]+1) QUANTILE=0.25, =1 in Q2
+        // CounterDiv=1/(inst->counter[s]+1) in Q15
+        tmp16 += 2;
+        tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 2);
+        inst->noiseEstLogQuantile[offset + i] += tmp16no1;
+      } else {
+        tmp16 += 1;
+        tmp16no1 = WEBRTC_SPL_RSHIFT_W16(tmp16, 1);
+        // *(1-QUANTILE), in Q2 QUANTILE=0.25, 1-0.25=0.75=3 in Q2
+        tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT(tmp16no1, 3, 1);
+        inst->noiseEstLogQuantile[offset + i] -= tmp16no2;
+        if (inst->noiseEstLogQuantile[offset + i] < logval) {
+          // logval is the smallest fixed point representation we can have.
+          // Values below that will correspond to values in the interval
+          // [0, 1], which can't possibly occur.
+          inst->noiseEstLogQuantile[offset + i] = logval;
+        }
+      }

-    // Sequentially update the noise during startup
-    if (inst->blockIndex < END_STARTUP_LONG)
-    {
+      // update density estimate
+      if (WEBRTC_SPL_ABS_W16(lmagn[i] - inst->noiseEstLogQuantile[offset + i])
+          < WIDTH_Q8) {
+        tmp16no1 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                     inst->noiseEstDensity[offset + i], countProd, 15);
+        tmp16no2 = (WebRtc_Word16)WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(
+                     widthFactor, countDiv, 15);
+        inst->noiseEstDensity[offset + i] = tmp16no1 + tmp16no2;
+      }
+    } // end loop over magnitude spectrum
+
+    if (counter >= END_STARTUP_LONG) {
+      inst->noiseEstCounter[s] = 0;
+      if (inst->blockIndex >= END_STARTUP_LONG) {
        WebRtcNsx_UpdateNoiseEstimate(inst, offset);
+      }
    }
+    inst->noiseEstCounter[s]++;

-    for (i = 0; i < inst->magnLen; i++)
-    {
-        noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise)
-    }
-    (*qNoise) = (WebRtc_Word16)inst->qNoise;
+  } // end loop over simultaneous estimates
+
+  // Sequentially update the noise during startup
+  if (inst->blockIndex < END_STARTUP_LONG) {
+    WebRtcNsx_UpdateNoiseEstimate(inst, offset);
+  }
+
+  for (i = 0; i < inst->magnLen; i++) {
+    noise[i] = (WebRtc_UWord32)(inst->noiseEstQuantile[i]); // Q(qNoise)
+  }
+  (*qNoise) = (WebRtc_Word16)inst->qNoise;
 }

 #endif // defined(WEBRTC_ARCH_ARM_NEON) && defined(WEBRTC_ANDROID)
--- a/src/modules/audio_processing/ns/main/source/nsx_defines.h
+++ b/src/modules/audio_processing/ns/main/source/nsx_defines.h
@@ -18,6 +18,7 @@
 #define END_STARTUP_SHORT       50
 #define FACTOR_Q16              (WebRtc_Word32)2621440 // 40 in Q16
 #define FACTOR_Q7               (WebRtc_Word16)5120 // 40 in Q7
+#define FACTOR_Q7_STARTUP       (WebRtc_Word16)1024 // 8 in Q7
 #define WIDTH_Q8                3 // 0.01 in Q8 (or 25 )
 //PARAMETERS FOR NEW METHOD
 #define DD_PR_SNR_Q11           2007 // ~= Q11(0.98) DD update of prior SNR
--- a/src/modules/audio_processing/ns/main/source/windows_private.h
+++ b/src/modules/audio_processing/ns/main/source/windows_private.h
--- a/src/modules/audio_processing/utility/Makefile.am
+++ b/src/modules/audio_processing/utility/Makefile.am
@@ -1,7 +1,12 @@
 noinst_LTLIBRARIES = libapm_util.la

-libapm_util_la_SOURCES = ring_buffer.c \
-			 ring_buffer.h \
+libapm_util_la_SOURCES = delay_estimator_float.c \
+			 delay_estimator_float.h \
+			 delay_estimator.c \
+			 delay_estimator.h \
 			 fft4g.c \
-			 fft4g.h
-libapm_util_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS)
+			 fft4g.h \
+			 ring_buffer.c \
+			 ring_buffer.h
+libapm_util_la_CFLAGS = $(AM_CFLAGS) $(COMMON_CFLAGS) \
+			-I$(top_srcdir)/src/common_audio/signal_processing_library/main/interface
--- a/src/modules/audio_processing/utility/delay_estimator.c
+++ b/src/modules/audio_processing/utility/delay_estimator.c
@@ -0,0 +1,550 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "delay_estimator.h"
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "signal_processing_library.h"
+
+typedef struct {
+  // Pointers to mean values of spectrum and bit counts
+  int32_t* mean_far_spectrum;
+  int32_t* mean_near_spectrum;
+  int32_t* mean_bit_counts;
+
+  // Arrays only used locally in DelayEstimatorProcess() but whose size
+  // is determined at run-time.
+  int32_t* bit_counts;
+  int32_t* far_spectrum_32;
+  int32_t* near_spectrum_32;
+
+  // Binary history variables
+  uint32_t* binary_far_history;
+
+  // Far end history variables
+  uint16_t* far_history;
+  int far_history_pos;
+  int* far_q_domains;
+
+  // Delay histogram variables
+  int* delay_histogram;
+  int vad_counter;
+
+  // Delay memory
+  int last_delay;
+
+  // Used to enable far end alignment. If it is disabled, only delay values are
+  // produced
+  int alignment_enabled;
+
+  // Buffer size parameters
+  int history_size;
+  int spectrum_size;
+
+} DelayEstimator_t;
+
+// Only bit |kBandFirst| through bit |kBandLast| are processed
+// |kBandFirst| - |kBandLast| must be < 32
+static const int kBandFirst = 12;
+static const int kBandLast = 43;
+
+static __inline uint32_t SetBit(uint32_t in, int32_t pos) {
+  uint32_t mask = WEBRTC_SPL_LSHIFT_W32(1, pos);
+  uint32_t out = (in | mask);
+
+  return out;
+}
+
+// Compares the |binary_vector| with all rows of the |binary_matrix| and counts
+// per row the number of times they have the same value.
+//
+// Inputs:
+//      - binary_vector     : binary "vector" stored in a long
+//      - binary_matrix     : binary "matrix" stored as a vector of long
+//      - matrix_size       : size of binary "matrix"
+//
+// Output:
+//      - bit_counts        : "Vector" stored as a long, containing for each
+//                            row the number of times the matrix row and the
+//                            input vector have the same value
+//
+static void BitCountComparison(uint32_t binary_vector,
+                               const uint32_t* binary_matrix,
+                               int matrix_size,
+                               int32_t* bit_counts) {
+  int n = 0;
+  uint32_t a = binary_vector;
+  register uint32_t tmp;
+
+  // compare |binary_vector| with all rows of the |binary_matrix|
+  for (; n < matrix_size; n++) {
+    a = (binary_vector ^ binary_matrix[n]);
+    // Returns bit counts in tmp
+    tmp = a - ((a >> 1) & 033333333333) - ((a >> 2) & 011111111111);
+    tmp = ((tmp + (tmp >> 3)) & 030707070707);
+    tmp = (tmp + (tmp >> 6));
+    tmp = (tmp + (tmp >> 12) + (tmp >> 24)) & 077;
+
+    bit_counts[n] = (int32_t) tmp;
+  }
+}
+
+// Computes the binary spectrum by comparing the input |spectrum| with a
+// |threshold_spectrum|.
+//
+// Inputs:
+//      - spectrum            : Spectrum of which the binary spectrum should be
+//                              calculated.
+//      - threshold_spectrum  : Threshold spectrum with which the input
+//                              spectrum is compared.
+// Return:
+//      - out                 : Binary spectrum
+//
+static uint32_t BinarySpectrum(int32_t* spectrum, int32_t* threshold_spectrum) {
+  int k = kBandFirst;
+  uint32_t out = 0;
+
+  for (; k <= kBandLast; k++) {
+    if (spectrum[k] > threshold_spectrum[k]) {
+      out = SetBit(out, k - kBandFirst);
+    }
+  }
+
+  return out;
+}
+
+//   Calculates the mean recursively.
+//
+//   Inputs:
+//      - new_value     : new additional value
+//      - factor        : factor for smoothing
+//
+//   Input/Output:
+//      - mean_value    : pointer to the mean value that should be updated
+//
+static void MeanEstimator(const int32_t new_value,
+                          int factor,
+                          int32_t* mean_value) {
+  int32_t mean_new = *mean_value;
+  int32_t diff = new_value - mean_new;
+
+  // mean_new = mean_value + ((new_value - mean_value) >> factor);
+  if (diff < 0) {
+    diff = -WEBRTC_SPL_RSHIFT_W32(-diff, factor);
+  } else {
+    diff = WEBRTC_SPL_RSHIFT_W32(diff, factor);
+  }
+  mean_new += diff;
+
+  *mean_value = mean_new;
+}
+
+// Moves the pointer to the next entry and inserts |far_spectrum| and
+// corresponding Q-domain in its buffer.
+//
+// Inputs:
+//      - self          : Pointer to the delay estimation instance
+//      - far_spectrum  : Pointer to the far end spectrum
+//      - far_q         : Q-domain of far end spectrum
+//
+static void UpdateFarHistory(DelayEstimator_t* self,
+                             uint16_t* far_spectrum,
+                             int far_q) {
+  // Get new buffer position
+  self->far_history_pos++;
+  if (self->far_history_pos >= self->history_size) {
+    self->far_history_pos = 0;
+  }
+  // Update Q-domain buffer
+  self->far_q_domains[self->far_history_pos] = far_q;
+  // Update far end spectrum buffer
+  memcpy(&(self->far_history[self->far_history_pos * self->spectrum_size]),
+         far_spectrum,
+         sizeof(uint16_t) * self->spectrum_size);
+}
+
+int WebRtc_FreeDelayEstimator(void* handle) {
+  DelayEstimator_t* self = (DelayEstimator_t*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  if (self->mean_far_spectrum != NULL) {
+    free(self->mean_far_spectrum);
+    self->mean_far_spectrum = NULL;
+  }
+  if (self->mean_near_spectrum != NULL) {
+    free(self->mean_near_spectrum);
+    self->mean_near_spectrum = NULL;
+  }
+  if (self->mean_bit_counts != NULL) {
+    free(self->mean_bit_counts);
+    self->mean_bit_counts = NULL;
+  }
+  if (self->bit_counts != NULL) {
+    free(self->bit_counts);
+    self->bit_counts = NULL;
+  }
+  if (self->far_spectrum_32 != NULL) {
+    free(self->far_spectrum_32);
+    self->far_spectrum_32 = NULL;
+  }
+  if (self->near_spectrum_32 != NULL) {
+    free(self->near_spectrum_32);
+    self->near_spectrum_32 = NULL;
+  }
+  if (self->binary_far_history != NULL) {
+    free(self->binary_far_history);
+    self->binary_far_history = NULL;
+  }
+  if (self->far_history != NULL) {
+    free(self->far_history);
+    self->far_history = NULL;
+  }
+  if (self->far_q_domains != NULL) {
+    free(self->far_q_domains);
+    self->far_q_domains = NULL;
+  }
+  if (self->delay_histogram != NULL) {
+    free(self->delay_histogram);
+    self->delay_histogram = NULL;
+  }
+
+  free(self);
+
+  return 0;
+}
+
+int WebRtc_CreateDelayEstimator(void** handle,
+                                int spectrum_size,
+                                int history_size,
+                                int enable_alignment) {
+  DelayEstimator_t *self = NULL;
+
+  // Check if the sub band used in the delay estimation is small enough to
+  // fit the binary spectra in a uint32.
+  assert(kBandLast - kBandFirst < 32);
+
+  if (spectrum_size < kBandLast) {
+    return -1;
+  }
+  if (history_size < 0) {
+    return -1;
+  }
+  if ((enable_alignment != 0) && (enable_alignment != 1)) {
+    return -1;
+  }
+
+  self = malloc(sizeof(DelayEstimator_t));
+  *handle = self;
+  if (self == NULL) {
+    return -1;
+  }
+
+  self->mean_far_spectrum = NULL;
+  self->mean_near_spectrum = NULL;
+  self->mean_bit_counts = NULL;
+  self->bit_counts = NULL;
+  self->far_spectrum_32 = NULL;
+  self->near_spectrum_32 = NULL;
+  self->binary_far_history = NULL;
+  self->far_history = NULL;
+  self->far_q_domains = NULL;
+  self->delay_histogram = NULL;
+
+  // Allocate memory for spectrum buffers
+  self->mean_far_spectrum = malloc(spectrum_size * sizeof(int32_t));
+  if (self->mean_far_spectrum == NULL) {
+    WebRtc_FreeDelayEstimator(self);
+    self = NULL;
+    return -1;
+  }
+  self->mean_near_spectrum = malloc(spectrum_size * sizeof(int32_t));
+  if (self->mean_near_spectrum == NULL) {
+    WebRtc_FreeDelayEstimator(self);
+    self = NULL;
+    return -1;
+  }
+  self->mean_bit_counts = malloc(history_size * sizeof(int32_t));
+  if (self->mean_bit_counts == NULL) {
+    WebRtc_FreeDelayEstimator(self);
+    self = NULL;
+    return -1;
+  }
+  self->bit_counts = malloc(history_size * sizeof(int32_t));
+  if (self->bit_counts == NULL) {
+    WebRtc_FreeDelayEstimator(self);
+    self = NULL;
+    return -1;
+  }
+  self->far_spectrum_32 = malloc(spectrum_size * sizeof(int32_t));
+  if (self->far_spectrum_32 == NULL) {
+    WebRtc_FreeDelayEstimator(self);
+    self = NULL;
+    return -1;
+  }
+  self->near_spectrum_32 = malloc(spectrum_size * sizeof(int32_t));
+  if (self->near_spectrum_32 == NULL) {
+    WebRtc_FreeDelayEstimator(self);
+    self = NULL;
+    return -1;
+  }
+  // Allocate memory for history buffers
+  self->binary_far_history = malloc(history_size * sizeof(uint32_t));
+  if (self->binary_far_history == NULL) {
+    WebRtc_FreeDelayEstimator(self);
+    self = NULL;
+    return -1;
+  }
+  if (enable_alignment) {
+    self->far_history = malloc(spectrum_size * history_size * sizeof(uint16_t));
+    if (self->far_history == NULL) {
+      WebRtc_FreeDelayEstimator(self);
+      self = NULL;
+      return -1;
+    }
+    self->far_q_domains = malloc(history_size * sizeof(int));
+    if (self->far_q_domains == NULL) {
+      WebRtc_FreeDelayEstimator(self);
+      self = NULL;
+      return -1;
+    }
+  }
+  self->delay_histogram = malloc(history_size * sizeof(int));
+  if (self->delay_histogram == NULL) {
+    WebRtc_FreeDelayEstimator(self);
+    self = NULL;
+    return -1;
+  }
+
+  self->spectrum_size = spectrum_size;
+  self->history_size = history_size;
+  self->alignment_enabled = enable_alignment;
+
+  return 0;
+}
+
+int WebRtc_InitDelayEstimator(void* handle) {
+  DelayEstimator_t* self = (DelayEstimator_t*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+  // Set averaged far and near end spectra to zero
+  memset(self->mean_far_spectrum, 0, sizeof(int32_t) * self->spectrum_size);
+  memset(self->mean_near_spectrum, 0, sizeof(int32_t) * self->spectrum_size);
+  // Set averaged bit counts to zero
+  memset(self->mean_bit_counts, 0, sizeof(int32_t) * self->history_size);
+  memset(self->bit_counts, 0, sizeof(int32_t) * self->history_size);
+  memset(self->far_spectrum_32, 0, sizeof(int32_t) * self->spectrum_size);
+  memset(self->near_spectrum_32, 0, sizeof(int32_t) * self->spectrum_size);
+  // Set far end histories to zero
+  memset(self->binary_far_history, 0, sizeof(uint32_t) * self->history_size);
+  if (self->alignment_enabled) {
+    memset(self->far_history,
+           0,
+           sizeof(uint16_t) * self->spectrum_size * self->history_size);
+    memset(self->far_q_domains, 0, sizeof(int) * self->history_size);
+    self->far_history_pos = self->history_size;
+  }
+  // Set delay histogram to zero
+  memset(self->delay_histogram, 0, sizeof(int) * self->history_size);
+  // Set VAD counter to zero
+  self->vad_counter = 0;
+  // Set delay memory to zero
+  self->last_delay = 0;
+
+  return 0;
+}
+
+int WebRtc_DelayEstimatorProcess(void* handle,
+                                 uint16_t* far_spectrum,
+                                 uint16_t* near_spectrum,
+                                 int spectrum_size,
+                                 int far_q,
+                                 int vad_value) {
+  DelayEstimator_t* self = (DelayEstimator_t*) handle;
+
+  const int kVadCountThreshold = 25;
+  const int kMaxHistogram = 600;
+
+  int histogram_bin = 0;
+  int i = 0;
+  int max_histogram_level = 0;
+  int min_position = -1;
+
+  uint32_t binary_far_spectrum = 0;
+  uint32_t binary_near_spectrum = 0;
+
+  int32_t bit_counts_tmp = 0;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  if (spectrum_size != self->spectrum_size) {
+    // Data sizes don't match
+    return -1;
+  }
+  if (far_q > 15) {
+    // If |far_q| is larger than 15 we cannot guarantee no wrap around
+    return -1;
+  }
+
+  if (self->alignment_enabled) {
+    // Update far end history
+    UpdateFarHistory(self, far_spectrum, far_q);
+  } // Update the far and near end means
+  for (i = 0; i < self->spectrum_size; i++) {
+    self->far_spectrum_32[i] = (int32_t) far_spectrum[i];
+    MeanEstimator(self->far_spectrum_32[i], 6, &(self->mean_far_spectrum[i]));
+
+    self->near_spectrum_32[i] = (int32_t) near_spectrum[i];
+    MeanEstimator(self->near_spectrum_32[i], 6, &(self->mean_near_spectrum[i]));
+  }
+
+  // Shift binary spectrum history
+  memmove(&(self->binary_far_history[1]), &(self->binary_far_history[0]),
+          (self->history_size - 1) * sizeof(uint32_t));
+
+  // Get binary spectra
+  binary_far_spectrum = BinarySpectrum(self->far_spectrum_32,
+                                       self->mean_far_spectrum);
+  binary_near_spectrum = BinarySpectrum(self->near_spectrum_32,
+                                        self->mean_near_spectrum);
+  // Insert new binary spectrum
+  self->binary_far_history[0] = binary_far_spectrum;
+
+  // Compare with delayed spectra
+  BitCountComparison(binary_near_spectrum,
+                     self->binary_far_history,
+                     self->history_size,
+                     self->bit_counts);
+
+  // Smooth bit count curve
+  for (i = 0; i < self->history_size; i++) {
+    // Update sum
+    // |bit_counts| is constrained to [0, 32], meaning we can smooth with a
+    // factor up to 2^26. We use Q9.
+    bit_counts_tmp = WEBRTC_SPL_LSHIFT_W32(self->bit_counts[i], 9); // Q9
+    MeanEstimator(bit_counts_tmp, 9, &(self->mean_bit_counts[i]));
+  }
+
+  // Find minimum position of bit count curve
+  min_position = (int) WebRtcSpl_MinIndexW32(self->mean_bit_counts,
+                                             (int16_t) self->history_size);
+
+  // If the far end has been active sufficiently long, begin accumulating a
+  // histogram of the minimum positions. Search for the maximum bin to
+  // determine the delay.
+  if (vad_value == 1) {
+    if (self->vad_counter >= kVadCountThreshold) {
+      // Increment the histogram at the current minimum position.
+      if (self->delay_histogram[min_position] < kMaxHistogram) {
+        self->delay_histogram[min_position] += 3;
+      }
+
+      self->last_delay = 0;
+      for (i = 0; i < self->history_size; i++) {
+        histogram_bin = self->delay_histogram[i];
+
+        // Decrement the histogram bin.
+        if (histogram_bin > 0) {
+          histogram_bin--;
+          self->delay_histogram[i] = histogram_bin;
+          // Select the histogram index corresponding to the maximum bin as the
+          // delay.
+          if (histogram_bin > max_histogram_level) {
+            max_histogram_level = histogram_bin;
+            self->last_delay = i;
+          }
+        }
+      }
+    } else {
+      self->vad_counter++;
+    }
+  } else {
+    self->vad_counter = 0;
+  }
+
+  return self->last_delay;
+}
+
+const uint16_t* WebRtc_AlignedFarend(void* handle,
+                                     int far_spectrum_size,
+                                     int* far_q) {
+  DelayEstimator_t* self = (DelayEstimator_t*) handle;
+  int buffer_position = 0;
+
+  if (self == NULL) {
+    return NULL;
+  }
+  if (far_spectrum_size != self->spectrum_size) {
+    return NULL;
+  }
+  if (self->alignment_enabled == 0) {
+    return NULL;
+  }
+
+  // Get buffer position
+  buffer_position = self->far_history_pos - self->last_delay;
+  if (buffer_position < 0) {
+    buffer_position += self->history_size;
+  }
+  // Get Q-domain
+  *far_q = self->far_q_domains[buffer_position];
+  // Return far end spectrum
+  return (self->far_history + (buffer_position * far_spectrum_size));
+
+}
+
+int WebRtc_last_delay(void* handle) {
+  DelayEstimator_t* self = (DelayEstimator_t*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  return self->last_delay;
+}
+
+int WebRtc_history_size(void* handle) {
+  DelayEstimator_t* self = (DelayEstimator_t*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  return self->history_size;
+}
+
+int WebRtc_spectrum_size(void* handle) {
+  DelayEstimator_t* self = (DelayEstimator_t*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  return self->spectrum_size;
+}
+
+int WebRtc_is_alignment_enabled(void* handle) {
+  DelayEstimator_t* self = (DelayEstimator_t*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  return self->alignment_enabled;
+}
--- a/src/modules/audio_processing/utility/delay_estimator.h
+++ b/src/modules/audio_processing/utility/delay_estimator.h
@@ -0,0 +1,154 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Performs delay estimation on a block by block basis
+// The return value is  0 - OK and -1 - Error, unless otherwise stated.
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
+
+#include "typedefs.h"
+
+// Releases the memory allocated by WebRtc_CreateDelayEstimator(...)
+// Input:
+//      - handle        : Pointer to the delay estimation instance
+//
+int WebRtc_FreeDelayEstimator(void* handle);
+
+// Allocates the memory needed by the delay estimation. The memory needs to be
+// initialized separately using the WebRtc_InitDelayEstimator(...)
+// function.
+//
+// Inputs:
+//      - handle            : Instance that should be created
+//      - spectrum_size     : Size of the spectrum used both in far end and
+//                            near end. Used to allocate memory for spectrum
+//                            specific buffers.
+//      - history_size      : Size of the far end history used to estimate the
+//                            delay from. Used to allocate memory for history
+//                            specific buffers.
+//      - enable_alignment  : With this mode set to 1, a far end history is
+//                            created, so that the user can retrieve aligned
+//                            far end spectra using
+//                            WebRtc_AlignedFarend(...). Otherwise, only delay
+//                            values are calculated.
+//
+// Output:
+//      - handle            : Created instance
+//
+int WebRtc_CreateDelayEstimator(void** handle,
+                                int spectrum_size,
+                                int history_size,
+                                int enable_alignment);
+
+// Initializes the delay estimation instance created with
+// WebRtc_CreateDelayEstimator(...)
+// Input:
+//      - handle        : Pointer to the delay estimation instance
+//
+// Output:
+//      - handle        : Initialized instance
+//
+int WebRtc_InitDelayEstimator(void* handle);
+
+// Estimates and returns the delay between the far end and near end blocks.
+// Inputs:
+//      - handle        : Pointer to the delay estimation instance
+//      - far_spectrum  : Pointer to the far end spectrum data
+//      - near_spectrum : Pointer to the near end spectrum data of the current
+//                        block
+//      - spectrum_size : The size of the data arrays (same for both far and
+//                        near end)
+//      - far_q         : The Q-domain of the far end data
+//      - vad_value     : The VAD decision of the current block
+//
+// Output:
+//      - handle        : Updated instance
+//
+// Return value:
+//      - delay         :  >= 0 - Calculated delay value
+//                        -1    - Error
+//
+int WebRtc_DelayEstimatorProcess(void* handle,
+                                 uint16_t* far_spectrum,
+                                 uint16_t* near_spectrum,
+                                 int spectrum_size,
+                                 int far_q,
+                                 int vad_value);
+
+// Returns a pointer to the far end spectrum aligned to current near end
+// spectrum. The function WebRtc_DelayEstimatorProcess(...) should have been
+// called before WebRtc_AlignedFarend(...). Otherwise, you get the pointer to
+// the previous frame. The memory is only valid until the next call of
+// WebRtc_DelayEstimatorProcess(...).
+//
+// Inputs:
+//      - handle            : Pointer to the delay estimation instance
+//      - far_spectrum_size : Size of far_spectrum allocated by the caller
+//
+// Output:
+//      - far_q             : The Q-domain of the aligned far end spectrum
+//
+// Return value:
+//      - far_spectrum      : Pointer to the aligned far end spectrum
+//                            NULL - Error
+//
+const uint16_t* WebRtc_AlignedFarend(void* handle,
+                                     int far_spectrum_size,
+                                     int* far_q);
+
+// Returns the last calculated delay updated by the function
+// WebRtc_DelayEstimatorProcess(...)
+//
+// Input:
+//      - handle        : Pointer to the delay estimation instance
+//
+// Return value:
+//      - delay         :  >= 0 - Last calculated delay value
+//                        -1    - Error
+//
+int WebRtc_last_delay(void* handle);
+
+// Returns the history size used in the far end buffers to calculate the delay
+// over.
+//
+// Input:
+//      - handle        : Pointer to the delay estimation instance
+//
+// Return value:
+//      - history_size  :  > 0  - Far end history size
+//                        -1    - Error
+//
+int WebRtc_history_size(void* handle);
+
+// Returns the fixed spectrum size used in the algorithm.
+//
+// Input:
+//      - handle        : Pointer to the delay estimation instance
+//
+// Return value:
+//      - spectrum_size :  > 0  - Spectrum size
+//                        -1    - Error
+//
+int WebRtc_spectrum_size(void* handle);
+
+// Returns 1 if the far end alignment is enabled and 0 otherwise.
+//
+// Input:
+//      - handle        : Pointer to the delay estimation instance
+//
+// Return value:
+//      - alignment_enabled : 1 - Enabled
+//                            0 - Disabled
+//                           -1 - Error
+//
+int WebRtc_is_alignment_enabled(void* handle);
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_H_
--- a/src/modules/audio_processing/utility/delay_estimator_float.c
+++ b/src/modules/audio_processing/utility/delay_estimator_float.c
@@ -0,0 +1,288 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+#include "delay_estimator_float.h"
+
+#include <assert.h>
+#include <math.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "delay_estimator.h"
+#include "signal_processing_library.h"
+
+typedef struct {
+  // Fixed point spectra
+  uint16_t* far_spectrum_u16;
+  uint16_t* near_spectrum_u16;
+
+  // Far end history variables
+  float* far_history;
+  int far_history_pos;
+
+  // Fixed point delay estimator
+  void* fixed_handle;
+
+} DelayEstimatorFloat_t;
+
+// Moves the pointer to the next buffer entry and inserts new far end spectrum.
+// Only used when alignment is enabled.
+//
+// Inputs:
+//      - self          : Pointer to the delay estimation instance
+//      - far_spectrum  : Pointer to the far end spectrum
+//
+static void UpdateFarHistory(DelayEstimatorFloat_t* self, float* far_spectrum) {
+  int spectrum_size = WebRtc_spectrum_size(self->fixed_handle);
+  // Get new buffer position
+  self->far_history_pos++;
+  if (self->far_history_pos >= WebRtc_history_size(self->fixed_handle)) {
+    self->far_history_pos = 0;
+  }
+  // Update far end spectrum buffer
+  memcpy(&(self->far_history[self->far_history_pos * spectrum_size]),
+         far_spectrum,
+         sizeof(float) * spectrum_size);
+}
+
+int WebRtc_FreeDelayEstimatorFloat(void* handle) {
+  DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  if (self->far_history != NULL) {
+    free(self->far_history);
+    self->far_history = NULL;
+  }
+  if (self->far_spectrum_u16 != NULL) {
+    free(self->far_spectrum_u16);
+    self->far_spectrum_u16 = NULL;
+  }
+  if (self->near_spectrum_u16 != NULL) {
+    free(self->near_spectrum_u16);
+    self->near_spectrum_u16 = NULL;
+  }
+
+  WebRtc_FreeDelayEstimator(self->fixed_handle);
+  free(self);
+
+  return 0;
+}
+
+int WebRtc_CreateDelayEstimatorFloat(void** handle,
+                                     int spectrum_size,
+                                     int history_size,
+                                     int enable_alignment) {
+  DelayEstimatorFloat_t *self = NULL;
+  if ((enable_alignment != 0) && (enable_alignment != 1)) {
+    return -1;
+  }
+
+  self = malloc(sizeof(DelayEstimatorFloat_t));
+  *handle = self;
+  if (self == NULL) {
+    return -1;
+  }
+
+  self->far_history = NULL;
+  self->far_spectrum_u16 = NULL;
+  self->near_spectrum_u16 = NULL;
+
+  // Create fixed point core delay estimator
+  if (WebRtc_CreateDelayEstimator(&self->fixed_handle,
+                                  spectrum_size,
+                                  history_size,
+                                  enable_alignment) != 0) {
+    WebRtc_FreeDelayEstimatorFloat(self);
+    self = NULL;
+    return -1;
+  }
+
+  // Allocate memory for far history buffer
+  if (enable_alignment) {
+    self->far_history = malloc(spectrum_size * history_size * sizeof(float));
+    if (self->far_history == NULL) {
+      WebRtc_FreeDelayEstimatorFloat(self);
+      self = NULL;
+      return -1;
+    }
+  }
+  // Allocate memory for fixed point spectra
+  self->far_spectrum_u16 = malloc(spectrum_size * sizeof(uint16_t));
+  if (self->far_spectrum_u16 == NULL) {
+    WebRtc_FreeDelayEstimatorFloat(self);
+    self = NULL;
+    return -1;
+  }
+  self->near_spectrum_u16 = malloc(spectrum_size * sizeof(uint16_t));
+  if (self->near_spectrum_u16 == NULL) {
+    WebRtc_FreeDelayEstimatorFloat(self);
+    self = NULL;
+    return -1;
+  }
+
+  return 0;
+}
+
+int WebRtc_InitDelayEstimatorFloat(void* handle) {
+  DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  if (WebRtc_InitDelayEstimator(self->fixed_handle) != 0) {
+    return -1;
+  }
+
+  {
+    int history_size = WebRtc_history_size(self->fixed_handle);
+    int spectrum_size = WebRtc_spectrum_size(self->fixed_handle);
+    if (WebRtc_is_alignment_enabled(self->fixed_handle) == 1) {
+      // Set far end histories to zero
+      memset(self->far_history,
+             0,
+             sizeof(float) * spectrum_size * history_size);
+      self->far_history_pos = history_size;
+    }
+    // Set fixed point spectra to zero
+    memset(self->far_spectrum_u16, 0, sizeof(uint16_t) * spectrum_size);
+    memset(self->near_spectrum_u16, 0, sizeof(uint16_t) * spectrum_size);
+  }
+
+  return 0;
+}
+
+int WebRtc_DelayEstimatorProcessFloat(void* handle,
+                                      float* far_spectrum,
+                                      float* near_spectrum,
+                                      int spectrum_size,
+                                      int vad_value) {
+  DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
+
+  const float kFftSize = (float) (2 * (spectrum_size - 1));
+  const float kLogOf2Inverse = 1.4426950f;
+  float max_value = 0.0f;
+  float scaling = 0;
+
+  int far_q = 0;
+  int scaling_log = 0;
+  int i = 0;
+
+  if (self == NULL) {
+    return -1;
+  }
+  if (far_spectrum == NULL) {
+    // Empty far end spectrum
+    return -1;
+  }
+  if (near_spectrum == NULL) {
+    // Empty near end spectrum
+    return -1;
+  }
+  if (spectrum_size != WebRtc_spectrum_size(self->fixed_handle)) {
+    // Data sizes don't match
+    return -1;
+  }
+
+  // Convert floating point spectrum to fixed point
+  // 1) Find largest value
+  // 2) Scale largest value to fit in Word16
+  for (i = 0; i < spectrum_size; ++i) {
+    if (near_spectrum[i] > max_value) {
+      max_value = near_spectrum[i];
+    }
+  }
+  // Find the largest possible scaling that is a multiple of two.
+  // With largest we mean to fit in a Word16.
+  // TODO(bjornv): I've taken the size of FFT into account, since there is a
+  // different scaling in float vs fixed point FFTs. I'm not completely sure
+  // this is necessary.
+  scaling_log = 14 - (int) (log(max_value / kFftSize + 1) * kLogOf2Inverse);
+  scaling = (float) (1 << scaling_log) / kFftSize;
+  for (i = 0; i < spectrum_size; ++i) {
+    self->near_spectrum_u16[i] = (uint16_t) (near_spectrum[i] * scaling);
+  }
+
+  // Same for far end
+  max_value = 0.0f;
+  for (i = 0; i < spectrum_size; ++i) {
+    if (far_spectrum[i] > max_value) {
+      max_value = far_spectrum[i];
+    }
+  }
+  // Find the largest possible scaling that is a multiple of two.
+  // With largest we mean to fit in a Word16.
+  scaling_log = 14 - (int) (log(max_value / kFftSize + 1) * kLogOf2Inverse);
+  scaling = (float) (1 << scaling_log) / kFftSize;
+  for (i = 0; i < spectrum_size; ++i) {
+    self->far_spectrum_u16[i] = (uint16_t) (far_spectrum[i] * scaling);
+  }
+  far_q = (int) scaling_log;
+  assert(far_q < 16); // Catch too large scaling, which should never be able to
+                      // occur.
+
+  if (WebRtc_is_alignment_enabled(self->fixed_handle) == 1) {
+    // Update far end history
+    UpdateFarHistory(self, far_spectrum);
+  }
+
+  return WebRtc_DelayEstimatorProcess(self->fixed_handle,
+                                      self->far_spectrum_u16,
+                                      self->near_spectrum_u16,
+                                      spectrum_size,
+                                      far_q,
+                                      vad_value);
+}
+
+const float* WebRtc_AlignedFarendFloat(void* handle, int far_spectrum_size) {
+  DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
+  int buffer_pos = 0;
+
+  if (self == NULL) {
+    return NULL;
+  }
+  if (far_spectrum_size != WebRtc_spectrum_size(self->fixed_handle)) {
+    return NULL;
+  }
+  if (WebRtc_is_alignment_enabled(self->fixed_handle) != 1) {
+    return NULL;
+  }
+
+  // Get buffer position
+  buffer_pos = self->far_history_pos - WebRtc_last_delay(self->fixed_handle);
+  if (buffer_pos < 0) {
+    buffer_pos += WebRtc_history_size(self->fixed_handle);
+  }
+  // Return pointer to far end spectrum
+  return (self->far_history + (buffer_pos * far_spectrum_size));
+}
+
+int WebRtc_last_delay_float(void* handle) {
+  DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  return WebRtc_last_delay(self->fixed_handle);
+}
+
+int WebRtc_is_alignment_enabled_float(void* handle) {
+  DelayEstimatorFloat_t* self = (DelayEstimatorFloat_t*) handle;
+
+  if (self == NULL) {
+    return -1;
+  }
+
+  return WebRtc_is_alignment_enabled(self->fixed_handle);
+}
--- a/src/modules/audio_processing/utility/delay_estimator_float.h
+++ b/src/modules/audio_processing/utility/delay_estimator_float.h
@@ -0,0 +1,125 @@
+/*
+ *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
+ *
+ *  Use of this source code is governed by a BSD-style license
+ *  that can be found in the LICENSE file in the root of the source
+ *  tree. An additional intellectual property rights grant can be found
+ *  in the file PATENTS.  All contributing project authors may
+ *  be found in the AUTHORS file in the root of the source tree.
+ */
+
+// Performs delay estimation on a block by block basis
+// The return value is  0 - OK and -1 - Error, unless otherwise stated.
+
+#ifndef WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_
+#define WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_
+
+// Releases the memory allocated by WebRtc_CreateDelayEstimatorFloat(...)
+// Input:
+//      - handle        : Pointer to the delay estimation instance
+//
+int WebRtc_FreeDelayEstimatorFloat(void* handle);
+
+// Allocates the memory needed by the delay estimation. The memory needs to be
+// initialized separately using the WebRtc_InitDelayEstimatorFloat(...)
+// function.
+//
+// Inputs:
+//      - handle            : Instance that should be created
+//      - spectrum_size     : Size of the spectrum used both in far end and
+//                            near end. Used to allocate memory for spectrum
+//                            specific buffers.
+//      - history_size      : Size of the far end history used to estimate the
+//                            delay from. Used to allocate memory for history
+//                            specific buffers.
+//      - enable_alignment  : With this mode set to 1, a far end history is
+//                            created, so that the user can retrieve aligned
+//                            far end spectra using
+//                            WebRtc_AlignedFarendFloat(...). Otherwise, only
+//                            delay values are calculated.
+//
+// Output:
+//      - handle            : Created instance
+//
+int WebRtc_CreateDelayEstimatorFloat(void** handle,
+                                     int spectrum_size,
+                                     int history_size,
+                                     int enable_alignment);
+
+// Initializes the delay estimation instance created with
+// WebRtc_CreateDelayEstimatorFloat(...)
+// Input:
+//      - handle        : Pointer to the delay estimation instance
+//
+// Output:
+//      - handle        : Initialized instance
+//
+int WebRtc_InitDelayEstimatorFloat(void* handle);
+
+// Estimates and returns the delay between the far end and near end blocks.
+// Inputs:
+//      - handle        : Pointer to the delay estimation instance
+//      - far_spectrum  : Pointer to the far end spectrum data
+//      - near_spectrum : Pointer to the near end spectrum data of the current
+//                        block
+//      - spectrum_size : The size of the data arrays (same for both far and
+//                        near end)
+//      - far_q         : The Q-domain of the far end data
+//      - vad_value     : The VAD decision of the current block
+//
+// Output:
+//      - handle        : Updated instance
+//
+// Return value:
+//      - delay         :  >= 0 - Calculated delay value
+//                        -1    - Error
+//
+int WebRtc_DelayEstimatorProcessFloat(void* handle,
+                                      float* far_spectrum,
+                                      float* near_spectrum,
+                                      int spectrum_size,
+                                      int vad_value);
+
+// Returns a pointer to the far end spectrum aligned to current near end
+// spectrum. The function WebRtc_DelayEstimatorProcessFloat(...) should
+// have been called before WebRtc_AlignedFarendFloat(...). Otherwise, you get
+// the pointer to the previous frame. The memory is only valid until the
+// next call of WebRtc_DelayEstimatorProcessFloat(...).
+//
+// Inputs:
+//      - handle            : Pointer to the delay estimation instance
+//      - far_spectrum_size : Size of far_spectrum allocated by the caller
+//
+// Output:
+//
+// Return value:
+//      - far_spectrum      : Pointer to the aligned far end spectrum
+//                            NULL - Error
+//
+const float* WebRtc_AlignedFarendFloat(void* handle, int far_spectrum_size);
+
+// Returns the last calculated delay updated by the function
+// WebRtcApm_DelayEstimatorProcessFloat(...)
+//
+// Inputs:
+//      - handle        : Pointer to the delay estimation instance
+//
+// Return value:
+//      - delay         :  >= 0 - Last calculated delay value
+//                        -1    - Error
+//
+int WebRtc_last_delay_float(void* handle);
+
+// Returns 1 if the far end alignment is enabled and 0 otherwise.
+//
+// Input:
+//      - handle            : Pointer to the delay estimation instance
+//
+// Return value:
+//      - alignment_enabled : 1 - Enabled
+//                            0 - Disabled
+//                           -1 - Error
+//
+int WebRtc_is_alignment_enabled_float(void* handle);
+
+#endif  // WEBRTC_MODULES_AUDIO_PROCESSING_UTILITY_DELAY_ESTIMATOR_FLOAT_H_
--- a/src/modules/audio_processing/utility/util.gypi
+++ b/src/modules/audio_processing/utility/util.gypi
@@ -11,16 +11,23 @@
    {
      'target_name': 'apm_util',
      'type': '<(library)',
+      'dependencies': [
+        '<(webrtc_root)/common_audio/common_audio.gyp:spl',
+      ],
      'direct_dependent_settings': {
        'include_dirs': [
          '.',
        ],
      },
      'sources': [
-        'ring_buffer.c',
-        'ring_buffer.h',
+        'delay_estimator_float.c',
+        'delay_estimator_float.h',
+        'delay_estimator.c',
+        'delay_estimator.h',
        'fft4g.c',
        'fft4g.h',
+        'ring_buffer.c',
+        'ring_buffer.h',
      ],
    },
  ],
--- a/src/modules/interface/module_common_types.h
+++ b/src/modules/interface/module_common_types.h
@@ -42,9 +42,12 @@ struct RTPVideoHeaderH263
    bool bits;                    // H.263 mode B, Xor the lasy byte of previus packet with the
                                  // first byte of this packet
 };
+
 enum {kNoPictureId = -1};
 enum {kNoTl0PicIdx = -1};
 enum {kNoTemporalIdx = -1};
+enum {kNoSimulcastIdx = 0};
+
 struct RTPVideoHeaderVP8
 {
    void InitRTPVideoHeaderVP8()
@@ -89,6 +92,8 @@ struct RTPVideoHeader
    WebRtc_UWord16          height;

    bool                    isFirstPacket;   // first packet in frame
+    WebRtc_UWord8           simulcastIdx;    // Index if the simulcast encoder creating
+                                             // this frame, 0 if not using simulcast.
    RTPVideoCodecTypes      codec;
    RTPVideoTypeHeader      codecHeader;
 };
--- a/src/system_wrappers/source/system_wrappers.gyp
+++ b/src/system_wrappers/source/system_wrappers.gyp
@@ -33,6 +33,7 @@
        '../interface/cpu_features_wrapper.h',
        '../interface/critical_section_wrapper.h',
        '../interface/data_log.h',
+        '../interface/data_log_c.h',
        '../interface/data_log_impl.h',
        '../interface/event_wrapper.h',
        '../interface/file_wrapper.h',
@@ -63,6 +64,7 @@
        'critical_section.cc',
        'critical_section_posix.h',
        'critical_section_windows.h',
+        'data_log_c.cc',
        'event.cc',
        'event_posix.h',
        'event_windows.h',
--- a/src/typedefs.h
+++ b/src/typedefs.h
@@ -14,12 +14,14 @@
 #define WEBRTC_TYPEDEFS_H_

 // Reserved words definitions
+// TODO(andrew): Look at removing these.
 #define WEBRTC_EXTERN extern
 #define G_CONST const
 #define WEBRTC_INLINE extern __inline

 // Define WebRTC preprocessor identifiers based on the current build platform.
-// TODO(ajm): Clean these up. We can probably remove everything in this block.
+// TODO(andrew): Clean these up. We can probably remove everything in this
+// block.
 //   - TARGET_MAC_INTEL and TARGET_MAC aren't used anywhere.
 //   - In the few places where TARGET_PC is used, it should be replaced by
 //     something more specific.
@@ -32,7 +34,7 @@
    #endif
 #elif defined(__APPLE__)
    // Mac OS X
-    #if defined(__LITTLE_ENDIAN__ ) //TODO: is this used?
+    #if defined(__LITTLE_ENDIAN__ )
        #if !defined(WEBRTC_TARGET_MAC_INTEL)
            #define WEBRTC_TARGET_MAC_INTEL
        #endif
@@ -53,7 +55,7 @@
 //   http://msdn.microsoft.com/en-us/library/b0084kay.aspx
 //   http://www.agner.org/optimize/calling_conventions.pdf
 //   or with gcc, run: "echo | gcc -E -dM -"
-// TODO(ajm): replace WEBRTC_LITTLE_ENDIAN with WEBRTC_ARCH_LITTLE_ENDIAN?
+// TODO(andrew): replace WEBRTC_LITTLE_ENDIAN with WEBRTC_ARCH_LITTLE_ENDIAN?
 #if defined(_M_X64) || defined(__x86_64__)
 #define WEBRTC_ARCH_X86_FAMILY
 #define WEBRTC_ARCH_X86_64
@@ -65,8 +67,11 @@
 #define WEBRTC_ARCH_32_BITS
 #define WEBRTC_ARCH_LITTLE_ENDIAN
 #elif defined(__ARMEL__)
-// TODO(ajm): Chromium uses the two commented defines. Should we switch?
-#define WEBRTC_ARCH_ARM
+// TODO(andrew): We'd prefer to control platform defines here, but this is
+// currently provided by the Android makefiles. Commented to avoid duplicate
+// definition warnings.
+//#define WEBRTC_ARCH_ARM
+// TODO(andrew): Chromium uses the following two defines. Should we switch?
 //#define WEBRTC_ARCH_ARM_FAMILY
 //#define WEBRTC_ARCH_ARMEL
 #define WEBRTC_ARCH_32_BITS
@@ -75,10 +80,7 @@
 #error Please add support for your architecture in typedefs.h
 #endif

-// TODO(ajm): SSE2 is disabled on Windows for the moment, because AEC
-// optimization is broken. Enable it as soon as AEC is fixed.
-//#if defined(__SSE2__) || defined(_MSC_VER)
-#if defined(__SSE2__)
+#if defined(__SSE2__) || defined(_MSC_VER)
 #define WEBRTC_USE_SSE2
 #endif