Bump to WebRTC M120 release

Some API deprecation -- ExperimentalAgc and ExperimentalNs are gone.
We're continuing to carry iSAC even though it's gone upstream, but maybe
we'll want to drop that soon.
This commit is contained in:
Arun Raghavan
2023-12-12 10:42:58 -05:00
parent 9a202fb8c2
commit c6abf6cd3f
479 changed files with 20900 additions and 11996 deletions

View File

@ -72,9 +72,9 @@ void WebRtcSpl_CrossCorrelationNeon(int32_t* cross_correlation,
size_t dim_cross_correlation,
int right_shifts,
int step_seq2) {
size_t i = 0;
int i = 0;
for (i = 0; i < dim_cross_correlation; i++) {
for (i = 0; i < (int)dim_cross_correlation; i++) {
const int16_t* seq1_ptr = seq1;
const int16_t* seq2_ptr = seq2 + (step_seq2 * i);

View File

@ -98,8 +98,7 @@ int32_t WebRtcSpl_DivResultInQ31(int32_t num, int32_t den)
return div;
}
int32_t RTC_NO_SANITIZE("signed-integer-overflow") // bugs.webrtc.org/5486
WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
int32_t WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
{
int16_t approx, tmp_hi, tmp_low, num_hi, num_low;
int32_t tmpW32;
@ -111,8 +110,8 @@ WebRtcSpl_DivW32HiLow(int32_t num, int16_t den_hi, int16_t den_low)
tmpW32 = (den_hi * approx << 1) + ((den_low * approx >> 15) << 1);
// tmpW32 = den * approx
tmpW32 = (int32_t)0x7fffffffL - tmpW32; // result in Q30 (tmpW32 = 2.0-(den*approx))
// UBSan: 2147483647 - -2 cannot be represented in type 'int'
// result in Q30 (tmpW32 = 2.0-(den*approx))
tmpW32 = (int32_t)((int64_t)0x7fffffffL - tmpW32);
// Store tmpW32 in hi and low format
tmp_hi = (int16_t)(tmpW32 >> 16);

View File

@ -26,7 +26,7 @@ extern "C" {
// - vector_length : Number of samples used in the dot product
// - scaling : The number of right bit shifts to apply on each term
// during calculation to avoid overflow, i.e., the
// output will be in Q(-|scaling|)
// output will be in Q(-`scaling`)
//
// Return value : The dot product in Q(-scaling)
int32_t WebRtcSpl_DotProductWithScale(const int16_t* vector1,

View File

@ -8,9 +8,11 @@
* be found in the AUTHORS file in the root of the source tree.
*/
#include <arm_neon.h>
#include "common_audio/signal_processing/include/signal_processing_library.h"
#include <arm_neon.h>
#include "rtc_base/checks.h"
// NEON intrinsics version of WebRtcSpl_DownsampleFast()
// for ARM 32-bit/64-bit platforms.
@ -22,19 +24,24 @@ int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
size_t coefficients_length,
int factor,
size_t delay) {
size_t i = 0;
size_t j = 0;
// Using signed indexes to be able to compute negative i-j that
// is used to index data_in.
int i = 0;
int j = 0;
int32_t out_s32 = 0;
size_t endpos = delay + factor * (data_out_length - 1) + 1;
int endpos = delay + factor * (data_out_length - 1) + 1;
size_t res = data_out_length & 0x7;
size_t endpos1 = endpos - factor * res;
int endpos1 = endpos - factor * res;
// Return error if any of the running conditions doesn't meet.
if (data_out_length == 0 || coefficients_length == 0
|| data_in_length < endpos) {
|| (int)data_in_length < endpos) {
return -1;
}
RTC_DCHECK_GE(endpos, 0);
RTC_DCHECK_GE(endpos1, 0);
// First part, unroll the loop 8 times, with 3 subcases
// (factor == 2, 4, others).
switch (factor) {
@ -46,7 +53,7 @@ int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
#if defined(WEBRTC_ARCH_ARM64)
// Unroll the loop 2 times.
for (j = 0; j < coefficients_length - 1; j += 2) {
for (j = 0; j < (int)coefficients_length - 1; j += 2) {
int32x2_t coeff32 = vld1_dup_s32((int32_t*)&coefficients[j]);
int16x4_t coeff16x4 = vreinterpret_s16_s32(coeff32);
int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j - 1]);
@ -68,7 +75,7 @@ int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_3, coeff16x4, 0);
}
for (; j < coefficients_length; j++) {
for (; j < (int)coefficients_length; j++) {
int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]);
@ -87,7 +94,7 @@ int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
#else
// On ARMv7, the loop unrolling 2 times results in performance
// regression.
for (j = 0; j < coefficients_length; j++) {
for (j = 0; j < (int)coefficients_length; j++) {
int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
int16x8x2_t in16x8x2 = vld2q_s16(&data_in[i - j]);
@ -114,7 +121,7 @@ int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
int32x4_t out32x4_1 = vdupq_n_s32(2048);
// Unroll the loop 4 times.
for (j = 0; j < coefficients_length - 3; j += 4) {
for (j = 0; j < (int)coefficients_length - 3; j += 4) {
int16x4_t coeff16x4 = vld1_s16(&coefficients[j]);
int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j - 3]);
@ -143,7 +150,7 @@ int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
out32x4_1 = vmlal_lane_s16(out32x4_1, in16x4_7, coeff16x4, 0);
}
for (; j < coefficients_length; j++) {
for (; j < (int)coefficients_length; j++) {
int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
int16x8x4_t in16x8x4 = vld4q_s16(&data_in[i - j]);
@ -174,7 +181,7 @@ int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
int32x4_t out32x4_0 = vdupq_n_s32(2048);
int32x4_t out32x4_1 = vdupq_n_s32(2048);
for (j = 0; j < coefficients_length; j++) {
for (j = 0; j < (int)coefficients_length; j++) {
int16x4_t coeff16x4 = vld1_dup_s16(&coefficients[j]);
int16x4_t in16x4_0 = vld1_dup_s16(&data_in[i - j]);
in16x4_0 = vld1_lane_s16(&data_in[i + factor - j], in16x4_0, 1);
@ -204,7 +211,7 @@ int WebRtcSpl_DownsampleFastNeon(const int16_t* data_in,
for (; i < endpos; i += factor) {
out_s32 = 2048; // Round value, 0.5 in Q12.
for (j = 0; j < coefficients_length; j++) {
for (j = 0; j < (int)coefficients_length; j++) {
out_s32 = WebRtc_MulAccumW16(coefficients[j], data_in[i - j], out_s32);
}

View File

@ -81,7 +81,7 @@ int WebRtcSpl_RealForwardFFT(struct RealFFT* self,
// boundary.
//
// Return Value:
// 0 or a positive number - a value that the elements in the |real_data_out|
// 0 or a positive number - a value that the elements in the `real_data_out`
// should be shifted left with in order to get
// correct physical values.
// -1 - Error with bad arguments (null pointers).

View File

@ -166,7 +166,7 @@ int32_t WebRtcSpl_MaxAbsValueW32_mips(const int32_t* vector, size_t length);
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum sample value in |vector|.
// Return value : Maximum sample value in `vector`.
typedef int16_t (*MaxValueW16)(const int16_t* vector, size_t length);
extern const MaxValueW16 WebRtcSpl_MaxValueW16;
int16_t WebRtcSpl_MaxValueW16C(const int16_t* vector, size_t length);
@ -183,7 +183,7 @@ int16_t WebRtcSpl_MaxValueW16_mips(const int16_t* vector, size_t length);
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Maximum sample value in |vector|.
// Return value : Maximum sample value in `vector`.
typedef int32_t (*MaxValueW32)(const int32_t* vector, size_t length);
extern const MaxValueW32 WebRtcSpl_MaxValueW32;
int32_t WebRtcSpl_MaxValueW32C(const int32_t* vector, size_t length);
@ -200,7 +200,7 @@ int32_t WebRtcSpl_MaxValueW32_mips(const int32_t* vector, size_t length);
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Minimum sample value in |vector|.
// Return value : Minimum sample value in `vector`.
typedef int16_t (*MinValueW16)(const int16_t* vector, size_t length);
extern const MinValueW16 WebRtcSpl_MinValueW16;
int16_t WebRtcSpl_MinValueW16C(const int16_t* vector, size_t length);
@ -217,7 +217,7 @@ int16_t WebRtcSpl_MinValueW16_mips(const int16_t* vector, size_t length);
// - vector : 32-bit input vector.
// - length : Number of samples in vector.
//
// Return value : Minimum sample value in |vector|.
// Return value : Minimum sample value in `vector`.
typedef int32_t (*MinValueW32)(const int32_t* vector, size_t length);
extern const MinValueW32 WebRtcSpl_MinValueW32;
int32_t WebRtcSpl_MinValueW32C(const int32_t* vector, size_t length);
@ -228,6 +228,25 @@ int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length);
int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length);
#endif
// Returns both the minimum and maximum values of a 16-bit vector.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
// Ouput:
// - max_val : Maximum sample value in `vector`.
// - min_val : Minimum sample value in `vector`.
void WebRtcSpl_MinMaxW16(const int16_t* vector,
size_t length,
int16_t* min_val,
int16_t* max_val);
#if defined(WEBRTC_HAS_NEON)
void WebRtcSpl_MinMaxW16Neon(const int16_t* vector,
size_t length,
int16_t* min_val,
int16_t* max_val);
#endif
// Returns the vector index to the largest absolute value of a 16-bit vector.
//
// Input:
@ -240,6 +259,17 @@ int32_t WebRtcSpl_MinValueW32_mips(const int32_t* vector, size_t length);
// -32768 presenting an int16 absolute value of 32767).
size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length);
// Returns the element with the largest absolute value of a 16-bit vector. Note
// that this function can return a negative value.
//
// Input:
// - vector : 16-bit input vector.
// - length : Number of samples in vector.
//
// Return value : The element with the largest absolute value. Note that this
// may be a negative value.
int16_t WebRtcSpl_MaxAbsElementW16(const int16_t* vector, size_t length);
// Returns the vector index to the maximum sample value of a 16-bit vector.
//
// Input:
@ -396,7 +426,7 @@ void WebRtcSpl_AffineTransformVector(int16_t* out_vector,
//
// Input:
// - in_vector : Vector to calculate autocorrelation upon
// - in_vector_length : Length (in samples) of |vector|
// - in_vector_length : Length (in samples) of `vector`
// - order : The order up to which the autocorrelation should be
// calculated
//
@ -408,7 +438,7 @@ void WebRtcSpl_AffineTransformVector(int16_t* out_vector,
// - scale : The number of left shifts required to obtain the
// auto-correlation in Q0
//
// Return value : Number of samples in |result|, i.e. (order+1)
// Return value : Number of samples in `result`, i.e. (order+1)
size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
size_t in_vector_length,
size_t order,
@ -419,7 +449,7 @@ size_t WebRtcSpl_AutoCorrelation(const int16_t* in_vector,
// does NOT use the 64 bit class
//
// Input:
// - auto_corr : Vector with autocorrelation values of length >= |order|+1
// - auto_corr : Vector with autocorrelation values of length >= `order`+1
// - order : The LPC filter order (support up to order 20)
//
// Output:
@ -432,7 +462,7 @@ int16_t WebRtcSpl_LevinsonDurbin(const int32_t* auto_corr,
int16_t* refl_coef,
size_t order);
// Converts reflection coefficients |refl_coef| to LPC coefficients |lpc_coef|.
// Converts reflection coefficients `refl_coef` to LPC coefficients `lpc_coef`.
// This version is a 16 bit operation.
//
// NOTE: The 16 bit refl_coef -> lpc_coef conversion might result in a
@ -442,7 +472,7 @@ int16_t WebRtcSpl_LevinsonDurbin(const int32_t* auto_corr,
// Input:
// - refl_coef : Reflection coefficients in Q15 that should be converted
// to LPC coefficients
// - use_order : Number of coefficients in |refl_coef|
// - use_order : Number of coefficients in `refl_coef`
//
// Output:
// - lpc_coef : LPC coefficients in Q12
@ -450,14 +480,14 @@ void WebRtcSpl_ReflCoefToLpc(const int16_t* refl_coef,
int use_order,
int16_t* lpc_coef);
// Converts LPC coefficients |lpc_coef| to reflection coefficients |refl_coef|.
// Converts LPC coefficients `lpc_coef` to reflection coefficients `refl_coef`.
// This version is a 16 bit operation.
// The conversion is implemented by the step-down algorithm.
//
// Input:
// - lpc_coef : LPC coefficients in Q12, that should be converted to
// reflection coefficients
// - use_order : Number of coefficients in |lpc_coef|
// - use_order : Number of coefficients in `lpc_coef`
//
// Output:
// - refl_coef : Reflection coefficients in Q15.
@ -478,24 +508,24 @@ void WebRtcSpl_AutoCorrToReflCoef(const int32_t* auto_corr,
int16_t* refl_coef);
// The functions (with related pointer) calculate the cross-correlation between
// two sequences |seq1| and |seq2|.
// |seq1| is fixed and |seq2| slides as the pointer is increased with the
// amount |step_seq2|. Note the arguments should obey the relationship:
// |dim_seq| - 1 + |step_seq2| * (|dim_cross_correlation| - 1) <
// buffer size of |seq2|
// two sequences `seq1` and `seq2`.
// `seq1` is fixed and `seq2` slides as the pointer is increased with the
// amount `step_seq2`. Note the arguments should obey the relationship:
// `dim_seq` - 1 + `step_seq2` * (`dim_cross_correlation` - 1) <
// buffer size of `seq2`
//
// Input:
// - seq1 : First sequence (fixed throughout the correlation)
// - seq2 : Second sequence (slides |step_vector2| for each
// - seq2 : Second sequence (slides `step_vector2` for each
// new correlation)
// - dim_seq : Number of samples to use in the cross-correlation
// - dim_cross_correlation : Number of cross-correlations to calculate (the
// start position for |vector2| is updated for each
// start position for `vector2` is updated for each
// new one)
// - right_shifts : Number of right bit shifts to use. This will
// become the output Q-domain.
// - step_seq2 : How many (positive or negative) steps the
// |vector2| pointer should be updated for each new
// `vector2` pointer should be updated for each new
// cross-correlation value.
//
// Output:
@ -545,11 +575,11 @@ void WebRtcSpl_CrossCorrelation_mips(int32_t* cross_correlation,
void WebRtcSpl_GetHanningWindow(int16_t* window, size_t size);
// Calculates y[k] = sqrt(1 - x[k]^2) for each element of the input vector
// |in_vector|. Input and output values are in Q15.
// `in_vector`. Input and output values are in Q15.
//
// Inputs:
// - in_vector : Values to calculate sqrt(1 - x^2) of
// - vector_length : Length of vector |in_vector|
// - vector_length : Length of vector `in_vector`
//
// Output:
// - out_vector : Output values in Q15
@ -637,9 +667,9 @@ void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
// Input:
// - data_in : Input samples (state in positions
// data_in[-order] .. data_in[-1])
// - data_in_length : Number of samples in |data_in| to be filtered.
// - data_in_length : Number of samples in `data_in` to be filtered.
// This must be at least
// |delay| + |factor|*(|out_vector_length|-1) + 1)
// `delay` + `factor`*(`out_vector_length`-1) + 1)
// - data_out_length : Number of down sampled samples desired
// - coefficients : Filter coefficients (in Q12)
// - coefficients_length: Number of coefficients (order+1)
@ -647,7 +677,7 @@ void WebRtcSpl_FilterARFastQ12(const int16_t* data_in,
// - delay : Delay of filter (compensated for in out_vector)
// Output:
// - data_out : Filtered samples
// Return value : 0 if OK, -1 if |in_vector| is too short
// Return value : 0 if OK, -1 if `in_vector` is too short
typedef int (*DownsampleFast)(const int16_t* data_in,
size_t data_in_length,
int16_t* data_out,
@ -693,12 +723,12 @@ int WebRtcSpl_DownsampleFast_mips(const int16_t* data_in,
int WebRtcSpl_ComplexFFT(int16_t vector[], int stages, int mode);
int WebRtcSpl_ComplexIFFT(int16_t vector[], int stages, int mode);
// Treat a 16-bit complex data buffer |complex_data| as an array of 32-bit
// Treat a 16-bit complex data buffer `complex_data` as an array of 32-bit
// values, and swap elements whose indexes are bit-reverses of each other.
//
// Input:
// - complex_data : Complex data buffer containing 2^|stages| real
// elements interleaved with 2^|stages| imaginary
// - complex_data : Complex data buffer containing 2^`stages` real
// elements interleaved with 2^`stages` imaginary
// elements: [Re Im Re Im Re Im....]
// - stages : Number of FFT stages. Must be at least 3 and at most
// 10, since the table WebRtcSpl_kSinTable1024[] is 1024
@ -908,7 +938,7 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// WebRtcSpl_AddSatW32(...)
//
// Returns the result of a saturated 16-bit, respectively 32-bit, addition of
// the numbers specified by the |var1| and |var2| parameters.
// the numbers specified by the `var1` and `var2` parameters.
//
// Input:
// - var1 : Input variable 1
@ -922,7 +952,7 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// WebRtcSpl_SubSatW32(...)
//
// Returns the result of a saturated 16-bit, respectively 32-bit, subtraction
// of the numbers specified by the |var1| and |var2| parameters.
// of the numbers specified by the `var1` and `var2` parameters.
//
// Input:
// - var1 : Input variable 1
@ -935,61 +965,61 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// WebRtcSpl_GetSizeInBits(...)
//
// Returns the # of bits that are needed at the most to represent the number
// specified by the |value| parameter.
// specified by the `value` parameter.
//
// Input:
// - value : Input value
//
// Return value : Number of bits needed to represent |value|
// Return value : Number of bits needed to represent `value`
//
//
// WebRtcSpl_NormW32(...)
//
// Norm returns the # of left shifts required to 32-bit normalize the 32-bit
// signed number specified by the |value| parameter.
// signed number specified by the `value` parameter.
//
// Input:
// - value : Input value
//
// Return value : Number of bit shifts needed to 32-bit normalize |value|
// Return value : Number of bit shifts needed to 32-bit normalize `value`
//
//
// WebRtcSpl_NormW16(...)
//
// Norm returns the # of left shifts required to 16-bit normalize the 16-bit
// signed number specified by the |value| parameter.
// signed number specified by the `value` parameter.
//
// Input:
// - value : Input value
//
// Return value : Number of bit shifts needed to 32-bit normalize |value|
// Return value : Number of bit shifts needed to 32-bit normalize `value`
//
//
// WebRtcSpl_NormU32(...)
//
// Norm returns the # of left shifts required to 32-bit normalize the unsigned
// 32-bit number specified by the |value| parameter.
// 32-bit number specified by the `value` parameter.
//
// Input:
// - value : Input value
//
// Return value : Number of bit shifts needed to 32-bit normalize |value|
// Return value : Number of bit shifts needed to 32-bit normalize `value`
//
//
// WebRtcSpl_GetScalingSquare(...)
//
// Returns the # of bits required to scale the samples specified in the
// |in_vector| parameter so that, if the squares of the samples are added the
// # of times specified by the |times| parameter, the 32-bit addition will not
// `in_vector` parameter so that, if the squares of the samples are added the
// # of times specified by the `times` parameter, the 32-bit addition will not
// overflow (result in int32_t).
//
// Input:
// - in_vector : Input vector to check scaling on
// - in_vector_length : Samples in |in_vector|
// - in_vector_length : Samples in `in_vector`
// - times : Number of additions to be performed
//
// Return value : Number of right bit shifts needed to avoid
@ -999,8 +1029,8 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
//
// WebRtcSpl_MemSetW16(...)
//
// Sets all the values in the int16_t vector |vector| of length
// |vector_length| to the specified value |set_value|
// Sets all the values in the int16_t vector `vector` of length
// `vector_length` to the specified value `set_value`
//
// Input:
// - vector : Pointer to the int16_t vector
@ -1011,8 +1041,8 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
//
// WebRtcSpl_MemSetW32(...)
//
// Sets all the values in the int32_t vector |vector| of length
// |vector_length| to the specified value |set_value|
// Sets all the values in the int32_t vector `vector` of length
// `vector_length` to the specified value `set_value`
//
// Input:
// - vector : Pointer to the int16_t vector
@ -1023,34 +1053,34 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
//
// WebRtcSpl_MemCpyReversedOrder(...)
//
// Copies all the values from the source int16_t vector |in_vector| to a
// destination int16_t vector |out_vector|. It is done in reversed order,
// meaning that the first sample of |in_vector| is copied to the last sample of
// the |out_vector|. The procedure continues until the last sample of
// |in_vector| has been copied to the first sample of |out_vector|. This
// Copies all the values from the source int16_t vector `in_vector` to a
// destination int16_t vector `out_vector`. It is done in reversed order,
// meaning that the first sample of `in_vector` is copied to the last sample of
// the `out_vector`. The procedure continues until the last sample of
// `in_vector` has been copied to the first sample of `out_vector`. This
// creates a reversed vector. Used in e.g. prediction in iLBC.
//
// Input:
// - in_vector : Pointer to the first sample in a int16_t vector
// of length |length|
// of length `length`
// - vector_length : Number of elements to copy
//
// Output:
// - out_vector : Pointer to the last sample in a int16_t vector
// of length |length|
// of length `length`
//
//
// WebRtcSpl_CopyFromEndW16(...)
//
// Copies the rightmost |samples| of |in_vector| (of length |in_vector_length|)
// to the vector |out_vector|.
// Copies the rightmost `samples` of `in_vector` (of length `in_vector_length`)
// to the vector `out_vector`.
//
// Input:
// - in_vector : Input vector
// - in_vector_length : Number of samples in |in_vector|
// - in_vector_length : Number of samples in `in_vector`
// - samples : Number of samples to extract (from right side)
// from |in_vector|
// from `in_vector`
//
// Output:
// - out_vector : Vector with the requested samples
@ -1085,7 +1115,7 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
//
// Output:
// - out_vector : Pointer to the result vector (can be the same as
// |in_vector|)
// `in_vector`)
//
//
@ -1103,7 +1133,7 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
//
// Output:
// - out_vector : Pointer to the result vector (can be the same as
// |in_vector|)
// `in_vector`)
//
//
@ -1115,11 +1145,11 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// Input:
// - in_vector : Input vector
// - gain : Scaling gain
// - vector_length : Elements in the |in_vector|
// - vector_length : Elements in the `in_vector`
// - right_shifts : Number of right bit shifts applied
//
// Output:
// - out_vector : Output vector (can be the same as |in_vector|)
// - out_vector : Output vector (can be the same as `in_vector`)
//
//
@ -1131,11 +1161,11 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// Input:
// - in_vector : Input vector
// - gain : Scaling gain
// - vector_length : Elements in the |in_vector|
// - vector_length : Elements in the `in_vector`
// - right_shifts : Number of right bit shifts applied
//
// Output:
// - out_vector : Output vector (can be the same as |in_vector|)
// - out_vector : Output vector (can be the same as `in_vector`)
//
//
@ -1170,10 +1200,10 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// should be set to the last value in the vector
// - right_shifts : Number of right bit shift to be applied after the
// multiplication
// - vector_length : Number of elements in |in_vector|
// - vector_length : Number of elements in `in_vector`
//
// Output:
// - out_vector : Output vector (can be same as |in_vector|)
// - out_vector : Output vector (can be same as `in_vector`)
//
//
@ -1187,10 +1217,10 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// - window : Window vector.
// - right_shifts : Number of right bit shift to be applied after the
// multiplication
// - vector_length : Number of elements in |in_vector|
// - vector_length : Number of elements in `in_vector`
//
// Output:
// - out_vector : Output vector (can be same as |in_vector|)
// - out_vector : Output vector (can be same as `in_vector`)
//
//
@ -1204,16 +1234,16 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// - in_vector2 : Input vector 2
// - right_shifts : Number of right bit shift to be applied after the
// multiplication
// - vector_length : Number of elements in |in_vector1| and |in_vector2|
// - vector_length : Number of elements in `in_vector1` and `in_vector2`
//
// Output:
// - out_vector : Output vector (can be same as |in_vector1|)
// - out_vector : Output vector (can be same as `in_vector1`)
//
//
// WebRtcSpl_AddAffineVectorToVector(...)
//
// Adds an affine transformed vector to another vector |out_vector|, i.e,
// Adds an affine transformed vector to another vector `out_vector`, i.e,
// performs
// out_vector[k] += (in_vector[k]*gain+add_constant)>>right_shifts
//
@ -1223,7 +1253,7 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// - add_constant : Constant value to add (usually 1<<(right_shifts-1),
// but others can be used as well
// - right_shifts : Number of right bit shifts (0-16)
// - vector_length : Number of samples in |in_vector| and |out_vector|
// - vector_length : Number of samples in `in_vector` and `out_vector`
//
// Output:
// - out_vector : Vector with the output
@ -1241,7 +1271,7 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// - add_constant : Constant value to add (usually 1<<(right_shifts-1),
// but others can be used as well
// - right_shifts : Number of right bit shifts (0-16)
// - vector_length : Number of samples in |in_vector| and |out_vector|
// - vector_length : Number of samples in `in_vector` and `out_vector`
//
// Output:
// - out_vector : Vector with the output
@ -1304,15 +1334,15 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// - vector : Vector with the uniform values
// - seed : Updated seed value
//
// Return value : Number of samples in vector, i.e., |vector_length|
// Return value : Number of samples in vector, i.e., `vector_length`
//
//
// WebRtcSpl_Sqrt(...)
//
// Returns the square root of the input value |value|. The precision of this
// Returns the square root of the input value `value`. The precision of this
// function is integer precision, i.e., sqrt(8) gives 2 as answer.
// If |value| is a negative number then 0 is returned.
// If `value` is a negative number then 0 is returned.
//
// Algorithm:
//
@ -1332,9 +1362,9 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
//
// WebRtcSpl_DivU32U16(...)
//
// Divides a uint32_t |num| by a uint16_t |den|.
// Divides a uint32_t `num` by a uint16_t `den`.
//
// If |den|==0, (uint32_t)0xFFFFFFFF is returned.
// If `den`==0, (uint32_t)0xFFFFFFFF is returned.
//
// Input:
// - num : Numerator
@ -1347,9 +1377,9 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
//
// WebRtcSpl_DivW32W16(...)
//
// Divides a int32_t |num| by a int16_t |den|.
// Divides a int32_t `num` by a int16_t `den`.
//
// If |den|==0, (int32_t)0x7FFFFFFF is returned.
// If `den`==0, (int32_t)0x7FFFFFFF is returned.
//
// Input:
// - num : Numerator
@ -1362,10 +1392,10 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
//
// WebRtcSpl_DivW32W16ResW16(...)
//
// Divides a int32_t |num| by a int16_t |den|, assuming that the
// Divides a int32_t `num` by a int16_t `den`, assuming that the
// result is less than 32768, otherwise an unpredictable result will occur.
//
// If |den|==0, (int16_t)0x7FFF is returned.
// If `den`==0, (int16_t)0x7FFF is returned.
//
// Input:
// - num : Numerator
@ -1378,7 +1408,7 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
//
// WebRtcSpl_DivResultInQ31(...)
//
// Divides a int32_t |num| by a int16_t |den|, assuming that the
// Divides a int32_t `num` by a int16_t `den`, assuming that the
// absolute value of the denominator is larger than the numerator, otherwise
// an unpredictable result will occur.
//
@ -1392,7 +1422,7 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
//
// WebRtcSpl_DivW32HiLow(...)
//
// Divides a int32_t |num| by a denominator in hi, low format. The
// Divides a int32_t `num` by a denominator in hi, low format. The
// absolute value of the denominator has to be larger (or equal to) the
// numerator.
//
@ -1417,7 +1447,7 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// - scale_factor : Number of left bit shifts needed to get the physical
// energy value, i.e, to get the Q0 value
//
// Return value : Energy value in Q(-|scale_factor|)
// Return value : Energy value in Q(-`scale_factor`)
//
//
@ -1428,15 +1458,15 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// Input:
// - ar_coef : AR-coefficient vector (values in Q12),
// ar_coef[0] must be 4096.
// - ar_coef_length : Number of coefficients in |ar_coef|.
// - ar_coef_length : Number of coefficients in `ar_coef`.
// - in_vector : Vector to be filtered.
// - in_vector_length : Number of samples in |in_vector|.
// - in_vector_length : Number of samples in `in_vector`.
// - filter_state : Current state (higher part) of the filter.
// - filter_state_length : Length (in samples) of |filter_state|.
// - filter_state_length : Length (in samples) of `filter_state`.
// - filter_state_low : Current state (lower part) of the filter.
// - filter_state_low_length : Length (in samples) of |filter_state_low|.
// - filter_state_low_length : Length (in samples) of `filter_state_low`.
// - out_vector_low_length : Maximum length (in samples) of
// |out_vector_low|.
// `out_vector_low`.
//
// Output:
// - filter_state : Updated state (upper part) vector.
@ -1446,7 +1476,7 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// - out_vector_low : Vector containing the lower part of the
// filtered values.
//
// Return value : Number of samples in the |out_vector|.
// Return value : Number of samples in the `out_vector`.
//
//
@ -1454,11 +1484,11 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
//
// Complex Inverse FFT
//
// Computes an inverse complex 2^|stages|-point FFT on the input vector, which
// Computes an inverse complex 2^`stages`-point FFT on the input vector, which
// is in bit-reversed order. The original content of the vector is destroyed in
// the process, since the input is overwritten by the output, normal-ordered,
// FFT vector. With X as the input complex vector, y as the output complex
// vector and with M = 2^|stages|, the following is computed:
// vector and with M = 2^`stages`, the following is computed:
//
// M-1
// y(k) = sum[X(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
@ -1468,8 +1498,8 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// decimation-in-time algorithm with radix-2 butterfly technique.
//
// Input:
// - vector : In pointer to complex vector containing 2^|stages|
// real elements interleaved with 2^|stages| imaginary
// - vector : In pointer to complex vector containing 2^`stages`
// real elements interleaved with 2^`stages` imaginary
// elements.
// [ReImReImReIm....]
// The elements are in Q(-scale) domain, see more on Return
@ -1488,10 +1518,10 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// - vector : Out pointer to the FFT vector (the same as input).
//
// Return Value : The scale value that tells the number of left bit shifts
// that the elements in the |vector| should be shifted with
// that the elements in the `vector` should be shifted with
// in order to get Q0 values, i.e. the physically correct
// values. The scale parameter is always 0 or positive,
// except if N>1024 (|stages|>10), which returns a scale
// except if N>1024 (`stages`>10), which returns a scale
// value of -1, indicating error.
//
@ -1500,11 +1530,11 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
//
// Complex FFT
//
// Computes a complex 2^|stages|-point FFT on the input vector, which is in
// Computes a complex 2^`stages`-point FFT on the input vector, which is in
// bit-reversed order. The original content of the vector is destroyed in
// the process, since the input is overwritten by the output, normal-ordered,
// FFT vector. With x as the input complex vector, Y as the output complex
// vector and with M = 2^|stages|, the following is computed:
// vector and with M = 2^`stages`, the following is computed:
//
// M-1
// Y(k) = 1/M * sum[x(i)*[cos(2*pi*i*k/M) + j*sin(2*pi*i*k/M)]]
@ -1519,8 +1549,8 @@ void WebRtcSpl_SynthesisQMF(const int16_t* low_band,
// accuracy.
//
// Input:
// - vector : In pointer to complex vector containing 2^|stages| real
// elements interleaved with 2^|stages| imaginary elements.
// - vector : In pointer to complex vector containing 2^`stages` real
// elements interleaved with 2^`stages` imaginary elements.
// [ReImReImReIm....]
// The output is in the Q0 domain.
//

View File

@ -14,6 +14,8 @@
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_H_
#include <stdint.h>
#include "rtc_base/compile_assert_c.h"
extern const int8_t kWebRtcSpl_CountLeadingZeros32_Table[64];

View File

@ -15,6 +15,8 @@
#ifndef COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_ARMV7_H_
#define COMMON_AUDIO_SIGNAL_PROCESSING_INCLUDE_SPL_INL_ARMV7_H_
#include <stdint.h>
/* TODO(kma): Replace some assembly code with GCC intrinsics
* (e.g. __builtin_clz).
*/

View File

@ -25,6 +25,7 @@
*/
#include <stdlib.h>
#include <limits.h>
#include "rtc_base/checks.h"
#include "common_audio/signal_processing/include/signal_processing_library.h"
@ -67,7 +68,8 @@ int32_t WebRtcSpl_MaxAbsValueW32C(const int32_t* vector, size_t length) {
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
absolute = abs((int)vector[i]);
absolute =
(vector[i] != INT_MIN) ? abs((int)vector[i]) : INT_MAX + (uint32_t)1;
if (absolute > maximum) {
maximum = absolute;
}
@ -155,6 +157,15 @@ size_t WebRtcSpl_MaxAbsIndexW16(const int16_t* vector, size_t length) {
return index;
}
int16_t WebRtcSpl_MaxAbsElementW16(const int16_t* vector, size_t length) {
int16_t min_val, max_val;
WebRtcSpl_MinMaxW16(vector, length, &min_val, &max_val);
if (min_val == max_val || min_val < -max_val) {
return min_val;
}
return max_val;
}
// Index of maximum value in a word16 vector.
size_t WebRtcSpl_MaxIndexW16(const int16_t* vector, size_t length) {
size_t i = 0, index = 0;
@ -222,3 +233,26 @@ size_t WebRtcSpl_MinIndexW32(const int32_t* vector, size_t length) {
return index;
}
// Finds both the minimum and maximum elements in an array of 16-bit integers.
void WebRtcSpl_MinMaxW16(const int16_t* vector, size_t length,
int16_t* min_val, int16_t* max_val) {
#if defined(WEBRTC_HAS_NEON)
return WebRtcSpl_MinMaxW16Neon(vector, length, min_val, max_val);
#else
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
size_t i = 0;
RTC_DCHECK_GT(length, 0);
for (i = 0; i < length; i++) {
if (vector[i] < minimum)
minimum = vector[i];
if (vector[i] > maximum)
maximum = vector[i];
}
*min_val = minimum;
*max_val = maximum;
#endif
}

View File

@ -281,3 +281,53 @@ int32_t WebRtcSpl_MinValueW32Neon(const int32_t* vector, size_t length) {
return minimum;
}
// Finds both the minimum and maximum elements in an array of 16-bit integers.
void WebRtcSpl_MinMaxW16Neon(const int16_t* vector, size_t length,
int16_t* min_val, int16_t* max_val) {
int16_t minimum = WEBRTC_SPL_WORD16_MAX;
int16_t maximum = WEBRTC_SPL_WORD16_MIN;
size_t i = 0;
size_t residual = length & 0x7;
RTC_DCHECK_GT(length, 0);
const int16_t* p_start = vector;
int16x8_t min16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MAX);
int16x8_t max16x8 = vdupq_n_s16(WEBRTC_SPL_WORD16_MIN);
// First part, unroll the loop 8 times.
for (i = 0; i < length - residual; i += 8) {
int16x8_t in16x8 = vld1q_s16(p_start);
min16x8 = vminq_s16(min16x8, in16x8);
max16x8 = vmaxq_s16(max16x8, in16x8);
p_start += 8;
}
#if defined(WEBRTC_ARCH_ARM64)
minimum = vminvq_s16(min16x8);
maximum = vmaxvq_s16(max16x8);
#else
int16x4_t min16x4 = vmin_s16(vget_low_s16(min16x8), vget_high_s16(min16x8));
min16x4 = vpmin_s16(min16x4, min16x4);
min16x4 = vpmin_s16(min16x4, min16x4);
minimum = vget_lane_s16(min16x4, 0);
int16x4_t max16x4 = vmax_s16(vget_low_s16(max16x8), vget_high_s16(max16x8));
max16x4 = vpmax_s16(max16x4, max16x4);
max16x4 = vpmax_s16(max16x4, max16x4);
maximum = vget_lane_s16(max16x4, 0);
#endif
// Second part, do the remaining iterations (if any).
for (i = residual; i > 0; i--) {
if (*p_start < minimum)
minimum = *p_start;
if (*p_start > maximum)
maximum = *p_start;
p_start++;
}
*min_val = minimum;
*max_val = maximum;
}

View File

@ -41,35 +41,39 @@ static const uint16_t WebRtcSpl_kAllPassFilter2[3] = {21333, 49062, 63010};
//
// Output:
// - out_data : Output data sequence (Q10), length equal to
// |data_length|
// `data_length`
//
void WebRtcSpl_AllPassQMF(int32_t* in_data, size_t data_length,
int32_t* out_data, const uint16_t* filter_coefficients,
int32_t* filter_state)
static void WebRtcSpl_AllPassQMF(int32_t* in_data,
size_t data_length,
int32_t* out_data,
const uint16_t* filter_coefficients,
int32_t* filter_state)
{
// The procedure is to filter the input with three first order all pass filters
// (cascade operations).
// The procedure is to filter the input with three first order all pass
// filters (cascade operations).
//
// a_3 + q^-1 a_2 + q^-1 a_1 + q^-1
// y[n] = ----------- ----------- ----------- x[n]
// 1 + a_3q^-1 1 + a_2q^-1 1 + a_1q^-1
//
// The input vector |filter_coefficients| includes these three filter coefficients.
// The filter state contains the in_data state, in_data[-1], followed by
// the out_data state, out_data[-1]. This is repeated for each cascade.
// The first cascade filter will filter the |in_data| and store the output in
// |out_data|. The second will the take the |out_data| as input and make an
// intermediate storage in |in_data|, to save memory. The third, and final, cascade
// filter operation takes the |in_data| (which is the output from the previous cascade
// filter) and store the output in |out_data|.
// Note that the input vector values are changed during the process.
// The input vector `filter_coefficients` includes these three filter
// coefficients. The filter state contains the in_data state, in_data[-1],
// followed by the out_data state, out_data[-1]. This is repeated for each
// cascade. The first cascade filter will filter the `in_data` and store
// the output in `out_data`. The second will the take the `out_data` as
// input and make an intermediate storage in `in_data`, to save memory. The
// third, and final, cascade filter operation takes the `in_data` (which is
// the output from the previous cascade filter) and store the output in
// `out_data`. Note that the input vector values are changed during the
// process.
size_t k;
int32_t diff;
// First all-pass cascade; filter from in_data to out_data.
// Let y_i[n] indicate the output of cascade filter i (with filter coefficient a_i) at
// vector position n. Then the final output will be y[n] = y_3[n]
// Let y_i[n] indicate the output of cascade filter i (with filter
// coefficient a_i) at vector position n. Then the final output will be
// y[n] = y_3[n]
// First loop, use the states stored in memory.
// "diff" should be safe from wrap around since max values are 2^25