webrtc-audio-processing/patches/0001-Fix-up-XMM-intrinsics-usage-on-MSVC.patch
Arun Raghavan a1ccd6c700 Track patches to WebRTC code in patches/
This should make it easier to not lose track during the next bump.
2024-12-30 14:05:31 -05:00

69 lines
2.5 KiB
Diff

From ad563b095cea13730ca95e77d50e352ea9e344a9 Mon Sep 17 00:00:00 2001
From: Arun Raghavan <arun@asymptotic.io>
Date: Fri, 15 Dec 2023 16:06:05 -0500
Subject: [PATCH] Fix up XMM intrinsics usage on MSVC
Repplying 0a0050746bc20ef970b9f260d485e4367c7ba854 after M131 bump.
---
.../aec3/matched_filter_avx2.cc | 30 ++++++++++++-------
1 file changed, 20 insertions(+), 10 deletions(-)
diff --git a/webrtc/modules/audio_processing/aec3/matched_filter_avx2.cc b/webrtc/modules/audio_processing/aec3/matched_filter_avx2.cc
index 8c2ffcb..65a1b76 100644
--- a/webrtc/modules/audio_processing/aec3/matched_filter_avx2.cc
+++ b/webrtc/modules/audio_processing/aec3/matched_filter_avx2.cc
@@ -13,6 +13,16 @@
#include "modules/audio_processing/aec3/matched_filter.h"
#include "rtc_base/checks.h"
+#ifdef _MSC_VER
+// Visual Studio
+#define LOOKUP_M128(v, i) v.m128_f32[i]
+#define LOOKUP_M256(v, i) v.m256_f32[i]
+#else
+// GCC/Clang
+#define LOOKUP_M128(v, i) v[i]
+#define LOOKUP_M256(v, i) v[i]
+#endif
+
namespace webrtc {
namespace aec3 {
@@ -81,14 +91,14 @@ void MatchedFilterCore_AccumulatedError_AVX2(
s_inst_256_8 = _mm256_mul_ps(h_k_8, x_k_8);
s_inst_hadd_256 = _mm256_hadd_ps(s_inst_256, s_inst_256_8);
s_inst_hadd_256 = _mm256_hadd_ps(s_inst_hadd_256, s_inst_hadd_256);
- s_acum += s_inst_hadd_256[0];
- e_128[0] = s_acum - y[i];
- s_acum += s_inst_hadd_256[4];
- e_128[1] = s_acum - y[i];
- s_acum += s_inst_hadd_256[1];
- e_128[2] = s_acum - y[i];
- s_acum += s_inst_hadd_256[5];
- e_128[3] = s_acum - y[i];
+ s_acum += LOOKUP_M256(s_inst_hadd_256, 0);
+ LOOKUP_M128(e_128, 0) = s_acum - y[i];
+ s_acum += LOOKUP_M256(s_inst_hadd_256,4);
+ LOOKUP_M128(e_128, 1) = s_acum - y[i];
+ s_acum += LOOKUP_M256(s_inst_hadd_256,1);
+ LOOKUP_M128(e_128, 2) = s_acum - y[i];
+ s_acum += LOOKUP_M256(s_inst_hadd_256,5);
+ LOOKUP_M128(e_128, 3) = s_acum - y[i];
__m128 accumulated_error = _mm_load_ps(a_p);
accumulated_error = _mm_fmadd_ps(e_128, e_128, accumulated_error);
@@ -209,8 +219,8 @@ void MatchedFilterCore_AVX2(size_t x_start_index,
x2_sum_256 = _mm256_add_ps(x2_sum_256, x2_sum_256_8);
s_256 = _mm256_add_ps(s_256, s_256_8);
__m128 sum = hsum_ab(x2_sum_256, s_256);
- x2_sum += sum[0];
- s += sum[1];
+ x2_sum += LOOKUP_M128(sum, 0);
+ s += LOOKUP_M128(sum, 1);
// Compute the matched filter error.
float e = y[i] - s;
--
2.47.1