Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_32fc_magnitude_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
71#ifndef INCLUDED_volk_32fc_magnitude_32f_u_H
72#define INCLUDED_volk_32fc_magnitude_32f_u_H
73
74#include <inttypes.h>
75#include <math.h>
76#include <stdio.h>
77
78#ifdef LV_HAVE_AVX
79#include <immintrin.h>
81
82static inline void volk_32fc_magnitude_32f_u_avx(float* magnitudeVector,
83 const lv_32fc_t* complexVector,
84 unsigned int num_points)
85{
86 unsigned int number = 0;
87 const unsigned int eighthPoints = num_points / 8;
88
89 const float* complexVectorPtr = (float*)complexVector;
90 float* magnitudeVectorPtr = magnitudeVector;
91
92 __m256 cplxValue1, cplxValue2, result;
93
94 for (; number < eighthPoints; number++) {
95 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
96 cplxValue2 = _mm256_loadu_ps(complexVectorPtr + 8);
97 result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
98 _mm256_storeu_ps(magnitudeVectorPtr, result);
99
100 complexVectorPtr += 16;
101 magnitudeVectorPtr += 8;
102 }
103
104 number = eighthPoints * 8;
105 for (; number < num_points; number++) {
106 float val1Real = *complexVectorPtr++;
107 float val1Imag = *complexVectorPtr++;
108 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
109 }
110}
111#endif /* LV_HAVE_AVX */
112
113#ifdef LV_HAVE_SSE3
114#include <pmmintrin.h>
116
117static inline void volk_32fc_magnitude_32f_u_sse3(float* magnitudeVector,
118 const lv_32fc_t* complexVector,
119 unsigned int num_points)
120{
121 unsigned int number = 0;
122 const unsigned int quarterPoints = num_points / 4;
123
124 const float* complexVectorPtr = (float*)complexVector;
125 float* magnitudeVectorPtr = magnitudeVector;
126
127 __m128 cplxValue1, cplxValue2, result;
128 for (; number < quarterPoints; number++) {
129 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
130 complexVectorPtr += 4;
131
132 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
133 complexVectorPtr += 4;
134
135 result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
136
137 _mm_storeu_ps(magnitudeVectorPtr, result);
138 magnitudeVectorPtr += 4;
139 }
140
141 number = quarterPoints * 4;
142 for (; number < num_points; number++) {
143 float val1Real = *complexVectorPtr++;
144 float val1Imag = *complexVectorPtr++;
145 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
146 }
147}
148#endif /* LV_HAVE_SSE3 */
149
150
151#ifdef LV_HAVE_SSE
153#include <xmmintrin.h>
154
155static inline void volk_32fc_magnitude_32f_u_sse(float* magnitudeVector,
156 const lv_32fc_t* complexVector,
157 unsigned int num_points)
158{
159 unsigned int number = 0;
160 const unsigned int quarterPoints = num_points / 4;
161
162 const float* complexVectorPtr = (float*)complexVector;
163 float* magnitudeVectorPtr = magnitudeVector;
164
165 __m128 cplxValue1, cplxValue2, result;
166
167 for (; number < quarterPoints; number++) {
168 cplxValue1 = _mm_loadu_ps(complexVectorPtr);
169 complexVectorPtr += 4;
170
171 cplxValue2 = _mm_loadu_ps(complexVectorPtr);
172 complexVectorPtr += 4;
173
174 result = _mm_magnitude_ps(cplxValue1, cplxValue2);
175 _mm_storeu_ps(magnitudeVectorPtr, result);
176 magnitudeVectorPtr += 4;
177 }
178
179 number = quarterPoints * 4;
180 for (; number < num_points; number++) {
181 float val1Real = *complexVectorPtr++;
182 float val1Imag = *complexVectorPtr++;
183 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
184 }
185}
186#endif /* LV_HAVE_SSE */
187
188
189#ifdef LV_HAVE_GENERIC
190
191static inline void volk_32fc_magnitude_32f_generic(float* magnitudeVector,
192 const lv_32fc_t* complexVector,
193 unsigned int num_points)
194{
195 const float* complexVectorPtr = (float*)complexVector;
196 float* magnitudeVectorPtr = magnitudeVector;
197 unsigned int number = 0;
198 for (number = 0; number < num_points; number++) {
199 const float real = *complexVectorPtr++;
200 const float imag = *complexVectorPtr++;
201 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
202 }
203}
204#endif /* LV_HAVE_GENERIC */
205
206
207#endif /* INCLUDED_volk_32fc_magnitude_32f_u_H */
208#ifndef INCLUDED_volk_32fc_magnitude_32f_a_H
209#define INCLUDED_volk_32fc_magnitude_32f_a_H
210
211#include <inttypes.h>
212#include <math.h>
213#include <stdio.h>
214
215#ifdef LV_HAVE_AVX
216#include <immintrin.h>
218
219static inline void volk_32fc_magnitude_32f_a_avx(float* magnitudeVector,
220 const lv_32fc_t* complexVector,
221 unsigned int num_points)
222{
223 unsigned int number = 0;
224 const unsigned int eighthPoints = num_points / 8;
225
226 const float* complexVectorPtr = (float*)complexVector;
227 float* magnitudeVectorPtr = magnitudeVector;
228
229 __m256 cplxValue1, cplxValue2, result;
230 for (; number < eighthPoints; number++) {
231 cplxValue1 = _mm256_load_ps(complexVectorPtr);
232 complexVectorPtr += 8;
233
234 cplxValue2 = _mm256_load_ps(complexVectorPtr);
235 complexVectorPtr += 8;
236
237 result = _mm256_magnitude_ps(cplxValue1, cplxValue2);
238 _mm256_store_ps(magnitudeVectorPtr, result);
239 magnitudeVectorPtr += 8;
240 }
241
242 number = eighthPoints * 8;
243 for (; number < num_points; number++) {
244 float val1Real = *complexVectorPtr++;
245 float val1Imag = *complexVectorPtr++;
246 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
247 }
248}
249#endif /* LV_HAVE_AVX */
250
251#ifdef LV_HAVE_SSE3
252#include <pmmintrin.h>
254
255static inline void volk_32fc_magnitude_32f_a_sse3(float* magnitudeVector,
256 const lv_32fc_t* complexVector,
257 unsigned int num_points)
258{
259 unsigned int number = 0;
260 const unsigned int quarterPoints = num_points / 4;
261
262 const float* complexVectorPtr = (float*)complexVector;
263 float* magnitudeVectorPtr = magnitudeVector;
264
265 __m128 cplxValue1, cplxValue2, result;
266 for (; number < quarterPoints; number++) {
267 cplxValue1 = _mm_load_ps(complexVectorPtr);
268 complexVectorPtr += 4;
269
270 cplxValue2 = _mm_load_ps(complexVectorPtr);
271 complexVectorPtr += 4;
272
273 result = _mm_magnitude_ps_sse3(cplxValue1, cplxValue2);
274 _mm_store_ps(magnitudeVectorPtr, result);
275 magnitudeVectorPtr += 4;
276 }
277
278 number = quarterPoints * 4;
279 for (; number < num_points; number++) {
280 float val1Real = *complexVectorPtr++;
281 float val1Imag = *complexVectorPtr++;
282 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
283 }
284}
285#endif /* LV_HAVE_SSE3 */
286
287#ifdef LV_HAVE_SSE
289#include <xmmintrin.h>
290
291static inline void volk_32fc_magnitude_32f_a_sse(float* magnitudeVector,
292 const lv_32fc_t* complexVector,
293 unsigned int num_points)
294{
295 unsigned int number = 0;
296 const unsigned int quarterPoints = num_points / 4;
297
298 const float* complexVectorPtr = (float*)complexVector;
299 float* magnitudeVectorPtr = magnitudeVector;
300
301 __m128 cplxValue1, cplxValue2, result;
302 for (; number < quarterPoints; number++) {
303 cplxValue1 = _mm_load_ps(complexVectorPtr);
304 complexVectorPtr += 4;
305
306 cplxValue2 = _mm_load_ps(complexVectorPtr);
307 complexVectorPtr += 4;
308
309 result = _mm_magnitude_ps(cplxValue1, cplxValue2);
310 _mm_store_ps(magnitudeVectorPtr, result);
311 magnitudeVectorPtr += 4;
312 }
313
314 number = quarterPoints * 4;
315 for (; number < num_points; number++) {
316 float val1Real = *complexVectorPtr++;
317 float val1Imag = *complexVectorPtr++;
318 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
319 }
320}
321#endif /* LV_HAVE_SSE */
322
323
324#ifdef LV_HAVE_GENERIC
325
326static inline void volk_32fc_magnitude_32f_a_generic(float* magnitudeVector,
327 const lv_32fc_t* complexVector,
328 unsigned int num_points)
329{
330 const float* complexVectorPtr = (float*)complexVector;
331 float* magnitudeVectorPtr = magnitudeVector;
332 unsigned int number = 0;
333 for (number = 0; number < num_points; number++) {
334 const float real = *complexVectorPtr++;
335 const float imag = *complexVectorPtr++;
336 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
337 }
338}
339#endif /* LV_HAVE_GENERIC */
340
341
342#ifdef LV_HAVE_NEON
343#include <arm_neon.h>
344
345static inline void volk_32fc_magnitude_32f_neon(float* magnitudeVector,
346 const lv_32fc_t* complexVector,
347 unsigned int num_points)
348{
349 unsigned int number;
350 unsigned int quarter_points = num_points / 4;
351 const float* complexVectorPtr = (float*)complexVector;
352 float* magnitudeVectorPtr = magnitudeVector;
353
354 float32x4x2_t complex_vec;
355 float32x4_t magnitude_vec;
356 for (number = 0; number < quarter_points; number++) {
357 complex_vec = vld2q_f32(complexVectorPtr);
358 complex_vec.val[0] = vmulq_f32(complex_vec.val[0], complex_vec.val[0]);
359 magnitude_vec =
360 vmlaq_f32(complex_vec.val[0], complex_vec.val[1], complex_vec.val[1]);
361 magnitude_vec = vrsqrteq_f32(magnitude_vec);
362 magnitude_vec = vrecpeq_f32(magnitude_vec); // no plain ol' sqrt
363 vst1q_f32(magnitudeVectorPtr, magnitude_vec);
364
365 complexVectorPtr += 8;
366 magnitudeVectorPtr += 4;
367 }
368
369 for (number = quarter_points * 4; number < num_points; number++) {
370 const float real = *complexVectorPtr++;
371 const float imag = *complexVectorPtr++;
372 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
373 }
374}
375#endif /* LV_HAVE_NEON */
376
377
378#ifdef LV_HAVE_NEON
396 float* magnitudeVector, const lv_32fc_t* complexVector, unsigned int num_points)
397{
398 unsigned int number;
399 unsigned int quarter_points = num_points / 4;
400 const float* complexVectorPtr = (float*)complexVector;
401 float* magnitudeVectorPtr = magnitudeVector;
402
403 const float threshold = 0.4142135;
404
405 float32x4_t a_vec, b_vec, a_high, a_low, b_high, b_low;
406 a_high = vdupq_n_f32(0.84);
407 b_high = vdupq_n_f32(0.561);
408 a_low = vdupq_n_f32(0.99);
409 b_low = vdupq_n_f32(0.197);
410
411 uint32x4_t comp0, comp1;
412
413 float32x4x2_t complex_vec;
414 float32x4_t min_vec, max_vec, magnitude_vec;
415 float32x4_t real_abs, imag_abs;
416 for (number = 0; number < quarter_points; number++) {
417 complex_vec = vld2q_f32(complexVectorPtr);
418
419 real_abs = vabsq_f32(complex_vec.val[0]);
420 imag_abs = vabsq_f32(complex_vec.val[1]);
421
422 min_vec = vminq_f32(real_abs, imag_abs);
423 max_vec = vmaxq_f32(real_abs, imag_abs);
424
425 // effective branch to choose coefficient pair.
426 comp0 = vcgtq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
427 comp1 = vcleq_f32(min_vec, vmulq_n_f32(max_vec, threshold));
428
429 // and 0s or 1s with coefficients from previous effective branch
430 a_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)a_high),
431 vandq_s32((int32x4_t)comp1, (int32x4_t)a_low));
432 b_vec = (float32x4_t)vaddq_s32(vandq_s32((int32x4_t)comp0, (int32x4_t)b_high),
433 vandq_s32((int32x4_t)comp1, (int32x4_t)b_low));
434
435 // coefficients chosen, do the weighted sum
436 min_vec = vmulq_f32(min_vec, b_vec);
437 max_vec = vmulq_f32(max_vec, a_vec);
438
439 magnitude_vec = vaddq_f32(min_vec, max_vec);
440 vst1q_f32(magnitudeVectorPtr, magnitude_vec);
441
442 complexVectorPtr += 8;
443 magnitudeVectorPtr += 4;
444 }
445
446 for (number = quarter_points * 4; number < num_points; number++) {
447 const float real = *complexVectorPtr++;
448 const float imag = *complexVectorPtr++;
449 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
450 }
451}
452#endif /* LV_HAVE_NEON */
453
454
455#ifdef LV_HAVE_ORC
456
457extern void volk_32fc_magnitude_32f_a_orc_impl(float* magnitudeVector,
458 const lv_32fc_t* complexVector,
459 unsigned int num_points);
460
461static inline void volk_32fc_magnitude_32f_u_orc(float* magnitudeVector,
462 const lv_32fc_t* complexVector,
463 unsigned int num_points)
464{
465 volk_32fc_magnitude_32f_a_orc_impl(magnitudeVector, complexVector, num_points);
466}
467#endif /* LV_HAVE_ORC */
468
469
470#endif /* INCLUDED_volk_32fc_magnitude_32f_a_H */
static void volk_32fc_magnitude_32f_a_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:326
static void volk_32fc_magnitude_32f_u_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:155
static void volk_32fc_magnitude_32f_u_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:117
static void volk_32fc_magnitude_32f_u_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:82
static void volk_32fc_magnitude_32f_neon_fancy_sweet(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Calculates the magnitude of the complexVector and stores the results in the magnitudeVector.
Definition: volk_32fc_magnitude_32f.h:395
static void volk_32fc_magnitude_32f_generic(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:191
static void volk_32fc_magnitude_32f_a_avx(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:219
static void volk_32fc_magnitude_32f_neon(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:345
static void volk_32fc_magnitude_32f_a_sse3(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:255
static void volk_32fc_magnitude_32f_a_sse(float *magnitudeVector, const lv_32fc_t *complexVector, unsigned int num_points)
Definition: volk_32fc_magnitude_32f.h:291
static __m256 _mm256_magnitude_ps(__m256 cplxValue1, __m256 cplxValue2)
Definition: volk_avx_intrinsics.h:83
float complex lv_32fc_t
Definition: volk_complex.h:65
static __m128 _mm_magnitude_ps_sse3(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse3_intrinsics.h:58
static __m128 _mm_magnitude_ps(__m128 cplxValue1, __m128 cplxValue2)
Definition: volk_sse_intrinsics.h:44