Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_16ic_s32f_magnitude_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
55#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
56#define INCLUDED_volk_16ic_s32f_magnitude_32f_a_H
57
58#include <inttypes.h>
59#include <math.h>
60#include <stdio.h>
61#include <volk/volk_common.h>
62
63#ifdef LV_HAVE_AVX2
64#include <immintrin.h>
65
66static inline void volk_16ic_s32f_magnitude_32f_a_avx2(float* magnitudeVector,
67 const lv_16sc_t* complexVector,
68 const float scalar,
69 unsigned int num_points)
70{
71 unsigned int number = 0;
72 const unsigned int eighthPoints = num_points / 8;
73
74 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
75 float* magnitudeVectorPtr = magnitudeVector;
76
77 __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
78
79 __m256 cplxValue1, cplxValue2, result;
80 __m256i int1, int2;
81 __m128i short1, short2;
82 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
83
84 for (; number < eighthPoints; number++) {
85
86 int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
87 complexVectorPtr += 16;
88 short1 = _mm256_extracti128_si256(int1, 0);
89 short2 = _mm256_extracti128_si256(int1, 1);
90
91 int1 = _mm256_cvtepi16_epi32(short1);
92 int2 = _mm256_cvtepi16_epi32(short2);
93 cplxValue1 = _mm256_cvtepi32_ps(int1);
94 cplxValue2 = _mm256_cvtepi32_ps(int2);
95
96 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
97 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
98
99 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
100 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
101
102 result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
103 result = _mm256_permutevar8x32_ps(result, idx);
104
105 result = _mm256_sqrt_ps(result); // Square root the values
106
107 _mm256_store_ps(magnitudeVectorPtr, result);
108
109 magnitudeVectorPtr += 8;
110 }
111
112 number = eighthPoints * 8;
113 magnitudeVectorPtr = &magnitudeVector[number];
114 complexVectorPtr = (const int16_t*)&complexVector[number];
115 for (; number < num_points; number++) {
116 float val1Real = (float)(*complexVectorPtr++) / scalar;
117 float val1Imag = (float)(*complexVectorPtr++) / scalar;
118 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
119 }
120}
121#endif /* LV_HAVE_AVX2 */
122
123
124#ifdef LV_HAVE_SSE3
125#include <pmmintrin.h>
126
127static inline void volk_16ic_s32f_magnitude_32f_a_sse3(float* magnitudeVector,
128 const lv_16sc_t* complexVector,
129 const float scalar,
130 unsigned int num_points)
131{
132 unsigned int number = 0;
133 const unsigned int quarterPoints = num_points / 4;
134
135 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
136 float* magnitudeVectorPtr = magnitudeVector;
137
138 __m128 invScalar = _mm_set_ps1(1.0 / scalar);
139
140 __m128 cplxValue1, cplxValue2, result;
141
142 __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
143
144 for (; number < quarterPoints; number++) {
145
146 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
147 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
148 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
149 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
150
151 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
152 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
153 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
154 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
155
156 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
157 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
158
159 complexVectorPtr += 8;
160
161 cplxValue1 = _mm_mul_ps(cplxValue1, invScalar);
162 cplxValue2 = _mm_mul_ps(cplxValue2, invScalar);
163
164 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
165 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
166
167 result = _mm_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
168
169 result = _mm_sqrt_ps(result); // Square root the values
170
171 _mm_store_ps(magnitudeVectorPtr, result);
172
173 magnitudeVectorPtr += 4;
174 }
175
176 number = quarterPoints * 4;
177 magnitudeVectorPtr = &magnitudeVector[number];
178 complexVectorPtr = (const int16_t*)&complexVector[number];
179 for (; number < num_points; number++) {
180 float val1Real = (float)(*complexVectorPtr++) / scalar;
181 float val1Imag = (float)(*complexVectorPtr++) / scalar;
182 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
183 }
184}
185#endif /* LV_HAVE_SSE3 */
186
187#ifdef LV_HAVE_SSE
188#include <xmmintrin.h>
189
190static inline void volk_16ic_s32f_magnitude_32f_a_sse(float* magnitudeVector,
191 const lv_16sc_t* complexVector,
192 const float scalar,
193 unsigned int num_points)
194{
195 unsigned int number = 0;
196 const unsigned int quarterPoints = num_points / 4;
197
198 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
199 float* magnitudeVectorPtr = magnitudeVector;
200
201 const float iScalar = 1.0 / scalar;
202 __m128 invScalar = _mm_set_ps1(iScalar);
203
204 __m128 cplxValue1, cplxValue2, result, re, im;
205
206 __VOLK_ATTR_ALIGNED(16) float inputFloatBuffer[8];
207
208 for (; number < quarterPoints; number++) {
209 inputFloatBuffer[0] = (float)(complexVectorPtr[0]);
210 inputFloatBuffer[1] = (float)(complexVectorPtr[1]);
211 inputFloatBuffer[2] = (float)(complexVectorPtr[2]);
212 inputFloatBuffer[3] = (float)(complexVectorPtr[3]);
213
214 inputFloatBuffer[4] = (float)(complexVectorPtr[4]);
215 inputFloatBuffer[5] = (float)(complexVectorPtr[5]);
216 inputFloatBuffer[6] = (float)(complexVectorPtr[6]);
217 inputFloatBuffer[7] = (float)(complexVectorPtr[7]);
218
219 cplxValue1 = _mm_load_ps(&inputFloatBuffer[0]);
220 cplxValue2 = _mm_load_ps(&inputFloatBuffer[4]);
221
222 re = _mm_shuffle_ps(cplxValue1, cplxValue2, 0x88);
223 im = _mm_shuffle_ps(cplxValue1, cplxValue2, 0xdd);
224
225 complexVectorPtr += 8;
226
227 cplxValue1 = _mm_mul_ps(re, invScalar);
228 cplxValue2 = _mm_mul_ps(im, invScalar);
229
230 cplxValue1 = _mm_mul_ps(cplxValue1, cplxValue1); // Square the values
231 cplxValue2 = _mm_mul_ps(cplxValue2, cplxValue2); // Square the Values
232
233 result = _mm_add_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
234
235 result = _mm_sqrt_ps(result); // Square root the values
236
237 _mm_store_ps(magnitudeVectorPtr, result);
238
239 magnitudeVectorPtr += 4;
240 }
241
242 number = quarterPoints * 4;
243 magnitudeVectorPtr = &magnitudeVector[number];
244 complexVectorPtr = (const int16_t*)&complexVector[number];
245 for (; number < num_points; number++) {
246 float val1Real = (float)(*complexVectorPtr++) * iScalar;
247 float val1Imag = (float)(*complexVectorPtr++) * iScalar;
248 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
249 }
250}
251
252
253#endif /* LV_HAVE_SSE */
254
255#ifdef LV_HAVE_GENERIC
256
257static inline void volk_16ic_s32f_magnitude_32f_generic(float* magnitudeVector,
258 const lv_16sc_t* complexVector,
259 const float scalar,
260 unsigned int num_points)
261{
262 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
263 float* magnitudeVectorPtr = magnitudeVector;
264 unsigned int number = 0;
265 const float invScalar = 1.0 / scalar;
266 for (number = 0; number < num_points; number++) {
267 float real = ((float)(*complexVectorPtr++)) * invScalar;
268 float imag = ((float)(*complexVectorPtr++)) * invScalar;
269 *magnitudeVectorPtr++ = sqrtf((real * real) + (imag * imag));
270 }
271}
272#endif /* LV_HAVE_GENERIC */
273
274#ifdef LV_HAVE_ORC_DISABLED
275
276extern void volk_16ic_s32f_magnitude_32f_a_orc_impl(float* magnitudeVector,
277 const lv_16sc_t* complexVector,
278 const float scalar,
279 unsigned int num_points);
280
281static inline void volk_16ic_s32f_magnitude_32f_u_orc(float* magnitudeVector,
282 const lv_16sc_t* complexVector,
283 const float scalar,
284 unsigned int num_points)
285{
286 volk_16ic_s32f_magnitude_32f_a_orc_impl(
287 magnitudeVector, complexVector, scalar, num_points);
288}
289#endif /* LV_HAVE_ORC */
290
291
292#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_a_H */
293
294#ifndef INCLUDED_volk_16ic_s32f_magnitude_32f_u_H
295#define INCLUDED_volk_16ic_s32f_magnitude_32f_u_H
296
297#include <inttypes.h>
298#include <math.h>
299#include <stdio.h>
300#include <volk/volk_common.h>
301
302#ifdef LV_HAVE_AVX2
303#include <immintrin.h>
304
305static inline void volk_16ic_s32f_magnitude_32f_u_avx2(float* magnitudeVector,
306 const lv_16sc_t* complexVector,
307 const float scalar,
308 unsigned int num_points)
309{
310 unsigned int number = 0;
311 const unsigned int eighthPoints = num_points / 8;
312
313 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
314 float* magnitudeVectorPtr = magnitudeVector;
315
316 __m256 invScalar = _mm256_set1_ps(1.0 / scalar);
317
318 __m256 cplxValue1, cplxValue2, result;
319 __m256i int1, int2;
320 __m128i short1, short2;
321 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
322
323 for (; number < eighthPoints; number++) {
324
325 int1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
326 complexVectorPtr += 16;
327 short1 = _mm256_extracti128_si256(int1, 0);
328 short2 = _mm256_extracti128_si256(int1, 1);
329
330 int1 = _mm256_cvtepi16_epi32(short1);
331 int2 = _mm256_cvtepi16_epi32(short2);
332 cplxValue1 = _mm256_cvtepi32_ps(int1);
333 cplxValue2 = _mm256_cvtepi32_ps(int2);
334
335 cplxValue1 = _mm256_mul_ps(cplxValue1, invScalar);
336 cplxValue2 = _mm256_mul_ps(cplxValue2, invScalar);
337
338 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1); // Square the values
339 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2); // Square the Values
340
341 result = _mm256_hadd_ps(cplxValue1, cplxValue2); // Add the I2 and Q2 values
342 result = _mm256_permutevar8x32_ps(result, idx);
343
344 result = _mm256_sqrt_ps(result); // Square root the values
345
346 _mm256_storeu_ps(magnitudeVectorPtr, result);
347
348 magnitudeVectorPtr += 8;
349 }
350
351 number = eighthPoints * 8;
352 magnitudeVectorPtr = &magnitudeVector[number];
353 complexVectorPtr = (const int16_t*)&complexVector[number];
354 for (; number < num_points; number++) {
355 float val1Real = (float)(*complexVectorPtr++) / scalar;
356 float val1Imag = (float)(*complexVectorPtr++) / scalar;
357 *magnitudeVectorPtr++ = sqrtf((val1Real * val1Real) + (val1Imag * val1Imag));
358 }
359}
360#endif /* LV_HAVE_AVX2 */
361
362#endif /* INCLUDED_volk_16ic_s32f_magnitude_32f_u_H */
static void volk_16ic_s32f_magnitude_32f_generic(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:257
static void volk_16ic_s32f_magnitude_32f_a_sse(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:190
static void volk_16ic_s32f_magnitude_32f_a_sse3(float *magnitudeVector, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_magnitude_32f.h:127
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
short complex lv_16sc_t
Definition: volk_complex.h:62