Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_8ic_s32f_deinterleave_real_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
55#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
56#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H
57
58#include <inttypes.h>
59#include <stdio.h>
60#include <volk/volk_common.h>
61
62#ifdef LV_HAVE_AVX2
63#include <immintrin.h>
64
65static inline void
66volk_8ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer,
67 const lv_8sc_t* complexVector,
68 const float scalar,
69 unsigned int num_points)
70{
71 float* iBufferPtr = iBuffer;
72
73 unsigned int number = 0;
74 const unsigned int sixteenthPoints = num_points / 16;
75 __m256 iFloatValue;
76
77 const float iScalar = 1.0 / scalar;
78 __m256 invScalar = _mm256_set1_ps(iScalar);
79 __m256i complexVal, iIntVal;
80 int8_t* complexVectorPtr = (int8_t*)complexVector;
81
82 __m256i moveMask = _mm256_set_epi8(0x80,
83 0x80,
84 0x80,
85 0x80,
86 0x80,
87 0x80,
88 0x80,
89 0x80,
90 14,
91 12,
92 10,
93 8,
94 6,
95 4,
96 2,
97 0,
98 0x80,
99 0x80,
100 0x80,
101 0x80,
102 0x80,
103 0x80,
104 0x80,
105 0x80,
106 14,
107 12,
108 10,
109 8,
110 6,
111 4,
112 2,
113 0);
114 for (; number < sixteenthPoints; number++) {
115 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
116 complexVectorPtr += 32;
117 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
118
119 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
120 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
121 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
122 _mm256_store_ps(iBufferPtr, iFloatValue);
123 iBufferPtr += 8;
124
125 complexVal = _mm256_permute4x64_epi64(complexVal, 0b11000110);
126 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(complexVal));
127 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
128 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
129 _mm256_store_ps(iBufferPtr, iFloatValue);
130 iBufferPtr += 8;
131 }
132
133 number = sixteenthPoints * 16;
134 for (; number < num_points; number++) {
135 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
136 complexVectorPtr++;
137 }
138}
139#endif /* LV_HAVE_AVX2 */
140
141
142#ifdef LV_HAVE_SSE4_1
143#include <smmintrin.h>
144
145static inline void
146volk_8ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
147 const lv_8sc_t* complexVector,
148 const float scalar,
149 unsigned int num_points)
150{
151 float* iBufferPtr = iBuffer;
152
153 unsigned int number = 0;
154 const unsigned int eighthPoints = num_points / 8;
155 __m128 iFloatValue;
156
157 const float iScalar = 1.0 / scalar;
158 __m128 invScalar = _mm_set_ps1(iScalar);
159 __m128i complexVal, iIntVal;
160 int8_t* complexVectorPtr = (int8_t*)complexVector;
161
162 __m128i moveMask = _mm_set_epi8(
163 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
164
165 for (; number < eighthPoints; number++) {
166 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
167 complexVectorPtr += 16;
168 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
169
170 iIntVal = _mm_cvtepi8_epi32(complexVal);
171 iFloatValue = _mm_cvtepi32_ps(iIntVal);
172
173 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
174
175 _mm_store_ps(iBufferPtr, iFloatValue);
176
177 iBufferPtr += 4;
178
179 complexVal = _mm_srli_si128(complexVal, 4);
180 iIntVal = _mm_cvtepi8_epi32(complexVal);
181 iFloatValue = _mm_cvtepi32_ps(iIntVal);
182
183 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
184
185 _mm_store_ps(iBufferPtr, iFloatValue);
186
187 iBufferPtr += 4;
188 }
189
190 number = eighthPoints * 8;
191 for (; number < num_points; number++) {
192 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
193 complexVectorPtr++;
194 }
195}
196#endif /* LV_HAVE_SSE4_1 */
197
198
199#ifdef LV_HAVE_SSE
200#include <xmmintrin.h>
201
202static inline void
204 const lv_8sc_t* complexVector,
205 const float scalar,
206 unsigned int num_points)
207{
208 float* iBufferPtr = iBuffer;
209
210 unsigned int number = 0;
211 const unsigned int quarterPoints = num_points / 4;
212 __m128 iValue;
213
214 const float iScalar = 1.0 / scalar;
215 __m128 invScalar = _mm_set_ps1(iScalar);
216 int8_t* complexVectorPtr = (int8_t*)complexVector;
217
218 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
219
220 for (; number < quarterPoints; number++) {
221 floatBuffer[0] = (float)(*complexVectorPtr);
222 complexVectorPtr += 2;
223 floatBuffer[1] = (float)(*complexVectorPtr);
224 complexVectorPtr += 2;
225 floatBuffer[2] = (float)(*complexVectorPtr);
226 complexVectorPtr += 2;
227 floatBuffer[3] = (float)(*complexVectorPtr);
228 complexVectorPtr += 2;
229
230 iValue = _mm_load_ps(floatBuffer);
231
232 iValue = _mm_mul_ps(iValue, invScalar);
233
234 _mm_store_ps(iBufferPtr, iValue);
235
236 iBufferPtr += 4;
237 }
238
239 number = quarterPoints * 4;
240 for (; number < num_points; number++) {
241 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
242 complexVectorPtr++;
243 }
244}
245#endif /* LV_HAVE_SSE */
246
247
248#ifdef LV_HAVE_GENERIC
249
250static inline void
252 const lv_8sc_t* complexVector,
253 const float scalar,
254 unsigned int num_points)
255{
256 unsigned int number = 0;
257 const int8_t* complexVectorPtr = (const int8_t*)complexVector;
258 float* iBufferPtr = iBuffer;
259 const float invScalar = 1.0 / scalar;
260 for (number = 0; number < num_points; number++) {
261 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
262 complexVectorPtr++;
263 }
264}
265#endif /* LV_HAVE_GENERIC */
266
267
268#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_a_H */
269
270#ifndef INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
271#define INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H
272
273#include <inttypes.h>
274#include <stdio.h>
275#include <volk/volk_common.h>
276
277#ifdef LV_HAVE_AVX2
278#include <immintrin.h>
279
280static inline void
281volk_8ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer,
282 const lv_8sc_t* complexVector,
283 const float scalar,
284 unsigned int num_points)
285{
286 float* iBufferPtr = iBuffer;
287
288 unsigned int number = 0;
289 const unsigned int sixteenthPoints = num_points / 16;
290 __m256 iFloatValue;
291
292 const float iScalar = 1.0 / scalar;
293 __m256 invScalar = _mm256_set1_ps(iScalar);
294 __m256i complexVal, iIntVal;
295 __m128i hcomplexVal;
296 int8_t* complexVectorPtr = (int8_t*)complexVector;
297
298 __m256i moveMask = _mm256_set_epi8(0x80,
299 0x80,
300 0x80,
301 0x80,
302 0x80,
303 0x80,
304 0x80,
305 0x80,
306 14,
307 12,
308 10,
309 8,
310 6,
311 4,
312 2,
313 0,
314 0x80,
315 0x80,
316 0x80,
317 0x80,
318 0x80,
319 0x80,
320 0x80,
321 0x80,
322 14,
323 12,
324 10,
325 8,
326 6,
327 4,
328 2,
329 0);
330
331 for (; number < sixteenthPoints; number++) {
332 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
333 complexVectorPtr += 32;
334 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
335
336 hcomplexVal = _mm256_extracti128_si256(complexVal, 0);
337 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
338 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
339
340 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
341
342 _mm256_storeu_ps(iBufferPtr, iFloatValue);
343
344 iBufferPtr += 8;
345
346 hcomplexVal = _mm256_extracti128_si256(complexVal, 1);
347 iIntVal = _mm256_cvtepi8_epi32(hcomplexVal);
348 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
349
350 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
351
352 _mm256_storeu_ps(iBufferPtr, iFloatValue);
353
354 iBufferPtr += 8;
355 }
356
357 number = sixteenthPoints * 16;
358 for (; number < num_points; number++) {
359 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
360 complexVectorPtr++;
361 }
362}
363#endif /* LV_HAVE_AVX2 */
364
365
366#endif /* INCLUDED_volk_8ic_s32f_deinterleave_real_32f_u_H */
static void volk_8ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_real_32f.h:251
static void volk_8ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_8sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_8ic_s32f_deinterleave_real_32f.h:203
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
char complex lv_8sc_t
Provide typedefs and operators for all complex types in C and C++.
Definition: volk_complex.h:61