Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_16ic_s32f_deinterleave_real_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
56#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
57#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H
58
59#include <inttypes.h>
60#include <stdio.h>
61#include <volk/volk_common.h>
62
63#ifdef LV_HAVE_AVX2
64#include <immintrin.h>
65
66static inline void
67volk_16ic_s32f_deinterleave_real_32f_a_avx2(float* iBuffer,
68 const lv_16sc_t* complexVector,
69 const float scalar,
70 unsigned int num_points)
71{
72 float* iBufferPtr = iBuffer;
73
74 unsigned int number = 0;
75 const unsigned int eighthPoints = num_points / 8;
76
77 __m256 iFloatValue;
78
79 const float iScalar = 1.0 / scalar;
80 __m256 invScalar = _mm256_set1_ps(iScalar);
81 __m256i complexVal, iIntVal;
82 __m128i complexVal128;
83 int8_t* complexVectorPtr = (int8_t*)complexVector;
84
85 __m256i moveMask = _mm256_set_epi8(0x80,
86 0x80,
87 0x80,
88 0x80,
89 0x80,
90 0x80,
91 0x80,
92 0x80,
93 13,
94 12,
95 9,
96 8,
97 5,
98 4,
99 1,
100 0,
101 0x80,
102 0x80,
103 0x80,
104 0x80,
105 0x80,
106 0x80,
107 0x80,
108 0x80,
109 13,
110 12,
111 9,
112 8,
113 5,
114 4,
115 1,
116 0);
117
118 for (; number < eighthPoints; number++) {
119 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
120 complexVectorPtr += 32;
121 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
122 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
123 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
124
125 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
126 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
127
128 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
129
130 _mm256_store_ps(iBufferPtr, iFloatValue);
131
132 iBufferPtr += 8;
133 }
134
135 number = eighthPoints * 8;
136 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
137 for (; number < num_points; number++) {
138 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
139 sixteenTComplexVectorPtr++;
140 }
141}
142#endif /* LV_HAVE_AVX2 */
143
144#ifdef LV_HAVE_SSE4_1
145#include <smmintrin.h>
146
147static inline void
148volk_16ic_s32f_deinterleave_real_32f_a_sse4_1(float* iBuffer,
149 const lv_16sc_t* complexVector,
150 const float scalar,
151 unsigned int num_points)
152{
153 float* iBufferPtr = iBuffer;
154
155 unsigned int number = 0;
156 const unsigned int quarterPoints = num_points / 4;
157
158 __m128 iFloatValue;
159
160 const float iScalar = 1.0 / scalar;
161 __m128 invScalar = _mm_set_ps1(iScalar);
162 __m128i complexVal, iIntVal;
163 int8_t* complexVectorPtr = (int8_t*)complexVector;
164
165 __m128i moveMask = _mm_set_epi8(
166 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
167
168 for (; number < quarterPoints; number++) {
169 complexVal = _mm_load_si128((__m128i*)complexVectorPtr);
170 complexVectorPtr += 16;
171 complexVal = _mm_shuffle_epi8(complexVal, moveMask);
172
173 iIntVal = _mm_cvtepi16_epi32(complexVal);
174 iFloatValue = _mm_cvtepi32_ps(iIntVal);
175
176 iFloatValue = _mm_mul_ps(iFloatValue, invScalar);
177
178 _mm_store_ps(iBufferPtr, iFloatValue);
179
180 iBufferPtr += 4;
181 }
182
183 number = quarterPoints * 4;
184 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
185 for (; number < num_points; number++) {
186 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
187 sixteenTComplexVectorPtr++;
188 }
189}
190#endif /* LV_HAVE_SSE4_1 */
191
192#ifdef LV_HAVE_SSE
193#include <xmmintrin.h>
194
195static inline void
197 const lv_16sc_t* complexVector,
198 const float scalar,
199 unsigned int num_points)
200{
201 float* iBufferPtr = iBuffer;
202
203 unsigned int number = 0;
204 const unsigned int quarterPoints = num_points / 4;
205 __m128 iValue;
206
207 const float iScalar = 1.0 / scalar;
208 __m128 invScalar = _mm_set_ps1(iScalar);
209 int16_t* complexVectorPtr = (int16_t*)complexVector;
210
211 __VOLK_ATTR_ALIGNED(16) float floatBuffer[4];
212
213 for (; number < quarterPoints; number++) {
214 floatBuffer[0] = (float)(*complexVectorPtr);
215 complexVectorPtr += 2;
216 floatBuffer[1] = (float)(*complexVectorPtr);
217 complexVectorPtr += 2;
218 floatBuffer[2] = (float)(*complexVectorPtr);
219 complexVectorPtr += 2;
220 floatBuffer[3] = (float)(*complexVectorPtr);
221 complexVectorPtr += 2;
222
223 iValue = _mm_load_ps(floatBuffer);
224
225 iValue = _mm_mul_ps(iValue, invScalar);
226
227 _mm_store_ps(iBufferPtr, iValue);
228
229 iBufferPtr += 4;
230 }
231
232 number = quarterPoints * 4;
233 complexVectorPtr = (int16_t*)&complexVector[number];
234 for (; number < num_points; number++) {
235 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * iScalar;
236 complexVectorPtr++;
237 }
238}
239#endif /* LV_HAVE_SSE */
240
241#ifdef LV_HAVE_GENERIC
242static inline void
244 const lv_16sc_t* complexVector,
245 const float scalar,
246 unsigned int num_points)
247{
248 unsigned int number = 0;
249 const int16_t* complexVectorPtr = (const int16_t*)complexVector;
250 float* iBufferPtr = iBuffer;
251 const float invScalar = 1.0 / scalar;
252 for (number = 0; number < num_points; number++) {
253 *iBufferPtr++ = ((float)(*complexVectorPtr++)) * invScalar;
254 complexVectorPtr++;
255 }
256}
257#endif /* LV_HAVE_GENERIC */
258
259
260#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_a_H */
261
262#ifndef INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
263#define INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H
264
265#include <inttypes.h>
266#include <stdio.h>
267#include <volk/volk_common.h>
268
269#ifdef LV_HAVE_AVX2
270#include <immintrin.h>
271
272static inline void
273volk_16ic_s32f_deinterleave_real_32f_u_avx2(float* iBuffer,
274 const lv_16sc_t* complexVector,
275 const float scalar,
276 unsigned int num_points)
277{
278 float* iBufferPtr = iBuffer;
279
280 unsigned int number = 0;
281 const unsigned int eighthPoints = num_points / 8;
282
283 __m256 iFloatValue;
284
285 const float iScalar = 1.0 / scalar;
286 __m256 invScalar = _mm256_set1_ps(iScalar);
287 __m256i complexVal, iIntVal;
288 __m128i complexVal128;
289 int8_t* complexVectorPtr = (int8_t*)complexVector;
290
291 __m256i moveMask = _mm256_set_epi8(0x80,
292 0x80,
293 0x80,
294 0x80,
295 0x80,
296 0x80,
297 0x80,
298 0x80,
299 13,
300 12,
301 9,
302 8,
303 5,
304 4,
305 1,
306 0,
307 0x80,
308 0x80,
309 0x80,
310 0x80,
311 0x80,
312 0x80,
313 0x80,
314 0x80,
315 13,
316 12,
317 9,
318 8,
319 5,
320 4,
321 1,
322 0);
323
324 for (; number < eighthPoints; number++) {
325 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
326 complexVectorPtr += 32;
327 complexVal = _mm256_shuffle_epi8(complexVal, moveMask);
328 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
329 complexVal128 = _mm256_extracti128_si256(complexVal, 0);
330
331 iIntVal = _mm256_cvtepi16_epi32(complexVal128);
332 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
333
334 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
335
336 _mm256_storeu_ps(iBufferPtr, iFloatValue);
337
338 iBufferPtr += 8;
339 }
340
341 number = eighthPoints * 8;
342 int16_t* sixteenTComplexVectorPtr = (int16_t*)&complexVector[number];
343 for (; number < num_points; number++) {
344 *iBufferPtr++ = ((float)(*sixteenTComplexVectorPtr++)) * iScalar;
345 sixteenTComplexVectorPtr++;
346 }
347}
348#endif /* LV_HAVE_AVX2 */
349
350#endif /* INCLUDED_volk_16ic_s32f_deinterleave_real_32f_u_H */
static void volk_16ic_s32f_deinterleave_real_32f_generic(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_real_32f.h:243
static void volk_16ic_s32f_deinterleave_real_32f_a_sse(float *iBuffer, const lv_16sc_t *complexVector, const float scalar, unsigned int num_points)
Definition: volk_16ic_s32f_deinterleave_real_32f.h:196
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56
short complex lv_16sc_t
Definition: volk_complex.h:62