Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_32i_s32f_convert_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
64#ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
65#define INCLUDED_volk_32i_s32f_convert_32f_u_H
66
67#include <inttypes.h>
68#include <stdio.h>
69
70#ifdef LV_HAVE_AVX512F
71#include <immintrin.h>
72
73static inline void volk_32i_s32f_convert_32f_u_avx512f(float* outputVector,
74 const int32_t* inputVector,
75 const float scalar,
76 unsigned int num_points)
77{
78 unsigned int number = 0;
79 const unsigned int onesixteenthPoints = num_points / 16;
80
81 float* outputVectorPtr = outputVector;
82 const float iScalar = 1.0 / scalar;
83 __m512 invScalar = _mm512_set1_ps(iScalar);
84 int32_t* inputPtr = (int32_t*)inputVector;
85 __m512i inputVal;
86 __m512 ret;
87
88 for (; number < onesixteenthPoints; number++) {
89 // Load the values
90 inputVal = _mm512_loadu_si512((__m512i*)inputPtr);
91
92 ret = _mm512_cvtepi32_ps(inputVal);
93 ret = _mm512_mul_ps(ret, invScalar);
94
95 _mm512_storeu_ps(outputVectorPtr, ret);
96
97 outputVectorPtr += 16;
98 inputPtr += 16;
99 }
100
101 number = onesixteenthPoints * 16;
102 for (; number < num_points; number++) {
103 outputVector[number] = ((float)(inputVector[number])) * iScalar;
104 }
105}
106#endif /* LV_HAVE_AVX512F */
107
108
109#ifdef LV_HAVE_AVX2
110#include <immintrin.h>
111
112static inline void volk_32i_s32f_convert_32f_u_avx2(float* outputVector,
113 const int32_t* inputVector,
114 const float scalar,
115 unsigned int num_points)
116{
117 unsigned int number = 0;
118 const unsigned int oneEightPoints = num_points / 8;
119
120 float* outputVectorPtr = outputVector;
121 const float iScalar = 1.0 / scalar;
122 __m256 invScalar = _mm256_set1_ps(iScalar);
123 int32_t* inputPtr = (int32_t*)inputVector;
124 __m256i inputVal;
125 __m256 ret;
126
127 for (; number < oneEightPoints; number++) {
128 // Load the 4 values
129 inputVal = _mm256_loadu_si256((__m256i*)inputPtr);
130
131 ret = _mm256_cvtepi32_ps(inputVal);
132 ret = _mm256_mul_ps(ret, invScalar);
133
134 _mm256_storeu_ps(outputVectorPtr, ret);
135
136 outputVectorPtr += 8;
137 inputPtr += 8;
138 }
139
140 number = oneEightPoints * 8;
141 for (; number < num_points; number++) {
142 outputVector[number] = ((float)(inputVector[number])) * iScalar;
143 }
144}
145#endif /* LV_HAVE_AVX2 */
146
147
148#ifdef LV_HAVE_SSE2
149#include <emmintrin.h>
150
151static inline void volk_32i_s32f_convert_32f_u_sse2(float* outputVector,
152 const int32_t* inputVector,
153 const float scalar,
154 unsigned int num_points)
155{
156 unsigned int number = 0;
157 const unsigned int quarterPoints = num_points / 4;
158
159 float* outputVectorPtr = outputVector;
160 const float iScalar = 1.0 / scalar;
161 __m128 invScalar = _mm_set_ps1(iScalar);
162 int32_t* inputPtr = (int32_t*)inputVector;
163 __m128i inputVal;
164 __m128 ret;
165
166 for (; number < quarterPoints; number++) {
167 // Load the 4 values
168 inputVal = _mm_loadu_si128((__m128i*)inputPtr);
169
170 ret = _mm_cvtepi32_ps(inputVal);
171 ret = _mm_mul_ps(ret, invScalar);
172
173 _mm_storeu_ps(outputVectorPtr, ret);
174
175 outputVectorPtr += 4;
176 inputPtr += 4;
177 }
178
179 number = quarterPoints * 4;
180 for (; number < num_points; number++) {
181 outputVector[number] = ((float)(inputVector[number])) * iScalar;
182 }
183}
184#endif /* LV_HAVE_SSE2 */
185
186
187#ifdef LV_HAVE_GENERIC
188
189static inline void volk_32i_s32f_convert_32f_generic(float* outputVector,
190 const int32_t* inputVector,
191 const float scalar,
192 unsigned int num_points)
193{
194 float* outputVectorPtr = outputVector;
195 const int32_t* inputVectorPtr = inputVector;
196 unsigned int number = 0;
197 const float iScalar = 1.0 / scalar;
198
199 for (number = 0; number < num_points; number++) {
200 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
201 }
202}
203#endif /* LV_HAVE_GENERIC */
204
205#endif /* INCLUDED_volk_32i_s32f_convert_32f_u_H */
206
207
208#ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
209#define INCLUDED_volk_32i_s32f_convert_32f_a_H
210
211#include <inttypes.h>
212#include <stdio.h>
213
214#ifdef LV_HAVE_AVX512F
215#include <immintrin.h>
216
217static inline void volk_32i_s32f_convert_32f_a_avx512f(float* outputVector,
218 const int32_t* inputVector,
219 const float scalar,
220 unsigned int num_points)
221{
222 unsigned int number = 0;
223 const unsigned int onesixteenthPoints = num_points / 16;
224
225 float* outputVectorPtr = outputVector;
226 const float iScalar = 1.0 / scalar;
227 __m512 invScalar = _mm512_set1_ps(iScalar);
228 int32_t* inputPtr = (int32_t*)inputVector;
229 __m512i inputVal;
230 __m512 ret;
231
232 for (; number < onesixteenthPoints; number++) {
233 // Load the values
234 inputVal = _mm512_load_si512((__m512i*)inputPtr);
235
236 ret = _mm512_cvtepi32_ps(inputVal);
237 ret = _mm512_mul_ps(ret, invScalar);
238
239 _mm512_store_ps(outputVectorPtr, ret);
240
241 outputVectorPtr += 16;
242 inputPtr += 16;
243 }
244
245 number = onesixteenthPoints * 16;
246 for (; number < num_points; number++) {
247 outputVector[number] = ((float)(inputVector[number])) * iScalar;
248 }
249}
250#endif /* LV_HAVE_AVX512F */
251
252#ifdef LV_HAVE_AVX2
253#include <immintrin.h>
254
255static inline void volk_32i_s32f_convert_32f_a_avx2(float* outputVector,
256 const int32_t* inputVector,
257 const float scalar,
258 unsigned int num_points)
259{
260 unsigned int number = 0;
261 const unsigned int oneEightPoints = num_points / 8;
262
263 float* outputVectorPtr = outputVector;
264 const float iScalar = 1.0 / scalar;
265 __m256 invScalar = _mm256_set1_ps(iScalar);
266 int32_t* inputPtr = (int32_t*)inputVector;
267 __m256i inputVal;
268 __m256 ret;
269
270 for (; number < oneEightPoints; number++) {
271 // Load the 4 values
272 inputVal = _mm256_load_si256((__m256i*)inputPtr);
273
274 ret = _mm256_cvtepi32_ps(inputVal);
275 ret = _mm256_mul_ps(ret, invScalar);
276
277 _mm256_store_ps(outputVectorPtr, ret);
278
279 outputVectorPtr += 8;
280 inputPtr += 8;
281 }
282
283 number = oneEightPoints * 8;
284 for (; number < num_points; number++) {
285 outputVector[number] = ((float)(inputVector[number])) * iScalar;
286 }
287}
288#endif /* LV_HAVE_AVX2 */
289
290
291#ifdef LV_HAVE_SSE2
292#include <emmintrin.h>
293
294static inline void volk_32i_s32f_convert_32f_a_sse2(float* outputVector,
295 const int32_t* inputVector,
296 const float scalar,
297 unsigned int num_points)
298{
299 unsigned int number = 0;
300 const unsigned int quarterPoints = num_points / 4;
301
302 float* outputVectorPtr = outputVector;
303 const float iScalar = 1.0 / scalar;
304 __m128 invScalar = _mm_set_ps1(iScalar);
305 int32_t* inputPtr = (int32_t*)inputVector;
306 __m128i inputVal;
307 __m128 ret;
308
309 for (; number < quarterPoints; number++) {
310 // Load the 4 values
311 inputVal = _mm_load_si128((__m128i*)inputPtr);
312
313 ret = _mm_cvtepi32_ps(inputVal);
314 ret = _mm_mul_ps(ret, invScalar);
315
316 _mm_store_ps(outputVectorPtr, ret);
317
318 outputVectorPtr += 4;
319 inputPtr += 4;
320 }
321
322 number = quarterPoints * 4;
323 for (; number < num_points; number++) {
324 outputVector[number] = ((float)(inputVector[number])) * iScalar;
325 }
326}
327#endif /* LV_HAVE_SSE2 */
328
329
330#ifdef LV_HAVE_GENERIC
331
332static inline void volk_32i_s32f_convert_32f_a_generic(float* outputVector,
333 const int32_t* inputVector,
334 const float scalar,
335 unsigned int num_points)
336{
337 float* outputVectorPtr = outputVector;
338 const int32_t* inputVectorPtr = inputVector;
339 unsigned int number = 0;
340 const float iScalar = 1.0 / scalar;
341
342 for (number = 0; number < num_points; number++) {
343 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
344 }
345}
346#endif /* LV_HAVE_GENERIC */
347
348
349#endif /* INCLUDED_volk_32i_s32f_convert_32f_a_H */
static void volk_32i_s32f_convert_32f_a_generic(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:332
static void volk_32i_s32f_convert_32f_u_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:151
static void volk_32i_s32f_convert_32f_a_sse2(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:294
static void volk_32i_s32f_convert_32f_generic(float *outputVector, const int32_t *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32i_s32f_convert_32f.h:189