Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_32f_s32f_convert_32i.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
70#ifndef INCLUDED_volk_32f_s32f_convert_32i_u_H
71#define INCLUDED_volk_32f_s32f_convert_32i_u_H
72
73#include <inttypes.h>
74#include <limits.h>
75#include <stdio.h>
76
77#ifdef LV_HAVE_AVX
78#include <immintrin.h>
79
80static inline void volk_32f_s32f_convert_32i_u_avx(int32_t* outputVector,
81 const float* inputVector,
82 const float scalar,
83 unsigned int num_points)
84{
85 unsigned int number = 0;
86
87 const unsigned int eighthPoints = num_points / 8;
88
89 const float* inputVectorPtr = (const float*)inputVector;
90 int32_t* outputVectorPtr = outputVector;
91
92 float min_val = INT_MIN;
93 float max_val = INT_MAX;
94 float r;
95
96 __m256 vScalar = _mm256_set1_ps(scalar);
97 __m256 inputVal1;
98 __m256i intInputVal1;
99 __m256 vmin_val = _mm256_set1_ps(min_val);
100 __m256 vmax_val = _mm256_set1_ps(max_val);
101
102 for (; number < eighthPoints; number++) {
103 inputVal1 = _mm256_loadu_ps(inputVectorPtr);
104 inputVectorPtr += 8;
105
106 inputVal1 = _mm256_max_ps(
107 _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
108 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
109
110 _mm256_storeu_si256((__m256i*)outputVectorPtr, intInputVal1);
111 outputVectorPtr += 8;
112 }
113
114 number = eighthPoints * 8;
115 for (; number < num_points; number++) {
116 r = inputVector[number] * scalar;
117 if (r > max_val)
118 r = max_val;
119 else if (r < min_val)
120 r = min_val;
121 outputVector[number] = (int32_t)rintf(r);
122 }
123}
124
125#endif /* LV_HAVE_AVX */
126
127#ifdef LV_HAVE_SSE2
128#include <emmintrin.h>
129
130static inline void volk_32f_s32f_convert_32i_u_sse2(int32_t* outputVector,
131 const float* inputVector,
132 const float scalar,
133 unsigned int num_points)
134{
135 unsigned int number = 0;
136
137 const unsigned int quarterPoints = num_points / 4;
138
139 const float* inputVectorPtr = (const float*)inputVector;
140 int32_t* outputVectorPtr = outputVector;
141
142 float min_val = INT_MIN;
143 float max_val = INT_MAX;
144 float r;
145
146 __m128 vScalar = _mm_set_ps1(scalar);
147 __m128 inputVal1;
148 __m128i intInputVal1;
149 __m128 vmin_val = _mm_set_ps1(min_val);
150 __m128 vmax_val = _mm_set_ps1(max_val);
151
152 for (; number < quarterPoints; number++) {
153 inputVal1 = _mm_loadu_ps(inputVectorPtr);
154 inputVectorPtr += 4;
155
156 inputVal1 =
157 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
158 intInputVal1 = _mm_cvtps_epi32(inputVal1);
159
160 _mm_storeu_si128((__m128i*)outputVectorPtr, intInputVal1);
161 outputVectorPtr += 4;
162 }
163
164 number = quarterPoints * 4;
165 for (; number < num_points; number++) {
166 r = inputVector[number] * scalar;
167 if (r > max_val)
168 r = max_val;
169 else if (r < min_val)
170 r = min_val;
171 outputVector[number] = (int32_t)rintf(r);
172 }
173}
174
175#endif /* LV_HAVE_SSE2 */
176
177
178#ifdef LV_HAVE_SSE
179#include <xmmintrin.h>
180
181static inline void volk_32f_s32f_convert_32i_u_sse(int32_t* outputVector,
182 const float* inputVector,
183 const float scalar,
184 unsigned int num_points)
185{
186 unsigned int number = 0;
187
188 const unsigned int quarterPoints = num_points / 4;
189
190 const float* inputVectorPtr = (const float*)inputVector;
191 int32_t* outputVectorPtr = outputVector;
192
193 float min_val = INT_MIN;
194 float max_val = INT_MAX;
195 float r;
196
197 __m128 vScalar = _mm_set_ps1(scalar);
198 __m128 ret;
199 __m128 vmin_val = _mm_set_ps1(min_val);
200 __m128 vmax_val = _mm_set_ps1(max_val);
201
202 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
203
204 for (; number < quarterPoints; number++) {
205 ret = _mm_loadu_ps(inputVectorPtr);
206 inputVectorPtr += 4;
207
208 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
209
210 _mm_store_ps(outputFloatBuffer, ret);
211 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[0]);
212 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[1]);
213 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[2]);
214 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[3]);
215 }
216
217 number = quarterPoints * 4;
218 for (; number < num_points; number++) {
219 r = inputVector[number] * scalar;
220 if (r > max_val)
221 r = max_val;
222 else if (r < min_val)
223 r = min_val;
224 outputVector[number] = (int32_t)rintf(r);
225 }
226}
227
228#endif /* LV_HAVE_SSE */
229
230
231#ifdef LV_HAVE_GENERIC
232
233static inline void volk_32f_s32f_convert_32i_generic(int32_t* outputVector,
234 const float* inputVector,
235 const float scalar,
236 unsigned int num_points)
237{
238 int32_t* outputVectorPtr = outputVector;
239 const float* inputVectorPtr = inputVector;
240 unsigned int number = 0;
241 float min_val = INT_MIN;
242 float max_val = INT_MAX;
243 float r;
244
245 for (number = 0; number < num_points; number++) {
246 r = *inputVectorPtr++ * scalar;
247 if (r > max_val)
248 r = max_val;
249 else if (r < min_val)
250 r = min_val;
251 *outputVectorPtr++ = (int32_t)rintf(r);
252 }
253}
254
255#endif /* LV_HAVE_GENERIC */
256
257
258#endif /* INCLUDED_volk_32f_s32f_convert_32i_u_H */
259#ifndef INCLUDED_volk_32f_s32f_convert_32i_a_H
260#define INCLUDED_volk_32f_s32f_convert_32i_a_H
261
262#include <inttypes.h>
263#include <stdio.h>
264#include <volk/volk_common.h>
265
266#ifdef LV_HAVE_AVX
267#include <immintrin.h>
268
269static inline void volk_32f_s32f_convert_32i_a_avx(int32_t* outputVector,
270 const float* inputVector,
271 const float scalar,
272 unsigned int num_points)
273{
274 unsigned int number = 0;
275
276 const unsigned int eighthPoints = num_points / 8;
277
278 const float* inputVectorPtr = (const float*)inputVector;
279 int32_t* outputVectorPtr = outputVector;
280
281 float min_val = INT_MIN;
282 float max_val = INT_MAX;
283 float r;
284
285 __m256 vScalar = _mm256_set1_ps(scalar);
286 __m256 inputVal1;
287 __m256i intInputVal1;
288 __m256 vmin_val = _mm256_set1_ps(min_val);
289 __m256 vmax_val = _mm256_set1_ps(max_val);
290
291 for (; number < eighthPoints; number++) {
292 inputVal1 = _mm256_load_ps(inputVectorPtr);
293 inputVectorPtr += 8;
294
295 inputVal1 = _mm256_max_ps(
296 _mm256_min_ps(_mm256_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
297 intInputVal1 = _mm256_cvtps_epi32(inputVal1);
298
299 _mm256_store_si256((__m256i*)outputVectorPtr, intInputVal1);
300 outputVectorPtr += 8;
301 }
302
303 number = eighthPoints * 8;
304 for (; number < num_points; number++) {
305 r = inputVector[number] * scalar;
306 if (r > max_val)
307 r = max_val;
308 else if (r < min_val)
309 r = min_val;
310 outputVector[number] = (int32_t)rintf(r);
311 }
312}
313
314#endif /* LV_HAVE_AVX */
315
316
317#ifdef LV_HAVE_SSE2
318#include <emmintrin.h>
319
320static inline void volk_32f_s32f_convert_32i_a_sse2(int32_t* outputVector,
321 const float* inputVector,
322 const float scalar,
323 unsigned int num_points)
324{
325 unsigned int number = 0;
326
327 const unsigned int quarterPoints = num_points / 4;
328
329 const float* inputVectorPtr = (const float*)inputVector;
330 int32_t* outputVectorPtr = outputVector;
331
332 float min_val = INT_MIN;
333 float max_val = INT_MAX;
334 float r;
335
336 __m128 vScalar = _mm_set_ps1(scalar);
337 __m128 inputVal1;
338 __m128i intInputVal1;
339 __m128 vmin_val = _mm_set_ps1(min_val);
340 __m128 vmax_val = _mm_set_ps1(max_val);
341
342 for (; number < quarterPoints; number++) {
343 inputVal1 = _mm_load_ps(inputVectorPtr);
344 inputVectorPtr += 4;
345
346 inputVal1 =
347 _mm_max_ps(_mm_min_ps(_mm_mul_ps(inputVal1, vScalar), vmax_val), vmin_val);
348 intInputVal1 = _mm_cvtps_epi32(inputVal1);
349
350 _mm_store_si128((__m128i*)outputVectorPtr, intInputVal1);
351 outputVectorPtr += 4;
352 }
353
354 number = quarterPoints * 4;
355 for (; number < num_points; number++) {
356 r = inputVector[number] * scalar;
357 if (r > max_val)
358 r = max_val;
359 else if (r < min_val)
360 r = min_val;
361 outputVector[number] = (int32_t)rintf(r);
362 }
363}
364
365#endif /* LV_HAVE_SSE2 */
366
367
368#ifdef LV_HAVE_SSE
369#include <xmmintrin.h>
370
371static inline void volk_32f_s32f_convert_32i_a_sse(int32_t* outputVector,
372 const float* inputVector,
373 const float scalar,
374 unsigned int num_points)
375{
376 unsigned int number = 0;
377
378 const unsigned int quarterPoints = num_points / 4;
379
380 const float* inputVectorPtr = (const float*)inputVector;
381 int32_t* outputVectorPtr = outputVector;
382
383 float min_val = INT_MIN;
384 float max_val = INT_MAX;
385 float r;
386
387 __m128 vScalar = _mm_set_ps1(scalar);
388 __m128 ret;
389 __m128 vmin_val = _mm_set_ps1(min_val);
390 __m128 vmax_val = _mm_set_ps1(max_val);
391
392 __VOLK_ATTR_ALIGNED(16) float outputFloatBuffer[4];
393
394 for (; number < quarterPoints; number++) {
395 ret = _mm_load_ps(inputVectorPtr);
396 inputVectorPtr += 4;
397
398 ret = _mm_max_ps(_mm_min_ps(_mm_mul_ps(ret, vScalar), vmax_val), vmin_val);
399
400 _mm_store_ps(outputFloatBuffer, ret);
401 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[0]);
402 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[1]);
403 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[2]);
404 *outputVectorPtr++ = (int32_t)rintf(outputFloatBuffer[3]);
405 }
406
407 number = quarterPoints * 4;
408 for (; number < num_points; number++) {
409 r = inputVector[number] * scalar;
410 if (r > max_val)
411 r = max_val;
412 else if (r < min_val)
413 r = min_val;
414 outputVector[number] = (int32_t)rintf(r);
415 }
416}
417
418#endif /* LV_HAVE_SSE */
419
420
421#ifdef LV_HAVE_GENERIC
422
423static inline void volk_32f_s32f_convert_32i_a_generic(int32_t* outputVector,
424 const float* inputVector,
425 const float scalar,
426 unsigned int num_points)
427{
428 int32_t* outputVectorPtr = outputVector;
429 const float* inputVectorPtr = inputVector;
430 unsigned int number = 0;
431 float min_val = INT_MIN;
432 float max_val = INT_MAX;
433 float r;
434
435 for (number = 0; number < num_points; number++) {
436 r = *inputVectorPtr++ * scalar;
437 if (r > max_val)
438 r = max_val;
439 else if (r < min_val)
440 r = min_val;
441 *outputVectorPtr++ = (int32_t)rintf(r);
442 }
443}
444
445#endif /* LV_HAVE_GENERIC */
446
447#endif /* INCLUDED_volk_32f_s32f_convert_32i_a_H */
static float rintf(float x)
Definition: config.h:37
static void volk_32f_s32f_convert_32i_a_sse(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:371
static void volk_32f_s32f_convert_32i_a_avx(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:269
static void volk_32f_s32f_convert_32i_a_generic(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:423
static void volk_32f_s32f_convert_32i_a_sse2(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:320
static void volk_32f_s32f_convert_32i_u_sse(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:181
static void volk_32f_s32f_convert_32i_generic(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:233
static void volk_32f_s32f_convert_32i_u_avx(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:80
static void volk_32f_s32f_convert_32i_u_sse2(int32_t *outputVector, const float *inputVector, const float scalar, unsigned int num_points)
Definition: volk_32f_s32f_convert_32i.h:130
#define __VOLK_ATTR_ALIGNED(x)
Definition: volk_common.h:56