Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_32f_x2_min_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
71#ifndef INCLUDED_volk_32f_x2_min_32f_a_H
72#define INCLUDED_volk_32f_x2_min_32f_a_H
73
74#include <inttypes.h>
75#include <stdio.h>
76
77#ifdef LV_HAVE_SSE
78#include <xmmintrin.h>
79
80static inline void volk_32f_x2_min_32f_a_sse(float* cVector,
81 const float* aVector,
82 const float* bVector,
83 unsigned int num_points)
84{
85 unsigned int number = 0;
86 const unsigned int quarterPoints = num_points / 4;
87
88 float* cPtr = cVector;
89 const float* aPtr = aVector;
90 const float* bPtr = bVector;
91
92 __m128 aVal, bVal, cVal;
93 for (; number < quarterPoints; number++) {
94 aVal = _mm_load_ps(aPtr);
95 bVal = _mm_load_ps(bPtr);
96
97 cVal = _mm_min_ps(aVal, bVal);
98
99 _mm_store_ps(cPtr, cVal); // Store the results back into the C container
100
101 aPtr += 4;
102 bPtr += 4;
103 cPtr += 4;
104 }
105
106 number = quarterPoints * 4;
107 for (; number < num_points; number++) {
108 const float a = *aPtr++;
109 const float b = *bPtr++;
110 *cPtr++ = (a < b ? a : b);
111 }
112}
113#endif /* LV_HAVE_SSE */
114
115
116#ifdef LV_HAVE_NEON
117#include <arm_neon.h>
118
119static inline void volk_32f_x2_min_32f_neon(float* cVector,
120 const float* aVector,
121 const float* bVector,
122 unsigned int num_points)
123{
124 float* cPtr = cVector;
125 const float* aPtr = aVector;
126 const float* bPtr = bVector;
127 unsigned int number = 0;
128 unsigned int quarter_points = num_points / 4;
129
130 float32x4_t a_vec, b_vec, c_vec;
131 for (number = 0; number < quarter_points; number++) {
132 a_vec = vld1q_f32(aPtr);
133 b_vec = vld1q_f32(bPtr);
134
135 c_vec = vminq_f32(a_vec, b_vec);
136
137 vst1q_f32(cPtr, c_vec);
138 aPtr += 4;
139 bPtr += 4;
140 cPtr += 4;
141 }
142
143 for (number = quarter_points * 4; number < num_points; number++) {
144 const float a = *aPtr++;
145 const float b = *bPtr++;
146 *cPtr++ = (a < b ? a : b);
147 }
148}
149#endif /* LV_HAVE_NEON */
150
151
152#ifdef LV_HAVE_GENERIC
153
154static inline void volk_32f_x2_min_32f_generic(float* cVector,
155 const float* aVector,
156 const float* bVector,
157 unsigned int num_points)
158{
159 float* cPtr = cVector;
160 const float* aPtr = aVector;
161 const float* bPtr = bVector;
162 unsigned int number = 0;
163
164 for (number = 0; number < num_points; number++) {
165 const float a = *aPtr++;
166 const float b = *bPtr++;
167 *cPtr++ = (a < b ? a : b);
168 }
169}
170#endif /* LV_HAVE_GENERIC */
171
172
173#ifdef LV_HAVE_ORC
174
175extern void volk_32f_x2_min_32f_a_orc_impl(float* cVector,
176 const float* aVector,
177 const float* bVector,
178 unsigned int num_points);
179
180static inline void volk_32f_x2_min_32f_u_orc(float* cVector,
181 const float* aVector,
182 const float* bVector,
183 unsigned int num_points)
184{
185 volk_32f_x2_min_32f_a_orc_impl(cVector, aVector, bVector, num_points);
186}
187#endif /* LV_HAVE_ORC */
188
189#ifdef LV_HAVE_AVX
190#include <immintrin.h>
191
192static inline void volk_32f_x2_min_32f_a_avx(float* cVector,
193 const float* aVector,
194 const float* bVector,
195 unsigned int num_points)
196{
197 unsigned int number = 0;
198 const unsigned int eighthPoints = num_points / 8;
199
200 float* cPtr = cVector;
201 const float* aPtr = aVector;
202 const float* bPtr = bVector;
203
204 __m256 aVal, bVal, cVal;
205 for (; number < eighthPoints; number++) {
206 aVal = _mm256_load_ps(aPtr);
207 bVal = _mm256_load_ps(bPtr);
208
209 cVal = _mm256_min_ps(aVal, bVal);
210
211 _mm256_store_ps(cPtr, cVal); // Store the results back into the C container
212
213 aPtr += 8;
214 bPtr += 8;
215 cPtr += 8;
216 }
217
218 number = eighthPoints * 8;
219 for (; number < num_points; number++) {
220 const float a = *aPtr++;
221 const float b = *bPtr++;
222 *cPtr++ = (a < b ? a : b);
223 }
224}
225#endif /* LV_HAVE_AVX */
226
227#ifdef LV_HAVE_AVX512F
228#include <immintrin.h>
229
230static inline void volk_32f_x2_min_32f_a_avx512f(float* cVector,
231 const float* aVector,
232 const float* bVector,
233 unsigned int num_points)
234{
235 unsigned int number = 0;
236 const unsigned int sixteenthPoints = num_points / 16;
237
238 float* cPtr = cVector;
239 const float* aPtr = aVector;
240 const float* bPtr = bVector;
241
242 __m512 aVal, bVal, cVal;
243 for (; number < sixteenthPoints; number++) {
244 aVal = _mm512_load_ps(aPtr);
245 bVal = _mm512_load_ps(bPtr);
246
247 cVal = _mm512_min_ps(aVal, bVal);
248
249 _mm512_store_ps(cPtr, cVal); // Store the results back into the C container
250
251 aPtr += 16;
252 bPtr += 16;
253 cPtr += 16;
254 }
255
256 number = sixteenthPoints * 16;
257 for (; number < num_points; number++) {
258 const float a = *aPtr++;
259 const float b = *bPtr++;
260 *cPtr++ = (a < b ? a : b);
261 }
262}
263#endif /* LV_HAVE_AVX512F */
264
265#endif /* INCLUDED_volk_32f_x2_min_32f_a_H */
266
267
268#ifndef INCLUDED_volk_32f_x2_min_32f_u_H
269#define INCLUDED_volk_32f_x2_min_32f_u_H
270
271#include <inttypes.h>
272#include <stdio.h>
273
274#ifdef LV_HAVE_AVX512F
275#include <immintrin.h>
276
277static inline void volk_32f_x2_min_32f_u_avx512f(float* cVector,
278 const float* aVector,
279 const float* bVector,
280 unsigned int num_points)
281{
282 unsigned int number = 0;
283 const unsigned int sixteenthPoints = num_points / 16;
284
285 float* cPtr = cVector;
286 const float* aPtr = aVector;
287 const float* bPtr = bVector;
288
289 __m512 aVal, bVal, cVal;
290 for (; number < sixteenthPoints; number++) {
291 aVal = _mm512_loadu_ps(aPtr);
292 bVal = _mm512_loadu_ps(bPtr);
293
294 cVal = _mm512_min_ps(aVal, bVal);
295
296 _mm512_storeu_ps(cPtr, cVal); // Store the results back into the C container
297
298 aPtr += 16;
299 bPtr += 16;
300 cPtr += 16;
301 }
302
303 number = sixteenthPoints * 16;
304 for (; number < num_points; number++) {
305 const float a = *aPtr++;
306 const float b = *bPtr++;
307 *cPtr++ = (a < b ? a : b);
308 }
309}
310#endif /* LV_HAVE_AVX512F */
311
312#ifdef LV_HAVE_AVX
313#include <immintrin.h>
314
315static inline void volk_32f_x2_min_32f_u_avx(float* cVector,
316 const float* aVector,
317 const float* bVector,
318 unsigned int num_points)
319{
320 unsigned int number = 0;
321 const unsigned int eighthPoints = num_points / 8;
322
323 float* cPtr = cVector;
324 const float* aPtr = aVector;
325 const float* bPtr = bVector;
326
327 __m256 aVal, bVal, cVal;
328 for (; number < eighthPoints; number++) {
329 aVal = _mm256_loadu_ps(aPtr);
330 bVal = _mm256_loadu_ps(bPtr);
331
332 cVal = _mm256_min_ps(aVal, bVal);
333
334 _mm256_storeu_ps(cPtr, cVal); // Store the results back into the C container
335
336 aPtr += 8;
337 bPtr += 8;
338 cPtr += 8;
339 }
340
341 number = eighthPoints * 8;
342 for (; number < num_points; number++) {
343 const float a = *aPtr++;
344 const float b = *bPtr++;
345 *cPtr++ = (a < b ? a : b);
346 }
347}
348#endif /* LV_HAVE_AVX */
349
350#endif /* INCLUDED_volk_32f_x2_min_32f_u_H */
static void volk_32f_x2_min_32f_a_sse(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:80
static void volk_32f_x2_min_32f_neon(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:119
static void volk_32f_x2_min_32f_generic(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:154
static void volk_32f_x2_min_32f_u_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:315
static void volk_32f_x2_min_32f_a_avx(float *cVector, const float *aVector, const float *bVector, unsigned int num_points)
Definition: volk_32f_x2_min_32f.h:192