Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_64f_x2_min_64f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
72#ifndef INCLUDED_volk_64f_x2_min_64f_a_H
73#define INCLUDED_volk_64f_x2_min_64f_a_H
74
75#include <inttypes.h>
76#include <stdio.h>
77
78#ifdef LV_HAVE_AVX512F
79#include <immintrin.h>
80
81static inline void volk_64f_x2_min_64f_a_avx512f(double* cVector,
82 const double* aVector,
83 const double* bVector,
84 unsigned int num_points)
85{
86 unsigned int number = 0;
87 const unsigned int eigthPoints = num_points / 8;
88
89 double* cPtr = cVector;
90 const double* aPtr = aVector;
91 const double* bPtr = bVector;
92
93 __m512d aVal, bVal, cVal;
94 for (; number < eigthPoints; number++) {
95
96 aVal = _mm512_load_pd(aPtr);
97 bVal = _mm512_load_pd(bPtr);
98
99 cVal = _mm512_min_pd(aVal, bVal);
100
101 _mm512_store_pd(cPtr, cVal); // Store the results back into the C container
102
103 aPtr += 8;
104 bPtr += 8;
105 cPtr += 8;
106 }
107
108 number = eigthPoints * 8;
109 for (; number < num_points; number++) {
110 const double a = *aPtr++;
111 const double b = *bPtr++;
112 *cPtr++ = (a < b ? a : b);
113 }
114}
115#endif /* LV_HAVE_AVX512F */
116
117
118#ifdef LV_HAVE_AVX
119#include <immintrin.h>
120
121static inline void volk_64f_x2_min_64f_a_avx(double* cVector,
122 const double* aVector,
123 const double* bVector,
124 unsigned int num_points)
125{
126 unsigned int number = 0;
127 const unsigned int quarterPoints = num_points / 4;
128
129 double* cPtr = cVector;
130 const double* aPtr = aVector;
131 const double* bPtr = bVector;
132
133 __m256d aVal, bVal, cVal;
134 for (; number < quarterPoints; number++) {
135
136 aVal = _mm256_load_pd(aPtr);
137 bVal = _mm256_load_pd(bPtr);
138
139 cVal = _mm256_min_pd(aVal, bVal);
140
141 _mm256_store_pd(cPtr, cVal); // Store the results back into the C container
142
143 aPtr += 4;
144 bPtr += 4;
145 cPtr += 4;
146 }
147
148 number = quarterPoints * 4;
149 for (; number < num_points; number++) {
150 const double a = *aPtr++;
151 const double b = *bPtr++;
152 *cPtr++ = (a < b ? a : b);
153 }
154}
155#endif /* LV_HAVE_AVX */
156
157
158#ifdef LV_HAVE_SSE2
159#include <emmintrin.h>
160
161static inline void volk_64f_x2_min_64f_a_sse2(double* cVector,
162 const double* aVector,
163 const double* bVector,
164 unsigned int num_points)
165{
166 unsigned int number = 0;
167 const unsigned int halfPoints = num_points / 2;
168
169 double* cPtr = cVector;
170 const double* aPtr = aVector;
171 const double* bPtr = bVector;
172
173 __m128d aVal, bVal, cVal;
174 for (; number < halfPoints; number++) {
175
176 aVal = _mm_load_pd(aPtr);
177 bVal = _mm_load_pd(bPtr);
178
179 cVal = _mm_min_pd(aVal, bVal);
180
181 _mm_store_pd(cPtr, cVal); // Store the results back into the C container
182
183 aPtr += 2;
184 bPtr += 2;
185 cPtr += 2;
186 }
187
188 number = halfPoints * 2;
189 for (; number < num_points; number++) {
190 const double a = *aPtr++;
191 const double b = *bPtr++;
192 *cPtr++ = (a < b ? a : b);
193 }
194}
195#endif /* LV_HAVE_SSE2 */
196
197
198#ifdef LV_HAVE_GENERIC
199
200static inline void volk_64f_x2_min_64f_generic(double* cVector,
201 const double* aVector,
202 const double* bVector,
203 unsigned int num_points)
204{
205 double* cPtr = cVector;
206 const double* aPtr = aVector;
207 const double* bPtr = bVector;
208 unsigned int number = 0;
209
210 for (number = 0; number < num_points; number++) {
211 const double a = *aPtr++;
212 const double b = *bPtr++;
213 *cPtr++ = (a < b ? a : b);
214 }
215}
216#endif /* LV_HAVE_GENERIC */
217
218
219#endif /* INCLUDED_volk_64f_x2_min_64f_a_H */
220
221#ifndef INCLUDED_volk_64f_x2_min_64f_u_H
222#define INCLUDED_volk_64f_x2_min_64f_u_H
223
224#include <inttypes.h>
225#include <stdio.h>
226
227#ifdef LV_HAVE_AVX512F
228#include <immintrin.h>
229
230static inline void volk_64f_x2_min_64f_u_avx512f(double* cVector,
231 const double* aVector,
232 const double* bVector,
233 unsigned int num_points)
234{
235 unsigned int number = 0;
236 const unsigned int eigthPoints = num_points / 8;
237
238 double* cPtr = cVector;
239 const double* aPtr = aVector;
240 const double* bPtr = bVector;
241
242 __m512d aVal, bVal, cVal;
243 for (; number < eigthPoints; number++) {
244
245 aVal = _mm512_loadu_pd(aPtr);
246 bVal = _mm512_loadu_pd(bPtr);
247
248 cVal = _mm512_min_pd(aVal, bVal);
249
250 _mm512_storeu_pd(cPtr, cVal); // Store the results back into the C container
251
252 aPtr += 8;
253 bPtr += 8;
254 cPtr += 8;
255 }
256
257 number = eigthPoints * 8;
258 for (; number < num_points; number++) {
259 const double a = *aPtr++;
260 const double b = *bPtr++;
261 *cPtr++ = (a < b ? a : b);
262 }
263}
264#endif /* LV_HAVE_AVX512F */
265
266
267#ifdef LV_HAVE_AVX
268#include <immintrin.h>
269
270static inline void volk_64f_x2_min_64f_u_avx(double* cVector,
271 const double* aVector,
272 const double* bVector,
273 unsigned int num_points)
274{
275 unsigned int number = 0;
276 const unsigned int quarterPoints = num_points / 4;
277
278 double* cPtr = cVector;
279 const double* aPtr = aVector;
280 const double* bPtr = bVector;
281
282 __m256d aVal, bVal, cVal;
283 for (; number < quarterPoints; number++) {
284
285 aVal = _mm256_loadu_pd(aPtr);
286 bVal = _mm256_loadu_pd(bPtr);
287
288 cVal = _mm256_min_pd(aVal, bVal);
289
290 _mm256_storeu_pd(cPtr, cVal); // Store the results back into the C container
291
292 aPtr += 4;
293 bPtr += 4;
294 cPtr += 4;
295 }
296
297 number = quarterPoints * 4;
298 for (; number < num_points; number++) {
299 const double a = *aPtr++;
300 const double b = *bPtr++;
301 *cPtr++ = (a < b ? a : b);
302 }
303}
304#endif /* LV_HAVE_AVX */
305
306
307#endif /* INCLUDED_volk_64f_x2_min_64f_u_H */
static void volk_64f_x2_min_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:270
static void volk_64f_x2_min_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:161
static void volk_64f_x2_min_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:121
static void volk_64f_x2_min_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_min_64f.h:200