Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_64f_x2_max_64f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
71#ifndef INCLUDED_volk_64f_x2_max_64f_a_H
72#define INCLUDED_volk_64f_x2_max_64f_a_H
73
74#include <inttypes.h>
75#include <stdio.h>
76
77#ifdef LV_HAVE_AVX512F
78#include <immintrin.h>
79
80static inline void volk_64f_x2_max_64f_a_avx512f(double* cVector,
81 const double* aVector,
82 const double* bVector,
83 unsigned int num_points)
84{
85 unsigned int number = 0;
86 const unsigned int eigthPoints = num_points / 8;
87
88 double* cPtr = cVector;
89 const double* aPtr = aVector;
90 const double* bPtr = bVector;
91
92 __m512d aVal, bVal, cVal;
93 for (; number < eigthPoints; number++) {
94
95 aVal = _mm512_load_pd(aPtr);
96 bVal = _mm512_load_pd(bPtr);
97
98 cVal = _mm512_max_pd(aVal, bVal);
99
100 _mm512_store_pd(cPtr, cVal); // Store the results back into the C container
101
102 aPtr += 8;
103 bPtr += 8;
104 cPtr += 8;
105 }
106
107 number = eigthPoints * 8;
108 for (; number < num_points; number++) {
109 const double a = *aPtr++;
110 const double b = *bPtr++;
111 *cPtr++ = (a > b ? a : b);
112 }
113}
114#endif /* LV_HAVE_AVX512F */
115
116
117#ifdef LV_HAVE_AVX
118#include <immintrin.h>
119
120static inline void volk_64f_x2_max_64f_a_avx(double* cVector,
121 const double* aVector,
122 const double* bVector,
123 unsigned int num_points)
124{
125 unsigned int number = 0;
126 const unsigned int quarterPoints = num_points / 4;
127
128 double* cPtr = cVector;
129 const double* aPtr = aVector;
130 const double* bPtr = bVector;
131
132 __m256d aVal, bVal, cVal;
133 for (; number < quarterPoints; number++) {
134
135 aVal = _mm256_load_pd(aPtr);
136 bVal = _mm256_load_pd(bPtr);
137
138 cVal = _mm256_max_pd(aVal, bVal);
139
140 _mm256_store_pd(cPtr, cVal); // Store the results back into the C container
141
142 aPtr += 4;
143 bPtr += 4;
144 cPtr += 4;
145 }
146
147 number = quarterPoints * 4;
148 for (; number < num_points; number++) {
149 const double a = *aPtr++;
150 const double b = *bPtr++;
151 *cPtr++ = (a > b ? a : b);
152 }
153}
154#endif /* LV_HAVE_AVX */
155
156
157#ifdef LV_HAVE_SSE2
158#include <emmintrin.h>
159
160static inline void volk_64f_x2_max_64f_a_sse2(double* cVector,
161 const double* aVector,
162 const double* bVector,
163 unsigned int num_points)
164{
165 unsigned int number = 0;
166 const unsigned int halfPoints = num_points / 2;
167
168 double* cPtr = cVector;
169 const double* aPtr = aVector;
170 const double* bPtr = bVector;
171
172 __m128d aVal, bVal, cVal;
173 for (; number < halfPoints; number++) {
174
175 aVal = _mm_load_pd(aPtr);
176 bVal = _mm_load_pd(bPtr);
177
178 cVal = _mm_max_pd(aVal, bVal);
179
180 _mm_store_pd(cPtr, cVal); // Store the results back into the C container
181
182 aPtr += 2;
183 bPtr += 2;
184 cPtr += 2;
185 }
186
187 number = halfPoints * 2;
188 for (; number < num_points; number++) {
189 const double a = *aPtr++;
190 const double b = *bPtr++;
191 *cPtr++ = (a > b ? a : b);
192 }
193}
194#endif /* LV_HAVE_SSE2 */
195
196
197#ifdef LV_HAVE_GENERIC
198
199static inline void volk_64f_x2_max_64f_generic(double* cVector,
200 const double* aVector,
201 const double* bVector,
202 unsigned int num_points)
203{
204 double* cPtr = cVector;
205 const double* aPtr = aVector;
206 const double* bPtr = bVector;
207 unsigned int number = 0;
208
209 for (number = 0; number < num_points; number++) {
210 const double a = *aPtr++;
211 const double b = *bPtr++;
212 *cPtr++ = (a > b ? a : b);
213 }
214}
215#endif /* LV_HAVE_GENERIC */
216
217
218#endif /* INCLUDED_volk_64f_x2_max_64f_a_H */
219
220
221#ifndef INCLUDED_volk_64f_x2_max_64f_u_H
222#define INCLUDED_volk_64f_x2_max_64f_u_H
223
224#include <inttypes.h>
225#include <stdio.h>
226
227#ifdef LV_HAVE_AVX512F
228#include <immintrin.h>
229
230static inline void volk_64f_x2_max_64f_u_avx512f(double* cVector,
231 const double* aVector,
232 const double* bVector,
233 unsigned int num_points)
234{
235 unsigned int number = 0;
236 const unsigned int eigthPoints = num_points / 8;
237
238 double* cPtr = cVector;
239 const double* aPtr = aVector;
240 const double* bPtr = bVector;
241
242 __m512d aVal, bVal, cVal;
243 for (; number < eigthPoints; number++) {
244
245 aVal = _mm512_loadu_pd(aPtr);
246 bVal = _mm512_loadu_pd(bPtr);
247
248 cVal = _mm512_max_pd(aVal, bVal);
249
250 _mm512_storeu_pd(cPtr, cVal); // Store the results back into the C container
251
252 aPtr += 8;
253 bPtr += 8;
254 cPtr += 8;
255 }
256
257 number = eigthPoints * 8;
258 for (; number < num_points; number++) {
259 const double a = *aPtr++;
260 const double b = *bPtr++;
261 *cPtr++ = (a > b ? a : b);
262 }
263}
264#endif /* LV_HAVE_AVX512F */
265
266
267#ifdef LV_HAVE_AVX
268#include <immintrin.h>
269
270static inline void volk_64f_x2_max_64f_u_avx(double* cVector,
271 const double* aVector,
272 const double* bVector,
273 unsigned int num_points)
274{
275 unsigned int number = 0;
276 const unsigned int quarterPoints = num_points / 4;
277
278 double* cPtr = cVector;
279 const double* aPtr = aVector;
280 const double* bPtr = bVector;
281
282 __m256d aVal, bVal, cVal;
283 for (; number < quarterPoints; number++) {
284
285 aVal = _mm256_loadu_pd(aPtr);
286 bVal = _mm256_loadu_pd(bPtr);
287
288 cVal = _mm256_max_pd(aVal, bVal);
289
290 _mm256_storeu_pd(cPtr, cVal); // Store the results back into the C container
291
292 aPtr += 4;
293 bPtr += 4;
294 cPtr += 4;
295 }
296
297 number = quarterPoints * 4;
298 for (; number < num_points; number++) {
299 const double a = *aPtr++;
300 const double b = *bPtr++;
301 *cPtr++ = (a > b ? a : b);
302 }
303}
304#endif /* LV_HAVE_AVX */
305
306
307#endif /* INCLUDED_volk_64f_x2_max_64f_u_H */
static void volk_64f_x2_max_64f_a_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:120
static void volk_64f_x2_max_64f_u_avx(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:270
static void volk_64f_x2_max_64f_a_sse2(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:160
static void volk_64f_x2_max_64f_generic(double *cVector, const double *aVector, const double *bVector, unsigned int num_points)
Definition: volk_64f_x2_max_64f.h:199