Vector Optimized Library of Kernels 2.5.1
Architecture-tuned implementations of math kernels
volk_64f_convert_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of GNU Radio
6 *
7 * GNU Radio is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 3, or (at your option)
10 * any later version.
11 *
12 * GNU Radio is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with GNU Radio; see the file COPYING. If not, write to
19 * the Free Software Foundation, Inc., 51 Franklin Street,
20 * Boston, MA 02110-1301, USA.
21 */
22
64#ifndef INCLUDED_volk_64f_convert_32f_u_H
65#define INCLUDED_volk_64f_convert_32f_u_H
66
67#include <inttypes.h>
68#include <stdio.h>
69
70#ifdef LV_HAVE_AVX512F
71#include <immintrin.h>
72
73static inline void volk_64f_convert_32f_u_avx512f(float* outputVector,
74 const double* inputVector,
75 unsigned int num_points)
76{
77 unsigned int number = 0;
78
79 const unsigned int oneSixteenthPoints = num_points / 16;
80
81 const double* inputVectorPtr = (const double*)inputVector;
82 float* outputVectorPtr = outputVector;
83 __m256 ret1, ret2;
84 __m512d inputVal1, inputVal2;
85
86 for (; number < oneSixteenthPoints; number++) {
87 inputVal1 = _mm512_loadu_pd(inputVectorPtr);
88 inputVectorPtr += 8;
89 inputVal2 = _mm512_loadu_pd(inputVectorPtr);
90 inputVectorPtr += 8;
91
92 ret1 = _mm512_cvtpd_ps(inputVal1);
93 ret2 = _mm512_cvtpd_ps(inputVal2);
94
95 _mm256_storeu_ps(outputVectorPtr, ret1);
96 outputVectorPtr += 8;
97
98 _mm256_storeu_ps(outputVectorPtr, ret2);
99 outputVectorPtr += 8;
100 }
101
102 number = oneSixteenthPoints * 16;
103 for (; number < num_points; number++) {
104 outputVector[number] = (float)(inputVector[number]);
105 }
106}
107#endif /* LV_HAVE_AVX512F */
108
109
110#ifdef LV_HAVE_AVX
111#include <immintrin.h>
112
113static inline void volk_64f_convert_32f_u_avx(float* outputVector,
114 const double* inputVector,
115 unsigned int num_points)
116{
117 unsigned int number = 0;
118
119 const unsigned int oneEightPoints = num_points / 8;
120
121 const double* inputVectorPtr = (const double*)inputVector;
122 float* outputVectorPtr = outputVector;
123 __m128 ret1, ret2;
124 __m256d inputVal1, inputVal2;
125
126 for (; number < oneEightPoints; number++) {
127 inputVal1 = _mm256_loadu_pd(inputVectorPtr);
128 inputVectorPtr += 4;
129 inputVal2 = _mm256_loadu_pd(inputVectorPtr);
130 inputVectorPtr += 4;
131
132 ret1 = _mm256_cvtpd_ps(inputVal1);
133 ret2 = _mm256_cvtpd_ps(inputVal2);
134
135 _mm_storeu_ps(outputVectorPtr, ret1);
136 outputVectorPtr += 4;
137
138 _mm_storeu_ps(outputVectorPtr, ret2);
139 outputVectorPtr += 4;
140 }
141
142 number = oneEightPoints * 8;
143 for (; number < num_points; number++) {
144 outputVector[number] = (float)(inputVector[number]);
145 }
146}
147#endif /* LV_HAVE_AVX */
148
149
150#ifdef LV_HAVE_SSE2
151#include <emmintrin.h>
152
153static inline void volk_64f_convert_32f_u_sse2(float* outputVector,
154 const double* inputVector,
155 unsigned int num_points)
156{
157 unsigned int number = 0;
158
159 const unsigned int quarterPoints = num_points / 4;
160
161 const double* inputVectorPtr = (const double*)inputVector;
162 float* outputVectorPtr = outputVector;
163 __m128 ret, ret2;
164 __m128d inputVal1, inputVal2;
165
166 for (; number < quarterPoints; number++) {
167 inputVal1 = _mm_loadu_pd(inputVectorPtr);
168 inputVectorPtr += 2;
169 inputVal2 = _mm_loadu_pd(inputVectorPtr);
170 inputVectorPtr += 2;
171
172 ret = _mm_cvtpd_ps(inputVal1);
173 ret2 = _mm_cvtpd_ps(inputVal2);
174
175 ret = _mm_movelh_ps(ret, ret2);
176
177 _mm_storeu_ps(outputVectorPtr, ret);
178 outputVectorPtr += 4;
179 }
180
181 number = quarterPoints * 4;
182 for (; number < num_points; number++) {
183 outputVector[number] = (float)(inputVector[number]);
184 }
185}
186#endif /* LV_HAVE_SSE2 */
187
188
189#ifdef LV_HAVE_GENERIC
190
191static inline void volk_64f_convert_32f_generic(float* outputVector,
192 const double* inputVector,
193 unsigned int num_points)
194{
195 float* outputVectorPtr = outputVector;
196 const double* inputVectorPtr = inputVector;
197 unsigned int number = 0;
198
199 for (number = 0; number < num_points; number++) {
200 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
201 }
202}
203#endif /* LV_HAVE_GENERIC */
204
205
206#endif /* INCLUDED_volk_64f_convert_32f_u_H */
207#ifndef INCLUDED_volk_64f_convert_32f_a_H
208#define INCLUDED_volk_64f_convert_32f_a_H
209
210#include <inttypes.h>
211#include <stdio.h>
212
213#ifdef LV_HAVE_AVX512F
214#include <immintrin.h>
215
216static inline void volk_64f_convert_32f_a_avx512f(float* outputVector,
217 const double* inputVector,
218 unsigned int num_points)
219{
220 unsigned int number = 0;
221
222 const unsigned int oneSixteenthPoints = num_points / 16;
223
224 const double* inputVectorPtr = (const double*)inputVector;
225 float* outputVectorPtr = outputVector;
226 __m256 ret1, ret2;
227 __m512d inputVal1, inputVal2;
228
229 for (; number < oneSixteenthPoints; number++) {
230 inputVal1 = _mm512_load_pd(inputVectorPtr);
231 inputVectorPtr += 8;
232 inputVal2 = _mm512_load_pd(inputVectorPtr);
233 inputVectorPtr += 8;
234
235 ret1 = _mm512_cvtpd_ps(inputVal1);
236 ret2 = _mm512_cvtpd_ps(inputVal2);
237
238 _mm256_store_ps(outputVectorPtr, ret1);
239 outputVectorPtr += 8;
240
241 _mm256_store_ps(outputVectorPtr, ret2);
242 outputVectorPtr += 8;
243 }
244
245 number = oneSixteenthPoints * 16;
246 for (; number < num_points; number++) {
247 outputVector[number] = (float)(inputVector[number]);
248 }
249}
250#endif /* LV_HAVE_AVX512F */
251
252
253#ifdef LV_HAVE_AVX
254#include <immintrin.h>
255
256static inline void volk_64f_convert_32f_a_avx(float* outputVector,
257 const double* inputVector,
258 unsigned int num_points)
259{
260 unsigned int number = 0;
261
262 const unsigned int oneEightPoints = num_points / 8;
263
264 const double* inputVectorPtr = (const double*)inputVector;
265 float* outputVectorPtr = outputVector;
266 __m128 ret1, ret2;
267 __m256d inputVal1, inputVal2;
268
269 for (; number < oneEightPoints; number++) {
270 inputVal1 = _mm256_load_pd(inputVectorPtr);
271 inputVectorPtr += 4;
272 inputVal2 = _mm256_load_pd(inputVectorPtr);
273 inputVectorPtr += 4;
274
275 ret1 = _mm256_cvtpd_ps(inputVal1);
276 ret2 = _mm256_cvtpd_ps(inputVal2);
277
278 _mm_store_ps(outputVectorPtr, ret1);
279 outputVectorPtr += 4;
280
281 _mm_store_ps(outputVectorPtr, ret2);
282 outputVectorPtr += 4;
283 }
284
285 number = oneEightPoints * 8;
286 for (; number < num_points; number++) {
287 outputVector[number] = (float)(inputVector[number]);
288 }
289}
290#endif /* LV_HAVE_AVX */
291
292
293#ifdef LV_HAVE_SSE2
294#include <emmintrin.h>
295
296static inline void volk_64f_convert_32f_a_sse2(float* outputVector,
297 const double* inputVector,
298 unsigned int num_points)
299{
300 unsigned int number = 0;
301
302 const unsigned int quarterPoints = num_points / 4;
303
304 const double* inputVectorPtr = (const double*)inputVector;
305 float* outputVectorPtr = outputVector;
306 __m128 ret, ret2;
307 __m128d inputVal1, inputVal2;
308
309 for (; number < quarterPoints; number++) {
310 inputVal1 = _mm_load_pd(inputVectorPtr);
311 inputVectorPtr += 2;
312 inputVal2 = _mm_load_pd(inputVectorPtr);
313 inputVectorPtr += 2;
314
315 ret = _mm_cvtpd_ps(inputVal1);
316 ret2 = _mm_cvtpd_ps(inputVal2);
317
318 ret = _mm_movelh_ps(ret, ret2);
319
320 _mm_store_ps(outputVectorPtr, ret);
321 outputVectorPtr += 4;
322 }
323
324 number = quarterPoints * 4;
325 for (; number < num_points; number++) {
326 outputVector[number] = (float)(inputVector[number]);
327 }
328}
329#endif /* LV_HAVE_SSE2 */
330
331
332#ifdef LV_HAVE_GENERIC
333
334static inline void volk_64f_convert_32f_a_generic(float* outputVector,
335 const double* inputVector,
336 unsigned int num_points)
337{
338 float* outputVectorPtr = outputVector;
339 const double* inputVectorPtr = inputVector;
340 unsigned int number = 0;
341
342 for (number = 0; number < num_points; number++) {
343 *outputVectorPtr++ = ((float)(*inputVectorPtr++));
344 }
345}
346#endif /* LV_HAVE_GENERIC */
347
348
349#endif /* INCLUDED_volk_64f_convert_32f_a_H */
static void volk_64f_convert_32f_u_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:113
static void volk_64f_convert_32f_generic(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:191
static void volk_64f_convert_32f_a_avx(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:256
static void volk_64f_convert_32f_a_generic(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:334
static void volk_64f_convert_32f_u_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:153
static void volk_64f_convert_32f_a_sse2(float *outputVector, const double *inputVector, unsigned int num_points)
Definition: volk_64f_convert_32f.h:296